#!/usr/bin/env python # -*- coding: utf-8 -*- """ Ежедневное обновление статистики «Метрики МП» из Impala. Модель данных: • Грузим помесячно (группировка по report_period_id = YYYYmm), начиная с января прошлого года и до текущего месяца текущего года. • Текущий (незавершённый) месяц обрезаем по entry_date до ВЧЕРАШНЕГО дня — симметрично для обоих лет (например, на 16 июня берём данные по 15 июня включительно и в 2026, и в 2025), чтобы сравнение было «равный период». • Внутри каждого года считаем нарастающий итог (кумулятив) по месяцам; на старте нового года накопление сбрасывается. • Сравнение в карточке = кумулятив-на-дату текущего года vs прошлого года. Результат пишется в ../app_stats/app_metrics.json и пушится в ветку pages. Подключение к Impala, конфиг и git-push переиспользуются из update_kpi.py. """ import sys import json import datetime as dt from pathlib import Path import update_kpi as base # общие функции: load_config, connect_impala, git_commit_push, ... log = base.log REPO_DIR = base.REPO_DIR OUT_PATH = REPO_DIR / "app_stats" / "app_metrics.json" OUT_REL = "app_stats/app_metrics.json" # ─── Метрики: ключ в SQL → человекочитаемое название (в порядке SELECT) ─── METRICS = [ ("my_services", "Мои услуги"), ("traffic", "Детализация трафика"), ("payments", "Платежи"), ("orders", "Заявки"), ("loyalty", "Лояльность"), ("pay", "Оплата"), ("billing_detail", "Детали счета"), ("viktorina", "Викторина KT Club"), ("partners", "Акции партнеров"), ("tv_plus", "TV+"), ("boosters", "Бустеры"), ("roaming", "Роуминг"), ("pereoform", "Переоформление"), ("aitu_music", "Aitu Music"), ("online_booking", "Онлайн очередь"), ("my_docs", "Мои документы"), ("dz_statement", "Справка о ДЗ"), ("new_boosters_roaming_kcell", "Новая линейка бустеров и роумингов Кселл"), ("adsl", "ADSL отключение услуги"), ("law_and_order", "Закон и порядок"), ("acs", "ACS"), ("kaspi_freedom_pay", "Прием платежей через Freedom и Kaspi"), ("csat", "CSAT"), ("multicustomer", "Мультикастомер"), ("tv_plus_setup", "Настройка TV+"), ("static_ip", "Статический IP"), ("turbo_button", "Turbo кнопка"), ("real_estate_docs", "Справка о недвижимости"), ] METRIC_KEYS = [k for k, _ in METRICS] _MONTHS_RU_GEN = ["", "января", "февраля", "марта", "апреля", "мая", "июня", "июля", "августа", "сентября", "октября", "ноября", "декабря"] _MONTHS_RU_SHORT = ["", "Янв", "Фев", "Мар", "Апр", "Май", "Июн", "Июл", "Авг", "Сен", "Окт", "Ноя", "Дек"] def date_bounds(today: dt.date | None = None): """Границы выборки. Текущий месяц обрезается по вчерашнему дню (симметрично по годам).""" today = today or dt.date.today() end_cur = today - dt.timedelta(days=1) # вчера cur_year = today.year prev_year = cur_year - 1 cap_month, cap_day = end_cur.month, end_cur.day start_cur = dt.date(cur_year, 1, 1) start_prev = dt.date(prev_year, 1, 1) try: end_prev = dt.date(prev_year, cap_month, cap_day) except ValueError: # 29 февраля в невисокосный год end_prev = dt.date(prev_year, cap_month, 28) return { "cur_year": cur_year, "prev_year": prev_year, "cap_month": cap_month, "cap_day": cap_day, "start_cur": start_cur, "end_cur": end_cur, "start_prev": start_prev, "end_prev": end_prev, } def build_sql(b: dict) -> str: # created_at — timestamp, поэтому верхнюю границу берём как «< следующий день» cur_end_excl = b["end_cur"] + dt.timedelta(days=1) prev_end_excl = b["end_prev"] + dt.timedelta(days=1) return f""" with t as ( select report_period_id, count(case when event_type = 'OPENWSCREENMYSERVICES' then 1 end) as my_services, count(case when event_type = 'OPENWINDOWDETALIZTION' then 1 end) as traffic, count(case when event_type = 'OPENWINDOWPAYMENT' then 1 end) as payments, count(case when event_type = 'OPENSCREENAPPEALS' then 1 end) as orders, count(case when event_type in ('banner_auth', 'banner_unauth', 'loyalty_banner_slider_auth', 'loyalty_banner_slider_unauth', 'get_bonus_opened', 'bonus_opened','promo_partners_opened','company_promo_opened') then 1 end) as loyalty, count(case when event_type = 'WINDOWPAYMENT' then 1 end) as pay, count(case when event_type = 'OPENWINDOWBILLING' then 1 end) as billing_detail, count(case when event_type = 'game_page' then 1 end) as viktorina, count(case when event_type = 'promo_partners_opened' then 1 end) as partners, count(case when event_type = 'OPENWINDOWTVPLUS' then 1 end) as tv_plus, count(case when event_type = 'MOBCONNECTIONOPENWINDOWADDITIONALTRAFFIC' then 1 end) as boosters, count(case when event_type = 'OPENWINDOWROAMING' then 1 end) as roaming, count(case when event_type = 'reregistration_comm_start' then 1 end) as pereoform, count(case when event_type = 'aitu_music_banner_clicked' then 1 end) as aitu_music, count(case when event_type = 'ONLINE_BOOKING_SERVICES' then 1 end) as online_booking, count(case when event_type in ('EMPTYLISTDOCS', 'HASLISTDOCS') then 1 end) as my_docs, count(case when event_type = 'PDFSTATEMENT' then 1 end) as dz_statement, count(case when event_type in ('booster_success_screen_kcell', 'ROAMINGPACKAGEMOBILEKCELL') then 1 end) as new_boosters_roaming_kcell, count(case when event_type = 'law_and_order_service_clicked' then 1 end) as law_and_order, count(case when event_type = 'ACS_DEVICE_SELECTION_OPEN' then 1 end) as acs, count(case when event_type in ('PAYMENTWASSUCCESSFULFREEDOM', 'PAYWITHKASPI') then 1 end) as kaspi_freedom_pay, count(case when event_type = 'csat_screen_sent' then 1 end) as csat, count(case when event_type = 'multicustomer_completed_screen_viewed' then 1 end) as multicustomer, count(case when event_type = 'tv_plus_setup_success_viewed' then 1 end) as tv_plus_setup, count(case when event_type = 'static_ip_connect_success_viewed' then 1 end) as static_ip, count(case when event_type = 'turbo_activation_success_viewed' then 1 end) as turbo_button, count(case when event_type = 'real_estate_docs_screen_shown' then 1 end) as real_estate_docs from drb.drb_iliyas_amplitude_metrics_full where entry_date between '{b['start_cur']:%Y-%m-%d}' and '{b['end_cur']:%Y-%m-%d}' or entry_date between '{b['start_prev']:%Y-%m-%d}' and '{b['end_prev']:%Y-%m-%d}' group by report_period_id ) , a as ( select (year(created_at) * 100 + month(created_at)) as report_period_id, count(order_id) as adsl from telecomkz.telecomkz_retention_service_prod_tariff_change_validations where (created_at >= '{b['start_cur']:%Y-%m-%d}' and created_at < '{cur_end_excl:%Y-%m-%d}') or (created_at >= '{b['start_prev']:%Y-%m-%d}' and created_at < '{prev_end_excl:%Y-%m-%d}') group by 1 ) select t.report_period_id, my_services, traffic, payments, orders, loyalty, pay, billing_detail, viktorina, partners, tv_plus, boosters, roaming, pereoform, aitu_music, online_booking, my_docs, dz_statement, new_boosters_roaming_kcell, adsl, law_and_order, acs, kaspi_freedom_pay, csat, multicustomer, tv_plus_setup, static_ip, turbo_button, real_estate_docs from t left join a on t.report_period_id = a.report_period_id order by t.report_period_id """.strip() def fetch_monthly(conn, sql): cur = conn.cursor() log.info("Выполнение запроса метрик МП (помесячно)...") cur.execute(sql) rows = cur.fetchall() names = [d[0].lower() for d in cur.description] cur.close() idx = {n: i for i, n in enumerate(names)} out = [] for r in rows: rec = {"report_period_id": int(r[idx["report_period_id"]])} for key in METRIC_KEYS: v = r[idx[key]] if key in idx else None rec[key] = int(v) if v is not None else 0 out.append(rec) log.info("Получено помесячных строк: %d (периоды: %s)", len(out), ", ".join(str(x["report_period_id"]) for x in out)) return out def _cumsum(arr): acc, out = 0, [] for v in arr: acc += v out.append(acc) return out def build_payload(rows, b: dict): cur_year, prev_year, cap_month = b["cur_year"], b["prev_year"], b["cap_month"] months = list(range(1, cap_month + 1)) month_labels = [_MONTHS_RU_SHORT[m] for m in months] # key -> {year -> [monthly values по месяцам months]} monthly = {k: {cur_year: [0] * len(months), prev_year: [0] * len(months)} for k in METRIC_KEYS} for rec in rows: rpid = rec["report_period_id"] y, m = rpid // 100, rpid % 100 if y not in (cur_year, prev_year) or m not in months: continue i = months.index(m) for k in METRIC_KEYS: monthly[k][y][i] = rec[k] metrics = [] for key, label in METRICS: cur_cum = _cumsum(monthly[key][cur_year]) prev_cum = _cumsum(monthly[key][prev_year]) cur_v = cur_cum[-1] if cur_cum else 0 prev_v = prev_cum[-1] if prev_cum else 0 is_new = prev_v == 0 growth = None if is_new else (cur_v - prev_v) / prev_v metrics.append({ "key": key, "label": label, "cur": cur_v, "prev": prev_v, "growth": growth, "is_new": is_new, "cur_cum": cur_cum, "prev_cum": prev_cum, }) end_cur = b["end_cur"] period_label = f"с 1 января по {end_cur.day} {_MONTHS_RU_GEN[end_cur.month]}" return { "generated_at": dt.datetime.now().isoformat(timespec="seconds"), "cur_year": cur_year, "prev_year": prev_year, "period_label": period_label, "range": {"start": f"{b['start_cur']:%Y-%m-%d}", "end": f"{end_cur:%Y-%m-%d}"}, "months": months, "month_labels": month_labels, "cumulative": True, "metrics": metrics, } def write_if_changed(payload) -> bool: text = json.dumps(payload, ensure_ascii=False, indent=2) + "\n" old = OUT_PATH.read_text(encoding="utf-8") if OUT_PATH.exists() else "" def strip_ts(s): try: d = json.loads(s) d.pop("generated_at", None) return json.dumps(d, ensure_ascii=False, sort_keys=True) except Exception: return s if strip_ts(old) == strip_ts(text): log.info("Метрики МП не изменились — коммит не требуется.") return False OUT_PATH.parent.mkdir(parents=True, exist_ok=True) OUT_PATH.write_text(text, encoding="utf-8") log.info("app_metrics.json обновлён (%d метрик, %d мес.).", len(payload["metrics"]), len(payload["months"])) return True def main() -> int: log.info("=" * 60) log.info("Старт обновления Метрик МП") cfg = base.load_config() b = date_bounds() log.info("Период (тек.): %s..%s | (пред.): %s..%s | месяцев: %d", b["start_cur"], b["end_cur"], b["start_prev"], b["end_prev"], b["cap_month"]) sql = build_sql(b) base.patch_thrift_ssl() conn = base.connect_impala(cfg) try: rows = fetch_monthly(conn, sql) finally: try: conn.close() except Exception: pass cur_year = b["cur_year"] if not any(r["report_period_id"] // 100 == cur_year for r in rows): log.error("В ответе нет данных за %s — JSON НЕ перезаписан.", cur_year) return 1 payload = build_payload(rows, b) if write_if_changed(payload): base.git_commit_push(cfg, [OUT_REL], f"data: update app metrics {dt.date.today():%Y-%m-%d}") log.info("Готово (Метрики МП).") return 0 if __name__ == "__main__": try: sys.exit(main()) except Exception as e: # noqa: BLE001 log.exception("ОШИБКА (Метрики МП): %s", e) sys.exit(1)