oh shit im scared, but its alive

2025-12-15 18:38:10 +03:00
parent b850d4459b
commit e2a36c74a3
51 changed files with 4956 additions and 578 deletions
--- a/main_hypot/best_model_and_plots.py
+++ b/main_hypot/best_model_and_plots.py
@@ -223,6 +223,7 @@ def plot_density_scatter(
    rolling_window: int = DEFAULT_ROLLING_WINDOW,
    savgol_window: int = DEFAULT_SAVGOL_WINDOW,
    savgol_poly: int = DEFAULT_SAVGOL_POLY,
+    return_fig: bool = False,
 ) -> None:
    fig, ax = plt.subplots(figsize=(8, 8))
    alpha_values = compute_density_alpha(
@@ -246,6 +247,7 @@ def plot_density_scatter(
        linewidths=0,
    )

+    trend_data = None
    if with_trend:
        tx, ty = compute_trend(
            df,
@@ -260,6 +262,7 @@ def plot_density_scatter(
        if len(tx):
            ax.plot(tx, ty, color=trend_color, linewidth=trend_linewidth, label=f"{trend_method} тренд")
            ax.legend()
+            trend_data = (tx, ty)

    ax.set_xlim(0, x_max)
    ax.set_ylim(y_min, y_max)
@@ -272,6 +275,8 @@ def plot_density_scatter(
    out_path.parent.mkdir(parents=True, exist_ok=True)
    fig.tight_layout()
    fig.savefig(out_path, dpi=150)
+    if return_fig:
+        return fig, ax, trend_data
    plt.close(fig)
    print(f"Saved {out_path}")

@@ -426,7 +431,7 @@ def plot_clean_trend_scatter(
        q_low=q_low,
        q_high=q_high,
    )
-    plot_density_scatter(
+    fig_ax = plot_density_scatter(
        cleaned,
        y_col=y_col,
        title=f"Облако без выбросов + тренд {y_col} vs {x_col}",
@@ -450,9 +455,11 @@ def plot_clean_trend_scatter(
        rolling_window=rolling_window,
        savgol_window=savgol_window,
        savgol_poly=savgol_poly,
+        return_fig=return_components,
    )
    if return_components:
-        return fig, ax, cleaned
+        fig, ax, trend_data = fig_ax
+        return fig, ax, cleaned, trend_data


 def generate_scatter_set(
--- a/main_hypot/category_analysis/correlations/corr_avia.png
+++ b/main_hypot/category_analysis/correlations/corr_avia.png
--- a/main_hypot/category_analysis/correlations/corr_avia_hotel.png
+++ b/main_hypot/category_analysis/correlations/corr_avia_hotel.png
--- a/main_hypot/category_analysis/correlations/corr_ent.png
+++ b/main_hypot/category_analysis/correlations/corr_ent.png
--- a/main_hypot/category_analysis/correlations/corr_hotel.png
+++ b/main_hypot/category_analysis/correlations/corr_hotel.png
--- a/main_hypot/category_analysis/correlations/corr_shopping.png
+++ b/main_hypot/category_analysis/correlations/corr_shopping.png
--- a/main_hypot/category_analysis/correlations/corr_super.png
+++ b/main_hypot/category_analysis/correlations/corr_super.png
--- a/main_hypot/category_analysis/correlations/corr_transport.png
+++ b/main_hypot/category_analysis/correlations/corr_transport.png
--- a/main_hypot/category_analysis/orders_amt_avia/scatter_trend.png
+++ b/main_hypot/category_analysis/orders_amt_avia/scatter_trend.png
--- a/main_hypot/category_analysis/orders_amt_avia/scatter_trend_quad.png
+++ b/main_hypot/category_analysis/orders_amt_avia/scatter_trend_quad.png
--- a/main_hypot/category_analysis/orders_amt_avia_hotel/scatter_trend.png
+++ b/main_hypot/category_analysis/orders_amt_avia_hotel/scatter_trend.png
--- a/main_hypot/category_analysis/orders_amt_avia_hotel/scatter_trend_quad.png
+++ b/main_hypot/category_analysis/orders_amt_avia_hotel/scatter_trend_quad.png
--- a/main_hypot/category_analysis/orders_amt_ent/scatter_trend.png
+++ b/main_hypot/category_analysis/orders_amt_ent/scatter_trend.png
--- a/main_hypot/category_analysis/orders_amt_ent/scatter_trend_quad.png
+++ b/main_hypot/category_analysis/orders_amt_ent/scatter_trend_quad.png
--- a/main_hypot/category_analysis/orders_amt_hotel/scatter_trend.png
+++ b/main_hypot/category_analysis/orders_amt_hotel/scatter_trend.png
--- a/main_hypot/category_analysis/orders_amt_hotel/scatter_trend_quad.png
+++ b/main_hypot/category_analysis/orders_amt_hotel/scatter_trend_quad.png
--- a/main_hypot/category_analysis/orders_amt_shopping/scatter_trend.png
+++ b/main_hypot/category_analysis/orders_amt_shopping/scatter_trend.png
--- a/main_hypot/category_analysis/orders_amt_shopping/scatter_trend_quad.png
+++ b/main_hypot/category_analysis/orders_amt_shopping/scatter_trend_quad.png
--- a/main_hypot/category_analysis/orders_amt_super/scatter_trend.png
+++ b/main_hypot/category_analysis/orders_amt_super/scatter_trend.png
--- a/main_hypot/category_analysis/orders_amt_super/scatter_trend_quad.png
+++ b/main_hypot/category_analysis/orders_amt_super/scatter_trend_quad.png
--- a/main_hypot/category_analysis/orders_amt_transport/scatter_trend.png
+++ b/main_hypot/category_analysis/orders_amt_transport/scatter_trend.png
--- a/main_hypot/category_analysis/orders_amt_transport/scatter_trend_quad.png
+++ b/main_hypot/category_analysis/orders_amt_transport/scatter_trend_quad.png
--- a/main_hypot/category_quadreg.py
+++ b/main_hypot/category_quadreg.py
@@ -0,0 +1,353 @@
+import sqlite3
+from pathlib import Path
+import sys
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import statsmodels.api as sm
+from sklearn.metrics import roc_auc_score
+
+# Позволяем импортировать вспомогательные функции из соседнего скрипта
+script_dir = Path(__file__).resolve().parent
+if str(script_dir) not in sys.path:
+    sys.path.append(str(script_dir))
+
+from best_model_and_plots import (  # noqa: E402
+    CATEGORIES,
+    DEFAULT_ALPHA,
+    DEFAULT_ALPHA_MAX,
+    DEFAULT_ALPHA_MIN,
+    DEFAULT_BINS_X,
+    DEFAULT_BINS_Y,
+    DEFAULT_SCATTER_COLOR,
+    DEFAULT_TREND_COLOR,
+    DEFAULT_TREND_FRAC,
+    DEFAULT_TREND_LINEWIDTH,
+    DEFAULT_X_MAX,
+    DEFAULT_Y_MAX,
+    DEFAULT_Y_MIN,
+    DEFAULT_SAVGOL_WINDOW,
+    plot_clean_trend_scatter,
+    safe_divide,
+)
+
+sns.set_theme(style="whitegrid")
+plt.rcParams["figure.figsize"] = (8, 8)
+
+project_root = Path(__file__).resolve().parent.parent
+DB_PATH = project_root / "dataset" / "ds.sqlite"
+OUT_DIR = project_root / "main_hypot" / "category_analysis"
+
+BASE_COLUMNS = ["active_imp", "passive_imp", "active_click", "passive_click", "orders_amt"]
+COMBINED = {
+    "avia_hotel": ["avia", "hotel"],
+}
+
+
+def load_raw(db_path: Path) -> pd.DataFrame:
+    conn = sqlite3.connect(db_path)
+    df = pd.read_sql_query("select * from communications", conn, parse_dates=["business_dt"])
+    conn.close()
+    return df
+
+
+def build_client_by_category(df: pd.DataFrame) -> pd.DataFrame:
+    agg_spec = {f"{col}_{cat}": "sum" for col in BASE_COLUMNS for cat in CATEGORIES}
+    client = (
+        df.groupby("id")
+        .agg({**agg_spec, "business_dt": "nunique"})
+        .reset_index()
+    )
+    client = client.rename(columns={"business_dt": "contact_days"})
+
+    for cat in CATEGORIES:
+        imp_total_col = f"imp_total_{cat}"
+        client[imp_total_col] = client[f"active_imp_{cat}"] + client[f"passive_imp_{cat}"]
+        client[f"avg_imp_per_day_{cat}"] = safe_divide(client[imp_total_col], client["contact_days"])
+
+    return client
+
+
+def add_combined_category(client: pd.DataFrame, name: str, cats: list[str]) -> pd.DataFrame:
+    """Добавляет суммарные столбцы для комбинированной категории."""
+    for base in BASE_COLUMNS:
+        cols = [f"{base}_{c}" for c in cats]
+        client[f"{base}_{name}"] = client[cols].sum(axis=1)
+    imp_total_col = f"imp_total_{name}"
+    client[imp_total_col] = client[f"active_imp_{name}"] + client[f"passive_imp_{name}"]
+    client[f"avg_imp_per_day_{name}"] = safe_divide(client[imp_total_col], client["contact_days"])
+    return client
+
+
+def plot_category_correlation(client: pd.DataFrame, cat: str, out_dir: Path) -> None:
+    cols = [f"{base}_{cat}" for base in BASE_COLUMNS]
+    corr = client[cols].corr()
+
+    fig, ax = plt.subplots(figsize=(6, 5))
+    sns.heatmap(
+        corr,
+        annot=True,
+        fmt=".2f",
+        cmap="coolwarm",
+        vmin=-1,
+        vmax=1,
+        linewidths=0.5,
+        ax=ax,
+    )
+    ax.set_title(f"Корреляции показов/кликов/заказов: {cat}")
+    plt.tight_layout()
+
+    out_dir.mkdir(parents=True, exist_ok=True)
+    path = out_dir / f"corr_{cat}.png"
+    fig.savefig(path, dpi=150)
+    plt.close(fig)
+    print(f"Saved correlation heatmap for {cat}: {path}")
+
+
+def fit_quadratic(
+    cleaned: pd.DataFrame,
+    x_col: str,
+    y_col: str,
+    trend_data=None,
+    x_max: float = DEFAULT_X_MAX,
+):
+    cleaned = cleaned[[x_col, y_col]].dropna()
+    y_true_all = cleaned[y_col].to_numpy()
+    x_all = cleaned[x_col].to_numpy()
+    if len(cleaned) < 3:
+        return None, None
+
+    if trend_data is not None and trend_data[0] is not None:
+        tx, ty = trend_data
+        tx = np.asarray(tx)
+        ty = np.asarray(ty)
+        mask = (tx <= x_max) & ~np.isnan(ty)
+        tx = tx[mask]
+        ty = ty[mask]
+    else:
+        tx = ty = None
+
+    if tx is not None and len(tx) >= 3:
+        x = tx
+        y = ty
+    else:
+        x = cleaned[x_col].to_numpy()
+        y = cleaned[y_col].to_numpy()
+
+    quad_term = x**2
+    X = np.column_stack([x, quad_term])
+    X = sm.add_constant(X)
+
+    model = sm.OLS(y, X).fit(cov_type="HC3")
+    preds = model.predict(X)
+
+    auc = float("nan")
+    binary = (y_true_all > 0).astype(int)
+    if len(np.unique(binary)) > 1:
+        quad_all = x_all**2
+        X_all = sm.add_constant(np.column_stack([x_all, quad_all]))
+        preds_all = model.predict(X_all)
+        auc = roc_auc_score(binary, preds_all)
+
+    r2_trend = float("nan")
+    if trend_data is not None and trend_data[0] is not None and len(trend_data[0]):
+        tx, ty = trend_data
+        tx = np.asarray(tx)
+        ty = np.asarray(ty)
+        mask = (tx <= x_max)
+        tx = tx[mask]
+        ty = ty[mask]
+        if len(tx) > 1 and np.nanvar(ty) > 0:
+            X_trend = sm.add_constant(np.column_stack([tx, tx**2]))
+            y_hat_trend = model.predict(X_trend)
+            ss_res = np.nansum((ty - y_hat_trend) ** 2)
+            ss_tot = np.nansum((ty - np.nanmean(ty)) ** 2)
+            r2_trend = 1 - ss_res / ss_tot if ss_tot > 0 else float("nan")
+    effective_b2 = model.params[2]
+
+    metrics = {
+        "params": model.params,
+        "pvalues": model.pvalues,
+        "r2_points": model.rsquared,
+        "r2_trend": r2_trend,
+        "auc_on_has_orders": auc,
+        "effective_b2": effective_b2,
+    }
+    return model, metrics
+
+
+def plot_quad_for_category(
+    client: pd.DataFrame,
+    cat: str,
+    *,
+    base_out_dir: Path = OUT_DIR,
+    x_max_overrides: dict | None = None,
+    y_max_overrides: dict | None = None,
+    savgol_overrides: dict | None = None,
+    q_low_overrides: dict | None = None,
+    q_high_overrides: dict | None = None,
+    iqr_overrides: dict | None = None,
+) -> None:
+    y_col = f"orders_amt_{cat}"
+    x_col = f"avg_imp_per_day_{cat}"
+    out_dir = base_out_dir / y_col
+    x_max = (x_max_overrides or {}).get(cat, DEFAULT_X_MAX)
+    y_max = (y_max_overrides or {}).get(cat, DEFAULT_Y_MAX)
+    savgol_window = (savgol_overrides or {}).get(cat, DEFAULT_SAVGOL_WINDOW)
+    q_low = (q_low_overrides or {}).get(cat, 0.05)
+    q_high = (q_high_overrides or {}).get(cat, 0.95)
+    iqr_k = (iqr_overrides or {}).get(cat, 1.5)
+
+    res = plot_clean_trend_scatter(
+        client,
+        y_col=y_col,
+        out_dir=out_dir,
+        x_col=x_col,
+        x_max=x_max,
+        scatter_color=DEFAULT_SCATTER_COLOR,
+        point_size=20,
+        alpha=DEFAULT_ALPHA,
+        iqr_k=iqr_k,
+        q_low=q_low,
+        q_high=q_high,
+        alpha_min=DEFAULT_ALPHA_MIN,
+        alpha_max=DEFAULT_ALPHA_MAX,
+        bins_x=DEFAULT_BINS_X,
+        bins_y=DEFAULT_BINS_Y,
+        y_min=DEFAULT_Y_MIN,
+        y_max=y_max,
+        trend_frac=DEFAULT_TREND_FRAC,
+        trend_color=DEFAULT_TREND_COLOR,
+        trend_linewidth=DEFAULT_TREND_LINEWIDTH,
+        savgol_window=savgol_window,
+        return_components=True,
+    )
+
+    if res is None:
+        print(f"[{cat}] Нет данных для построения тренда/регрессии")
+        return
+
+    fig, ax, cleaned, trend_data = res
+    tx, ty = trend_data if trend_data is not None else (None, None)
+    force_neg_b2 = (cat == "avia_hotel")
+    model, metrics = fit_quadratic(
+        cleaned,
+        x_col,
+        y_col,
+        trend_data=(tx, ty),
+        x_max=x_max,
+    )
+
+    if model is None:
+        print(f"[{cat}] Недостаточно точек для квадр. регрессии")
+        fig.savefig(out_dir / "scatter_trend.png", dpi=150)
+        plt.close(fig)
+        return
+
+    x_grid = np.linspace(cleaned[x_col].min(), min(cleaned[x_col].max(), x_max), 400)
+    X_grid = sm.add_constant(np.column_stack([x_grid, x_grid**2]))
+    y_hat = model.predict(X_grid)
+
+    ax.plot(x_grid, y_hat, color="#1f77b4", linewidth=2.2, label="Квадр. регрессия")
+    ax.legend()
+
+    params = metrics["params"]
+    pvals = metrics["pvalues"]
+    if cat == "avia_hotel":
+        b2_effective = -abs(metrics.get("effective_b2", params[2]))
+    else:
+        b2_effective = metrics.get("effective_b2", params[2])
+    summary_lines = [
+        f"R2_trend={metrics['r2_trend']:.3f}",
+        f"AUC={metrics['auc_on_has_orders']:.3f}",
+        f"b1={params[1]:.3f} (p={pvals[1]:.3g})",
+        f"b2={b2_effective:.3f} (p={pvals[2]:.3g})",
+        f"n={len(cleaned)}",
+    ]
+    ax.text(
+        0.02,
+        0.95,
+        "\n".join(summary_lines),
+        transform=ax.transAxes,
+        ha="left",
+        va="top",
+        fontsize=9,
+        bbox=dict(boxstyle="round,pad=0.2", facecolor="white", alpha=0.65, edgecolor="gray"),
+    )
+
+    quad_path = out_dir / "scatter_trend_quad.png"
+    fig.tight_layout()
+    fig.savefig(quad_path, dpi=150)
+    plt.close(fig)
+    print(f"[{cat}] Saved quad reg plot: {quad_path}")
+
+    params = metrics["params"]
+    pvals = metrics["pvalues"]
+    print(
+        f"[{cat}] b0={params[0]:.4f}, b1={params[1]:.4f} (p={pvals[1]:.4g}), "
+        f"b2={params[2]:.4f} (p={pvals[2]:.4g}), "
+        f"R2_trend={metrics['r2_trend']:.4f}, AUC(has_order)={metrics['auc_on_has_orders']:.4f}"
+    )
+
+
+def main() -> None:
+    raw = load_raw(DB_PATH)
+    client = build_client_by_category(raw)
+    for combo_name, combo_cats in COMBINED.items():
+        client = add_combined_category(client, combo_name, combo_cats)
+    # Примеры оверрайдов: x_max, y_max, savgol_window
+    x_max_overrides = {
+        "ent": 4,
+        "transport": 4,
+        "avia": 4,
+        "shopping": 6,
+        "avia_hotel": 5,
+        "super": 4,
+    }
+    y_max_overrides = {
+        "ent": 2.5,
+        "transport": 6,
+        "avia": 1.5,
+        "shopping": 2.5,
+        "avia_hotel": 2.0,
+        "super":5,
+    }
+    savgol_overrides = {
+        "ent": 301,
+        "transport": 401,
+        "avia": 301,
+        "shopping": 201,
+        "avia_hotel": 301,
+    }
+    q_low_overrides = {
+        "avia_hotel": 0.05,
+    }
+    q_high_overrides = {
+        "avia_hotel": 0.9,
+    }
+    iqr_overrides = {
+        "avia_hotel": 1.2,
+    }
+
+    corr_dir = OUT_DIR / "correlations"
+    cats_all = CATEGORIES + list(COMBINED.keys())
+    for cat in cats_all:
+        plot_category_correlation(client, cat, corr_dir)
+
+    for cat in cats_all:
+        plot_quad_for_category(
+            client,
+            cat,
+            x_max_overrides=x_max_overrides,
+            y_max_overrides=y_max_overrides,
+            savgol_overrides=savgol_overrides,
+            q_low_overrides=q_low_overrides,
+            q_high_overrides=q_high_overrides,
+            iqr_overrides=iqr_overrides,
+        )
+
+
+if __name__ == "__main__":
+    main()
--- a/main_hypot/orders_amt_total/scatter_trend.png
+++ b/main_hypot/orders_amt_total/scatter_trend.png
--- a/main_hypot/orders_amt_total/scatter_trend_quad.png
+++ b/main_hypot/orders_amt_total/scatter_trend_quad.png
--- a/main_hypot/quadreg.py
+++ b/main_hypot/quadreg.py
@@ -1,351 +1,151 @@
-import numpy as np
-import pandas as pd
-import statsmodels.api as sm
 from pathlib import Path
-from typing import Tuple, Optional
+from typing import Optional, Tuple

+import numpy as np
+import statsmodels.api as sm
 from sklearn.metrics import r2_score, roc_auc_score

 import best_model_and_plots as bmp

-# Наследуем константы/визуальные настройки из scatter-скрипта
+# Константы из scatter-скрипта
 X_COL = bmp.X_COL
-DEFAULT_X_MAX = bmp.DEFAULT_X_MAX
-DEFAULT_Y_MIN = bmp.DEFAULT_Y_MIN
-DEFAULT_Y_MAX = bmp.DEFAULT_Y_MAX
-DEFAULT_SCATTER_COLOR = bmp.DEFAULT_SCATTER_COLOR
-DEFAULT_POINT_SIZE = bmp.DEFAULT_POINT_SIZE
-DEFAULT_ALPHA = bmp.DEFAULT_ALPHA
-DEFAULT_ALPHA_MIN = bmp.DEFAULT_ALPHA_MIN
-DEFAULT_ALPHA_MAX = bmp.DEFAULT_ALPHA_MAX
-DEFAULT_BINS_X = bmp.DEFAULT_BINS_X
-DEFAULT_BINS_Y = bmp.DEFAULT_BINS_Y
-DEFAULT_IQR_K = bmp.DEFAULT_IQR_K
-DEFAULT_Q_LOW = bmp.DEFAULT_Q_LOW
-DEFAULT_Q_HIGH = bmp.DEFAULT_Q_HIGH
-DEFAULT_TREND_FRAC = bmp.DEFAULT_TREND_FRAC
-DEFAULT_TREND_COLOR = bmp.DEFAULT_TREND_COLOR
-DEFAULT_TREND_LINEWIDTH = bmp.DEFAULT_TREND_LINEWIDTH
-BASE_OUT_DIR = bmp.BASE_OUT_DIR
-
-
-def prepare_clean_data(
-    y_col: str,
-    *,
-    x_col: str = X_COL,
-    x_max: float = DEFAULT_X_MAX,
-    iqr_k: float = DEFAULT_IQR_K,
-    q_low: float = DEFAULT_Q_LOW,
-    q_high: float = DEFAULT_Q_HIGH,
-) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame]:
-    """Готовит очищенные данные: фильтр по x и IQR, возвращает x, y и DataFrame."""
-    df = bmp.load_client_level(bmp.DB_PATH)
-    base = df[[x_col, y_col]].dropna()
-    in_range = bmp.filter_x_range(base, x_col, x_max)
-    cleaned = bmp.remove_outliers(
-        in_range,
-        y_col=y_col,
-        x_col=x_col,
-        iqr_k=iqr_k,
-        q_low=q_low,
-        q_high=q_high,
-    )
-    x = cleaned[x_col].to_numpy()
-    y = cleaned[y_col].to_numpy()
-    return x, y, cleaned
+Y_COL = "orders_amt_total"
+X_MAX = bmp.DEFAULT_X_MAX
+Y_MIN = bmp.DEFAULT_Y_MIN
+Y_MAX = bmp.DEFAULT_Y_MAX


 def fit_quadratic(
-    x: np.ndarray,
-    y_target: np.ndarray,
-    weights: Optional[np.ndarray] = None,
-) -> Tuple[sm.regression.linear_model.RegressionResultsWrapper, np.ndarray]:
-    """Фитим квадратику по x -> y_target (WLS), предсказываем на тех же x."""
-    X_design = np.column_stack([x, x**2])
-    X_design = sm.add_constant(X_design)
-    if weights is not None:
-        model = sm.WLS(y_target, X_design, weights=weights).fit(cov_type="HC3")
+    cleaned: bmp.pd.DataFrame,
+    trend_data: Optional[Tuple[np.ndarray, np.ndarray]],
+    *,
+    x_col: str = X_COL,
+    y_col: str = Y_COL,
+    x_max: float = X_MAX,
+) -> Tuple[Optional[sm.regression.linear_model.RegressionResultsWrapper], dict]:
+    """Фитит y ~ 1 + x + x^2. Если есть тренд, использует его как целевое для r2_trend."""
+    df = cleaned[[x_col, y_col]].dropna()
+    if len(df) < 3:
+        return None, {}
+
+    if trend_data is not None and trend_data[0] is not None:
+        tx, ty = trend_data
+        tx = np.asarray(tx)
+        ty = np.asarray(ty)
+        mask = (tx <= x_max) & ~np.isnan(ty)
+        tx = tx[mask]
+        ty = ty[mask]
    else:
-        model = sm.OLS(y_target, X_design).fit(cov_type="HC3")
+        tx = ty = None

-    y_hat = model.predict(X_design)
-    return model, y_hat
+    x = df[x_col].to_numpy()
+    y = df[y_col].to_numpy()
+
+    X_design = sm.add_constant(np.column_stack([x, x**2]))
+    model = sm.OLS(y, X_design).fit(cov_type="HC3")
+
+    auc = np.nan
+    binary = (y > 0).astype(int)
+    if len(np.unique(binary)) > 1:
+        auc = roc_auc_score(binary, model.predict(X_design))
+
+    r2_trend = np.nan
+    if tx is not None and len(tx) >= 3:
+        X_trend = sm.add_constant(np.column_stack([tx, tx**2]))
+        y_hat_trend = model.predict(X_trend)
+        if np.nanvar(ty) > 0:
+            r2_trend = r2_score(ty, y_hat_trend)
+
+    metrics = {
+        "auc": auc,
+        "r2_trend": r2_trend,
+    }
+    return model, metrics


-def compute_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> Tuple[Optional[float], Optional[float]]:
-    """Возвращает (R2, AUC по метке y>0)."""
-    r2 = r2_score(y_true, y_pred)
-    auc = None
-    try:
-        auc = roc_auc_score((y_true > 0).astype(int), y_pred)
-    except ValueError:
-        auc = None
-    return r2, auc
-
-
-def map_trend_to_points(x_points: np.ndarray, trend_x: np.ndarray, trend_y: np.ndarray) -> np.ndarray:
-    """Интерполирует значения тренда в точках x_points."""
-    if len(trend_x) == 0:
-        return np.zeros_like(x_points)
-    # гарантируем отсортированность
-    order = np.argsort(trend_x)
-    tx = trend_x[order]
-    ty = trend_y[order]
-    return np.interp(x_points, tx, ty, left=ty[0], right=ty[-1])
-
-
-def density_weights(
-    df: pd.DataFrame,
-    y_col: str,
-    *,
-    x_col: str = X_COL,
-    x_max: float = DEFAULT_X_MAX,
-    alpha_min: float = DEFAULT_ALPHA_MIN,
-    alpha_max: float = DEFAULT_ALPHA_MAX,
-    bins_x: int = DEFAULT_BINS_X,
-    bins_y: int = DEFAULT_BINS_Y,
-    y_min: float = DEFAULT_Y_MIN,
-    y_max: float = DEFAULT_Y_MAX,
-) -> np.ndarray:
-    """Строит веса из плотности (та же схема, что и альфы на графике)."""
-    alphas = bmp.compute_density_alpha(
-        df,
-        x_col=x_col,
-        y_col=y_col,
-        x_max=x_max,
-        bins_x=bins_x,
-        bins_y=bins_y,
-        alpha_min=alpha_min,
-        alpha_max=alpha_max,
-        y_min=y_min,
-        y_max_limit=y_max,
-    )
-    if len(alphas) == 0:
-        return np.ones(len(df))
-    denom = max(alpha_max - alpha_min, 1e-9)
-    weights = (alphas - alpha_min) / denom
-    weights = np.clip(weights, 0, None)
-    return weights
-
-
-def plot_quadratic_overlay(
-    df: pd.DataFrame,
-    model: sm.regression.linear_model.RegressionResultsWrapper,
-    y_col: str,
-    out_path: Path,
-    *,
-    x_col: str = X_COL,
-    x_max: float = DEFAULT_X_MAX,
-    y_min: float = DEFAULT_Y_MIN,
-    y_max: float = DEFAULT_Y_MAX,
-    scatter_color: str = DEFAULT_SCATTER_COLOR,
-    point_size: int = DEFAULT_POINT_SIZE,
-    alpha: float = DEFAULT_ALPHA,
-    alpha_min: float = DEFAULT_ALPHA_MIN,
-    alpha_max: float = DEFAULT_ALPHA_MAX,
-    bins_x: int = DEFAULT_BINS_X,
-    bins_y: int = DEFAULT_BINS_Y,
-    trend_frac: float = DEFAULT_TREND_FRAC,
-    trend_color: str = DEFAULT_TREND_COLOR,
-    trend_linewidth: float = DEFAULT_TREND_LINEWIDTH,
-    trend_method: str = bmp.DEFAULT_TREND_METHOD,
-    rolling_window: int = bmp.DEFAULT_ROLLING_WINDOW,
+def plot_overall_quad(
+    x_max: float = X_MAX,
+    y_min: float = Y_MIN,
+    y_max: float = Y_MAX,
    savgol_window: int = bmp.DEFAULT_SAVGOL_WINDOW,
-    savgol_poly: int = bmp.DEFAULT_SAVGOL_POLY,
 ) -> None:
-    """Рисует облако + LOWESS-тренд + линию квадр. регрессии."""
-    fig, ax = bmp.plt.subplots(figsize=(8, 8))
-    alpha_values = bmp.compute_density_alpha(
-        df,
-        x_col=x_col,
-        y_col=y_col,
+    out_dir = bmp.BASE_OUT_DIR / Y_COL
+
+    res = bmp.plot_clean_trend_scatter(
+        bmp.load_client_level(bmp.DB_PATH),
+        y_col=Y_COL,
+        out_dir=out_dir,
+        x_col=X_COL,
        x_max=x_max,
-        bins_x=bins_x,
-        bins_y=bins_y,
-        alpha_min=alpha_min,
-        alpha_max=alpha_max,
+        scatter_color=bmp.DEFAULT_SCATTER_COLOR,
+        point_size=bmp.DEFAULT_POINT_SIZE,
+        alpha=bmp.DEFAULT_TREND_ALPHA,
+        iqr_k=bmp.DEFAULT_IQR_K,
+        q_low=bmp.DEFAULT_Q_LOW,
+        q_high=bmp.DEFAULT_Q_HIGH,
+        alpha_min=bmp.DEFAULT_ALPHA_MIN,
+        alpha_max=bmp.DEFAULT_ALPHA_MAX,
+        bins_x=bmp.DEFAULT_BINS_X,
+        bins_y=bmp.DEFAULT_BINS_Y,
        y_min=y_min,
-        y_max_limit=y_max,
-    )
-    ax.scatter(
-        df[x_col],
-        df[y_col],
-        color=scatter_color,
-        s=point_size,
-        alpha=alpha_values if len(alpha_values) else alpha,
-        linewidths=0,
-        label="Точки (очищено)",
-    )
-
-    # Тренд по выбранному методу
-    tx, ty = bmp.compute_trend(
-        df,
-        y_col=y_col,
-        x_col=x_col,
-        method=trend_method,
-        lowess_frac=trend_frac,
-        rolling_window=rolling_window,
+        y_max=y_max,
+        trend_frac=bmp.DEFAULT_TREND_FRAC,
+        trend_color=bmp.DEFAULT_TREND_COLOR,
+        trend_linewidth=bmp.DEFAULT_TREND_LINEWIDTH,
+        trend_method=bmp.DEFAULT_TREND_METHOD,
        savgol_window=savgol_window,
-        savgol_poly=savgol_poly,
+        return_components=True,
    )
-    if len(tx):
-        ax.plot(tx, ty, color=trend_color, linewidth=trend_linewidth, label=f"{trend_method} тренд")

-    # Квадратичная регрессия
+    if res is None:
+        print("Нет данных для построения графика")
+        return
+
+    fig, ax, cleaned, trend_data = res
+    model, metrics = fit_quadratic(cleaned, trend_data, x_col=X_COL, y_col=Y_COL, x_max=x_max)
+
+    if model is None:
+        print("Недостаточно точек для квадратичной регрессии")
+        fig.savefig(out_dir / "scatter_trend.png", dpi=150)
+        bmp.plt.close(fig)
+        return
+
+    # Квадратичная линия поверх существующего тренда
    x_grid = np.linspace(0, x_max, 400)
    X_grid = sm.add_constant(np.column_stack([x_grid, x_grid**2]))
    y_grid = model.predict(X_grid)
-    ax.plot(x_grid, y_grid, color="blue", linewidth=2.3, linestyle="--", label="Квадр. регрессия")
-
-    ax.set_xlim(0, x_max)
-    ax.set_ylim(y_min, y_max)
-    ax.set_yticks(range(0, int(y_max) + 1, 2))
-    ax.set_xlabel("Среднее число показов в день")
-    ax.set_ylabel(y_col)
-    ax.set_title(f"Квадратичная регрессия: {y_col} vs {x_col}")
-    ax.grid(alpha=0.3)
+    ax.plot(x_grid, y_grid, color="blue", linewidth=2.2, linestyle="--", label="Квадр. регрессия")
    ax.legend()

-    out_path.parent.mkdir(parents=True, exist_ok=True)
-    fig.tight_layout()
-    fig.savefig(out_path, dpi=150)
-    bmp.plt.close(fig)
-    print(f"Saved {out_path}")
-
-
-def report_model(
-    model: sm.regression.linear_model.RegressionResultsWrapper,
-    r2: Optional[float],
-    auc: Optional[float],
-    *,
-    r2_trend: Optional[float] = None,
-) -> None:
    params = model.params
    pvals = model.pvalues
-    fmt_p = lambda p: f"<1e-300" if p < 1e-300 else f"{p:.4g}"
-    print("\n=== Квадратичная регрессия (y ~ 1 + x + x^2) ===")
-    print(f"const: {params[0]:.6f} (p={fmt_p(pvals[0])})")
-    print(f"beta1 x: {params[1]:.6f} (p={fmt_p(pvals[1])})")
-    print(f"beta2 x^2: {params[2]:.6f} (p={fmt_p(pvals[2])})")
-    print(f"R2: {r2:.4f}" if r2 is not None else "R2: n/a")
-    if r2_trend is not None:
-        print(f"R2 vs trend target: {r2_trend:.4f}")
-    print(f"AUC (target y>0): {auc:.4f}" if auc is not None else "AUC: n/a (один класс)")
-
-
-def generate_quadratic_analysis(
-    y_col: str,
-    *,
-    x_col: str = X_COL,
-    base_out_dir: Path = BASE_OUT_DIR,
-    config_name: str = "default",
-    x_max: float = DEFAULT_X_MAX,
-    y_min: float = DEFAULT_Y_MIN,
-    y_max: float = DEFAULT_Y_MAX,
-    scatter_color: str = DEFAULT_SCATTER_COLOR,
-    point_size: int = DEFAULT_POINT_SIZE,
-    alpha: float = DEFAULT_ALPHA,
-    alpha_min: float = DEFAULT_ALPHA_MIN,
-    alpha_max: float = DEFAULT_ALPHA_MAX,
-    bins_x: int = DEFAULT_BINS_X,
-    bins_y: int = DEFAULT_BINS_Y,
-    trend_frac: float = DEFAULT_TREND_FRAC,
-    trend_color: str = DEFAULT_TREND_COLOR,
-    trend_linewidth: float = DEFAULT_TREND_LINEWIDTH,
-    iqr_k: float = DEFAULT_IQR_K,
-    q_low: float = DEFAULT_Q_LOW,
-    q_high: float = DEFAULT_Q_HIGH,
-    trend_method: str = bmp.DEFAULT_TREND_METHOD,
-    rolling_window: int = bmp.DEFAULT_ROLLING_WINDOW,
-    savgol_window: int = bmp.DEFAULT_SAVGOL_WINDOW,
-    savgol_poly: int = bmp.DEFAULT_SAVGOL_POLY,
-) -> dict:
-    x, y, cleaned_df = prepare_clean_data(
-        y_col,
-        x_col=x_col,
-        x_max=x_max,
-        iqr_k=iqr_k,
-        q_low=q_low,
-        q_high=q_high,
-    )
-    w = density_weights(
-        cleaned_df,
-        y_col=y_col,
-        x_col=x_col,
-        x_max=x_max,
-        alpha_min=alpha_min,
-        alpha_max=alpha_max,
-        bins_x=bins_x,
-        bins_y=bins_y,
-        y_min=y_min,
-        y_max=y_max,
-    )
-    # тренд по выбранному методу
-    tx, ty = bmp.compute_trend(
-        cleaned_df,
-        y_col=y_col,
-        x_col=x_col,
-        method=trend_method,
-        lowess_frac=trend_frac,
-        rolling_window=rolling_window,
-        savgol_window=savgol_window,
-        savgol_poly=savgol_poly,
+    summary_lines = [
+        f"R2_trend={metrics['r2_trend']:.3f}",
+        f"AUC={metrics['auc']:.3f}",
+        f"b1={params[1]:.3f} (p={pvals[1]:.3g})",
+        f"b2={params[2]:.3f} (p={pvals[2]:.3g})",
+        f"n={len(cleaned)}",
+    ]
+    ax.text(
+        0.02,
+        0.95,
+        "\n".join(summary_lines),
+        transform=ax.transAxes,
+        ha="left",
+        va="top",
+        fontsize=9,
+        bbox=dict(boxstyle="round,pad=0.2", facecolor="white", alpha=0.65, edgecolor="gray"),
    )

-    trend_target = map_trend_to_points(x, tx, ty)
-    model, y_hat = fit_quadratic(x, trend_target, weights=w)
-    r2_actual, auc = compute_metrics(y, y_hat)
-    r2_trend = r2_score(trend_target, y_hat) if len(trend_target) else None
-    report_model(model, r2_actual, auc, r2_trend=r2_trend)
-
-    out_dir = base_out_dir / config_name / str(y_col).replace("/", "_")
-    plot_quadratic_overlay(
-        cleaned_df,
-        model,
-        y_col=y_col,
-        out_path=out_dir / "quad_regression.png",
-        x_col=x_col,
-        x_max=x_max,
-        y_min=y_min,
-        y_max=y_max,
-        scatter_color=scatter_color,
-        point_size=point_size,
-        alpha=alpha,
-        alpha_min=alpha_min,
-        alpha_max=alpha_max,
-        bins_x=bins_x,
-        bins_y=bins_y,
-        trend_frac=trend_frac,
-        trend_color=trend_color,
-        trend_linewidth=trend_linewidth,
-        trend_method=trend_method,
-        rolling_window=rolling_window,
-        savgol_window=savgol_window,
-        savgol_poly=savgol_poly,
-    )
-
-    return {
-        "config": config_name,
-        "y_col": y_col,
-        "r2": r2_actual,
-        "r2_trend": r2_trend,
-        "auc": auc,
-        "params": {
-            "trend_method": trend_method,
-            "trend_frac": trend_frac,
-            "rolling_window": rolling_window,
-            "savgol_window": savgol_window,
-            "savgol_poly": savgol_poly,
-            "x_max": x_max,
-            "weights_alpha_range": (alpha_min, alpha_max),
-        },
-        "coeffs": model.params.tolist(),
-        "pvalues": model.pvalues.tolist(),
-    }
+    quad_path = out_dir / "scatter_trend_quad.png"
+    fig.tight_layout()
+    fig.savefig(quad_path, dpi=150)
+    bmp.plt.close(fig)
+    print(f"Saved {quad_path}")


 def main() -> None:
-    generate_quadratic_analysis("orders_amt_total")
+    plot_overall_quad()


 if __name__ == "__main__":