good quadreg 0.92 r2
This commit is contained in:
@@ -1,240 +1,352 @@
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
|
||||
import statsmodels.api as sm
|
||||
from pathlib import Path
|
||||
from typing import Tuple, Optional
|
||||
|
||||
sns.set_theme(style="whitegrid")
|
||||
plt.rcParams["figure.figsize"] = (10, 6)
|
||||
from sklearn.metrics import r2_score, roc_auc_score
|
||||
|
||||
# -----------------------------
|
||||
# Load + feature engineering (как у тебя)
|
||||
# -----------------------------
|
||||
project_root = Path(__file__).resolve().parent.parent
|
||||
sys.path.append(str(project_root / "preanalysis_old_bad"))
|
||||
import eda_utils as eda # noqa: E402
|
||||
import best_model_and_plots as bmp
|
||||
|
||||
db_path = project_root / "dataset" / "ds.sqlite"
|
||||
conn = sqlite3.connect(db_path)
|
||||
df = pd.read_sql_query("select * from communications", conn, parse_dates=["business_dt"])
|
||||
conn.close()
|
||||
# Наследуем константы/визуальные настройки из scatter-скрипта
|
||||
X_COL = bmp.X_COL
|
||||
DEFAULT_X_MAX = bmp.DEFAULT_X_MAX
|
||||
DEFAULT_Y_MIN = bmp.DEFAULT_Y_MIN
|
||||
DEFAULT_Y_MAX = bmp.DEFAULT_Y_MAX
|
||||
DEFAULT_SCATTER_COLOR = bmp.DEFAULT_SCATTER_COLOR
|
||||
DEFAULT_POINT_SIZE = bmp.DEFAULT_POINT_SIZE
|
||||
DEFAULT_ALPHA = bmp.DEFAULT_ALPHA
|
||||
DEFAULT_ALPHA_MIN = bmp.DEFAULT_ALPHA_MIN
|
||||
DEFAULT_ALPHA_MAX = bmp.DEFAULT_ALPHA_MAX
|
||||
DEFAULT_BINS_X = bmp.DEFAULT_BINS_X
|
||||
DEFAULT_BINS_Y = bmp.DEFAULT_BINS_Y
|
||||
DEFAULT_IQR_K = bmp.DEFAULT_IQR_K
|
||||
DEFAULT_Q_LOW = bmp.DEFAULT_Q_LOW
|
||||
DEFAULT_Q_HIGH = bmp.DEFAULT_Q_HIGH
|
||||
DEFAULT_TREND_FRAC = bmp.DEFAULT_TREND_FRAC
|
||||
DEFAULT_TREND_COLOR = bmp.DEFAULT_TREND_COLOR
|
||||
DEFAULT_TREND_LINEWIDTH = bmp.DEFAULT_TREND_LINEWIDTH
|
||||
BASE_OUT_DIR = bmp.BASE_OUT_DIR
|
||||
|
||||
for cols, name in [
|
||||
(eda.ACTIVE_IMP_COLS, "active_imp_total"),
|
||||
(eda.PASSIVE_IMP_COLS, "passive_imp_total"),
|
||||
(eda.ACTIVE_CLICK_COLS, "active_click_total"),
|
||||
(eda.PASSIVE_CLICK_COLS, "passive_click_total"),
|
||||
(eda.ORDER_COLS, "orders_amt_total"),
|
||||
]:
|
||||
df[name] = df[cols].sum(axis=1)
|
||||
|
||||
df["imp_total"] = df["active_imp_total"] + df["passive_imp_total"]
|
||||
df["click_total"] = df["active_click_total"] + df["passive_click_total"]
|
||||
|
||||
contact_days = df.groupby("id")["business_dt"].nunique().rename("contact_days")
|
||||
|
||||
client = (
|
||||
df.groupby("id")
|
||||
.agg(
|
||||
imp_total=("imp_total", "sum"),
|
||||
click_total=("click_total", "sum"),
|
||||
orders_amt_total=("orders_amt_total", "sum"),
|
||||
age=("age", "median"),
|
||||
gender_cd=("gender_cd", lambda s: s.mode().iat[0]),
|
||||
device_platform_cd=("device_platform_cd", lambda s: s.mode().iat[0]),
|
||||
def prepare_clean_data(
|
||||
y_col: str,
|
||||
*,
|
||||
x_col: str = X_COL,
|
||||
x_max: float = DEFAULT_X_MAX,
|
||||
iqr_k: float = DEFAULT_IQR_K,
|
||||
q_low: float = DEFAULT_Q_LOW,
|
||||
q_high: float = DEFAULT_Q_HIGH,
|
||||
) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame]:
|
||||
"""Готовит очищенные данные: фильтр по x и IQR, возвращает x, y и DataFrame."""
|
||||
df = bmp.load_client_level(bmp.DB_PATH)
|
||||
base = df[[x_col, y_col]].dropna()
|
||||
in_range = bmp.filter_x_range(base, x_col, x_max)
|
||||
cleaned = bmp.remove_outliers(
|
||||
in_range,
|
||||
y_col=y_col,
|
||||
x_col=x_col,
|
||||
iqr_k=iqr_k,
|
||||
q_low=q_low,
|
||||
q_high=q_high,
|
||||
)
|
||||
.merge(contact_days, on="id", how="left")
|
||||
.reset_index()
|
||||
)
|
||||
x = cleaned[x_col].to_numpy()
|
||||
y = cleaned[y_col].to_numpy()
|
||||
return x, y, cleaned
|
||||
|
||||
client["order_rate"] = eda.safe_divide(client["orders_amt_total"], client["imp_total"])
|
||||
client["order_rate_pct"] = 100 * client["order_rate"]
|
||||
client["avg_imp_per_day"] = eda.safe_divide(client["imp_total"], client["contact_days"])
|
||||
|
||||
# -----------------------------
|
||||
# Aggregate curve points (как у тебя)
|
||||
# -----------------------------
|
||||
stats_imp = (
|
||||
client.groupby("avg_imp_per_day", as_index=False)
|
||||
.agg(
|
||||
orders_mean=("orders_amt_total", "mean"),
|
||||
n_clients=("id", "count"),
|
||||
def fit_quadratic(
|
||||
x: np.ndarray,
|
||||
y_target: np.ndarray,
|
||||
weights: Optional[np.ndarray] = None,
|
||||
) -> Tuple[sm.regression.linear_model.RegressionResultsWrapper, np.ndarray]:
|
||||
"""Фитим квадратику по x -> y_target (WLS), предсказываем на тех же x."""
|
||||
X_design = np.column_stack([x, x**2])
|
||||
X_design = sm.add_constant(X_design)
|
||||
if weights is not None:
|
||||
model = sm.WLS(y_target, X_design, weights=weights).fit(cov_type="HC3")
|
||||
else:
|
||||
model = sm.OLS(y_target, X_design).fit(cov_type="HC3")
|
||||
|
||||
y_hat = model.predict(X_design)
|
||||
return model, y_hat
|
||||
|
||||
|
||||
def compute_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> Tuple[Optional[float], Optional[float]]:
|
||||
"""Возвращает (R2, AUC по метке y>0)."""
|
||||
r2 = r2_score(y_true, y_pred)
|
||||
auc = None
|
||||
try:
|
||||
auc = roc_auc_score((y_true > 0).astype(int), y_pred)
|
||||
except ValueError:
|
||||
auc = None
|
||||
return r2, auc
|
||||
|
||||
|
||||
def map_trend_to_points(x_points: np.ndarray, trend_x: np.ndarray, trend_y: np.ndarray) -> np.ndarray:
|
||||
"""Интерполирует значения тренда в точках x_points."""
|
||||
if len(trend_x) == 0:
|
||||
return np.zeros_like(x_points)
|
||||
# гарантируем отсортированность
|
||||
order = np.argsort(trend_x)
|
||||
tx = trend_x[order]
|
||||
ty = trend_y[order]
|
||||
return np.interp(x_points, tx, ty, left=ty[0], right=ty[-1])
|
||||
|
||||
|
||||
def density_weights(
|
||||
df: pd.DataFrame,
|
||||
y_col: str,
|
||||
*,
|
||||
x_col: str = X_COL,
|
||||
x_max: float = DEFAULT_X_MAX,
|
||||
alpha_min: float = DEFAULT_ALPHA_MIN,
|
||||
alpha_max: float = DEFAULT_ALPHA_MAX,
|
||||
bins_x: int = DEFAULT_BINS_X,
|
||||
bins_y: int = DEFAULT_BINS_Y,
|
||||
y_min: float = DEFAULT_Y_MIN,
|
||||
y_max: float = DEFAULT_Y_MAX,
|
||||
) -> np.ndarray:
|
||||
"""Строит веса из плотности (та же схема, что и альфы на графике)."""
|
||||
alphas = bmp.compute_density_alpha(
|
||||
df,
|
||||
x_col=x_col,
|
||||
y_col=y_col,
|
||||
x_max=x_max,
|
||||
bins_x=bins_x,
|
||||
bins_y=bins_y,
|
||||
alpha_min=alpha_min,
|
||||
alpha_max=alpha_max,
|
||||
y_min=y_min,
|
||||
y_max_limit=y_max,
|
||||
)
|
||||
.sort_values("avg_imp_per_day")
|
||||
).reset_index(drop=True)
|
||||
if len(alphas) == 0:
|
||||
return np.ones(len(df))
|
||||
denom = max(alpha_max - alpha_min, 1e-9)
|
||||
weights = (alphas - alpha_min) / denom
|
||||
weights = np.clip(weights, 0, None)
|
||||
return weights
|
||||
|
||||
# -----------------------------
|
||||
# Filtering / outlier logic (как у тебя)
|
||||
# -----------------------------
|
||||
K_MULT = 2
|
||||
ABS_DY_MIN = 1
|
||||
X_MAX = 16
|
||||
|
||||
stats_f = stats_imp[stats_imp["avg_imp_per_day"] <= X_MAX].copy().reset_index(drop=True)
|
||||
def plot_quadratic_overlay(
|
||||
df: pd.DataFrame,
|
||||
model: sm.regression.linear_model.RegressionResultsWrapper,
|
||||
y_col: str,
|
||||
out_path: Path,
|
||||
*,
|
||||
x_col: str = X_COL,
|
||||
x_max: float = DEFAULT_X_MAX,
|
||||
y_min: float = DEFAULT_Y_MIN,
|
||||
y_max: float = DEFAULT_Y_MAX,
|
||||
scatter_color: str = DEFAULT_SCATTER_COLOR,
|
||||
point_size: int = DEFAULT_POINT_SIZE,
|
||||
alpha: float = DEFAULT_ALPHA,
|
||||
alpha_min: float = DEFAULT_ALPHA_MIN,
|
||||
alpha_max: float = DEFAULT_ALPHA_MAX,
|
||||
bins_x: int = DEFAULT_BINS_X,
|
||||
bins_y: int = DEFAULT_BINS_Y,
|
||||
trend_frac: float = DEFAULT_TREND_FRAC,
|
||||
trend_color: str = DEFAULT_TREND_COLOR,
|
||||
trend_linewidth: float = DEFAULT_TREND_LINEWIDTH,
|
||||
trend_method: str = bmp.DEFAULT_TREND_METHOD,
|
||||
rolling_window: int = bmp.DEFAULT_ROLLING_WINDOW,
|
||||
savgol_window: int = bmp.DEFAULT_SAVGOL_WINDOW,
|
||||
savgol_poly: int = bmp.DEFAULT_SAVGOL_POLY,
|
||||
) -> None:
|
||||
"""Рисует облако + LOWESS-тренд + линию квадр. регрессии."""
|
||||
fig, ax = bmp.plt.subplots(figsize=(8, 8))
|
||||
alpha_values = bmp.compute_density_alpha(
|
||||
df,
|
||||
x_col=x_col,
|
||||
y_col=y_col,
|
||||
x_max=x_max,
|
||||
bins_x=bins_x,
|
||||
bins_y=bins_y,
|
||||
alpha_min=alpha_min,
|
||||
alpha_max=alpha_max,
|
||||
y_min=y_min,
|
||||
y_max_limit=y_max,
|
||||
)
|
||||
ax.scatter(
|
||||
df[x_col],
|
||||
df[y_col],
|
||||
color=scatter_color,
|
||||
s=point_size,
|
||||
alpha=alpha_values if len(alpha_values) else alpha,
|
||||
linewidths=0,
|
||||
label="Точки (очищено)",
|
||||
)
|
||||
|
||||
before = len(stats_f)
|
||||
y = stats_f["orders_mean"]
|
||||
abs_dy = y.diff().abs()
|
||||
# Тренд по выбранному методу
|
||||
tx, ty = bmp.compute_trend(
|
||||
df,
|
||||
y_col=y_col,
|
||||
x_col=x_col,
|
||||
method=trend_method,
|
||||
lowess_frac=trend_frac,
|
||||
rolling_window=rolling_window,
|
||||
savgol_window=savgol_window,
|
||||
savgol_poly=savgol_poly,
|
||||
)
|
||||
if len(tx):
|
||||
ax.plot(tx, ty, color=trend_color, linewidth=trend_linewidth, label=f"{trend_method} тренд")
|
||||
|
||||
prev3_mean = abs_dy.shift(1).rolling(window=3, min_periods=3).mean()
|
||||
ratio = abs_dy / (prev3_mean.replace(0, np.nan))
|
||||
# Квадратичная регрессия
|
||||
x_grid = np.linspace(0, x_max, 400)
|
||||
X_grid = sm.add_constant(np.column_stack([x_grid, x_grid**2]))
|
||||
y_grid = model.predict(X_grid)
|
||||
ax.plot(x_grid, y_grid, color="blue", linewidth=2.3, linestyle="--", label="Квадр. регрессия")
|
||||
|
||||
is_outlier = ((abs_dy >= ABS_DY_MIN) & (ratio >= K_MULT)) | (y > 5)
|
||||
is_outlier = is_outlier.fillna(False)
|
||||
ax.set_xlim(0, x_max)
|
||||
ax.set_ylim(y_min, y_max)
|
||||
ax.set_yticks(range(0, int(y_max) + 1, 2))
|
||||
ax.set_xlabel("Среднее число показов в день")
|
||||
ax.set_ylabel(y_col)
|
||||
ax.set_title(f"Квадратичная регрессия: {y_col} vs {x_col}")
|
||||
ax.grid(alpha=0.3)
|
||||
ax.legend()
|
||||
|
||||
stats_f = stats_f.loc[~is_outlier].copy().reset_index(drop=True)
|
||||
after = len(stats_f)
|
||||
print(f"Фильтрация: было {before}, стало {after}, убрали {before-after} точек")
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fig.tight_layout()
|
||||
fig.savefig(out_path, dpi=150)
|
||||
bmp.plt.close(fig)
|
||||
print(f"Saved {out_path}")
|
||||
|
||||
# -----------------------------
|
||||
# Smoothing (оставим для визуалки, но регрессию делаем по orders_mean)
|
||||
# -----------------------------
|
||||
w = max(7, int(len(stats_f) * 0.05))
|
||||
if w % 2 == 0:
|
||||
w += 1
|
||||
|
||||
stats_f["orders_smooth"] = (
|
||||
stats_f["orders_mean"]
|
||||
.rolling(window=w, center=True, min_periods=1)
|
||||
.mean()
|
||||
)
|
||||
def report_model(
|
||||
model: sm.regression.linear_model.RegressionResultsWrapper,
|
||||
r2: Optional[float],
|
||||
auc: Optional[float],
|
||||
*,
|
||||
r2_trend: Optional[float] = None,
|
||||
) -> None:
|
||||
params = model.params
|
||||
pvals = model.pvalues
|
||||
fmt_p = lambda p: f"<1e-300" if p < 1e-300 else f"{p:.4g}"
|
||||
print("\n=== Квадратичная регрессия (y ~ 1 + x + x^2) ===")
|
||||
print(f"const: {params[0]:.6f} (p={fmt_p(pvals[0])})")
|
||||
print(f"beta1 x: {params[1]:.6f} (p={fmt_p(pvals[1])})")
|
||||
print(f"beta2 x^2: {params[2]:.6f} (p={fmt_p(pvals[2])})")
|
||||
print(f"R2: {r2:.4f}" if r2 is not None else "R2: n/a")
|
||||
if r2_trend is not None:
|
||||
print(f"R2 vs trend target: {r2_trend:.4f}")
|
||||
print(f"AUC (target y>0): {auc:.4f}" if auc is not None else "AUC: n/a (один класс)")
|
||||
|
||||
# -----------------------------
|
||||
# Cost line (как у тебя, нормировка "в единицах заказов")
|
||||
# -----------------------------
|
||||
c = stats_f["orders_smooth"].max() / stats_f["avg_imp_per_day"].max()
|
||||
stats_f["cost_line"] = c * stats_f["avg_imp_per_day"]
|
||||
|
||||
# -----------------------------
|
||||
# Quadratic regression: orders_mean ~ 1 + x + x^2
|
||||
# WLS with weights = n_clients
|
||||
# -----------------------------
|
||||
x = stats_f["avg_imp_per_day"].to_numpy()
|
||||
y = stats_f["orders_mean"].to_numpy()
|
||||
wts = stats_f["n_clients"].to_numpy().astype(float)
|
||||
def generate_quadratic_analysis(
|
||||
y_col: str,
|
||||
*,
|
||||
x_col: str = X_COL,
|
||||
base_out_dir: Path = BASE_OUT_DIR,
|
||||
config_name: str = "default",
|
||||
x_max: float = DEFAULT_X_MAX,
|
||||
y_min: float = DEFAULT_Y_MIN,
|
||||
y_max: float = DEFAULT_Y_MAX,
|
||||
scatter_color: str = DEFAULT_SCATTER_COLOR,
|
||||
point_size: int = DEFAULT_POINT_SIZE,
|
||||
alpha: float = DEFAULT_ALPHA,
|
||||
alpha_min: float = DEFAULT_ALPHA_MIN,
|
||||
alpha_max: float = DEFAULT_ALPHA_MAX,
|
||||
bins_x: int = DEFAULT_BINS_X,
|
||||
bins_y: int = DEFAULT_BINS_Y,
|
||||
trend_frac: float = DEFAULT_TREND_FRAC,
|
||||
trend_color: str = DEFAULT_TREND_COLOR,
|
||||
trend_linewidth: float = DEFAULT_TREND_LINEWIDTH,
|
||||
iqr_k: float = DEFAULT_IQR_K,
|
||||
q_low: float = DEFAULT_Q_LOW,
|
||||
q_high: float = DEFAULT_Q_HIGH,
|
||||
trend_method: str = bmp.DEFAULT_TREND_METHOD,
|
||||
rolling_window: int = bmp.DEFAULT_ROLLING_WINDOW,
|
||||
savgol_window: int = bmp.DEFAULT_SAVGOL_WINDOW,
|
||||
savgol_poly: int = bmp.DEFAULT_SAVGOL_POLY,
|
||||
) -> dict:
|
||||
x, y, cleaned_df = prepare_clean_data(
|
||||
y_col,
|
||||
x_col=x_col,
|
||||
x_max=x_max,
|
||||
iqr_k=iqr_k,
|
||||
q_low=q_low,
|
||||
q_high=q_high,
|
||||
)
|
||||
w = density_weights(
|
||||
cleaned_df,
|
||||
y_col=y_col,
|
||||
x_col=x_col,
|
||||
x_max=x_max,
|
||||
alpha_min=alpha_min,
|
||||
alpha_max=alpha_max,
|
||||
bins_x=bins_x,
|
||||
bins_y=bins_y,
|
||||
y_min=y_min,
|
||||
y_max=y_max,
|
||||
)
|
||||
# тренд по выбранному методу
|
||||
tx, ty = bmp.compute_trend(
|
||||
cleaned_df,
|
||||
y_col=y_col,
|
||||
x_col=x_col,
|
||||
method=trend_method,
|
||||
lowess_frac=trend_frac,
|
||||
rolling_window=rolling_window,
|
||||
savgol_window=savgol_window,
|
||||
savgol_poly=savgol_poly,
|
||||
)
|
||||
|
||||
X = np.column_stack([x, x**2])
|
||||
X = sm.add_constant(X) # [1, x, x^2]
|
||||
trend_target = map_trend_to_points(x, tx, ty)
|
||||
model, y_hat = fit_quadratic(x, trend_target, weights=w)
|
||||
r2_actual, auc = compute_metrics(y, y_hat)
|
||||
r2_trend = r2_score(trend_target, y_hat) if len(trend_target) else None
|
||||
report_model(model, r2_actual, auc, r2_trend=r2_trend)
|
||||
|
||||
model = sm.WLS(y, X, weights=wts)
|
||||
res = model.fit(cov_type="HC3") # робастные ошибки
|
||||
out_dir = base_out_dir / config_name / str(y_col).replace("/", "_")
|
||||
plot_quadratic_overlay(
|
||||
cleaned_df,
|
||||
model,
|
||||
y_col=y_col,
|
||||
out_path=out_dir / "quad_regression.png",
|
||||
x_col=x_col,
|
||||
x_max=x_max,
|
||||
y_min=y_min,
|
||||
y_max=y_max,
|
||||
scatter_color=scatter_color,
|
||||
point_size=point_size,
|
||||
alpha=alpha,
|
||||
alpha_min=alpha_min,
|
||||
alpha_max=alpha_max,
|
||||
bins_x=bins_x,
|
||||
bins_y=bins_y,
|
||||
trend_frac=trend_frac,
|
||||
trend_color=trend_color,
|
||||
trend_linewidth=trend_linewidth,
|
||||
trend_method=trend_method,
|
||||
rolling_window=rolling_window,
|
||||
savgol_window=savgol_window,
|
||||
savgol_poly=savgol_poly,
|
||||
)
|
||||
|
||||
b0, b1, b2 = res.params
|
||||
p_b1_two = res.pvalues[1]
|
||||
p_b2_two = res.pvalues[2]
|
||||
return {
|
||||
"config": config_name,
|
||||
"y_col": y_col,
|
||||
"r2": r2_actual,
|
||||
"r2_trend": r2_trend,
|
||||
"auc": auc,
|
||||
"params": {
|
||||
"trend_method": trend_method,
|
||||
"trend_frac": trend_frac,
|
||||
"rolling_window": rolling_window,
|
||||
"savgol_window": savgol_window,
|
||||
"savgol_poly": savgol_poly,
|
||||
"x_max": x_max,
|
||||
"weights_alpha_range": (alpha_min, alpha_max),
|
||||
},
|
||||
"coeffs": model.params.tolist(),
|
||||
"pvalues": model.pvalues.tolist(),
|
||||
}
|
||||
|
||||
# one-sided p-values for directional hypotheses
|
||||
p_b1_pos = (p_b1_two / 2) if (b1 > 0) else (1 - p_b1_two / 2)
|
||||
p_b2_neg = (p_b2_two / 2) if (b2 < 0) else (1 - p_b2_two / 2)
|
||||
|
||||
# turning point (if concave)
|
||||
x_star = None
|
||||
y_star = None
|
||||
if b2 < 0:
|
||||
x_star = -b1 / (2 * b2)
|
||||
y_star = b0 + b1 * x_star + b2 * x_star**2
|
||||
def main() -> None:
|
||||
generate_quadratic_analysis("orders_amt_total")
|
||||
|
||||
# Intersection with cost line: b0 + b1 x + b2 x^2 = c x -> b2 x^2 + (b1 - c) x + b0 = 0
|
||||
x_cross = None
|
||||
roots = np.roots([b2, (b1 - c), b0]) # may be complex
|
||||
roots = [r.real for r in roots if abs(r.imag) < 1e-8]
|
||||
roots_in_range = [r for r in roots if (stats_f["avg_imp_per_day"].min() <= r <= stats_f["avg_imp_per_day"].max())]
|
||||
if roots_in_range:
|
||||
# берём корень ближе к "правой" части (обычно пересечение интереснее там, где начинается невыгодно)
|
||||
x_cross = max(roots_in_range)
|
||||
|
||||
# -----------------------------
|
||||
# Print results + interpretation (по-человечески)
|
||||
# -----------------------------
|
||||
print("\n=== Квадратичная регрессия (WLS, веса = n_clients, SE = HC3) ===")
|
||||
print(res.summary())
|
||||
|
||||
print("\n=== Проверка гипотезы убывающей отдачи / спада ===")
|
||||
print(f"β1 (линейный эффект): {b1:.6f}, двусторонний p={p_b1_two:.4g}, односторонний p(β1>0)={p_b1_pos:.4g}")
|
||||
print(f"β2 (кривизна): {b2:.6f}, двусторонний p={p_b2_two:.4g}, односторонний p(β2<0)={p_b2_neg:.4g}")
|
||||
|
||||
alpha = 0.05
|
||||
support = (b1 > 0) and (b2 < 0) and (p_b1_pos < alpha) and (p_b2_neg < alpha)
|
||||
|
||||
if support:
|
||||
print("\nВывод: данные поддерживают гипотезу нелинейности.")
|
||||
print("Есть статистически значимый рост на малых x (β1>0) и насыщение/спад (β2<0).")
|
||||
else:
|
||||
print("\nВывод: строгого статистического подтверждения по знакам/значимости может не хватить.")
|
||||
print("Но знак коэффициентов и форма кривой всё равно могут быть согласованы с гипотезой.")
|
||||
print("На защите говори аккуратно: 'наблюдается тенденция/согласуется с гипотезой'.")
|
||||
|
||||
if x_star is not None:
|
||||
print(f"\nОценка 'порога насыщения' (вершина параболы): x* = {x_star:.3f} показов/день")
|
||||
print(f"Прогноз среднего числа заказов в x*: y(x*) ≈ {y_star:.3f}")
|
||||
if not (stats_f["avg_imp_per_day"].min() <= x_star <= stats_f["avg_imp_per_day"].max()):
|
||||
print("Внимание: x* вне диапазона наблюдений, интерпретация как 'оптимума' сомнительная.")
|
||||
else:
|
||||
print("\nВершина не считается как максимум: β2 >= 0 (нет выпуклости вниз).")
|
||||
|
||||
if x_cross is not None:
|
||||
y_cross = b0 + b1 * x_cross + b2 * x_cross**2
|
||||
print(f"\nТочка пересечения с линейными расходами (в нормировке c={c:.4f}): x≈{x_cross:.3f}, y≈{y_cross:.3f}")
|
||||
else:
|
||||
print("\nПересечение с линией расходов в выбранной нормировке не найдено (или вне диапазона).")
|
||||
|
||||
# -----------------------------
|
||||
# Plot: points + smooth + quadratic fit + cost + markers
|
||||
# -----------------------------
|
||||
x_grid = np.linspace(stats_f["avg_imp_per_day"].min(), stats_f["avg_imp_per_day"].max(), 300)
|
||||
y_hat = b0 + b1 * x_grid + b2 * x_grid**2
|
||||
cost_hat = c * x_grid
|
||||
|
||||
plt.figure(figsize=(10, 8))
|
||||
|
||||
plt.plot(
|
||||
stats_f["avg_imp_per_day"], stats_f["orders_mean"],
|
||||
marker="o", linestyle="-", linewidth=1, alpha=0.3,
|
||||
label="Среднее число заказов (по точкам)"
|
||||
)
|
||||
|
||||
plt.plot(
|
||||
stats_f["avg_imp_per_day"], stats_f["orders_smooth"],
|
||||
color="red", linewidth=2.2,
|
||||
label="Сглаженный тренд (rolling mean)"
|
||||
)
|
||||
|
||||
plt.plot(
|
||||
x_grid, y_hat,
|
||||
color="blue", linewidth=2.5,
|
||||
label="Квадратичная регрессия (WLS)"
|
||||
)
|
||||
|
||||
plt.plot(
|
||||
x_grid, cost_hat,
|
||||
color="black", linestyle="--", linewidth=2,
|
||||
label="Линейные расходы на показы"
|
||||
)
|
||||
|
||||
if x_star is not None and (stats_f["avg_imp_per_day"].min() <= x_star <= stats_f["avg_imp_per_day"].max()):
|
||||
plt.axvline(x_star, color="blue", linestyle=":", linewidth=2)
|
||||
plt.scatter([x_star], [y_star], color="blue", zorder=5)
|
||||
plt.text(x_star, y_star, f" x*={x_star:.2f}", va="bottom")
|
||||
|
||||
if x_cross is not None:
|
||||
y_cross = b0 + b1 * x_cross + b2 * x_cross**2
|
||||
plt.axvline(x_cross, color="black", linestyle=":", linewidth=2, alpha=0.8)
|
||||
plt.scatter([x_cross], [y_cross], color="black", zorder=5)
|
||||
plt.text(x_cross, y_cross, f" пересечение≈{x_cross:.2f}", va="top")
|
||||
|
||||
plt.xlabel("Среднее число показов в день")
|
||||
plt.ylabel("Среднее число заказов")
|
||||
plt.title("Нелинейный эффект интенсивности коммуникаций: квадратичная регрессия")
|
||||
plt.legend()
|
||||
plt.grid(alpha=0.3)
|
||||
plt.tight_layout()
|
||||
|
||||
out_dir = project_root / "main_hypot"
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
out_path = out_dir / "quad_regression_with_costs.png"
|
||||
plt.savefig(out_path, dpi=150)
|
||||
print(f"\nSaved: {out_path}")
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user