gadem
This commit is contained in:
582
old data/best_model_and_plots.py
Normal file
582
old data/best_model_and_plots.py
Normal file
@@ -0,0 +1,582 @@
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from typing import Tuple
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy.signal import savgol_filter
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
from statsmodels.nonparametric.smoothers_lowess import lowess
|
||||
import numpy as np
|
||||
|
||||
sns.set_theme(style="whitegrid")
|
||||
plt.rcParams["figure.figsize"] = (8, 8)
|
||||
|
||||
project_root = Path(__file__).resolve().parent.parent
|
||||
DB_PATH = project_root / "dataset" / "ds.sqlite"
|
||||
BASE_OUT_DIR = project_root / "main_hypot"
|
||||
|
||||
# Константы данных
|
||||
CATEGORIES = ["ent", "super", "transport", "shopping", "hotel", "avia"]
|
||||
ACTIVE_IMP_COLS = [f"active_imp_{c}" for c in CATEGORIES]
|
||||
PASSIVE_IMP_COLS = [f"passive_imp_{c}" for c in CATEGORIES]
|
||||
ORDER_COLS = [f"orders_amt_{c}" for c in CATEGORIES]
|
||||
|
||||
# Константы визуализации/очистки
|
||||
X_COL = "avg_imp_per_day" # x всегда фиксирован
|
||||
DEFAULT_X_MAX = 18
|
||||
DEFAULT_SCATTER_COLOR = "#2c7bb6"
|
||||
DEFAULT_POINT_SIZE = 20
|
||||
DEFAULT_ALPHA = 0.08
|
||||
DEFAULT_TREND_ALPHA = 0.1
|
||||
DEFAULT_TREND_FRAC = 0.3
|
||||
DEFAULT_TREND_COLOR = "red"
|
||||
DEFAULT_TREND_LINEWIDTH = 2.5
|
||||
DEFAULT_IQR_K = 1.5
|
||||
DEFAULT_Q_LOW = 0.05
|
||||
DEFAULT_Q_HIGH = 0.95
|
||||
DEFAULT_ALPHA_MIN = 0.04
|
||||
DEFAULT_ALPHA_MAX = 0.7
|
||||
DEFAULT_BINS_X = 60
|
||||
DEFAULT_BINS_Y = 60
|
||||
DEFAULT_Y_MIN = -0.5
|
||||
DEFAULT_Y_MAX = 10
|
||||
DEFAULT_TREND_METHOD = "savgol" # options: lowess, rolling, savgol
|
||||
DEFAULT_ROLLING_WINDOW = 200
|
||||
DEFAULT_SAVGOL_WINDOW = 501
|
||||
DEFAULT_SAVGOL_POLY = 2
|
||||
|
||||
|
||||
def safe_divide(numerator: pd.Series, denominator: pd.Series) -> pd.Series:
|
||||
denom = denominator.replace(0, pd.NA)
|
||||
return numerator / denom
|
||||
|
||||
|
||||
def load_client_level(db_path: Path) -> pd.DataFrame:
|
||||
"""Собирает агрегаты по клиентам без зависимостей от eda_utils."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
df = pd.read_sql_query("select * from communications", conn, parse_dates=["business_dt"])
|
||||
conn.close()
|
||||
|
||||
df["imp_total"] = df[ACTIVE_IMP_COLS + PASSIVE_IMP_COLS].sum(axis=1)
|
||||
df["orders_amt_total"] = df[ORDER_COLS].sum(axis=1)
|
||||
|
||||
client = (
|
||||
df.groupby("id")
|
||||
.agg(
|
||||
imp_total=("imp_total", "sum"),
|
||||
orders_amt_total=("orders_amt_total", "sum"),
|
||||
contact_days=("business_dt", "nunique"),
|
||||
)
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
client[X_COL] = safe_divide(client["imp_total"], client["contact_days"])
|
||||
print(f"Loaded {len(client)} clients with {X_COL} computed.")
|
||||
return client
|
||||
|
||||
|
||||
def _bounds(series: pd.Series, q_low: float, q_high: float, iqr_k: float) -> Tuple[float, float]:
|
||||
q1, q3 = series.quantile([q_low, q_high])
|
||||
iqr = q3 - q1
|
||||
return q1 - iqr_k * iqr, q3 + iqr_k * iqr
|
||||
|
||||
|
||||
def remove_outliers(
|
||||
df: pd.DataFrame,
|
||||
y_col: str,
|
||||
x_col: str = X_COL,
|
||||
iqr_k: float = DEFAULT_IQR_K,
|
||||
q_low: float = DEFAULT_Q_LOW,
|
||||
q_high: float = DEFAULT_Q_HIGH,
|
||||
) -> pd.DataFrame:
|
||||
"""Убирает выбросы по IQR отдельно по x и y."""
|
||||
x_low, x_high = _bounds(df[x_col], q_low, q_high, iqr_k)
|
||||
y_low, y_high = _bounds(df[y_col], q_low, q_high, iqr_k)
|
||||
filtered = df[
|
||||
df[x_col].between(max(0, x_low), x_high)
|
||||
& df[y_col].between(max(0, y_low), y_high)
|
||||
].copy()
|
||||
print(f"Outlier cleaning: {len(df)} -> {len(filtered)} points (IQR k={iqr_k}, q=({q_low},{q_high})).")
|
||||
return filtered
|
||||
|
||||
|
||||
def compute_density_alpha(
|
||||
df: pd.DataFrame,
|
||||
x_col: str,
|
||||
y_col: str,
|
||||
x_max: float,
|
||||
*,
|
||||
bins_x: int = DEFAULT_BINS_X,
|
||||
bins_y: int = DEFAULT_BINS_Y,
|
||||
alpha_min: float = DEFAULT_ALPHA_MIN,
|
||||
alpha_max: float = DEFAULT_ALPHA_MAX,
|
||||
y_min: float = DEFAULT_Y_MIN,
|
||||
y_max_limit: float = DEFAULT_Y_MAX,
|
||||
) -> np.ndarray:
|
||||
"""Считает насыщенность цвета как квадратичный скейл по плотности в 2D бинах."""
|
||||
x_vals = df[x_col].to_numpy()
|
||||
y_vals = df[y_col].to_numpy()
|
||||
|
||||
if len(x_vals) == 0:
|
||||
return np.array([])
|
||||
|
||||
x_edges = np.linspace(min(x_vals.min(), 0), x_max, bins_x + 1)
|
||||
y_upper = max(min(y_vals.max(), y_max_limit), 1e-9)
|
||||
y_edges = np.linspace(y_min, y_upper, bins_y + 1)
|
||||
|
||||
x_bins = np.digitize(x_vals, x_edges) - 1
|
||||
y_bins = np.digitize(y_vals, y_edges) - 1
|
||||
|
||||
valid = (
|
||||
(x_bins >= 0) & (x_bins < bins_x) &
|
||||
(y_bins >= 0) & (y_bins < bins_y)
|
||||
)
|
||||
counts = np.zeros((bins_x, bins_y), dtype=int)
|
||||
for xb, yb in zip(x_bins[valid], y_bins[valid]):
|
||||
counts[xb, yb] += 1
|
||||
|
||||
bin_counts = counts[
|
||||
np.clip(x_bins, 0, bins_x - 1),
|
||||
np.clip(y_bins, 0, bins_y - 1),
|
||||
]
|
||||
max_count = bin_counts.max() if len(bin_counts) else 1
|
||||
if max_count == 0:
|
||||
weight = np.zeros_like(bin_counts, dtype=float)
|
||||
else:
|
||||
weight = (bin_counts / max_count) ** np.sqrt(1.5)
|
||||
weight = np.clip(weight, 0, 1)
|
||||
return alpha_min + (alpha_max - alpha_min) * weight
|
||||
|
||||
|
||||
def compute_trend(
|
||||
df: pd.DataFrame,
|
||||
y_col: str,
|
||||
*,
|
||||
x_col: str = X_COL,
|
||||
method: str = DEFAULT_TREND_METHOD,
|
||||
lowess_frac: float = DEFAULT_TREND_FRAC,
|
||||
rolling_window: int = DEFAULT_ROLLING_WINDOW,
|
||||
savgol_window: int = DEFAULT_SAVGOL_WINDOW,
|
||||
savgol_poly: int = DEFAULT_SAVGOL_POLY,
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Возвращает (x_sorted, trend_y) по выбранному методу."""
|
||||
d = df[[x_col, y_col]].dropna().sort_values(x_col)
|
||||
x_vals = d[x_col].to_numpy()
|
||||
y_vals = d[y_col].to_numpy()
|
||||
|
||||
if len(x_vals) == 0:
|
||||
return np.array([]), np.array([])
|
||||
|
||||
m = method.lower()
|
||||
if m == "lowess":
|
||||
trend = lowess(y_vals, x_vals, frac=lowess_frac, return_sorted=True)
|
||||
return trend[:, 0], trend[:, 1]
|
||||
if m == "rolling":
|
||||
w = max(3, rolling_window)
|
||||
if w % 2 == 0:
|
||||
w += 1
|
||||
y_trend = pd.Series(y_vals).rolling(window=w, center=True, min_periods=1).mean().to_numpy()
|
||||
return x_vals, y_trend
|
||||
if m == "savgol":
|
||||
w = max(5, savgol_window)
|
||||
if w % 2 == 0:
|
||||
w += 1
|
||||
poly = min(savgol_poly, w - 1)
|
||||
y_trend = savgol_filter(y_vals, window_length=w, polyorder=poly, mode="interp")
|
||||
return x_vals, y_trend
|
||||
|
||||
# fallback to lowess
|
||||
trend = lowess(y_vals, x_vals, frac=lowess_frac, return_sorted=True)
|
||||
return trend[:, 0], trend[:, 1]
|
||||
|
||||
|
||||
def filter_x_range(df: pd.DataFrame, x_col: str, x_max: float) -> pd.DataFrame:
|
||||
subset = df[df[x_col] <= x_max].copy()
|
||||
print(f"{len(df)} points; {len(subset)} within x<={x_max}.")
|
||||
return subset
|
||||
|
||||
|
||||
def plot_density_scatter(
|
||||
df: pd.DataFrame,
|
||||
y_col: str,
|
||||
title: str,
|
||||
out_path: Path,
|
||||
*,
|
||||
x_col: str = X_COL,
|
||||
x_max: float = DEFAULT_X_MAX,
|
||||
scatter_color: str = DEFAULT_SCATTER_COLOR,
|
||||
point_size: int = DEFAULT_POINT_SIZE,
|
||||
alpha: float = DEFAULT_ALPHA,
|
||||
alpha_min: float = DEFAULT_ALPHA_MIN,
|
||||
alpha_max: float = DEFAULT_ALPHA_MAX,
|
||||
bins_x: int = DEFAULT_BINS_X,
|
||||
bins_y: int = DEFAULT_BINS_Y,
|
||||
y_min: float = DEFAULT_Y_MIN,
|
||||
y_max: float = DEFAULT_Y_MAX,
|
||||
with_trend: bool = False,
|
||||
trend_method: str = DEFAULT_TREND_METHOD,
|
||||
trend_frac: float = DEFAULT_TREND_FRAC,
|
||||
trend_color: str = DEFAULT_TREND_COLOR,
|
||||
trend_linewidth: float = DEFAULT_TREND_LINEWIDTH,
|
||||
rolling_window: int = DEFAULT_ROLLING_WINDOW,
|
||||
savgol_window: int = DEFAULT_SAVGOL_WINDOW,
|
||||
savgol_poly: int = DEFAULT_SAVGOL_POLY,
|
||||
return_fig: bool = False,
|
||||
) -> None:
|
||||
fig, ax = plt.subplots(figsize=(8, 8))
|
||||
alpha_values = compute_density_alpha(
|
||||
df,
|
||||
x_col=x_col,
|
||||
y_col=y_col,
|
||||
x_max=x_max,
|
||||
bins_x=bins_x,
|
||||
bins_y=bins_y,
|
||||
alpha_min=alpha_min,
|
||||
alpha_max=alpha_max,
|
||||
y_min=y_min,
|
||||
y_max_limit=y_max,
|
||||
)
|
||||
ax.scatter(
|
||||
df[x_col],
|
||||
df[y_col],
|
||||
color=scatter_color,
|
||||
s=point_size,
|
||||
alpha=alpha_values if len(alpha_values) else alpha,
|
||||
linewidths=0,
|
||||
)
|
||||
|
||||
trend_data = None
|
||||
if with_trend:
|
||||
tx, ty = compute_trend(
|
||||
df,
|
||||
y_col=y_col,
|
||||
x_col=x_col,
|
||||
method=trend_method,
|
||||
lowess_frac=trend_frac,
|
||||
rolling_window=rolling_window,
|
||||
savgol_window=savgol_window,
|
||||
savgol_poly=savgol_poly,
|
||||
)
|
||||
if len(tx):
|
||||
ax.plot(tx, ty, color=trend_color, linewidth=trend_linewidth, label=f"{trend_method} тренд")
|
||||
ax.legend()
|
||||
trend_data = (tx, ty)
|
||||
|
||||
ax.set_xlim(0, x_max)
|
||||
ax.set_ylim(y_min, y_max)
|
||||
ax.set_yticks(range(0, int(y_max) + 1, 2))
|
||||
ax.set_xlabel("Среднее число показов в день")
|
||||
ax.set_ylabel(y_col)
|
||||
ax.set_title(title)
|
||||
ax.grid(alpha=0.3)
|
||||
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fig.tight_layout()
|
||||
fig.savefig(out_path, dpi=150)
|
||||
if return_fig:
|
||||
return fig, ax, trend_data
|
||||
plt.close(fig)
|
||||
print(f"Saved {out_path}")
|
||||
|
||||
|
||||
def plot_raw_scatter(
|
||||
df: pd.DataFrame,
|
||||
y_col: str,
|
||||
out_dir: Path,
|
||||
*,
|
||||
x_col: str = X_COL,
|
||||
x_max: float = DEFAULT_X_MAX,
|
||||
scatter_color: str = DEFAULT_SCATTER_COLOR,
|
||||
point_size: int = DEFAULT_POINT_SIZE,
|
||||
alpha: float = DEFAULT_ALPHA,
|
||||
alpha_min: float = DEFAULT_ALPHA_MIN,
|
||||
alpha_max: float = DEFAULT_ALPHA_MAX,
|
||||
bins_x: int = DEFAULT_BINS_X,
|
||||
bins_y: int = DEFAULT_BINS_Y,
|
||||
y_min: float = DEFAULT_Y_MIN,
|
||||
y_max: float = DEFAULT_Y_MAX,
|
||||
trend_method: str = DEFAULT_TREND_METHOD,
|
||||
trend_frac: float = DEFAULT_TREND_FRAC,
|
||||
trend_color: str = DEFAULT_TREND_COLOR,
|
||||
trend_linewidth: float = DEFAULT_TREND_LINEWIDTH,
|
||||
rolling_window: int = DEFAULT_ROLLING_WINDOW,
|
||||
savgol_window: int = DEFAULT_SAVGOL_WINDOW,
|
||||
savgol_poly: int = DEFAULT_SAVGOL_POLY,
|
||||
) -> None:
|
||||
in_range = filter_x_range(df[[x_col, y_col]].dropna(), x_col, x_max)
|
||||
plot_density_scatter(
|
||||
in_range,
|
||||
y_col=y_col,
|
||||
title=f"Облако: {y_col} vs {x_col} (все клиенты)",
|
||||
out_path=out_dir / "scatter.png",
|
||||
x_col=x_col,
|
||||
x_max=x_max,
|
||||
scatter_color=scatter_color,
|
||||
point_size=point_size,
|
||||
alpha=alpha,
|
||||
alpha_min=alpha_min,
|
||||
alpha_max=alpha_max,
|
||||
bins_x=bins_x,
|
||||
bins_y=bins_y,
|
||||
y_min=y_min,
|
||||
y_max=y_max,
|
||||
trend_method=trend_method,
|
||||
trend_frac=trend_frac,
|
||||
trend_color=trend_color,
|
||||
trend_linewidth=trend_linewidth,
|
||||
rolling_window=rolling_window,
|
||||
savgol_window=savgol_window,
|
||||
savgol_poly=savgol_poly,
|
||||
)
|
||||
|
||||
|
||||
def plot_clean_scatter(
|
||||
df: pd.DataFrame,
|
||||
y_col: str,
|
||||
out_dir: Path,
|
||||
*,
|
||||
x_col: str = X_COL,
|
||||
x_max: float = DEFAULT_X_MAX,
|
||||
scatter_color: str = DEFAULT_SCATTER_COLOR,
|
||||
point_size: int = DEFAULT_POINT_SIZE,
|
||||
alpha: float = DEFAULT_ALPHA,
|
||||
iqr_k: float = DEFAULT_IQR_K,
|
||||
q_low: float = DEFAULT_Q_LOW,
|
||||
q_high: float = DEFAULT_Q_HIGH,
|
||||
alpha_min: float = DEFAULT_ALPHA_MIN,
|
||||
alpha_max: float = DEFAULT_ALPHA_MAX,
|
||||
bins_x: int = DEFAULT_BINS_X,
|
||||
bins_y: int = DEFAULT_BINS_Y,
|
||||
y_min: float = DEFAULT_Y_MIN,
|
||||
y_max: float = DEFAULT_Y_MAX,
|
||||
trend_method: str = DEFAULT_TREND_METHOD,
|
||||
trend_frac: float = DEFAULT_TREND_FRAC,
|
||||
trend_color: str = DEFAULT_TREND_COLOR,
|
||||
trend_linewidth: float = DEFAULT_TREND_LINEWIDTH,
|
||||
rolling_window: int = DEFAULT_ROLLING_WINDOW,
|
||||
savgol_window: int = DEFAULT_SAVGOL_WINDOW,
|
||||
savgol_poly: int = DEFAULT_SAVGOL_POLY,
|
||||
) -> None:
|
||||
in_range = filter_x_range(df[[x_col, y_col]].dropna(), x_col, x_max)
|
||||
cleaned = remove_outliers(
|
||||
in_range,
|
||||
y_col=y_col,
|
||||
x_col=x_col,
|
||||
iqr_k=iqr_k,
|
||||
q_low=q_low,
|
||||
q_high=q_high,
|
||||
)
|
||||
plot_density_scatter(
|
||||
cleaned,
|
||||
y_col=y_col,
|
||||
title=f"Облако без выбросов (IQR) {y_col} vs {x_col}",
|
||||
out_path=out_dir / "scatter_clean.png",
|
||||
x_col=x_col,
|
||||
x_max=x_max,
|
||||
scatter_color=scatter_color,
|
||||
point_size=point_size,
|
||||
alpha=alpha,
|
||||
alpha_min=alpha_min,
|
||||
alpha_max=alpha_max,
|
||||
bins_x=bins_x,
|
||||
bins_y=bins_y,
|
||||
y_min=y_min,
|
||||
y_max=y_max,
|
||||
trend_method=trend_method,
|
||||
trend_frac=trend_frac,
|
||||
trend_color=trend_color,
|
||||
trend_linewidth=trend_linewidth,
|
||||
rolling_window=rolling_window,
|
||||
savgol_window=savgol_window,
|
||||
savgol_poly=savgol_poly,
|
||||
)
|
||||
|
||||
|
||||
def plot_clean_trend_scatter(
|
||||
df: pd.DataFrame,
|
||||
y_col: str,
|
||||
out_dir: Path,
|
||||
*,
|
||||
x_col: str = X_COL,
|
||||
x_max: float = DEFAULT_X_MAX,
|
||||
scatter_color: str = DEFAULT_SCATTER_COLOR,
|
||||
point_size: int = DEFAULT_POINT_SIZE,
|
||||
alpha: float = DEFAULT_TREND_ALPHA,
|
||||
iqr_k: float = DEFAULT_IQR_K,
|
||||
q_low: float = DEFAULT_Q_LOW,
|
||||
q_high: float = DEFAULT_Q_HIGH,
|
||||
trend_frac: float = DEFAULT_TREND_FRAC,
|
||||
trend_color: str = DEFAULT_TREND_COLOR,
|
||||
trend_linewidth: float = DEFAULT_TREND_LINEWIDTH,
|
||||
alpha_min: float = DEFAULT_ALPHA_MIN,
|
||||
alpha_max: float = DEFAULT_ALPHA_MAX,
|
||||
bins_x: int = DEFAULT_BINS_X,
|
||||
bins_y: int = DEFAULT_BINS_Y,
|
||||
y_min: float = DEFAULT_Y_MIN,
|
||||
y_max: float = DEFAULT_Y_MAX,
|
||||
trend_method: str = DEFAULT_TREND_METHOD,
|
||||
rolling_window: int = DEFAULT_ROLLING_WINDOW,
|
||||
savgol_window: int = DEFAULT_SAVGOL_WINDOW,
|
||||
savgol_poly: int = DEFAULT_SAVGOL_POLY,
|
||||
return_components: bool = False,
|
||||
) -> None:
|
||||
in_range = filter_x_range(df[[x_col, y_col]].dropna(), x_col, x_max)
|
||||
cleaned = remove_outliers(
|
||||
in_range,
|
||||
y_col=y_col,
|
||||
x_col=x_col,
|
||||
iqr_k=iqr_k,
|
||||
q_low=q_low,
|
||||
q_high=q_high,
|
||||
)
|
||||
fig_ax = plot_density_scatter(
|
||||
cleaned,
|
||||
y_col=y_col,
|
||||
title=f"Облако без выбросов + тренд {y_col} vs {x_col}",
|
||||
out_path=out_dir / "scatter_trend.png",
|
||||
x_col=x_col,
|
||||
x_max=x_max,
|
||||
scatter_color=scatter_color,
|
||||
point_size=point_size,
|
||||
alpha=alpha,
|
||||
with_trend=True,
|
||||
trend_frac=trend_frac,
|
||||
trend_color=trend_color,
|
||||
trend_linewidth=trend_linewidth,
|
||||
alpha_min=alpha_min,
|
||||
alpha_max=alpha_max,
|
||||
bins_x=bins_x,
|
||||
bins_y=bins_y,
|
||||
y_min=y_min,
|
||||
y_max=y_max,
|
||||
trend_method=trend_method,
|
||||
rolling_window=rolling_window,
|
||||
savgol_window=savgol_window,
|
||||
savgol_poly=savgol_poly,
|
||||
return_fig=return_components,
|
||||
)
|
||||
if return_components:
|
||||
fig, ax, trend_data = fig_ax
|
||||
return fig, ax, cleaned, trend_data
|
||||
|
||||
|
||||
def generate_scatter_set(
|
||||
df: pd.DataFrame,
|
||||
y_col: str,
|
||||
*,
|
||||
base_out_dir: Path = BASE_OUT_DIR,
|
||||
x_col: str = X_COL,
|
||||
x_max: float = DEFAULT_X_MAX,
|
||||
scatter_color: str = DEFAULT_SCATTER_COLOR,
|
||||
point_size: int = DEFAULT_POINT_SIZE,
|
||||
alpha: float = DEFAULT_ALPHA,
|
||||
trend_alpha: float = DEFAULT_TREND_ALPHA,
|
||||
trend_frac: float = DEFAULT_TREND_FRAC,
|
||||
trend_color: str = DEFAULT_TREND_COLOR,
|
||||
trend_linewidth: float = DEFAULT_TREND_LINEWIDTH,
|
||||
iqr_k: float = DEFAULT_IQR_K,
|
||||
q_low: float = DEFAULT_Q_LOW,
|
||||
q_high: float = DEFAULT_Q_HIGH,
|
||||
alpha_min: float = DEFAULT_ALPHA_MIN,
|
||||
alpha_max: float = DEFAULT_ALPHA_MAX,
|
||||
bins_x: int = DEFAULT_BINS_X,
|
||||
bins_y: int = DEFAULT_BINS_Y,
|
||||
y_min: float = DEFAULT_Y_MIN,
|
||||
y_max: float = DEFAULT_Y_MAX,
|
||||
trend_method: str = DEFAULT_TREND_METHOD,
|
||||
rolling_window: int = DEFAULT_ROLLING_WINDOW,
|
||||
savgol_window: int = DEFAULT_SAVGOL_WINDOW,
|
||||
savgol_poly: int = DEFAULT_SAVGOL_POLY,
|
||||
) -> None:
|
||||
"""Генерирует три облака (все, без выбросов, без выбросов + тренд) в папку y_col."""
|
||||
out_dir = base_out_dir / str(y_col).replace("/", "_")
|
||||
plot_raw_scatter(
|
||||
df,
|
||||
y_col=y_col,
|
||||
out_dir=out_dir,
|
||||
x_col=x_col,
|
||||
x_max=x_max,
|
||||
scatter_color=scatter_color,
|
||||
point_size=point_size,
|
||||
alpha=alpha,
|
||||
alpha_min=alpha_min,
|
||||
alpha_max=alpha_max,
|
||||
bins_x=bins_x,
|
||||
bins_y=bins_y,
|
||||
y_min=y_min,
|
||||
y_max=y_max,
|
||||
trend_method=trend_method,
|
||||
trend_frac=trend_frac,
|
||||
trend_color=trend_color,
|
||||
trend_linewidth=trend_linewidth,
|
||||
rolling_window=rolling_window,
|
||||
savgol_window=savgol_window,
|
||||
savgol_poly=savgol_poly,
|
||||
)
|
||||
plot_clean_scatter(
|
||||
df,
|
||||
y_col=y_col,
|
||||
out_dir=out_dir,
|
||||
x_col=x_col,
|
||||
x_max=x_max,
|
||||
scatter_color=scatter_color,
|
||||
point_size=point_size,
|
||||
alpha=alpha,
|
||||
iqr_k=iqr_k,
|
||||
q_low=q_low,
|
||||
q_high=q_high,
|
||||
alpha_min=alpha_min,
|
||||
alpha_max=alpha_max,
|
||||
bins_x=bins_x,
|
||||
bins_y=bins_y,
|
||||
y_min=y_min,
|
||||
y_max=y_max,
|
||||
trend_method=trend_method,
|
||||
trend_frac=trend_frac,
|
||||
trend_color=trend_color,
|
||||
trend_linewidth=trend_linewidth,
|
||||
rolling_window=rolling_window,
|
||||
savgol_window=savgol_window,
|
||||
savgol_poly=savgol_poly,
|
||||
)
|
||||
plot_clean_trend_scatter(
|
||||
df,
|
||||
y_col=y_col,
|
||||
out_dir=out_dir,
|
||||
x_col=x_col,
|
||||
x_max=x_max,
|
||||
scatter_color=scatter_color,
|
||||
point_size=point_size,
|
||||
alpha=trend_alpha,
|
||||
iqr_k=iqr_k,
|
||||
q_low=q_low,
|
||||
q_high=q_high,
|
||||
trend_frac=trend_frac,
|
||||
trend_color=trend_color,
|
||||
trend_linewidth=trend_linewidth,
|
||||
alpha_min=alpha_min,
|
||||
alpha_max=alpha_max,
|
||||
bins_x=bins_x,
|
||||
bins_y=bins_y,
|
||||
y_min=y_min,
|
||||
y_max=y_max,
|
||||
trend_method=trend_method,
|
||||
rolling_window=rolling_window,
|
||||
savgol_window=savgol_window,
|
||||
savgol_poly=savgol_poly,
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
client = load_client_level(DB_PATH)
|
||||
zero_orders = (client["orders_amt_total"] == 0).sum()
|
||||
non_zero = len(client) - zero_orders
|
||||
if len(client):
|
||||
print(f"orders=0: {zero_orders} ({zero_orders / len(client):.2%}); orders>0: {non_zero} ({non_zero / len(client):.2%})")
|
||||
generate_scatter_set(client, y_col="orders_amt_total")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user