{ "cells": [ { "cell_type": "markdown", "id": "34468500", "metadata": {}, "source": [ "# Доля пассивных показов и заказы\n", "\n", "**Вопрос:** повышает ли высокая доля пассивных показов вероятность заказа при контроле объёма коммуникаций?\n", "\n", "**Гипотеза:** большая доля пассивных показов связана с большей вероятностью заказа (проверяем ML)." ] }, { "cell_type": "code", "id": "46fb7ac5", "metadata": { "execution": { "iopub.execute_input": "2025-12-12T19:11:43.639846Z", "iopub.status.busy": "2025-12-12T19:11:43.638998Z", "iopub.status.idle": "2025-12-12T19:11:50.215868Z", "shell.execute_reply": "2025-12-12T19:11:50.213723Z" }, "ExecuteTime": { "end_time": "2025-12-12T19:27:46.168843Z", "start_time": "2025-12-12T19:27:44.987935Z" } }, "source": [ "import sqlite3\n", "from pathlib import Path\n", "import sys\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", "from sklearn.compose import ColumnTransformer\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import roc_auc_score\n", "\n", "sns.set_theme(style=\"whitegrid\")\n", "plt.rcParams[\"figure.figsize\"] = (10, 5)\n", "\n", "project_root = Path.cwd().resolve()\n", "while not (project_root / \"preanalysis\").exists() and project_root.parent != project_root:\n", " project_root = project_root.parent\n", " project_root = project_root.parent\n", "sys.path.append(str(project_root / \"preanalysis\"))\n", "import eda_utils as eda\n", "\n", "db_path = project_root / \"dataset\" / \"ds.sqlite\"\n", "conn = sqlite3.connect(db_path)\n", "df = pd.read_sql_query(\"select * from communications\", conn, parse_dates=[\"business_dt\"])\n", "conn.close()\n" ], "outputs": [], "execution_count": 1 }, { "cell_type": "code", "id": "73842cf6", "metadata": { "execution": { "iopub.execute_input": "2025-12-12T19:11:50.222842Z", "iopub.status.busy": "2025-12-12T19:11:50.222356Z", "iopub.status.idle": "2025-12-12T19:11:52.672337Z", "shell.execute_reply": "2025-12-12T19:11:52.670490Z" }, "ExecuteTime": { "end_time": "2025-12-12T19:27:46.794213Z", "start_time": "2025-12-12T19:27:46.179705Z" } }, "source": [ "for cols, name in [\n", " (eda.ACTIVE_IMP_COLS, \"active_imp_total\"),\n", " (eda.PASSIVE_IMP_COLS, \"passive_imp_total\"),\n", " (eda.ACTIVE_CLICK_COLS, \"active_click_total\"),\n", " (eda.PASSIVE_CLICK_COLS, \"passive_click_total\"),\n", " (eda.ORDER_COLS, \"orders_amt_total\"),\n", "]:\n", " df[name] = df[cols].sum(axis=1)\n", "\n", "df[\"imp_total\"] = df[\"active_imp_total\"] + df[\"passive_imp_total\"]\n", "df[\"click_total\"] = df[\"active_click_total\"] + df[\"passive_click_total\"]\n", "\n", "client = df.groupby(\"id\").agg(\n", " {\n", " \"active_imp_total\": \"sum\",\n", " \"passive_imp_total\": \"sum\",\n", " \"active_click_total\": \"sum\",\n", " \"passive_click_total\": \"sum\",\n", " \"orders_amt_total\": \"sum\",\n", " \"imp_total\": \"sum\",\n", " \"click_total\": \"sum\",\n", " \"age\": \"median\",\n", " \"gender_cd\": lambda s: s.mode().iat[0],\n", " \"device_platform_cd\": lambda s: s.mode().iat[0],\n", " }\n", ")\n", "\n", "client[\"passive_share\"] = eda.safe_divide(client[\"passive_imp_total\"], client[\"imp_total\"])\n", "client[\"ctr_all\"] = eda.safe_divide(client[\"click_total\"], client[\"imp_total\"])\n", "client[\"has_order\"] = (client[\"orders_amt_total\"] > 0).astype(int)\n", "client.head()\n" ], "outputs": [ { "data": { "text/plain": [ " active_imp_total passive_imp_total active_click_total \\\n", "id \n", "1 33.0 35.0 14.0 \n", "2 27.0 89.0 19.0 \n", "3 57.0 236.0 37.0 \n", "4 20.0 37.0 14.0 \n", "5 23.0 20.0 13.0 \n", "\n", " passive_click_total orders_amt_total imp_total click_total age \\\n", "id \n", "1 3.0 0 68.0 17.0 58.0 \n", "2 4.0 3 116.0 23.0 54.0 \n", "3 0.0 2 293.0 37.0 70.0 \n", "4 1.0 0 57.0 15.0 43.0 \n", "5 3.0 1 43.0 16.0 46.0 \n", "\n", " gender_cd device_platform_cd passive_share ctr_all has_order \n", "id \n", "1 M Android 0.514706 0.250000 0 \n", "2 M Android 0.767241 0.198276 1 \n", "3 F Android 0.805461 0.126280 1 \n", "4 F Android 0.649123 0.263158 0 \n", "5 M Android 0.465116 0.372093 1 " ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
active_imp_totalpassive_imp_totalactive_click_totalpassive_click_totalorders_amt_totalimp_totalclick_totalagegender_cddevice_platform_cdpassive_sharectr_allhas_order
id
133.035.014.03.0068.017.058.0MAndroid0.5147060.2500000
227.089.019.04.03116.023.054.0MAndroid0.7672410.1982761
357.0236.037.00.02293.037.070.0FAndroid0.8054610.1262801
420.037.014.01.0057.015.043.0FAndroid0.6491230.2631580
523.020.013.03.0143.016.046.0MAndroid0.4651160.3720931
\n", "
" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 2 }, { "cell_type": "markdown", "id": "98ac09e6", "metadata": {}, "source": [ "## Визуализация: заказы vs доля пассивных показов" ] }, { "cell_type": "code", "id": "35bfe71d", "metadata": { "execution": { "iopub.execute_input": "2025-12-12T19:11:52.678022Z", "iopub.status.busy": "2025-12-12T19:11:52.677564Z", "iopub.status.idle": "2025-12-12T19:11:52.998699Z", "shell.execute_reply": "2025-12-12T19:11:52.997056Z" }, "ExecuteTime": { "end_time": "2025-12-12T19:27:46.985756Z", "start_time": "2025-12-12T19:27:46.877380Z" } }, "source": [ "bins = pd.qcut(client[\"passive_share\"], 8, duplicates=\"drop\")\n", "order_rate = client.groupby(bins)[\"has_order\"].mean().reset_index()\n", "order_rate[\"passive_share\"] = order_rate[\"passive_share\"].astype(str)\n", "plt.figure(figsize=(12, 4))\n", "sns.lineplot(data=order_rate, x=\"passive_share\", y=\"has_order\", marker=\"o\")\n", "plt.xticks(rotation=40)\n", "plt.title(\"Доля клиентов с заказом vs доля пассивных показов\")\n", "plt.tight_layout()\n", "plt.show()\n" ], "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/mx/y1qcnthj1154ngqj00r8gz480000gn/T/ipykernel_85284/3960648772.py:2: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", " order_rate = client.groupby(bins)[\"has_order\"].mean().reset_index()\n" ] }, { "data": { "text/plain": [ "
" ], "image/png": "" }, "metadata": {}, "output_type": "display_data", "jetTransient": { "display_id": null } } ], "execution_count": 3 }, { "cell_type": "markdown", "id": "6def67b9", "metadata": {}, "source": [ "## ML-модель: влияние доли пассивных показов на заказ\n", "Target: `has_order`. Фичи: объёмы актив/пассив, клики, возраст, пол, платформа, пассивная доля." ] }, { "cell_type": "code", "id": "ae61b923", "metadata": { "execution": { "iopub.execute_input": "2025-12-12T19:11:53.004801Z", "iopub.status.busy": "2025-12-12T19:11:53.004396Z", "iopub.status.idle": "2025-12-12T19:11:53.143675Z", "shell.execute_reply": "2025-12-12T19:11:53.141866Z" }, "ExecuteTime": { "end_time": "2025-12-12T19:27:47.045615Z", "start_time": "2025-12-12T19:27:47.013172Z" } }, "source": [ "X = client[[\n", " \"active_imp_total\",\n", " \"passive_imp_total\",\n", " \"active_click_total\",\n", " \"passive_click_total\",\n", " \"passive_share\",\n", " \"age\",\n", " \"gender_cd\",\n", " \"device_platform_cd\",\n", "]]\n", "X = X.copy()\n", "X[\"gender_cd\"] = eda.normalize_gender(X[\"gender_cd\"])\n", "X[\"device_platform_cd\"] = eda.normalize_device(X[\"device_platform_cd\"])\n", "y = client[\"has_order\"]\n", "\n", "numeric_cols = [\"active_imp_total\", \"passive_imp_total\", \"active_click_total\", \"passive_click_total\", \"passive_share\", \"age\"]\n", "cat_cols = [\"gender_cd\", \"device_platform_cd\"]\n", "\n", "preprocess = ColumnTransformer(\n", " [\n", " (\"num\", Pipeline([(\"scaler\", StandardScaler())]), numeric_cols),\n", " (\"cat\", OneHotEncoder(handle_unknown=\"ignore\"), cat_cols),\n", " ]\n", ")\n", "\n", "model = Pipeline([(\"pre\", preprocess), (\"clf\", LogisticRegression(max_iter=1000))])\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n", "model.fit(X_train, y_train)\n", "proba = model.predict_proba(X_test)[:, 1]\n", "auc = roc_auc_score(y_test, proba)\n", "coef = model.named_steps[\"clf\"].coef_[0]\n", "features = model.named_steps[\"pre\"].get_feature_names_out()\n", "coef_series = pd.Series(coef, index=features).sort_values(key=abs, ascending=False)\n", "auc, coef_series.head(10)\n" ], "outputs": [ { "data": { "text/plain": [ "(0.6804173758429694,\n", " num__passive_click_total 0.638861\n", " num__passive_share 0.303223\n", " num__active_imp_total 0.216964\n", " cat__device_platform_cd_Android 0.186635\n", " num__active_click_total -0.150704\n", " cat__gender_cd_M 0.130234\n", " cat__device_platform_cd_iPadOS -0.105558\n", " num__passive_imp_total -0.087140\n", " num__age -0.072639\n", " cat__device_platform_cd_iOS 0.038500\n", " dtype: float64)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 4 }, { "cell_type": "markdown", "id": "7df5ccb7", "metadata": {}, "source": [ "## Вывод по гипотезе\n", "- Линейный рост доли клиентов с заказом при увеличении `passive_share`.\n", "- В модели коэффициент при `passive_share` положительный и по модулю в топ‑фичах; AUC ~0.68. Гипотеза подтверждается: высокая доля пассивных показов ассоциирована с большей вероятностью заказа при контроле объёма и кликов." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.5" } }, "nbformat": 4, "nbformat_minor": 5 }