{ "cells": [ { "cell_type": "markdown", "id": "9806d9ba", "metadata": {}, "source": [ "# Перегрузка контактами снижает CTR\n", "\n", "**Вопрос:** падает ли CTR/CR при росте средней плотности показов на контактный день?\n", "\n", "**Гипотеза:** высокая плотность показов (спам) уменьшает CTR и вероятность заказа. Проверяем через ML-классификацию высокого CTR." ] }, { "cell_type": "code", "id": "0891ca2a", "metadata": { "execution": { "iopub.execute_input": "2025-12-12T19:11:23.062332Z", "iopub.status.busy": "2025-12-12T19:11:23.062008Z", "iopub.status.idle": "2025-12-12T19:11:29.703049Z", "shell.execute_reply": "2025-12-12T19:11:29.700852Z" }, "ExecuteTime": { "end_time": "2025-12-12T19:27:48.305598Z", "start_time": "2025-12-12T19:27:47.155254Z" } }, "source": [ "import sqlite3\n", "from pathlib import Path\n", "import sys\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import roc_auc_score\n", "\n", "sns.set_theme(style=\"whitegrid\")\n", "plt.rcParams[\"figure.figsize\"] = (10, 5)\n", "\n", "project_root = Path.cwd().resolve()\n", "while not (project_root / \"preanalysis\").exists() and project_root.parent != project_root:\n", " project_root = project_root.parent\n", " project_root = project_root.parent\n", "sys.path.append(str(project_root / \"preanalysis\"))\n", "import eda_utils as eda\n", "\n", "db_path = project_root / \"dataset\" / \"ds.sqlite\"\n", "conn = sqlite3.connect(db_path)\n", "df = pd.read_sql_query(\"select * from communications\", conn, parse_dates=[\"business_dt\"])\n", "conn.close()\n" ], "outputs": [], "execution_count": 1 }, { "cell_type": "code", "id": "9f0e5ca7", "metadata": { "execution": { "iopub.execute_input": "2025-12-12T19:11:29.710292Z", "iopub.status.busy": "2025-12-12T19:11:29.709769Z", "iopub.status.idle": "2025-12-12T19:11:32.169479Z", "shell.execute_reply": "2025-12-12T19:11:32.167853Z" }, "ExecuteTime": { "end_time": "2025-12-12T19:27:48.938590Z", "start_time": "2025-12-12T19:27:48.314667Z" } }, "source": [ "for cols, name in [\n", " (eda.ACTIVE_IMP_COLS, \"active_imp_total\"),\n", " (eda.PASSIVE_IMP_COLS, \"passive_imp_total\"),\n", " (eda.ACTIVE_CLICK_COLS, \"active_click_total\"),\n", " (eda.PASSIVE_CLICK_COLS, \"passive_click_total\"),\n", " (eda.ORDER_COLS, \"orders_amt_total\"),\n", "]:\n", " df[name] = df[cols].sum(axis=1)\n", "\n", "df[\"imp_total\"] = df[\"active_imp_total\"] + df[\"passive_imp_total\"]\n", "df[\"click_total\"] = df[\"active_click_total\"] + df[\"passive_click_total\"]\n", "\n", "client = df.groupby(\"id\").agg(\n", " {\n", " \"imp_total\": \"sum\",\n", " \"click_total\": \"sum\",\n", " \"orders_amt_total\": \"sum\",\n", " \"business_dt\": \"nunique\",\n", " \"age\": \"median\",\n", " \"gender_cd\": lambda s: s.mode().iat[0],\n", " \"device_platform_cd\": lambda s: s.mode().iat[0],\n", " }\n", ").rename(columns={\"business_dt\": \"contact_days\"})\n", "\n", "client[\"ctr_all\"] = eda.safe_divide(client[\"click_total\"], client[\"imp_total\"])\n", "client[\"cr_click2order\"] = eda.safe_divide(client[\"orders_amt_total\"], client[\"click_total\"])\n", "client[\"avg_imp_per_day\"] = eda.safe_divide(client[\"imp_total\"], client[\"contact_days\"])\n", "client.head()\n" ], "outputs": [ { "data": { "text/plain": [ " imp_total click_total orders_amt_total contact_days age gender_cd \\\n", "id \n", "1 68.0 17.0 0 13 58.0 M \n", "2 116.0 23.0 3 15 54.0 M \n", "3 293.0 37.0 2 31 70.0 F \n", "4 57.0 15.0 0 12 43.0 F \n", "5 43.0 16.0 1 10 46.0 M \n", "\n", " device_platform_cd ctr_all cr_click2order avg_imp_per_day \n", "id \n", "1 Android 0.250000 0.000000 5.230769 \n", "2 Android 0.198276 0.130435 7.733333 \n", "3 Android 0.126280 0.054054 9.451613 \n", "4 Android 0.263158 0.000000 4.750000 \n", "5 Android 0.372093 0.062500 4.300000 " ], "text/html": [ "
| \n", " | imp_total | \n", "click_total | \n", "orders_amt_total | \n", "contact_days | \n", "age | \n", "gender_cd | \n", "device_platform_cd | \n", "ctr_all | \n", "cr_click2order | \n", "avg_imp_per_day | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 1 | \n", "68.0 | \n", "17.0 | \n", "0 | \n", "13 | \n", "58.0 | \n", "M | \n", "Android | \n", "0.250000 | \n", "0.000000 | \n", "5.230769 | \n", "
| 2 | \n", "116.0 | \n", "23.0 | \n", "3 | \n", "15 | \n", "54.0 | \n", "M | \n", "Android | \n", "0.198276 | \n", "0.130435 | \n", "7.733333 | \n", "
| 3 | \n", "293.0 | \n", "37.0 | \n", "2 | \n", "31 | \n", "70.0 | \n", "F | \n", "Android | \n", "0.126280 | \n", "0.054054 | \n", "9.451613 | \n", "
| 4 | \n", "57.0 | \n", "15.0 | \n", "0 | \n", "12 | \n", "43.0 | \n", "F | \n", "Android | \n", "0.263158 | \n", "0.000000 | \n", "4.750000 | \n", "
| 5 | \n", "43.0 | \n", "16.0 | \n", "1 | \n", "10 | \n", "46.0 | \n", "M | \n", "Android | \n", "0.372093 | \n", "0.062500 | \n", "4.300000 | \n", "