You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
DroneDetector/logs/analysis.ipynb

674 lines
22 KiB
Plaintext

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "markdown",
"id": "215a6c3a",
"metadata": {},
"source": [
"# NN inference analysis\n",
"\n",
"Анализ CSV с результатами инференса: доля класса `drone`, частоты срабатываний, уверенность модели и интервалы между `drone`-классификациями."
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "4e8cff32",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CSV path: /home/sibscience-4/from_ssh/DroneDetector/logs/nn_results_live_6gb.csv\n",
"Rows: 27258\n",
"Time range: 2026-05-04 17:35:21.019627763+07:00 -> 2026-05-05 12:17:19.369858371+07:00\n",
"Missing freq rows: 0\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>docker_timestamp</th>\n",
" <th>event_time_iso</th>\n",
" <th>event_time_epoch</th>\n",
" <th>freq</th>\n",
" <th>model_id</th>\n",
" <th>model_type</th>\n",
" <th>prediction</th>\n",
" <th>probability</th>\n",
" <th>ts</th>\n",
" <th>local_time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2026-05-04T10:35:21.019627763Z</td>\n",
" <td>2026-05-04T17:35:21+07:00</td>\n",
" <td>1.777891e+09</td>\n",
" <td>2400</td>\n",
" <td>2</td>\n",
" <td>ensemble_2400_v44</td>\n",
" <td>drone</td>\n",
" <td>0.99</td>\n",
" <td>2026-05-04 10:35:21.019627763+00:00</td>\n",
" <td>2026-05-04 17:35:21.019627763+07:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2026-05-04T10:35:21.019631281Z</td>\n",
" <td>2026-05-04T17:35:21+07:00</td>\n",
" <td>1.777891e+09</td>\n",
" <td>1200</td>\n",
" <td>1</td>\n",
" <td>ensemble_1200_v44</td>\n",
" <td>noise</td>\n",
" <td>1.00</td>\n",
" <td>2026-05-04 10:35:21.019631281+00:00</td>\n",
" <td>2026-05-04 17:35:21.019631281+07:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2026-05-04T10:35:27.048188525Z</td>\n",
" <td>2026-05-04T17:35:27+07:00</td>\n",
" <td>1.777891e+09</td>\n",
" <td>2400</td>\n",
" <td>2</td>\n",
" <td>ensemble_2400_v44</td>\n",
" <td>drone</td>\n",
" <td>0.99</td>\n",
" <td>2026-05-04 10:35:27.048188525+00:00</td>\n",
" <td>2026-05-04 17:35:27.048188525+07:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2026-05-04T10:35:29.238925690Z</td>\n",
" <td>2026-05-04T17:35:29+07:00</td>\n",
" <td>1.777891e+09</td>\n",
" <td>1200</td>\n",
" <td>1</td>\n",
" <td>ensemble_1200_v44</td>\n",
" <td>noise</td>\n",
" <td>1.00</td>\n",
" <td>2026-05-04 10:35:29.238925690+00:00</td>\n",
" <td>2026-05-04 17:35:29.238925690+07:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2026-05-04T10:35:32.842234116Z</td>\n",
" <td>2026-05-04T17:35:32+07:00</td>\n",
" <td>1.777891e+09</td>\n",
" <td>2400</td>\n",
" <td>2</td>\n",
" <td>ensemble_2400_v44</td>\n",
" <td>drone</td>\n",
" <td>0.92</td>\n",
" <td>2026-05-04 10:35:32.842234116+00:00</td>\n",
" <td>2026-05-04 17:35:32.842234116+07:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" docker_timestamp event_time_iso \\\n",
"0 2026-05-04T10:35:21.019627763Z 2026-05-04T17:35:21+07:00 \n",
"1 2026-05-04T10:35:21.019631281Z 2026-05-04T17:35:21+07:00 \n",
"2 2026-05-04T10:35:27.048188525Z 2026-05-04T17:35:27+07:00 \n",
"3 2026-05-04T10:35:29.238925690Z 2026-05-04T17:35:29+07:00 \n",
"4 2026-05-04T10:35:32.842234116Z 2026-05-04T17:35:32+07:00 \n",
"\n",
" event_time_epoch freq model_id model_type prediction \\\n",
"0 1.777891e+09 2400 2 ensemble_2400_v44 drone \n",
"1 1.777891e+09 1200 1 ensemble_1200_v44 noise \n",
"2 1.777891e+09 2400 2 ensemble_2400_v44 drone \n",
"3 1.777891e+09 1200 1 ensemble_1200_v44 noise \n",
"4 1.777891e+09 2400 2 ensemble_2400_v44 drone \n",
"\n",
" probability ts \\\n",
"0 0.99 2026-05-04 10:35:21.019627763+00:00 \n",
"1 1.00 2026-05-04 10:35:21.019631281+00:00 \n",
"2 0.99 2026-05-04 10:35:27.048188525+00:00 \n",
"3 1.00 2026-05-04 10:35:29.238925690+00:00 \n",
"4 0.92 2026-05-04 10:35:32.842234116+00:00 \n",
"\n",
" local_time \n",
"0 2026-05-04 17:35:21.019627763+07:00 \n",
"1 2026-05-04 17:35:21.019631281+07:00 \n",
"2 2026-05-04 17:35:27.048188525+07:00 \n",
"3 2026-05-04 17:35:29.238925690+07:00 \n",
"4 2026-05-04 17:35:32.842234116+07:00 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from pathlib import Path\n",
"import pandas as pd\n",
"\n",
"csv_path = Path('/home/sibscience-4/from_ssh/DroneDetector/logs/nn_results_live_6gb.csv')\n",
"df = pd.read_csv(csv_path)\n",
"\n",
"df['ts'] = pd.to_datetime(df['docker_timestamp'], utc=True)\n",
"df['local_time'] = df['ts'].dt.tz_convert('Asia/Novosibirsk')\n",
"df['freq'] = pd.to_numeric(df['freq'], errors='coerce').astype('Int64')\n",
"df['probability'] = pd.to_numeric(df['probability'], errors='coerce')\n",
"df = df.sort_values('ts').reset_index(drop=True)\n",
"\n",
"print(f'CSV path: {csv_path}')\n",
"print(f'Rows: {len(df)}')\n",
"print(f'Time range: {df[\"local_time\"].min()} -> {df[\"local_time\"].max()}')\n",
"print(f'Missing freq rows: {df[\"freq\"].isna().sum()}')\n",
"display(df.head())"
]
},
{
"cell_type": "markdown",
"id": "0fcc6dd6",
"metadata": {},
"source": [
"## Общая сводка по частотам и классам"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "7bf0fc3f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>freq</th>\n",
" <th>prediction</th>\n",
" <th>count</th>\n",
" <th>avg_probability</th>\n",
" <th>min_probability</th>\n",
" <th>max_probability</th>\n",
" <th>freq_total</th>\n",
" <th>class_rate</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1200</td>\n",
" <td>drone</td>\n",
" <td>3</td>\n",
" <td>0.840000</td>\n",
" <td>0.78</td>\n",
" <td>0.91</td>\n",
" <td>13632</td>\n",
" <td>0.000220</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1200</td>\n",
" <td>noise</td>\n",
" <td>13629</td>\n",
" <td>0.997436</td>\n",
" <td>0.91</td>\n",
" <td>1.00</td>\n",
" <td>13632</td>\n",
" <td>0.999780</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2400</td>\n",
" <td>drone</td>\n",
" <td>11921</td>\n",
" <td>0.868013</td>\n",
" <td>0.50</td>\n",
" <td>1.00</td>\n",
" <td>13626</td>\n",
" <td>0.874872</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2400</td>\n",
" <td>noise</td>\n",
" <td>1705</td>\n",
" <td>0.649185</td>\n",
" <td>0.50</td>\n",
" <td>1.00</td>\n",
" <td>13626</td>\n",
" <td>0.125128</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" freq prediction count avg_probability min_probability max_probability \\\n",
"0 1200 drone 3 0.840000 0.78 0.91 \n",
"1 1200 noise 13629 0.997436 0.91 1.00 \n",
"2 2400 drone 11921 0.868013 0.50 1.00 \n",
"3 2400 noise 1705 0.649185 0.50 1.00 \n",
"\n",
" freq_total class_rate \n",
"0 13632 0.000220 \n",
"1 13632 0.999780 \n",
"2 13626 0.874872 \n",
"3 13626 0.125128 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"class_summary = (\n",
" df.groupby(['freq', 'prediction'], dropna=False)\n",
" .agg(\n",
" count=('prediction', 'size'),\n",
" avg_probability=('probability', 'mean'),\n",
" min_probability=('probability', 'min'),\n",
" max_probability=('probability', 'max'),\n",
" )\n",
" .reset_index()\n",
")\n",
"\n",
"freq_total = df.groupby('freq', dropna=False).size().rename('freq_total').reset_index()\n",
"class_summary = class_summary.merge(freq_total, on='freq', how='left')\n",
"class_summary['class_rate'] = class_summary['count'] / class_summary['freq_total']\n",
"class_summary = class_summary.sort_values(['freq', 'prediction']).reset_index(drop=True)\n",
"\n",
"display(class_summary)"
]
},
{
"cell_type": "markdown",
"id": "039fcfa6",
"metadata": {},
"source": [
"## Статистика по классу drone"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "c1ed2bc4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>freq</th>\n",
" <th>total_inferences</th>\n",
" <th>drone_count</th>\n",
" <th>avg_drone_probability</th>\n",
" <th>median_drone_probability</th>\n",
" <th>min_drone_probability</th>\n",
" <th>max_drone_probability</th>\n",
" <th>first_drone_time</th>\n",
" <th>last_drone_time</th>\n",
" <th>drone_rate</th>\n",
" <th>dataset_duration_min</th>\n",
" <th>drone_per_min</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1200</td>\n",
" <td>13632</td>\n",
" <td>3</td>\n",
" <td>0.840000</td>\n",
" <td>0.83</td>\n",
" <td>0.78</td>\n",
" <td>0.91</td>\n",
" <td>2026-05-05 03:18:55.374394280+07:00</td>\n",
" <td>2026-05-05 08:39:23.676669045+07:00</td>\n",
" <td>0.000220</td>\n",
" <td>1121.972504</td>\n",
" <td>0.002674</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2400</td>\n",
" <td>13626</td>\n",
" <td>11921</td>\n",
" <td>0.868013</td>\n",
" <td>0.93</td>\n",
" <td>0.50</td>\n",
" <td>1.00</td>\n",
" <td>2026-05-04 17:35:21.019627763+07:00</td>\n",
" <td>2026-05-05 12:17:19.369858371+07:00</td>\n",
" <td>0.874872</td>\n",
" <td>1121.972504</td>\n",
" <td>10.625038</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" freq total_inferences drone_count avg_drone_probability \\\n",
"0 1200 13632 3 0.840000 \n",
"1 2400 13626 11921 0.868013 \n",
"\n",
" median_drone_probability min_drone_probability max_drone_probability \\\n",
"0 0.83 0.78 0.91 \n",
"1 0.93 0.50 1.00 \n",
"\n",
" first_drone_time last_drone_time \\\n",
"0 2026-05-05 03:18:55.374394280+07:00 2026-05-05 08:39:23.676669045+07:00 \n",
"1 2026-05-04 17:35:21.019627763+07:00 2026-05-05 12:17:19.369858371+07:00 \n",
"\n",
" drone_rate dataset_duration_min drone_per_min \n",
"0 0.000220 1121.972504 0.002674 \n",
"1 0.874872 1121.972504 10.625038 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"drone = df[df['prediction'].eq('drone')].copy()\n",
"\n",
"total_by_freq = df.groupby('freq', dropna=False).size().rename('total_inferences')\n",
"drone_by_freq = drone.groupby('freq', dropna=False).agg(\n",
" drone_count=('prediction', 'size'),\n",
" avg_drone_probability=('probability', 'mean'),\n",
" median_drone_probability=('probability', 'median'),\n",
" min_drone_probability=('probability', 'min'),\n",
" max_drone_probability=('probability', 'max'),\n",
" first_drone_time=('local_time', 'min'),\n",
" last_drone_time=('local_time', 'max'),\n",
")\n",
"\n",
"drone_stats = total_by_freq.to_frame().join(drone_by_freq, how='left').fillna({'drone_count': 0})\n",
"drone_stats['drone_count'] = drone_stats['drone_count'].astype(int)\n",
"drone_stats['drone_rate'] = drone_stats['drone_count'] / drone_stats['total_inferences']\n",
"\n",
"if len(df) > 1:\n",
" duration_min = (df['ts'].max() - df['ts'].min()).total_seconds() / 60\n",
"else:\n",
" duration_min = 0\n",
"\n",
"drone_stats['dataset_duration_min'] = duration_min\n",
"drone_stats['drone_per_min'] = drone_stats['drone_count'] / duration_min if duration_min > 0 else 0\n",
"drone_stats = drone_stats.reset_index().sort_values('freq').reset_index(drop=True)\n",
"\n",
"display(drone_stats)"
]
},
{
"cell_type": "markdown",
"id": "56ae5e2b",
"metadata": {},
"source": [
"## Интервалы между drone-классификациями"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "0c43eb07",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>freq</th>\n",
" <th>interval_count</th>\n",
" <th>avg_interval_sec</th>\n",
" <th>median_interval_sec</th>\n",
" <th>min_interval_sec</th>\n",
" <th>max_interval_sec</th>\n",
" <th>p90_interval_sec</th>\n",
" <th>p95_interval_sec</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1200</td>\n",
" <td>2</td>\n",
" <td>9614.151137</td>\n",
" <td>9614.151137</td>\n",
" <td>619.196112</td>\n",
" <td>18609.106163</td>\n",
" <td>16810.115158</td>\n",
" <td>17709.610661</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2400</td>\n",
" <td>11920</td>\n",
" <td>5.647513</td>\n",
" <td>4.974157</td>\n",
" <td>0.000000</td>\n",
" <td>1210.107950</td>\n",
" <td>9.203728</td>\n",
" <td>10.079097</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" freq interval_count avg_interval_sec median_interval_sec \\\n",
"0 1200 2 9614.151137 9614.151137 \n",
"1 2400 11920 5.647513 4.974157 \n",
"\n",
" min_interval_sec max_interval_sec p90_interval_sec p95_interval_sec \n",
"0 619.196112 18609.106163 16810.115158 17709.610661 \n",
"1 0.000000 1210.107950 9.203728 10.079097 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"if drone.empty:\n",
" print('No drone predictions found')\n",
" drone_interval_stats = pd.DataFrame()\n",
"else:\n",
" drone = drone.sort_values(['freq', 'ts']).copy()\n",
" drone['dt_drone_freq_sec'] = drone.groupby('freq')['ts'].diff().dt.total_seconds()\n",
" drone_interval_stats = (\n",
" drone.groupby('freq', dropna=False)['dt_drone_freq_sec']\n",
" .agg(\n",
" interval_count='count',\n",
" avg_interval_sec='mean',\n",
" median_interval_sec='median',\n",
" min_interval_sec='min',\n",
" max_interval_sec='max',\n",
" p90_interval_sec=lambda s: s.quantile(0.90),\n",
" p95_interval_sec=lambda s: s.quantile(0.95),\n",
" )\n",
" .reset_index()\n",
" )\n",
" display(drone_interval_stats)"
]
},
{
"cell_type": "markdown",
"id": "2ecf96f0",
"metadata": {},
"source": [
"## Drone-события"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ff14a339",
"metadata": {},
"outputs": [],
"source": [
"drone_events = drone[[\n",
" 'local_time',\n",
" 'docker_timestamp',\n",
" 'freq',\n",
" 'model_id',\n",
" 'model_type',\n",
" 'prediction',\n",
" 'probability',\n",
"]].sort_values('local_time').reset_index(drop=True)\n",
"\n",
"display(drone_events.head(50))\n",
"display(drone_events.tail(50))"
]
},
{
"cell_type": "markdown",
"id": "376b84d0",
"metadata": {},
"source": [
"## Частота drone-классификаций по минутам"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5f82f5c1",
"metadata": {},
"outputs": [],
"source": [
"if drone.empty:\n",
" print('No drone predictions found')\n",
"else:\n",
" drone_per_minute = (\n",
" drone.set_index('local_time')\n",
" .groupby('freq')\n",
" .resample('1min')\n",
" .size()\n",
" .rename('drone_count')\n",
" .reset_index()\n",
" )\n",
" display(drone_per_minute.tail(100))\n",
"\n",
" pivot = drone_per_minute.pivot_table(\n",
" index='local_time',\n",
" columns='freq',\n",
" values='drone_count',\n",
" fill_value=0,\n",
" )\n",
" display(pivot.tail(50))"
]
},
{
"cell_type": "markdown",
"id": "ad570d05",
"metadata": {},
"source": [
"## Быстрые графики"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "895550a1",
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"if not drone.empty:\n",
" ax = drone_stats.set_index('freq')['drone_rate'].plot(kind='bar', figsize=(8, 4), title='Drone rate by frequency')\n",
" ax.set_ylabel('drone_count / total_inferences')\n",
" plt.show()\n",
"\n",
" ax = drone.boxplot(column='probability', by='freq', figsize=(8, 4))\n",
" ax.set_title('Drone probability by frequency')\n",
" ax.set_xlabel('freq')\n",
" ax.set_ylabel('probability')\n",
" plt.suptitle('')\n",
" plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}