diff --git a/log-analysis/HTTP.ipynb b/log-analysis/HTTP.ipynb index 35942bd..a2d100b 100644 --- a/log-analysis/HTTP.ipynb +++ b/log-analysis/HTTP.ipynb @@ -9,13 +9,20 @@ "\n", "Because as far as I can tell, there isn't a good one that exists?\n", "\n", - "This Notebook ingests a given log file and produces useful statistics about the recorded traffic." + "This Notebook ingests a given log file and produces useful statistics about the recorded traffic.\n", + "\n", + "## Instructions\n", + "\n", + "1. Run the first and second cells\n", + "2. Upload the file you want to parse\n", + "3. Click on the third cell, then click `Run -> Run Selected Cell and All Below`\n", + "4. Enjoy your data!" ] }, { "cell_type": "code", "execution_count": 1, - "id": "9e51fd41-fb2d-45d4-a625-08fd60fd52d6", + "id": "1ce288a4-d9d8-4b51-9a3d-9194161e37da", "metadata": {}, "outputs": [ { @@ -34,26 +41,67 @@ "import pandas as pd\n", "import plotly.express as px\n", "import altair as alt\n", - "import numpy\n", + "import numpy as np\n", "import re\n", + "import ipywidgets as widgets\n", + "from IPython.display import display\n", "# Allow Altair to handle large datasets\n", "alt.data_transformers.disable_max_rows()" ] }, + { + "cell_type": "markdown", + "id": "bc020c2d-0ba9-4cff-a500-9735fb425eb8", + "metadata": {}, + "source": [ + "The first thing we need to do is get a file to analyze! Drop that file!" + ] + }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 10, "id": "04021b75-fa2c-422a-8ad6-6dff420a44d2", "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f5768708231741a5af91402cb7749b26", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FileUpload(value={}, accept='.log,.txt', description='Upload', multiple=True)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "upload = widgets.FileUpload(\n", + " accept=\".log,.txt\", # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'\n", + " multiple=True # True to accept multiple files upload else False\n", + ")\n", + "display(upload)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "32b0c18f-a554-4716-b71d-ce440a91c7df", + "metadata": {}, "outputs": [], "source": [ - "with open(\"samples/access.log\") as f:\n", - " raw_rows = [l.strip() for l in f.readlines()]" + "# Extract values\n", + "raw_rows = []\n", + "for f in upload.value:\n", + " raw_rows += upload.value[f][\"content\"].decode().split(\"\\n\")" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 19, "id": "a2faa922-0df2-4f6f-af5f-a2c25314e4e2", "metadata": {}, "outputs": [], @@ -66,7 +114,18 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 20, + "id": "22b268a1-535e-4208-b8e1-98b60a0b2aa5", + "metadata": {}, + "outputs": [], + "source": [ + "# Parser for log lines\n", + "row_parser = r\"(?P[0-9\\.]+) (?P\\w+|-) (?P\\w+|-) \\[(?P.*?)\\] \\\"(?P.*?) (?P\\/.*) (?PHTTP/[\\d\\.]+)\\\" (?P\\d{3}) (?P\\d+) \\\"(?P.*)\\\" \\\"(?P.*)\\\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 21, "id": "27edf20b-07fd-436c-bf2c-f972a0a6566d", "metadata": {}, "outputs": [], @@ -75,32 +134,14 @@ " \"\"\"\n", " Ingests a row of HTTP log and returns a dict with structured data\n", " \"\"\"\n", - " src_ip = re.search(\"^([0-9]{1,3}\\.){3}[0-9]{1,3}\", row)\n", - " timestamp = re.search(\"\\[(.+)\\]\", row)\n", - " http_method = re.search(\"\\] \\\"([A-Z]+) /\", row)\n", - " uri = re.search(\"[A-Z]+ (/.+) HTTP\", row)\n", - " http_status = re.search(\"HTTP/[12.]*\\\" ([0-9]{3}) \", row)\n", - " response_size = re.search(\"[0-9]{3} ([0-9]+) \\\"\", row)\n", - " referrer = re.search(\"[0-9]+ \\\"(.*)\\\" \\\"\", row)\n", - " user_agent = re.search(\"\\\" \\\"(.+)\\\"\", row)\n", + " matches = re.search(row_parser, row)\n", " \n", - "\n", - " res = {\n", - " \"src_ip\": src_ip.group(0) if src_ip else \"\",\n", - " \"timestamp\": timestamp.group(1) if timestamp else \"\",\n", - " \"http_method\": http_method.group(1) if http_method else \"\",\n", - " \"uri\": uri.group(1) if uri else \"\",\n", - " \"http_status\": http_status.group(1) if http_status else \"\",\n", - " \"response_size\": response_size.group(1) if response_size else \"\",\n", - " \"referrer\": referrer.group(1) if referrer else \"\",\n", - " \"user_agent\": user_agent.group(1) if user_agent else \"\",\n", - " }\n", - " return res" + " return matches.groupdict() if matches else {}" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 22, "id": "e6281ae6-40de-4bf7-a287-53885c176ba6", "metadata": {}, "outputs": [], @@ -111,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 23, "id": "f5028f7a-1631-43b5-9a0c-6ab0d09e7dde", "metadata": {}, "outputs": [], @@ -122,14 +163,15 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 24, "id": "68f34c95-0657-47e5-b1da-6400bf2c88b7", "metadata": {}, "outputs": [], "source": [ "# Make a proper datetime object\n", "# Reference: 17/May/2015:10:05:03 +0000\n", - "df[\"_time\"] = pd.to_datetime(df[\"timestamp\"], format=\"%d/%b/%Y:%H:%M:%S %z\")" + "df[\"_time\"] = pd.to_datetime(df[\"date\"], format=\"%d/%b/%Y:%H:%M:%S %z\")\n", + "df.sort_values(by=\"_time\", ascending=False, inplace=True)" ] }, { @@ -144,17 +186,17 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 25, "id": "ee02d32b-11ca-4c3e-8edf-1d91507509fe", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(10000, 9)" + "(10001, 12)" ] }, - "execution_count": 8, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -166,19 +208,20 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 26, "id": "de52d82a-aab1-4b5f-9d9b-16339d2af940", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Index(['src_ip', 'timestamp', 'http_method', 'uri', 'http_status',\n", - " 'response_size', 'referrer', 'user_agent', '_time'],\n", + "Index(['ip', 'remote_log_name', 'userid', 'date', 'request_method', 'path',\n", + " 'request_version', 'status', 'length', 'referrer', 'user_agent',\n", + " '_time'],\n", " dtype='object')" ] }, - "execution_count": 9, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -198,7 +241,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 27, "id": "d0f40868-dd2c-4301-86f0-93473fe5ef27", "metadata": {}, "outputs": [ @@ -223,17 +266,23 @@ " \n", " \n", " \n", - " timestamp\n", - " http_method\n", - " uri\n", - " http_status\n", - " response_size\n", + " remote_log_name\n", + " userid\n", + " date\n", + " request_method\n", + " path\n", + " request_version\n", + " status\n", + " length\n", " referrer\n", " user_agent\n", " _time\n", " \n", " \n", - " src_ip\n", + " ip\n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -247,14 +296,17 @@ " \n", " \n", " 66.249.73.135\n", - " 482\n", - " 482\n", - " 482\n", - " 482\n", - " 482\n", - " 482\n", - " 482\n", - " 482\n", + " 432\n", + " 432\n", + " 432\n", + " 432\n", + " 432\n", + " 432\n", + " 432\n", + " 432\n", + " 432\n", + " 432\n", + " 432\n", " \n", " \n", " 46.105.14.53\n", @@ -266,28 +318,23 @@ " 364\n", " 364\n", " 364\n", + " 364\n", + " 364\n", + " 364\n", " \n", " \n", " 130.237.218.86\n", - " 357\n", - " 357\n", - " 357\n", - " 357\n", - " 357\n", - " 357\n", - " 357\n", - " 357\n", - " \n", - " \n", - " 75.97.9.59\n", - " 273\n", - " 273\n", - " 273\n", - " 273\n", - " 273\n", - " 273\n", - " 273\n", - " 273\n", + " 293\n", + " 293\n", + " 293\n", + " 293\n", + " 293\n", + " 293\n", + " 293\n", + " 293\n", + " 293\n", + " 293\n", + " 293\n", " \n", " \n", " 50.16.19.13\n", @@ -299,6 +346,23 @@ " 113\n", " 113\n", " 113\n", + " 113\n", + " 113\n", + " 113\n", + " \n", + " \n", + " 209.85.238.199\n", + " 102\n", + " 102\n", + " 102\n", + " 102\n", + " 102\n", + " 102\n", + " 102\n", + " 102\n", + " 102\n", + " 102\n", + " 102\n", " \n", " \n", " ...\n", @@ -310,9 +374,15 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 41.249.219.90\n", + " 67.225.29.201\n", + " 1\n", + " 1\n", + " 1\n", " 1\n", " 1\n", " 1\n", @@ -323,7 +393,10 @@ " 1\n", " \n", " \n", - " 41.99.29.14\n", + " 206.161.220.42\n", + " 1\n", + " 1\n", + " 1\n", " 1\n", " 1\n", " 1\n", @@ -334,7 +407,10 @@ " 1\n", " \n", " \n", - " 42.156.136.43\n", + " 157.56.92.142\n", + " 1\n", + " 1\n", + " 1\n", " 1\n", " 1\n", " 1\n", @@ -345,7 +421,10 @@ " 1\n", " \n", " \n", - " 46.105.125.31\n", + " 157.56.92.141\n", + " 1\n", + " 1\n", + " 1\n", " 1\n", " 1\n", " 1\n", @@ -356,7 +435,10 @@ " 1\n", " \n", " \n", - " 54.215.54.10\n", + " 54.242.167.99\n", + " 1\n", + " 1\n", + " 1\n", " 1\n", " 1\n", " 1\n", @@ -368,54 +450,54 @@ " \n", " \n", "\n", - "

1753 rows × 8 columns

\n", + "

1674 rows × 11 columns

\n", "" ], "text/plain": [ - " timestamp http_method uri http_status response_size \\\n", - "src_ip \n", - "66.249.73.135 482 482 482 482 482 \n", - "46.105.14.53 364 364 364 364 364 \n", - "130.237.218.86 357 357 357 357 357 \n", - "75.97.9.59 273 273 273 273 273 \n", - "50.16.19.13 113 113 113 113 113 \n", - "... ... ... ... ... ... \n", - "41.249.219.90 1 1 1 1 1 \n", - "41.99.29.14 1 1 1 1 1 \n", - "42.156.136.43 1 1 1 1 1 \n", - "46.105.125.31 1 1 1 1 1 \n", - "54.215.54.10 1 1 1 1 1 \n", + " remote_log_name userid date request_method path \\\n", + "ip \n", + "66.249.73.135 432 432 432 432 432 \n", + "46.105.14.53 364 364 364 364 364 \n", + "130.237.218.86 293 293 293 293 293 \n", + "50.16.19.13 113 113 113 113 113 \n", + "209.85.238.199 102 102 102 102 102 \n", + "... ... ... ... ... ... \n", + "67.225.29.201 1 1 1 1 1 \n", + "206.161.220.42 1 1 1 1 1 \n", + "157.56.92.142 1 1 1 1 1 \n", + "157.56.92.141 1 1 1 1 1 \n", + "54.242.167.99 1 1 1 1 1 \n", "\n", - " referrer user_agent _time \n", - "src_ip \n", - "66.249.73.135 482 482 482 \n", - "46.105.14.53 364 364 364 \n", - "130.237.218.86 357 357 357 \n", - "75.97.9.59 273 273 273 \n", - "50.16.19.13 113 113 113 \n", - "... ... ... ... \n", - "41.249.219.90 1 1 1 \n", - "41.99.29.14 1 1 1 \n", - "42.156.136.43 1 1 1 \n", - "46.105.125.31 1 1 1 \n", - "54.215.54.10 1 1 1 \n", + " request_version status length referrer user_agent _time \n", + "ip \n", + "66.249.73.135 432 432 432 432 432 432 \n", + "46.105.14.53 364 364 364 364 364 364 \n", + "130.237.218.86 293 293 293 293 293 293 \n", + "50.16.19.13 113 113 113 113 113 113 \n", + "209.85.238.199 102 102 102 102 102 102 \n", + "... ... ... ... ... ... ... \n", + "67.225.29.201 1 1 1 1 1 1 \n", + "206.161.220.42 1 1 1 1 1 1 \n", + "157.56.92.142 1 1 1 1 1 1 \n", + "157.56.92.141 1 1 1 1 1 1 \n", + "54.242.167.99 1 1 1 1 1 1 \n", "\n", - "[1753 rows x 8 columns]" + "[1674 rows x 11 columns]" ] }, - "execution_count": 10, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Group data by src_ip\n", - "df.groupby(\"src_ip\").count().sort_values(by=\"timestamp\", ascending=False)" + "df.groupby(\"ip\").count().sort_values(by=\"_time\", ascending=False)" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 28, "id": "9abacfd7-23a1-424c-afce-cbc83446d8da", "metadata": {}, "outputs": [ @@ -507,7 +589,7 @@ "data": [ { "alignmentgroup": "True", - "hovertemplate": "src_ip=%{x}
http_status=%{y}", + "hovertemplate": "ip=%{x}
status=%{y}", "legendgroup": "", "marker": { "color": "#636efa", @@ -525,26 +607,26 @@ "66.249.73.135", "46.105.14.53", "130.237.218.86", - "75.97.9.59", "50.16.19.13", "209.85.238.199", + "75.97.9.59", "68.180.224.225", - "100.43.83.137", + "198.46.149.143", "208.115.111.72", - "198.46.149.143" + "208.115.113.88" ], "xaxis": "x", "y": [ - 482, + 432, 364, - 357, - 273, + 293, 113, 102, 99, - 84, - 83, - 82 + 95, + 82, + 73, + 66 ], "yaxis": "y" } @@ -1387,7 +1469,7 @@ 9.5 ], "title": { - "text": "src_ip" + "text": "ip" }, "type": "category" }, @@ -1400,20 +1482,20 @@ ], "range": [ 0, - 507.36842105263156 + 454.7368421052632 ], "title": { - "text": "http_status" + "text": "status" }, "type": "linear" } } }, - "image/png": "", + "image/png": "", "text/html": [ - "
" ], "text/plain": [ "alt.Chart(...)" ] }, - "execution_count": 21, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -3012,58 +3450,258 @@ "source": [ "alt.Chart(top_10_visits) \\\n", ".mark_line() \\\n", - ".encode(x=\"_time:T\", y=\"count(http_status):Q\", color=\"src_ip:N\")" + ".encode(x=\"_time:T\", y=\"count(status):Q\", color=\"ip:N\")" ] }, { "cell_type": "markdown", - "id": "ef7fee9f-721e-4146-9de3-f01115c6b6a5", + "id": "3dc84a01-9a58-4115-bad3-cf940017b57d", "metadata": {}, "source": [ - "## Indicators of Attack" + "### Significantly Noisy IPs" ] }, { "cell_type": "markdown", - "id": "b0a8d73c-9047-4b8d-8f6a-78c143faa743", + "id": "8d510d7d-b407-456a-a80d-f8f789728c50", "metadata": {}, "source": [ - "### Big Missers\n", + "Is there any reason to be concerned about any of these IPs? How can we tell?\n", "\n", - "Source IPs where the response was `401`, `403`, or `404`." + "Through the power of ***MATH***" ] }, { "cell_type": "code", - "execution_count": 16, - "id": "6f76b08a-2aaa-4be4-a681-c0c6b525c1c3", - "metadata": {}, + "execution_count": 34, + "id": "37b968a5-1a45-474a-9c59-9387dbd015eb", + "metadata": { + "jupyter": { + "source_hidden": true + }, + "tags": [] + }, "outputs": [], "source": [ - "fails = [\"404\", \"403\", \"401\"]\n", - "fail_visits = df[df.http_status.apply(lambda s: s in fails)]" + "# Aggregate top 10 visitors\n", + "top_10_counts = top_10_visits.groupby(\"ip\").count()" ] }, { "cell_type": "code", - "execution_count": 17, - "id": "62cc2a50-9c21-417d-ba5b-8fee635611c2", - "metadata": {}, + "execution_count": 35, + "id": "b75fee34-3b5e-4ac3-b18c-818a966a391b", + "metadata": { + "jupyter": { + "source_hidden": true + }, + "tags": [] + }, "outputs": [ { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "alignmentgroup": "True", - "hovertemplate": "src_ip=%{x}
http_status=%{y}", - "legendgroup": "", - "marker": { - "color": "#636efa", - "pattern": { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average visits per IP: 171.9\n", + "Standard Deviation: 129.57\n" + ] + } + ], + "source": [ + "# Calculate the average visits\n", + "# counts_df = df.groupby(\"ip\").count()\n", + "counts_df = top_10_counts\n", + "avg_visits = round(np.mean(counts_df._time), 2)\n", + "# Calculate the standard deviation\n", + "visits_std = round(np.std(counts_df._time), 2)\n", + "\n", + "print(f\"Average visits per IP: {avg_visits}\")\n", + "print(f\"Standard Deviation: {visits_std}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "5d930017-a553-49bd-8a35-e8b91bd7d72e", + "metadata": { + "jupyter": { + "source_hidden": true + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Calculate significance threshold\n", + "n_sigmas = 1.2\n", + "alpha = avg_visits + (visits_std * n_sigmas)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "f8edabe4-7728-40e8-bce3-e70e47d33542", + "metadata": { + "jupyter": { + "source_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
remote_log_nameuseriddaterequest_methodpathrequest_versionstatuslengthreferreruser_agent_time
ip
46.105.14.53364364364364364364364364364364364
66.249.73.135432432432432432432432432432432432
\n", + "
" + ], + "text/plain": [ + " remote_log_name userid date request_method path \\\n", + "ip \n", + "46.105.14.53 364 364 364 364 364 \n", + "66.249.73.135 432 432 432 432 432 \n", + "\n", + " request_version status length referrer user_agent _time \n", + "ip \n", + "46.105.14.53 364 364 364 364 364 364 \n", + "66.249.73.135 432 432 432 432 432 432 " + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Look for any values that exceed our alpha threshold\n", + "significant = top_10_counts[top_10_counts._time >= alpha]\n", + "significant" + ] + }, + { + "cell_type": "markdown", + "id": "ef7fee9f-721e-4146-9de3-f01115c6b6a5", + "metadata": { + "tags": [] + }, + "source": [ + "## Indicators of Attack" + ] + }, + { + "cell_type": "markdown", + "id": "b0a8d73c-9047-4b8d-8f6a-78c143faa743", + "metadata": {}, + "source": [ + "### Big Missers\n", + "\n", + "Source IPs where the response was `401`, `403`, or `404`. This can indicate brute force browsing with tools like Gobuster or ZAP/Burp." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "6f76b08a-2aaa-4be4-a681-c0c6b525c1c3", + "metadata": {}, + "outputs": [], + "source": [ + "fails = [\"404\", \"403\", \"401\"]\n", + "fail_visits = df[df.status.apply(lambda s: s in fails)]" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "62cc2a50-9c21-417d-ba5b-8fee635611c2", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "alignmentgroup": "True", + "hovertemplate": "ip=%{x}
status=%{y}", + "legendgroup": "", + "marker": { + "color": "#636efa", + "pattern": { "shape": "" } }, @@ -3086,22 +3724,32 @@ "131.107.160.94", "14.140.163.52", "14.160.65.22", + "144.76.194.187", + "144.76.95.39", "157.55.34.93", + "173.236.32.219", "176.92.75.62", "183.91.14.219", + "184.154.137.213", + "185.38.249.133", "188.165.243.45", "190.250.238.82", "192.185.81.134", "192.185.83.181", "193.244.33.47", "194.186.207.105", + "195.211.162.22", "195.250.34.144", + "198.143.145.210", "198.245.61.43", "199.102.67.16", "199.116.117.212", + "199.168.96.66", "199.189.248.95", "200.31.173.106", "204.62.56.3", + "207.241.237.104", + "207.241.237.220", "208.115.113.88", "208.43.251.181", "208.50.255.30", @@ -3120,6 +3768,7 @@ "50.87.144.128", "59.163.27.11", "61.140.183.41", + "62.225.70.202", "62.24.122.25", "63.140.98.80", "66.147.244.126", @@ -3128,10 +3777,13 @@ "67.215.172.14", "69.171.237.10", "69.171.237.9", + "69.175.14.230", + "69.175.87.242", "74.208.16.115", "74.208.180.23", "75.67.42.229", "75.97.9.59", + "76.164.234.106", "78.173.140.106", "79.171.127.34", "80.108.25.232", @@ -3145,9 +3797,10 @@ "89.107.177.18", "89.107.180.34", "90.175.31.133", - "91.236.75.25", + "94.153.9.168", "94.242.255.188", "95.78.54.93", + "96.127.149.186", "98.130.2.118" ], "xaxis": "x", @@ -3164,22 +3817,32 @@ 1, 1, 1, + 2, + 14, + 1, 1, 5, 1, + 1, + 1, 3, 1, 1, 1, 2, 1, + 1, 3, + 1, 3, 1, 1, + 2, 1, 1, 2, + 1, + 1, 3, 1, 1, @@ -3201,6 +3864,7 @@ 1, 1, 1, + 1, 8, 2, 1, @@ -3209,7 +3873,10 @@ 1, 1, 1, + 1, + 1, 6, + 1, 3, 1, 1, @@ -3223,9 +3890,10 @@ 2, 1, 1, - 8, + 1, 1, 3, + 1, 1 ], "yaxis": "y" @@ -3237,9 +3905,6 @@ "legend": { "tracegroupgap": 0 }, - "margin": { - "t": 60 - }, "template": { "data": { "bar": [ @@ -4057,6 +4722,9 @@ } } }, + "title": { + "text": "Failed browsers" + }, "xaxis": { "anchor": "y", "autorange": true, @@ -4066,10 +4734,10 @@ ], "range": [ -0.5, - 74.5 + 89.5 ], "title": { - "text": "src_ip" + "text": "ip" }, "type": "category" }, @@ -4085,17 +4753,17 @@ 63.1578947368421 ], "title": { - "text": "http_status" + "text": "status" }, "type": "linear" } } }, - "image/png": "", + "image/png": "", "text/html": [ - "
" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "alt.Chart(post_df) \\\n", + ".mark_bar() \\\n", + ".encode(x=\"_time:T\", y=\"count(status):Q\", color=\"ip:N\")" + ] + }, + { + "cell_type": "markdown", + "id": "677688de-a1d3-42f0-9c8b-9f60d5f5c187", + "metadata": {}, + "source": [ + "## User Agents\n", + "\n", + "User Agents may indicate scanning data, or uncommon agents in your org." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "016a6f3e-dc68-481d-b569-edbc279e35cb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ipremote_log_nameuseriddaterequest_methodpathrequest_versionstatuslengthreferrer_time
user_agent
&as_qdr=all11111111111
Mozilla/5.0 (Linux; U; Android 4.3; en-gb; GT-I9300 Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30 GSA/3.2.17.1009776.arm11111111111
Mozilla/5.0 (Linux; U; Android 4.3; en-nz; GT-I9505 Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.3011111111111
Mozilla/5.0 (Linux; U; Android 4.3; en-us; GT-I9300 Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.3011111111111
Mozilla/5.0 (Linux; U; Android 4.3; en-us; HTC_One Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.3011111111111
Mozilla/5.0 (iPad; U; CPU OS 4_3_5 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8L1 Safari/6533.18.511111111111
Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) CriOS/32.0.1700.21 Mobile/11B554a Safari/9537.5311111111111
Mozilla/5.0 (Linux;u;Android 2.3.7;zh-cn;HTC Desire Build) AppleWebKit/533.1 (KHTML,like Gecko) Version/4.0 Mobile Safari/533.111111111111
Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) GSA/3.2.1.25875 Mobile/11B554a Safari/8536.2511111111111
Mozilla/5.0 (iPad; CPU OS 6_1_3 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10B329 Safari/8536.2511111111111
Mozilla/5.0 (iPad; CPU OS 6_0_1 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A523 Safari/8536.2511111111111
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:11.0) Gecko/20100101 Firefox/11.011111111111
Mozilla/5.0 (compatible; theoldreader.com; 2 subscribers; feed-id=2e78188bde2b643b822a26d6)11111111111
Mozilla/5.0 (Linux; U; Android 4.2.2; fa-ir; GT-I8190 Build/JDQ39) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.3011111111111
Mozilla/5.0 (compatible; theoldreader.com)11111111111
Mozilla/5.0 (compatible; MojeekBot/0.6; http://www.mojeek.com/bot.html)11111111111
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.50.2 (KHTML, like Gecko) Version/5.0.6 Safari/533.22.311111111111
Mozilla/5.0 (compatible; MSIE or Firefox mutant; not on Windows server; + http://tab.search.daum.net/aboutWebSearch.html) Daumoa/3.011111111111
Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; MALC)11111111111
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.47 Safari/536.1111111111111
\n", + "
" + ], + "text/plain": [ + " ip remote_log_name \\\n", + "user_agent \n", + "&as_qdr=all 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-gb; GT-I... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-nz; GT-I... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-us; GT-I... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-us; HTC_... 1 1 \n", + "Mozilla/5.0 (iPad; U; CPU OS 4_3_5 like Mac OS ... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (Linux;u;Android 2.3.7;zh-cn;HTC De... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 6_1_3 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 6_0_1 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv... 1 1 \n", + "Mozilla/5.0 (compatible; theoldreader.com; 2 su... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.2.2; fa-ir; GT... 1 1 \n", + "Mozilla/5.0 (compatible; theoldreader.com) 1 1 \n", + "Mozilla/5.0 (compatible; MojeekBot/0.6; http://... 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) ... 1 1 \n", + "Mozilla/5.0 (compatible; MSIE or Firefox mutant... 1 1 \n", + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6... 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) ... 1 1 \n", + "\n", + " userid date \\\n", + "user_agent \n", + "&as_qdr=all 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-gb; GT-I... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-nz; GT-I... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-us; GT-I... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-us; HTC_... 1 1 \n", + "Mozilla/5.0 (iPad; U; CPU OS 4_3_5 like Mac OS ... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (Linux;u;Android 2.3.7;zh-cn;HTC De... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 6_1_3 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 6_0_1 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv... 1 1 \n", + "Mozilla/5.0 (compatible; theoldreader.com; 2 su... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.2.2; fa-ir; GT... 1 1 \n", + "Mozilla/5.0 (compatible; theoldreader.com) 1 1 \n", + "Mozilla/5.0 (compatible; MojeekBot/0.6; http://... 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) ... 1 1 \n", + "Mozilla/5.0 (compatible; MSIE or Firefox mutant... 1 1 \n", + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6... 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) ... 1 1 \n", + "\n", + " request_method path \\\n", + "user_agent \n", + "&as_qdr=all 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-gb; GT-I... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-nz; GT-I... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-us; GT-I... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-us; HTC_... 1 1 \n", + "Mozilla/5.0 (iPad; U; CPU OS 4_3_5 like Mac OS ... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (Linux;u;Android 2.3.7;zh-cn;HTC De... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 6_1_3 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 6_0_1 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv... 1 1 \n", + "Mozilla/5.0 (compatible; theoldreader.com; 2 su... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.2.2; fa-ir; GT... 1 1 \n", + "Mozilla/5.0 (compatible; theoldreader.com) 1 1 \n", + "Mozilla/5.0 (compatible; MojeekBot/0.6; http://... 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) ... 1 1 \n", + "Mozilla/5.0 (compatible; MSIE or Firefox mutant... 1 1 \n", + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6... 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) ... 1 1 \n", + "\n", + " request_version status \\\n", + "user_agent \n", + "&as_qdr=all 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-gb; GT-I... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-nz; GT-I... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-us; GT-I... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-us; HTC_... 1 1 \n", + "Mozilla/5.0 (iPad; U; CPU OS 4_3_5 like Mac OS ... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (Linux;u;Android 2.3.7;zh-cn;HTC De... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 6_1_3 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 6_0_1 like Mac OS X) ... 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv... 1 1 \n", + "Mozilla/5.0 (compatible; theoldreader.com; 2 su... 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.2.2; fa-ir; GT... 1 1 \n", + "Mozilla/5.0 (compatible; theoldreader.com) 1 1 \n", + "Mozilla/5.0 (compatible; MojeekBot/0.6; http://... 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) ... 1 1 \n", + "Mozilla/5.0 (compatible; MSIE or Firefox mutant... 1 1 \n", + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6... 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) ... 1 1 \n", + "\n", + " length referrer _time \n", + "user_agent \n", + "&as_qdr=all 1 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-gb; GT-I... 1 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-nz; GT-I... 1 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-us; GT-I... 1 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.3; en-us; HTC_... 1 1 1 \n", + "Mozilla/5.0 (iPad; U; CPU OS 4_3_5 like Mac OS ... 1 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) ... 1 1 1 \n", + "Mozilla/5.0 (Linux;u;Android 2.3.7;zh-cn;HTC De... 1 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) ... 1 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 6_1_3 like Mac OS X) ... 1 1 1 \n", + "Mozilla/5.0 (iPad; CPU OS 6_0_1 like Mac OS X) ... 1 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv... 1 1 1 \n", + "Mozilla/5.0 (compatible; theoldreader.com; 2 su... 1 1 1 \n", + "Mozilla/5.0 (Linux; U; Android 4.2.2; fa-ir; GT... 1 1 1 \n", + "Mozilla/5.0 (compatible; theoldreader.com) 1 1 1 \n", + "Mozilla/5.0 (compatible; MojeekBot/0.6; http://... 1 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) ... 1 1 1 \n", + "Mozilla/5.0 (compatible; MSIE or Firefox mutant... 1 1 1 \n", + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6... 1 1 1 \n", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) ... 1 1 1 " + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Group by User Agent\n", + "agents = df.groupby(\"user_agent\").count()\n", + "# Look at rare UAs\n", + "agents.sort_values(by=\"ip\", ascending=True).head(20)" + ] + }, + { + "cell_type": "markdown", + "id": "b6938024-ff6e-45a6-a653-0669268b9476", + "metadata": {}, + "source": [ + "### Short UAS\n", + "\n", + "Short User Agents may indicate programmatic access, like by scanners." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "3c1b9825-07d4-40f8-bb97-307b413f24ae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ipremote_log_nameuseriddaterequest_methodpathrequest_versionstatuslengthreferrer_time
user_agent
-175175175175175175175175175175175
Feedbin - 1 subscribers5050505050505050505050
Twitterbot/1.01414141414141414141414
portscout/0.8.177777777777
Embedly +support@embed.ly55555555555
Python-urllib/2.744444444444
Wget/1.14 (linux-gnu)44444444444
fetch libfetch/2.044444444444
MAXX_MAUI WAP Browser33333333333
Ruby33333333333
Googlebot-Image/1.022222222222
binlar_2.6.3 test@mgmt.mic22222222222
Opera 9.611111111111
Robosourcer/1.011111111111
Wget/1.12 (linux-gnu)11111111111
Wget/1.13.4 (linux-gnu)11111111111
YisouSpider11111111111
ZDM/4.0; Windows Mobile 8.011111111111
nutch-1.4/Nutch-1.411111111111
&as_qdr=all11111111111
\n", + "
" + ], + "text/plain": [ + " ip remote_log_name userid date \\\n", + "user_agent \n", + "- 175 175 175 175 \n", + "Feedbin - 1 subscribers 50 50 50 50 \n", + "Twitterbot/1.0 14 14 14 14 \n", + "portscout/0.8.1 7 7 7 7 \n", + "Embedly +support@embed.ly 5 5 5 5 \n", + "Python-urllib/2.7 4 4 4 4 \n", + "Wget/1.14 (linux-gnu) 4 4 4 4 \n", + "fetch libfetch/2.0 4 4 4 4 \n", + "MAXX_MAUI WAP Browser 3 3 3 3 \n", + "Ruby 3 3 3 3 \n", + "Googlebot-Image/1.0 2 2 2 2 \n", + "binlar_2.6.3 test@mgmt.mic 2 2 2 2 \n", + "Opera 9.6 1 1 1 1 \n", + "Robosourcer/1.0 1 1 1 1 \n", + "Wget/1.12 (linux-gnu) 1 1 1 1 \n", + "Wget/1.13.4 (linux-gnu) 1 1 1 1 \n", + "YisouSpider 1 1 1 1 \n", + "ZDM/4.0; Windows Mobile 8.0 1 1 1 1 \n", + "nutch-1.4/Nutch-1.4 1 1 1 1 \n", + "&as_qdr=all 1 1 1 1 \n", "\n", - " }) }; });
" + " request_method path request_version status \\\n", + "user_agent \n", + "- 175 175 175 175 \n", + "Feedbin - 1 subscribers 50 50 50 50 \n", + "Twitterbot/1.0 14 14 14 14 \n", + "portscout/0.8.1 7 7 7 7 \n", + "Embedly +support@embed.ly 5 5 5 5 \n", + "Python-urllib/2.7 4 4 4 4 \n", + "Wget/1.14 (linux-gnu) 4 4 4 4 \n", + "fetch libfetch/2.0 4 4 4 4 \n", + "MAXX_MAUI WAP Browser 3 3 3 3 \n", + "Ruby 3 3 3 3 \n", + "Googlebot-Image/1.0 2 2 2 2 \n", + "binlar_2.6.3 test@mgmt.mic 2 2 2 2 \n", + "Opera 9.6 1 1 1 1 \n", + "Robosourcer/1.0 1 1 1 1 \n", + "Wget/1.12 (linux-gnu) 1 1 1 1 \n", + "Wget/1.13.4 (linux-gnu) 1 1 1 1 \n", + "YisouSpider 1 1 1 1 \n", + "ZDM/4.0; Windows Mobile 8.0 1 1 1 1 \n", + "nutch-1.4/Nutch-1.4 1 1 1 1 \n", + "&as_qdr=all 1 1 1 1 \n", + "\n", + " length referrer _time \n", + "user_agent \n", + "- 175 175 175 \n", + "Feedbin - 1 subscribers 50 50 50 \n", + "Twitterbot/1.0 14 14 14 \n", + "portscout/0.8.1 7 7 7 \n", + "Embedly +support@embed.ly 5 5 5 \n", + "Python-urllib/2.7 4 4 4 \n", + "Wget/1.14 (linux-gnu) 4 4 4 \n", + "fetch libfetch/2.0 4 4 4 \n", + "MAXX_MAUI WAP Browser 3 3 3 \n", + "Ruby 3 3 3 \n", + "Googlebot-Image/1.0 2 2 2 \n", + "binlar_2.6.3 test@mgmt.mic 2 2 2 \n", + "Opera 9.6 1 1 1 \n", + "Robosourcer/1.0 1 1 1 \n", + "Wget/1.12 (linux-gnu) 1 1 1 \n", + "Wget/1.13.4 (linux-gnu) 1 1 1 \n", + "YisouSpider 1 1 1 \n", + "ZDM/4.0; Windows Mobile 8.0 1 1 1 \n", + "nutch-1.4/Nutch-1.4 1 1 1 \n", + "&as_qdr=all 1 1 1 " ] }, + "execution_count": 45, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ - "px.scatter(visits_208_91_156_11, x=\"_time\", y=\"uri\")" + "pd.set_option(\"display.max_rows\",100)\n", + "agent_clean = df.dropna(subset=[\"user_agent\"])\n", + "not_moz = agent_clean[~agent_clean.user_agent.str.contains(\"Mozilla\")]\n", + "short_agents = not_moz[not_moz.user_agent.str.len() <= 30]\n", + "short_agents.groupby(\"user_agent\").count().sort_values(by=\"ip\", ascending=False).head(100)" ] } ],