Skip to content

Commit

Permalink
Add geo OOI type and Maxmind geoip boefje (#3238)
Browse files Browse the repository at this point in the history
Co-authored-by: Jeroen Dekkers <[email protected]>
Co-authored-by: ammar92 <[email protected]>
Co-authored-by: Jan Klopper <[email protected]>
Co-authored-by: stephanie0x00 <[email protected]>
  • Loading branch information
5 people authored Aug 12, 2024
1 parent 0cf9a5a commit 1e6dab2
Show file tree
Hide file tree
Showing 14 changed files with 2,757 additions and 1,666 deletions.
Empty file.
14 changes: 14 additions & 0 deletions boefjes/boefjes/plugins/kat_maxmind_geoip/boefje.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"id": "maxmind",
"name": "Maxmind",
"description": "Fetch geolocation information for an IP address from Maxmind",
"consumes": [
"IPAddressV4",
"IPAddressV6"
],
"scan_level": 1,
"environment_keys": [
"MAXMIND_USER_ID",
"MAXMIND_LICENCE_KEY"
]
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
122 changes: 122 additions & 0 deletions boefjes/boefjes/plugins/kat_maxmind_geoip/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import hashlib
import io
import json
import os
import re
import shutil
import tarfile
from datetime import datetime, timezone
from os import getenv
from pathlib import Path

import maxminddb
import requests

from boefjes.job_models import BoefjeMeta

BASE_PATH = Path(getenv("OPENKAT_CACHE_PATH", Path(__file__).parent))
GEOIP_PATH_PATTERN = r"GeoLite2-City_\d+/GeoLite2-City.mmdb"
GEOIP_META_PATH = BASE_PATH / "geoip-meta.json"
GEOIP_SOURCE_URL = "https://download.maxmind.com/geoip/databases/GeoLite2-City/download?suffix=tar.gz"
GEOIP_CACHE_TIMEOUT = 86400 # in seconds
HASHFUNC = "sha256"
REQUEST_TIMEOUT = 30


def run(boefje_meta: BoefjeMeta) -> list[tuple[set, bytes | str]]:
input_ = boefje_meta.arguments["input"]
hash_algorithm = getenv("HASHFUNC", HASHFUNC)

if not geoip_file_exists() or cache_out_of_date():
geoip_meta = refresh_geoip(hash_algorithm)
else:
with GEOIP_META_PATH.open() as json_meta_file:
geoip_meta = json.load(json_meta_file)

geoip_path = find_geoip_path()

with maxminddb.open_database(geoip_path) as reader:
results = reader.get(input_["address"])

return [
({"maxmind-geoip/geo_data"}, json.dumps(results)),
(
{"maxmind-geoip/cache-meta"},
json.dumps(geoip_meta),
),
]


def create_hash(data: bytes, algo: str) -> str:
hashfunc = getattr(hashlib, algo)
return hashfunc(data).hexdigest()


def cache_out_of_date() -> bool:
"""Returns True if the file is older than the allowed cache_timout"""
now = datetime.now(timezone.utc)
max_age = int(getenv("GEOIP_CACHE_TIMEOUT", GEOIP_CACHE_TIMEOUT))
with GEOIP_META_PATH.open() as meta_file:
meta = json.load(meta_file)
cached_file_timestamp = datetime.fromisoformat(meta["timestamp"])
return (now - cached_file_timestamp).total_seconds() > max_age


def refresh_geoip(algo: str) -> dict:
maxmind_user_id = str(getenv("MAXMIND_USER_ID", ""))
maxmind_licence_key = getenv("MAXMIND_LICENCE_KEY", "")
source_url = getenv("GEOIP_SOURCE_URL", GEOIP_SOURCE_URL)
request_timeout = getenv("REQUEST_TIMEOUT", REQUEST_TIMEOUT)
response = requests.get(
source_url, allow_redirects=True, timeout=float(request_timeout), auth=(maxmind_user_id, maxmind_licence_key)
)
response.raise_for_status()

remove_old_geolite_data()

file_like_object = io.BytesIO(response.content)

with tarfile.open("r:gz", fileobj=file_like_object) as tf:
geoip_file = None
for member in tf.getmembers():
if re.match(GEOIP_PATH_PATTERN, member.name):
geoip_file = member
break
if geoip_file:
tf.extract(geoip_file, BASE_PATH)
else:
raise FileNotFoundError("GeoLite2-City.mmdb not found in the tar archive")

metadata = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"source": source_url,
"hash": create_hash(response.content, algo),
"hash_algorithm": algo,
}
with open(GEOIP_META_PATH, "w") as meta_file:
json.dump(metadata, meta_file)
return metadata


def find_geoip_path() -> str:
"""Find the GeoLite2-City.mmdb file in the BASE_PATH"""
for path in BASE_PATH.glob("GeoLite2-City_*/GeoLite2-City.mmdb"):
return str(path)
raise FileNotFoundError("GeoLite2-City.mmdb file not found in BASE_PATH")


def geoip_file_exists() -> bool:
"""Check if the GeoLite2-City.mmdb file exists in the BASE_PATH"""
try:
find_geoip_path()
return True
except FileNotFoundError:
return False


def remove_old_geolite_data():
"""Removes old GeoLite2 directory"""
for root, dirs, files in os.walk(BASE_PATH, topdown=False):
for directory in dirs:
dir_path = os.path.join(root, directory)
shutil.rmtree(dir_path)
19 changes: 19 additions & 0 deletions boefjes/boefjes/plugins/kat_maxmind_geoip/normalize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import json
from collections.abc import Iterable

from boefjes.job_models import NormalizerOutput
from octopoes.models import Reference
from octopoes.models.ooi.geography import GeographicPoint


def run(input_ooi: dict, raw: bytes) -> Iterable[NormalizerOutput]:
"""Yields GeographicPoints."""
results = json.loads(raw)
if not results:
return

yield GeographicPoint(
ooi=Reference.from_str(input_ooi["primary_key"]),
longitude=results.get("location", {}).get("longitude"),
latitude=results.get("location", {}).get("latitude"),
)
9 changes: 9 additions & 0 deletions boefjes/boefjes/plugins/kat_maxmind_geoip/normalizer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"id": "kat_maxmind_geoip_normalize",
"consumes": [
"maxmind-geoip/geo_data"
],
"produces": [
"GeographicPoint"
]
}
40 changes: 40 additions & 0 deletions boefjes/boefjes/plugins/kat_maxmind_geoip/schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"title": "Arguments",
"type": "object",
"properties": {
"MAXMIND_USER_ID": {
"title": "User ID",
"type": "integer",
"maxLength": 48,
"description": "Numeric user id for MaxMind"
},
"MAXMIND_LICENCE_KEY": {
"title": "Licence Key",
"maxLength": 48,
"type": "string",
"description": "Licence key for MaxMind"
},
"GEOIP_CACHE_TIMEOUT": {
"title": "Cache Timeout",
"type": "integer",
"description": "Cache timeout in seconds"
},
"GEOIP_SOURCE_URL": {
"title": "Source URL",
"type": "string",
"description": "URL to download the GeoIP database from"
},
"REQUEST_TIMEOUT": {
"title": "Request Timeout",
"type": "integer",
"description": "Request timeout in seconds"
}
},
"required": [
"MAXMIND_USER_ID",
"MAXMIND_LICENCE_KEY"
],
"secret": [
"MAXMIND_LICENCE_KEY"
]
}
Loading

0 comments on commit 1e6dab2

Please sign in to comment.