Skip to content

Commit

Permalink
RM: Geolocation data collection (#55)
Browse files Browse the repository at this point in the history
  • Loading branch information
mgxd authored Dec 1, 2022
1 parent 669ee71 commit b031ec3
Show file tree
Hide file tree
Showing 5 changed files with 2 additions and 102 deletions.
2 changes: 1 addition & 1 deletion migas_server/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ async def get_redis_connection() -> redis.Redis:
return MEM_CACHE


# GH / IPStack requests
# GH requests
async def get_requests_session() -> aiohttp.ClientSession:
"""Initialize within an async function, since sync initialization is deprecated."""
global REQUESTS_SESSION
Expand Down
69 changes: 0 additions & 69 deletions migas_server/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
from sqlalchemy import distinct, func, select
from sqlalchemy.dialects.postgresql import insert

from migas_server.fetchers import fetch_ipstack_data
from migas_server.models import (
Table,
gen_session,
geolocs,
get_project_tables,
projects,
)
Expand Down Expand Up @@ -112,73 +110,6 @@ async def ingest_project(project: Project) -> None:
)


async def insert_geoloc(
ip: str,
*,
continent: str,
country: str,
region: str,
city: str,
postal_code: str,
latitude: float,
longitude: float,
) -> None:
"""Insert geolocation data to table."""
async with gen_session() as session:
res = await session.execute(
geolocs.insert(),
{
"id": ip,
"continent": continent,
"country": country,
"region": region,
"city": city,
"postal_code": postal_code,
"latitude": latitude,
"longitude": longitude,
},
)
await session.commit()


# Table query
async def geoloc_request(ip: str) -> None:
"""
Check to see if the address has already been geolocated.
If so, nothing to do.
If not, spend an `ipstack` API call and store the resulting data.
We store geolocation information to avoid overloading our limited
IPStack API calls, since we are using the free tier.
"""
from hashlib import sha256

hip = sha256(ip.encode()).hexdigest()
async with gen_session() as session:
res = await session.execute(geolocs.select().where(geolocs.c.id == hip))
if res.one_or_none():
return

# No user data found
data = await fetch_ipstack_data(ip)
# Do not add to DB if IPStack call failed
if data.get("success", True) is False:
print(f"Unable to fetch geoloc data: {data}")
return

await insert_geoloc(
hip,
continent=data['continent_name'],
country=data['country_name'],
region=data['region_name'],
city=data['city'],
postal_code=data['zip'],
latitude=data['latitude'],
longitude=data['longitude'],
)


async def query_usage_by_datetimes(
project: Table,
start: DateTime,
Expand Down
14 changes: 0 additions & 14 deletions migas_server/fetchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from migas_server.connections import get_redis_connection, get_requests_session

IPSTACK_API_URL = "http://api.ipstack.com/{ip}?access_key={ipstack_secret}"
GITHUB_RELEASE_URL = "https://api.github.com/repos/{project}/releases/latest"
GITHUB_TAG_URL = "https://api.github.com/repos/{project}/tags"
GITHUB_ET_FILE_URL = "https://raw.githubusercontent.com/{project}/{version}/.migas.json"
Expand Down Expand Up @@ -69,16 +68,3 @@ async def fetch_project_info(project: str) -> dict:
"success": latest_version not in ('unknown', 'forbidden'),
"version": latest_version.lstrip('v'),
}


async def fetch_ipstack_data(ip: str) -> dict:
status, res = await fetch_response(
IPSTACK_API_URL.format(ip=ip, ipstack_secret=os.getenv("IPSTACK_API_KEY"))
)
match status:
case 200:
# verify it is valid
return res
case _:
print("IPSTACK: Something went wrong.")
return {"success": False}
17 changes: 1 addition & 16 deletions migas_server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,6 @@ class Projects(Base):
project = Column(String(140), primary_key=True) # 39 owner + "/" + 100 repository


class Geolocs(Base):
__tablename__ = "geolocs"
__mapper_args__ = {"eager_defaults": True}

id = Column(CHAR(length=64), primary_key=True)
continent = Column(String(length=13), nullable=False)
country = Column(String(length=56), nullable=False)
region = Column(String(length=58), nullable=False)
city = Column(String(length=58), nullable=False)
postal_code = Column(String(length=10), nullable=False)
latitude = Column(FLOAT(), nullable=False)
longitude = Column(FLOAT(), nullable=False)


class Project(Base):
__abstract__ = True
__mapper_args__ = {"eager_defaults": True}
Expand Down Expand Up @@ -62,7 +48,6 @@ class ProjectUsers(Base):
container = Column(String(length=9), nullable=False)


geolocs = Geolocs.__table__
projects = Projects.__table__


Expand Down Expand Up @@ -149,7 +134,7 @@ async def init_db(engine: AsyncEngine) -> None:
This method ensure the following are created (if not already existing):
1) migas schema
2) primary tables (projects, geoloc)
2) project tables
3) If projects table exists, ensure all tracked projects have Project/ProjectUsers tables.
"""
async with engine.begin() as conn:
Expand Down
2 changes: 0 additions & 2 deletions migas_server/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

from migas_server.connections import get_redis_connection
from migas_server.database import (
geoloc_request,
ingest_project,
project_exists,
query_projects,
Expand Down Expand Up @@ -105,7 +104,6 @@ async def add_project(self, p: ProjectInput, info: Info) -> JSON:
# return project info ASAP, assign data ingestion as background tasks
request = info.context['request']
bg_tasks = info.context['background_tasks']
bg_tasks.add_task(geoloc_request, request.client.host)
bg_tasks.add_task(ingest_project, project)

return {
Expand Down

0 comments on commit b031ec3

Please sign in to comment.