Skip to content

Commit

Permalink
⬆️ ftmq 0.5.2
Browse files Browse the repository at this point in the history
  • Loading branch information
simonwoerpel committed Mar 14, 2024
1 parent ec49e1b commit 98650c9
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 62 deletions.
53 changes: 44 additions & 9 deletions catalog.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,45 @@
datasets:
- include: https://data.ftm.store/eu_transparency_register/index.json
- include: https://data.ftm.store/ec_meetings/index.json
- include: https://data.ftm.store/eu_fts/index.json
- include: https://data.ftm.store/eu_authorities/index.json
- include: https://data.ftm.store/eu_horizon_europe/index.json
- include: https://data.ftm.store/eu_fp7/index.json
- include: https://data.opensanctions.org/datasets/latest/eu_meps/index.json
- include: https://data.opensanctions.org/datasets/latest/eu_cor_members/index.json
- include: https://data.opensanctions.org/datasets/latest/eu_fsf/index.json
- from_uri: https://data.ftm.store/eu_transparency_register/index.json
- from_uri: https://data.ftm.store/ec_meetings/index.json
- from_uri: https://data.ftm.store/eu_fts/index.json
git_repo: https://github.com/investigativedata/investigraph-eu
maintainer:
name: investigativedata.io
url: https://investigativedata.io
logo_url: https://cdn.investigativedata.org/style/logo_square_light.svg
- from_uri: https://data.ftm.store/eu_authorities/index.json
git_repo: https://github.com/investigativedata/investigraph-eu
maintainer:
name: investigativedata.io
url: https://investigativedata.io
logo_url: https://cdn.investigativedata.org/style/logo_square_light.svg
- from_uri: https://data.ftm.store/eu_horizon_europe/index.json
git_repo: https://github.com/investigativedata/investigraph-eu
maintainer:
name: investigativedata.io
url: https://investigativedata.io
logo_url: https://cdn.investigativedata.org/style/logo_square_light.svg
- from_uri: https://data.ftm.store/eu_fp7/index.json
git_repo: https://github.com/investigativedata/investigraph-eu
maintainer:
name: investigativedata.io
url: https://investigativedata.io
logo_url: https://cdn.investigativedata.org/style/logo_square_light.svg
- from_uri: https://data.opensanctions.org/datasets/latest/eu_meps/index.json
git_repo: https://github.com/opensanctions/opensanctions
maintainer:
name: OpenSanctions
url: https://opensanctions.org
logo_url: https://assets.opensanctions.org/images/ura/logo_text.svg
- from_uri: https://data.opensanctions.org/datasets/latest/eu_cor_members/index.json
git_repo: https://github.com/opensanctions/opensanctions
maintainer:
name: OpenSanctions
url: https://opensanctions.org
logo_url: https://assets.opensanctions.org/images/ura/logo_text.svg
- from_uri: https://data.opensanctions.org/datasets/latest/eu_fsf/index.json
git_repo: https://github.com/opensanctions/opensanctions
maintainer:
name: OpenSanctions
url: https://opensanctions.org
logo_url: https://assets.opensanctions.org/images/ura/logo_text.svg
5 changes: 5 additions & 0 deletions datasets/ec_meetings/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ publisher:
Commission’s work – both in shaping new policies, and in steering them
through the other EU institutions. It supports the whole Commission.
url: https://commission.europa.eu/about-european-commission/departments-and-executive-agencies/secretariat-general_en
maintainer:
name: investigativedata.io
url: https://investigativedata.io
logo_url: https://cdn.investigativedata.org/style/logo_square_light.svg
git_repo: https://github.com/investigativedata/investigraph-eu

extract:
pandas:
Expand Down
60 changes: 26 additions & 34 deletions datasets/ec_meetings/transform.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,30 @@
from typing import Generator

from ftmq.util import fingerprint as fp
from ftmq.util import make_fingerprint_id as fp
from ftmq.util import make_entity_id
from investigraph.model import Context
from investigraph.types import CE, CEGenerator, Record
from investigraph.util import join_text
from investigraph.util import clean_name, join_text


def make_address(ctx: Context, data: Record) -> CE | None:
proxy = ctx.make_proxy("Address")
location = data.pop("Location")
if not fp(location):
return
proxy.id = ctx.make_id(fp(location), prefix="addr")
proxy.add("full", location)
return proxy
proxy_id = ctx.make_id(fp(location), prefix="addr")
return ctx.make_proxy("Address", proxy_id, full=location)


def make_person(ctx: Context, name: str, role: str, body: CE) -> CE:
proxy = ctx.make_proxy("Person")
proxy.id = ctx.make_slug("person", make_entity_id(body.id, fp(name)))
proxy.add("name", name)
proxy.add("description", role)
return proxy
proxy_id = ctx.make_slug("person", make_entity_id(body.id, fp(name)))
return ctx.make_proxy("Person", proxy_id, name=name, description=role)


def make_organization(ctx: Context, regId: str, name: str | None = None) -> CE:
proxy = ctx.make_proxy("Organization")
proxy.id = ctx.make_slug(regId, prefix="eu-tr")
proxy_id = ctx.make_slug(regId, prefix="eu-tr")
proxy = ctx.make_proxy("Organization", proxy_id, idNumber=regId)
if fp(name):
proxy.add("name", name)
proxy.add("idNumber", regId)
return proxy


Expand All @@ -56,17 +50,14 @@ def make_organizations(ctx: Context, data: Record) -> CEGenerator:
data.pop("Name of interest representative"),
regIds,
):
org = make_organization(ctx, regId, name)
if org.id:
if clean_name(regId):
orgs = True
yield org
yield make_organization(ctx, regId, name)
if not orgs:
# yield only via id
for regId in regIds.split(","):
regId = regId.strip()
org = make_organization(ctx, regId)
if org.id:
yield org
if clean_name(regId):
yield make_organization(ctx, regId)


def make_persons(ctx: Context, data: Record, body: CE) -> CEGenerator:
Expand All @@ -83,12 +74,12 @@ def make_event(
) -> CEGenerator:
date = data.pop("Date of meeting")
participants = [o for o in make_organizations(ctx, data)]
proxy = ctx.make_proxy("Event")
proxy.id = ctx.make_slug(
proxy_id = ctx.make_slug(
"meeting",
date,
make_entity_id(organizer.id, *sorted([p.id for p in participants])),
)
proxy = ctx.make_proxy("Event", proxy_id)
label = join_text(*[p.first("name") for p in participants])
name = f"{date} - {organizer.caption} x {label}"
proxy.add("name", name)
Expand Down Expand Up @@ -119,8 +110,8 @@ def parse_record(ctx: Context, data: Record, body: CE):
yield from involved

for member in involved:
rel = ctx.make_proxy("Membership")
rel.id = ctx.make_slug("membership", make_entity_id(body.id, member.id)) # noqa
rel_id = ctx.make_slug("membership", make_entity_id(body.id, member.id)) # noqa
rel = ctx.make_proxy("Membership", rel_id)
rel.add("organization", body)
rel.add("member", member)
rel.add("role", member.get("description"))
Expand All @@ -129,11 +120,9 @@ def parse_record(ctx: Context, data: Record, body: CE):

def parse_record_ec(ctx: Context, data: Record):
# meetings of EC representatives
body = ctx.make_proxy("PublicBody")
name = data.pop("Name of cabinet")
body.id = ctx.make_slug(fp(name))
body.add("name", name)
body.add("jurisdiction", "eu")
body_id = ctx.make_slug(fp(name))
body = ctx.make_proxy("PublicBody", body_id, name=name, jurisdiction="eu")

yield body
yield from parse_record(ctx, data, body)
Expand All @@ -142,11 +131,14 @@ def parse_record_ec(ctx: Context, data: Record):
def parse_record_dg(ctx: Context, data: Record):
# meetings of EC Directors-General
acronym = data.pop("Name of DG - acronym")
body = ctx.make_proxy("PublicBody")
body.id = ctx.make_slug("dg", acronym)
body.add("name", data.pop("Name of DG - full name"))
body.add("weakAlias", acronym)
body.add("jurisdiction", "eu")
body_id = ctx.make_slug("dg", acronym)
body = ctx.make_proxy(
"PublicBody",
body_id,
name=data.pop("Name of DG - full name"),
weakAlias=acronym,
jurisdiction="eu",
)

yield body
yield from parse_record(ctx, data, body)
Expand Down
5 changes: 5 additions & 0 deletions datasets/eu_transparency_register/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ publisher:
Commission’s work – both in shaping new policies, and in steering them
through the other EU institutions. It supports the whole Commission.
url: https://commission.europa.eu/about-european-commission/departments-and-executive-agencies/secretariat-general_en
maintainer:
name: investigativedata.io
url: https://investigativedata.io
logo_url: https://cdn.investigativedata.org/style/logo_square_light.svg
git_repo: https://github.com/investigativedata/investigraph-eu

extract:
sources:
Expand Down
49 changes: 31 additions & 18 deletions datasets/eu_transparency_register/transform.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,30 @@
from typing import Any

from followthemoney.util import join_text, make_entity_id
from ftmq.util import fingerprint as fp
from ftmq.util import make_fingerprint as fp
from investigraph import Context
from investigraph.types import CE, CEGenerator, Record


def make_address(ctx: Context, prefix: str, data: dict[str, Any]) -> CE | None:
proxy = ctx.make("Address")
street = data.get("{prefix} office address")
postalCode = data.get("{prefix} office post code")
postBox = data.get("{prefix} office post box")
city = data.get("{prefix} office city")
country = data.get("{prefix} office country")
full = join_text(street, postalCode, postBox, city, country, sep=", ")
proxy.id = ctx.make_slug(make_entity_id(fp(full)), prefix="addr")
if proxy.id is not None:
proxy_id = ctx.make_slug(make_entity_id(fp(full)), prefix="addr")
if proxy_id is not None:
proxy = ctx.make(
"Address",
proxy_id,
full=full,
street=street,
postalCode=postalCode,
city=city,
country=country,
postOfficeBox=postBox,
)
ctx.emit(proxy)
return proxy

Expand All @@ -24,9 +33,9 @@ def parse_record(ctx: Context, record: dict[str, Any]):
schema = "Organization"
if "company" in record["Form of the entity"]:
schema = "Company"
proxy = ctx.make(schema)
ident = record["Identification code"]
proxy.id = ctx.make_slug(ident)
proxy_id = ctx.make_slug(ident)
proxy = ctx.make(schema, proxy_id)
proxy.add("idNumber", ident)
proxy.add("name", record["Name"])
proxy.add("alias", record["Acronym"])
Expand Down Expand Up @@ -78,34 +87,38 @@ def parse_record(ctx: Context, record: dict[str, Any]):

def parse_agents(ctx: Context, record: dict[str, Any]):
regId = record.pop("orgIdentificationCode")
client = ctx.make("Organization")
client.id = ctx.make_slug(regId)
client_id = ctx.make_slug(regId)
client = ctx.make("Organization", client_id)
client.add("name", record.pop("orgName"))
client.add("idNumber", regId)
ctx.emit(client)

agent = ctx.make("Person")
title, firstName, lastName = (
record.pop("title", None),
record.pop("firstName"),
record.pop("lastName"),
)
agent.add("title", title)
agent.add("firstName", firstName)
agent.add("lastName", lastName)
agent.add("name", join_text(title, firstName, lastName))
agent.id = ctx.make_slug("agent", regId, make_entity_id(fp(agent.caption)))
agent_name = join_text(title, firstName, lastName)
agent_id = ctx.make_slug("agent", regId, make_entity_id(fp(agent_name)))
agent = ctx.make(
"Person",
agent_id,
title=title,
firstName=firstName,
lastName=lastName,
name=agent_name,
)
ctx.emit(agent)

rel = ctx.make("Representation")
rel_id = ctx.make_slug(
"representation", make_entity_id(client.id, agent.id)
) # noqa
rel = ctx.make("Representation", rel_id)
rel.add("agent", agent)
rel.add("client", client)
rel.add("role", "Accredited lobbyist to access the european parliament")
rel.add("startDate", record.pop("accreditationStartDate"))
rel.add("endDate", record.pop("accreditationEndDate"))
rel.id = ctx.make_slug(
"representation", make_entity_id(client.id, agent.id)
) # noqa
ctx.emit(rel)


Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
followthemoney @ git+https://github.com/investigativedata/followthemoney.git@schema/science-identifiers
investigraph>=0.3.2
investigraph>=0.5.2
mkdocs-material
jinja-cli
html5lib
psycopg2-binary

0 comments on commit 98650c9

Please sign in to comment.