Skip to content

Commit

Permalink
organization node schema and fixes for lint and tests
Browse files Browse the repository at this point in the history
Signed-off-by: Daniel Brauer <[email protected]>
  • Loading branch information
danbrauer committed Nov 12, 2024
1 parent 564951f commit 9874a4a
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 31 deletions.
43 changes: 36 additions & 7 deletions cartography/intel/github/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
from cartography.client.core.tx import load
from cartography.intel.github.util import fetch_all
from cartography.models.core.nodes import CartographyNodeSchema
from cartography.models.github.users import GitHubOrganizationUserSchema, GitHubUnaffiliatedUserSchema
from cartography.models.github.orgs import GitHubOrganizationSchema
from cartography.models.github.users import GitHubOrganizationUserSchema
from cartography.models.github.users import GitHubUnaffiliatedUserSchema
from cartography.stats import get_stats_client
from cartography.util import merge_module_sync_metadata
from cartography.util import run_cleanup_job
Expand Down Expand Up @@ -120,6 +122,7 @@ def _get_enterprise_owners_raw(token: str, api_url: str, organization: str) -> T
)
return owners.edges, org


@timeit
def get_users(token: str, api_url: str, organization: str) -> Tuple[List[Dict], List[Dict], Dict]:
"""
Expand Down Expand Up @@ -158,12 +161,12 @@ def get_users(token: str, api_url: str, organization: str) -> Tuple[List[Dict],
processed_owner['MEMBER_OF'] = org['url']
owners_dict[processed_owner['url']] = processed_owner

affiliated_users = [] # users affiliated with the target org
affiliated_users = [] # users affiliated with the target org
for url, user in users_dict.items():
user['isEnterpriseOwner'] = url in owners_dict
affiliated_users.append(user)

unaffiliated_users = [] # users not affiliated with the target org
unaffiliated_users = [] # users not affiliated with the target org
for url, owner in owners_dict.items():
if url not in users_dict:
unaffiliated_users.append(owner)
Expand All @@ -189,6 +192,22 @@ def load_users(
)


@timeit
def load_organization(
neo4j_session: neo4j.Session,
node_schema: CartographyNodeSchema,
org_data: List[Dict[str, Any]],
update_tag: int,
) -> None:
logger.info(f"Loading {len(org_data)} GitHub organization to the graph")
load(
neo4j_session,
node_schema,
org_data,
lastupdated=update_tag,
)


@timeit
def sync(
neo4j_session: neo4j.Session,
Expand All @@ -199,10 +218,20 @@ def sync(
) -> None:
logger.info("Syncing GitHub users")
affiliated_user_data, unaffiliated_user_data, org_data = get_users(github_api_key, github_url, organization)
load_users(neo4j_session, GitHubOrganizationUserSchema(), affiliated_user_data, org_data, common_job_parameters['UPDATE_TAG'])
load_users(neo4j_session, GitHubUnaffiliatedUserSchema(), unaffiliated_user_data, org_data, common_job_parameters['UPDATE_TAG'])
# no automated cleanup job because user has no sub_resource_relationship
run_cleanup_job('github_users_cleanup.json', neo4j_session, common_job_parameters)
load_organization(
neo4j_session, GitHubOrganizationSchema(), [org_data],
common_job_parameters['UPDATE_TAG'],
)
load_users(
neo4j_session, GitHubOrganizationUserSchema(), affiliated_user_data, org_data,
common_job_parameters['UPDATE_TAG'],
)
load_users(
neo4j_session, GitHubUnaffiliatedUserSchema(), unaffiliated_user_data, org_data,
common_job_parameters['UPDATE_TAG'],
)
# no automated cleanup job for users because user node has no sub_resource_relationship
run_cleanup_job('github_org_and_users_cleanup.json', neo4j_session, common_job_parameters)
merge_module_sync_metadata(
neo4j_session,
group_type='GitHubOrganization',
Expand Down
32 changes: 32 additions & 0 deletions cartography/models/github/orgs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
This schema does not handle the org's relationships. Those are handled by other schemas, for example:
* GitHubTeamSchema defines (GitHubOrganization)-[RESOURCE]->(GitHubTeam)
* GitHubUserSchema defines (GitHubUser)-[MEMBER_OF|UNAFFILIATED]->(GitHubOrganization)
(There may be others, these are just two examples.)
"""
from dataclasses import dataclass

from cartography.models.core.common import PropertyRef
from cartography.models.core.nodes import CartographyNodeProperties
from cartography.models.core.nodes import CartographyNodeSchema
from cartography.models.core.relationships import CartographyRelProperties


@dataclass(frozen=True)
class GitHubOrganizationNodeProperties(CartographyNodeProperties):
id: PropertyRef = PropertyRef('url')
username: PropertyRef = PropertyRef('login', extra_index=True)
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)


@dataclass(frozen=True)
class GitHubUserToOrganizationRelProperties(CartographyRelProperties):
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)


@dataclass(frozen=True)
class GitHubOrganizationSchema(CartographyNodeSchema):
label: str = 'GitHubOrganization'
properties: GitHubOrganizationNodeProperties = GitHubOrganizationNodeProperties()
other_relationships = None
sub_resource_relationship = None
41 changes: 19 additions & 22 deletions cartography/models/github/users.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,18 @@
from dataclasses import dataclass
from typing import Optional

from cartography.models.core.common import PropertyRef
from cartography.models.core.nodes import CartographyNodeProperties
from cartography.models.core.nodes import CartographyNodeSchema
from cartography.models.core.relationships import CartographyRelProperties
from cartography.models.core.relationships import CartographyRelSchema
from cartography.models.core.relationships import LinkDirection
from cartography.models.core.relationships import make_target_node_matcher
from cartography.models.core.relationships import OtherRelationships
from cartography.models.core.relationships import TargetNodeMatcher

"""
RE: Tenant relationship between GitHubUser and GitHubOrganization
Note this relationship is implemented via 'other_relationships' and not via the 'sub_resource_relationship'
Note this relationship is implemented via 'other_relationships' and not via the 'sub_resource_relationship'
as might be expected.
The 'sub_resource_relationship' typically describes the relationship of a node to its tenant (the org, project, or
other resource to which other nodes belong). An assumption of that relationship is that if the tenant goes
away, all nodes related to it should be cleaned up.
In GitHub, though the GitHubUser's tenant seems to be GitHubOrganization, users actually exist independently. There
is a concept of 'UNAFFILIATED' users, like Enterprise Owners who are related to an org even if they are not direct
members of it. You would not want them to be cleaned up, if an org goes away, and you could want them in your graph
even if they are not members of any org in the enterprise.
is a concept of 'UNAFFILIATED' users (https://docs.github.com/en/graphql/reference/enums#roleinorganization) like
Enterprise Owners who are related to an org even if they are not direct members of it. You would not want them to be
cleaned up, if an org goes away, and you could want them in your graph even if they are not members of any org in
the enterprise.
To allow for this in the schema, this relationship is treated as any other node-to-node relationship, via
'other_relationships', instead of as the typical 'sub_resource_relationship'.
Expand All @@ -39,11 +27,18 @@
The main importance of having two schemas is to allow the two sets of users to be loaded separately. If we are loading
an unaffiliated user, but the user already exists in the graph (perhaps they are members of another GitHub orgs for
example), then loading the unaffiliated user will not blank out the 'role' and 'has_2fa_enabled' properties.
See:
* https://docs.github.com/en/graphql/reference/enums#roleinorganization
* https://docs.github.com/en/enterprise-cloud@latest/admin/managing-accounts-and-repositories/managing-users-in-your-enterprise/roles-in-an-enterprise#enterprise-owners
"""
from dataclasses import dataclass

from cartography.models.core.common import PropertyRef
from cartography.models.core.nodes import CartographyNodeProperties
from cartography.models.core.nodes import CartographyNodeSchema
from cartography.models.core.relationships import CartographyRelProperties
from cartography.models.core.relationships import CartographyRelSchema
from cartography.models.core.relationships import LinkDirection
from cartography.models.core.relationships import make_target_node_matcher
from cartography.models.core.relationships import OtherRelationships
from cartography.models.core.relationships import TargetNodeMatcher


@dataclass(frozen=True)
Expand All @@ -59,6 +54,7 @@ class GitHubOrganizationUserNodeProperties(CartographyNodeProperties):
has_2fa_enabled: PropertyRef = PropertyRef('hasTwoFactorEnabled')
role: PropertyRef = PropertyRef('role')


@dataclass(frozen=True)
class GitHubUnaffiliatedUserNodeProperties(CartographyNodeProperties):
id: PropertyRef = PropertyRef('url')
Expand All @@ -72,6 +68,7 @@ class GitHubUnaffiliatedUserNodeProperties(CartographyNodeProperties):
# 'has_2fa_enabled' not specified for unaffiliated; GitHub api does not return this property for them
# 'role' not specified for unaffiliated; they do not have a role in the target organization


@dataclass(frozen=True)
class GitHubUserToOrganizationRelProperties(CartographyRelProperties):
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
Expand Down Expand Up @@ -120,4 +117,4 @@ class GitHubUnaffiliatedUserSchema(CartographyNodeSchema):
GitHubUserUnaffiliatedOrganizationRel(),
],
)
sub_resource_relationship = None
sub_resource_relationship = None
6 changes: 6 additions & 0 deletions docs/root/modules/github/schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ Representation of a single GitHubOrganization [organization object](https://deve
(GitHubOrganization)-[RESOURCE]->(GitHubTeam)
```

- GitHubUsers are members of an organization. In some cases there may be a user who is "unaffiliated" with an org, for example if the user is an enterprise owner, but not member of, the org.

```
(GitHubUser)-[MEMBER_OF|UNAFFILIATED]->(GitHubOrganization)
```


### GitHubTeam

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from cartography.client.core.tx import load
from cartography.intel.github.users import load_organization_users
from cartography.intel.github.users import load_users
from cartography.models.core.nodes import CartographyNodeSchema
from tests.data.graph.querybuilder.sample_data.case_insensitive_prop_ref import FAKE_EMPLOYEE_DATA
from tests.data.graph.querybuilder.sample_data.case_insensitive_prop_ref import FAKE_GITHUB_ORG_DATA
from tests.data.graph.querybuilder.sample_data.case_insensitive_prop_ref import FAKE_GITHUB_USER_DATA
Expand All @@ -11,8 +12,9 @@

def test_load_team_members_data(neo4j_session):
# Arrange: Load some fake GitHubUser nodes to the graph
load_organization_users(
load_users(
neo4j_session,
CartographyNodeSchema(),
FAKE_GITHUB_USER_DATA,
FAKE_GITHUB_ORG_DATA,
TEST_UPDATE_TAG,
Expand Down

0 comments on commit 9874a4a

Please sign in to comment.