Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: use database to store and retrieve found repos #263

Open
wants to merge 5 commits into
base: staging
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/macaron/config/defaults.ini
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ parent_limit = 10
# Disables repo finding for specific artifacts based on their group and artifact IDs. Format: {groupId}:{artifactId}
# E.g. com.oracle.coherence.ce:coherence
artifact_ignore_list =
use_database = True

[git]
# The list of allowed git hosts.
Expand Down
23 changes: 20 additions & 3 deletions src/macaron/database/database_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ def insert(self, table: Table, values: dict) -> None:
Parameters
----------
table: Table
The Table to insert to
The Table to insert to.
values: dict
The mapping from column names to values to insert into the Table
The mapping from column names to values to insert into the Table.
"""
try:
self.execute(insert(table).values(**values))
Expand All @@ -112,12 +112,29 @@ def execute(self, query: Any) -> None:
Parameters
----------
query: Any
The SQLalchemy query to execute
The SQLAlchemy query to execute.
"""
with self.engine.connect() as conn:
conn.execute(query)
conn.commit()

def execute_and_return(self, query: Any) -> sqlalchemy.engine.cursor.CursorResult:
"""
Execute a SQLAlchemy core api query using a short-lived engine connection and returns the result.

Parameters
----------
query: Any
The SQLAlchemy query to execute.

Returns
-------
Any :
The result of the query.
"""
with self.engine.connect() as conn:
return conn.execute(query)

def create_tables(self) -> None:
"""
Automatically create views for all tables known to _base.metadata.
Expand Down
2 changes: 2 additions & 0 deletions src/macaron/database/table_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ class RepositoryTable(ORMBase):
release_tag: Mapped[str] = mapped_column(String, nullable=True)
commit_sha: Mapped[str] = mapped_column(String, nullable=False)
commit_date: Mapped[str] = mapped_column(String, nullable=False)
namespace: Mapped[str] = mapped_column(String, nullable=True)
name: Mapped[str] = mapped_column(String, nullable=True)


class SLSALevelTable(ORMBase):
Expand Down
19 changes: 15 additions & 4 deletions src/macaron/dependency_analyzer/cyclonedx.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from macaron.config.defaults import defaults
from macaron.config.global_config import global_config
from macaron.database.database_manager import DatabaseManager
from macaron.dependency_analyzer.dependency_resolver import DependencyAnalyzer, DependencyInfo
from macaron.errors import MacaronError
from macaron.output_reporter.scm import SCMStatus
Expand Down Expand Up @@ -139,12 +140,14 @@ def get_dep_components(


def convert_components_to_artifacts(
components: Iterable[dict], root_component: Optional[dict | None] = None
db_man: DatabaseManager | None, components: Iterable[dict], root_component: Optional[dict | None] = None
) -> dict[str, DependencyInfo]:
"""Convert CycloneDX components using internal artifact representation.

Parameters
----------
db_man : DatabaseManager
The database manager for accessing the database (optional).
components : list[dict]
The dependency components.
root_component: Optional[dict|None]
Expand Down Expand Up @@ -196,7 +199,12 @@ def convert_components_to_artifacts(
)

DependencyAnalyzer.add_latest_version(
item=item, key=key, all_versions=all_versions, latest_deps=latest_deps, url_to_artifact=url_to_artifact
db_man=db_man,
item=item,
key=key,
all_versions=all_versions,
latest_deps=latest_deps,
url_to_artifact=url_to_artifact,
)
except KeyError as error:
logger.debug(error)
Expand All @@ -210,11 +218,13 @@ def convert_components_to_artifacts(
return latest_deps


def get_deps_from_sbom(sbom_path: str | Path) -> dict[str, DependencyInfo]:
def get_deps_from_sbom(db_man: DatabaseManager | None, sbom_path: str | Path) -> dict[str, DependencyInfo]:
"""Get the dependencies from a provided SBOM.

Parameters
----------
db_man : DatabaseManager
The database manager for accessing the database (optional).
sbom_path : str | Path
The path to the SBOM file.

Expand All @@ -223,12 +233,13 @@ def get_deps_from_sbom(sbom_path: str | Path) -> dict[str, DependencyInfo]:
A dictionary where dependency artifacts are grouped based on "artifactId:groupId".
"""
return convert_components_to_artifacts(
db_man,
get_dep_components(
root_bom_path=Path(sbom_path),
recursive=defaults.getboolean(
"dependency.resolver",
"recursive",
fallback=False,
),
)
),
)
2 changes: 1 addition & 1 deletion src/macaron/dependency_analyzer/cyclonedx_gradle.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def collect_dependencies(self, dir_path: str) -> dict[str, DependencyInfo]:
fallback=False,
),
)
return convert_components_to_artifacts(components, root_component)
return convert_components_to_artifacts(None, components, root_component)

def remove_sboms(self, dir_path: str) -> bool:
"""Remove all the SBOM files in the provided directory recursively.
Expand Down
2 changes: 1 addition & 1 deletion src/macaron/dependency_analyzer/cyclonedx_mvn.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def collect_dependencies(self, dir_path: str) -> dict[str, DependencyInfo]:
fallback=False,
),
)
return convert_components_to_artifacts(components, root_component)
return convert_components_to_artifacts(None, components, root_component)

def remove_sboms(self, dir_path: str) -> bool:
"""Remove all the SBOM files in the provided directory recursively.
Expand Down
28 changes: 23 additions & 5 deletions src/macaron/dependency_analyzer/dependency_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
from enum import Enum
from typing import TypedDict

import sqlalchemy
from packaging import version

from macaron.config.defaults import defaults
from macaron.config.target_config import Configuration
from macaron.database.database_manager import DatabaseManager
from macaron.dependency_analyzer.java_repo_finder import find_java_repo
from macaron.errors import MacaronError
from macaron.output_reporter.scm import SCMStatus
Expand Down Expand Up @@ -111,6 +113,7 @@ def get_cmd(self) -> list:

@staticmethod
def add_latest_version(
db_man: DatabaseManager | None,
item: DependencyInfo,
key: str,
all_versions: dict[str, list[DependencyInfo]],
Expand All @@ -121,6 +124,8 @@ def add_latest_version(

Parameters
----------
db_man : DatabaseManager | None
The database manager for accessing the database (optional).
item : DependencyInfo
The dictionary containing info about the dependency to be added.
key : str
Expand All @@ -133,7 +138,7 @@ def add_latest_version(
Used to detect artifacts that have similar repos.
"""
if defaults.getboolean("repofinder.java", "find_repos"):
DependencyAnalyzer._find_repo(item)
DependencyAnalyzer._find_repo(db_man, item)

# Check if the URL is already seen for a different artifact.
if item["url"] != "":
Expand Down Expand Up @@ -173,16 +178,29 @@ def add_latest_version(
logger.error("Could not parse dependency version number: %s", error)

@staticmethod
def _find_repo(item: DependencyInfo) -> None:
def _find_repo(db_man: DatabaseManager | None, item: DependencyInfo) -> None:
"""Find the repo for the current item, if the criteria are met."""
if item["url"] != "" or item["version"] == "unspecified" or not item["group"] or not item["name"]:
logger.debug("Item URL already exists, or item is missing information: %s", item)
return
gav = f"{item['group']}:{item['name']}:{item['version']}"
artifact = f"{item['group']}:{item['name']}"
if f"{item['group']}:{item['name']}" in defaults.get_list("repofinder.java", "artifact_ignore_list"):
logger.debug("Skipping GAV: %s", gav)
logger.debug("Skipping artifact: %s", artifact)
return

if db_man and defaults.getboolean("repofinder.java", "use_database"):
# Perform database lookup
query = sqlalchemy.text(
"SELECT remote_path FROM _repository WHERE namespace = :group and name = :artifact"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of raw queries, we should use SQLAlchemy's ORM queries, which performs proper validation.

).bindparams(group=item["group"], artifact=item["name"])
result: sqlalchemy.engine.cursor.CursorResult = db_man.execute_and_return(query)
row = result.first()
if row and row.remote_path:
logger.debug("Found database url: %s for artifact: %s", row.remote_path, artifact)
item["url"] = row.remote_path
return
logger.debug("No database url found for GAV: %s", artifact)

urls = find_java_repo(
item["group"],
item["name"],
Expand All @@ -191,7 +209,7 @@ def _find_repo(item: DependencyInfo) -> None:
)
item["url"] = DependencyAnalyzer.find_valid_url(list(urls))
if item["url"] == "":
logger.debug("Failed to find url for GAV: %s", gav)
logger.debug("Failed to find url for artifact: %s", artifact)

@staticmethod
def find_valid_url(urls: Iterable[str]) -> str:
Expand Down
22 changes: 11 additions & 11 deletions src/macaron/policy_engine/prelude/aggregate_rules.dl
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ agg_levels(n+1) :- n <= 4, agg_levels(n).
* Everything has a repository and uses a scripted build service.
*/
aggregate_level_requirement(1, repo) :-
repository(repo, _,_,_,_,_,_),
repository(repo, _,_,_,_,_,_,_,_),
check_passed(repo, "mcn_build_service_1"),
check_passed(repo, "mcn_version_control_system_1").

Expand All @@ -44,7 +44,7 @@ aggregate_level_requirement(1, repo) :-
* The build is verifiably automated and deployable.
*/
aggregate_level_requirement(2, repo) :-
repository(repo, _,_,_,_,_,_),
repository(repo, _,_,_,_,_,_,_,_),
aggregate_level_requirement(1, repo),
check_passed(repo, "mcn_build_script_1"),
check_passed(repo, "mcn_build_service_1"),
Expand All @@ -55,15 +55,15 @@ aggregate_level_requirement(2, repo) :-
* provenance information.
*/
aggregate_level_requirement(3, repo) :-
repository(repo, _,_,_,_,_,_),
repository(repo, _,_,_,_,_,_,_,_),
check_passed(repo, "mcn_provenance_level_three_1"),
aggregate_level_requirement(2, repo).

/**
* The release provenance passes verification.
*/
aggregate_level_requirement(4, repo) :-
repository(repo, _,_,_,_,_,_),
repository(repo, _,_,_,_,_,_,_,_),
aggregate_level_requirement(3, repo),
check_passed(repo, "mcn_provenance_level_three_1"),
check_passed(repo, "mcn_trusted_builder_level_three_1"),
Expand Down Expand Up @@ -94,9 +94,9 @@ aggregate_level_min_dependency_level(level, repo) <= aggregate_level_min_depende
/**
* The aggregate level for each repository that does not have any dependencies asserts the requirements are met.
*/
aggregate_level(0, repo) :- repository(repo, _,_,_,_,_,_).
aggregate_level(0, repo) :- repository(repo, _,_,_,_,_,_,_,_).
aggregate_level(level, repo) :-
repository(repo, name,_,_,_,_,_),
repository(repo, name,_,_,_,_,_,_,_),
agg_levels(level),
// this level's requirements
aggregate_level_requirement(level, repo),
Expand All @@ -108,7 +108,7 @@ aggregate_level(level, repo) :-
* reach the required minimum level.
*/
aggregate_level(level, repo) :-
repository(repo, name,_,_,_,_,_),
repository(repo, name,_,_,_,_,_,_,_),
agg_levels(level),
// this level's requirements
aggregate_level_requirement(level, repo),
Expand All @@ -130,17 +130,17 @@ aggregate_level(level, repo) <= aggregate_level(higher_level, repo) :-
meets_aggregate_level(level, repo) :- aggregate_level(real_level, repo), agg_levels(level), level <= real_level.

Policy("aggregate_level_4", repo, reponame) :-
repository(repo, reponame,_,_,_,_,_),
repository(repo, reponame,_,_,_,_,_,_,_),
meets_aggregate_level(4, repo).

Policy("aggregate_level_3", repo, reponame) :-
repository(repo, reponame,_,_,_,_,_),
repository(repo, reponame,_,_,_,_,_,_,_),
meets_aggregate_level(3, repo).

Policy("aggregate_level_2", repo, reponame) :-
repository(repo, reponame,_,_,_,_,_),
repository(repo, reponame,_,_,_,_,_,_,_),
meets_aggregate_level(2, repo).

Policy("aggregate_level_1", repo, reponame) :-
repository(repo, reponame,_,_,_,_,_),
repository(repo, reponame,_,_,_,_,_,_,_),
meets_aggregate_level(1, repo).
4 changes: 2 additions & 2 deletions src/macaron/policy_engine/prelude/helper_rules.dl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ is_check(check_name) :- check_result(_, check_name, _, _, _).
* This fact exists iff a repository is hosted on a trusted public platform.
*/
.decl not_self_hosted_git(repo:number, message:symbol)
not_self_hosted_git(repo, message) :- repository(repo, name, remote, branch, release, commit_sha, commit_date),
not_self_hosted_git(repo, message) :- repository(repo, name, remote, branch, release, commit_sha, commit_date,_,_),
match("^.*(github.com|gitlab.com).*$", remote), message=remote.

/**
Expand All @@ -40,7 +40,7 @@ transitive_dependency(repo, dependency) :-
* Extract the id and full name from the repository relation.
*/
.decl is_repo(repo: number, repo_full_name: symbol)
is_repo(repo, name) :- repository(repo, name,_,_,_,_,_).
is_repo(repo, name) :- repository(repo, name,_,_,_,_,_,_,_).

/**
* ADT recursively describing a JSON object.
Expand Down
15 changes: 14 additions & 1 deletion src/macaron/slsa_analyzer/analyze_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def __init__(
output_dir: str = "",
remote_path: str = "",
current_date: str = "",
namespace: str = "",
name: str = "",
):
"""Initialize instance.

Expand All @@ -74,11 +76,17 @@ def __init__(
commit_date : str
The commit date of the target repo.
macaron_path : str
The Macaron's root path.
The Macaron root path.
output_dir : str
The output dir.
remote_path : str
The remote path for the target repo.
current_date: str
The current date.
namespace : str
The purl namespace element, a.k.a group id.
name : str
The purl name element, a.k.a artifact id.
"""
# <owner>/<repo_name>
self.repo_full_name = full_name
Expand Down Expand Up @@ -123,6 +131,9 @@ def __init__(
policy=None,
)

self.namespace = namespace
self.name = name

self.repository_table = RepositoryTable(**self.get_repository_data())

@property
Expand Down Expand Up @@ -208,6 +219,8 @@ def get_repository_data(self) -> dict:
"branch_name": self.branch_name,
"commit_sha": self.commit_sha,
"remote_path": self.remote_path,
"namespace": self.namespace,
"name": self.name,
}

def get_analysis_result_data(self) -> dict:
Expand Down
Loading