Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update ruff requirement from <0.9 to <0.10 #335

Merged
merged 3 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ jobs:
coverage report -m --skip-covered
- name: Generate coverage report
if: github.ref != 'refs/heads/main'
uses: orgoro/coverage@v3.1
uses: orgoro/coverage@v3.2
with:
coverageFile: coverage.xml
token: ${{ secrets.GITHUB_TOKEN }}
thresholdAll: .9
thresholdAll: 1
thresholdNew: 1
thresholdModified: 1

Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
default_install_hook_types: [pre-commit, pre-push]
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.1 # if you update this, also update pyproject.toml
rev: v0.9.4 # if you update this, also update pyproject.toml
hooks:
- name: Ruff formatting
id: ruff-format
Expand Down
2 changes: 1 addition & 1 deletion cumulus_library/actions/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ def _query_error(
exit_message: str,
) -> None:
print(
"An error occured executing the following query in ",
"An error occurred executing the following query in ",
f"{query_and_filename[1]}:",
file=sys.stderr,
)
Expand Down
4 changes: 1 addition & 3 deletions cumulus_library/actions/uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,7 @@ def upload_data(
def upload_files(args: dict):
"""Wrapper to prep files & console output"""
if args["data_path"] is None:
sys.exit(
"No data directory provided - please provide a path to your" "study export folder."
)
sys.exit("No data directory provided - please provide a path to your study export folder.")
file_paths = list(args["data_path"].glob("**/*.parquet"))
if args["target"]:
filtered_paths = []
Expand Down
2 changes: 1 addition & 1 deletion cumulus_library/apis/umls.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def download_umls_files(
progress.update(
task,
description=(
f"Downloading {file_meta['fileName']}: " f"{chunks_read/1000} MB"
f"Downloading {file_meta['fileName']}: {chunks_read / 1000} MB"
),
)
if unzip:
Expand Down
4 changes: 1 addition & 3 deletions cumulus_library/builders/base_table_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,7 @@ def execute_queries(
with base_utils.query_console_output(config.verbose, query, progress, task):
cursor.execute(query)
except Exception as e: # pylint: disable=broad-exception-caught
sys.exit(
"An error occured executing this query:\n----\n" f"{query}\n----\n" f"{e}"
)
sys.exit(f"An error occurred executing this query:\n----\n{query}\n----\n{e}")

self.post_execution(config, *args, **kwargs)

Expand Down
2 changes: 1 addition & 1 deletion cumulus_library/builders/counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def get_count_query(
"""
if not table_name or not source_table or not table_cols:
raise errors.CountsBuilderError(
"count_query missing required arguments. " f"output table: {table_name}"
f"count_query missing required arguments. output table: {table_name}"
)
for key in kwargs:
if key not in [
Expand Down
6 changes: 2 additions & 4 deletions cumulus_library/builders/protected_table_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,9 @@ def prepare_queries(
else:
db_schema = config.schema
transactions = (
f"{manifest.get_study_prefix()}" f"__{enums.ProtectedTables.TRANSACTIONS.value}"
)
statistics = (
f"{manifest.get_study_prefix()}" f"__{enums.ProtectedTables.STATISTICS.value}"
f"{manifest.get_study_prefix()}__{enums.ProtectedTables.TRANSACTIONS.value}"
)
statistics = f"{manifest.get_study_prefix()}__{enums.ProtectedTables.STATISTICS.value}"
self.queries.append(
base_templates.get_ctas_empty_query(
db_schema,
Expand Down
9 changes: 5 additions & 4 deletions cumulus_library/builders/psm_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,9 @@ def __init__(self, toml_config_path: str, data_path: pathlib.Path, **kwargs):
except OSError:
sys.exit(f"PSM configuration not found at {self.toml_path}")
try:
toml_dir = pathlib.Path(self.toml_path).parent
self.config = PsmConfig(
classification_json=f"{pathlib.Path(self.toml_path).parent}/{toml_config['classification_json']}",
classification_json=str(toml_dir.joinpath(toml_config["classification_json"])),
Comment on lines -71 to +72
Copy link
Contributor

@mikix mikix Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small behavior change here - before, if the toml specified an absolute path, we'd still prepend the toml dir path (which wouldn't find the resulting path, naturally). Now we support both absolute and relative filenames for this toml field. (I used this in my test, which is why I hit it)

pos_source_table=toml_config["pos_source_table"],
neg_source_table=toml_config["neg_source_table"],
target_table=toml_config["target_table"],
Expand Down Expand Up @@ -256,15 +257,15 @@ def psm_effect_size_plot(
def generate_psm_analysis(
self, cursor: databases.DatabaseCursor, schema: str, table_suffix: str
):
stats_table = f"{self.config.target_table}_{table_suffix}"
"""Runs PSM statistics on generated tables"""
stats_table = f"{self.config.target_table}_{table_suffix}"
cursor.execute(base_templates.get_alias_table_query(stats_table, self.config.target_table))
df = cursor.execute(
base_templates.get_select_all_query(self.config.target_table)
).as_pandas()
symptoms_dict = self._get_symptoms_dict(self.config.classification_json)
for dependent_variable, codes in symptoms_dict.items():
df[dependent_variable] = df["code"].apply(lambda x, codes=codes: 1 if x in codes else 0)
df[dependent_variable] = df["code"].apply(lambda x: 1 if x in codes else 0)
df = df.drop(columns="code")
# instance_count present but unused for PSM if table contains a count_ref input
# (it's intended for manual review)
Expand Down Expand Up @@ -336,7 +337,7 @@ def generate_psm_analysis(
)
except ValueError:
sys.exit(
"Encountered a value error during KNN matching. Try increasing " "your sample size."
"Encountered a value error during KNN matching. Try increasing your sample size."
)

def prepare_queries(self, config: base_utils.StudyConfig, *args, table_suffix: str, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion cumulus_library/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ def main(cli_args=None):
sys.exit(1)

if args["action"] == "version":
print(f"cumulus-library version: {__version__}\n" "Installed studies:")
print(f"cumulus-library version: {__version__}\nInstalled studies:")
studies = get_study_dict(args.get("study_dir"))
for study in sorted(studies.keys()):
try:
Expand Down
2 changes: 1 addition & 1 deletion cumulus_library/databases/athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def upload_file(
"AWS KMS encryption is expected for Cumulus buckets"
)
kms_arn = wg_conf.get("EncryptionConfiguration", {}).get("KmsKey", None)
s3_key = f"{key_prefix}cumulus_user_uploads/{self.schema_name}/" f"{study}/{topic}"
s3_key = f"{key_prefix}cumulus_user_uploads/{self.schema_name}/{study}/{topic}"
if not remote_filename:
remote_filename = file.name

Expand Down
2 changes: 1 addition & 1 deletion cumulus_library/databases/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def col_pyarrow_types_from_sql(self, columns: list[tuple]) -> list:
output.append((column[0], pyarrow.timestamp("s")))
case _:
raise errors.CumulusLibraryError(
f"{column[0],column[1]} does not have a conversion type"
f"{column[0], column[1]} does not have a conversion type"
)
return output

Expand Down
6 changes: 2 additions & 4 deletions cumulus_library/log_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,10 @@ def _log_table(
alter_query = ""
if isinstance(config.db, databases.AthenaDatabaseBackend):
alter_query = (
f"ALTER TABLE {db_schema}.{table_name} " "ADD COLUMNS(message string)"
f"ALTER TABLE {db_schema}.{table_name} ADD COLUMNS(message string)"
)
elif isinstance(config.db, databases.DuckDatabaseBackend):
alter_query = (
f"ALTER TABLE {db_schema}.{table_name} " "ADD COLUMN message varchar"
)
alter_query = f"ALTER TABLE {db_schema}.{table_name} ADD COLUMN message varchar"
cursor.execute(alter_query)
cursor.execute(query)
else:
Expand Down
2 changes: 1 addition & 1 deletion cumulus_library/studies/discovery/code_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def prepare_queries(
for code_definition in code_definitions.code_list:
if not required_keys.issubset(code_definition):
raise KeyError(
"Expected table_name and column_hierarchy keys in " f"{code_definition!s}"
f"Expected table_name and column_hierarchy keys in {code_definition!s}"
)
code_source = {
"has_data": False,
Expand Down
3 changes: 1 addition & 2 deletions cumulus_library/template_sql/sql_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,8 +319,7 @@ def is_field_populated(
source_field.append(element[0])
else:
raise ValueError(
"sql_utils.is_field_populated: Unexpected type "
f"{element[1]} for field {element[0]}"
f"sql_utils.is_field_populated: Unexpected type {element[1]} for field {element[0]}"
)
query = base_templates.get_is_table_not_empty_query(
source_table=source_table, field=".".join(source_field), unnests=unnests
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ dynamic=["version"]
[project.optional-dependencies]
dev = [
# if you update the ruff version, also update .pre-commit-config.yaml
"ruff < 0.9",
"ruff < 0.10",
"pre-commit",
]
test = [
Expand Down
5 changes: 2 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def get_sorted_table_data(cursor, table):
if num_cols == 0:
return [], []
data = cursor.execute(
f"SELECT * FROM '{table}' ORDER BY " f"{','.join(map(str, range(1,num_cols+1)))}"
f"SELECT * FROM '{table}' ORDER BY {','.join(map(str, range(1, num_cols + 1)))}"
).fetchall()
return data, cursor.description

Expand Down Expand Up @@ -158,8 +158,7 @@ def update_nested_obj(id_path, obj, i):

def debug_table_schema(cursor, table):
table_schema = cursor.execute(
"select column_name, data_type from information_schema.columns "
f"where table_name='{table}'"
f"select column_name, data_type from information_schema.columns where table_name='{table}'"
).fetchall()
for line in table_schema:
print(line)
Expand Down
8 changes: 4 additions & 4 deletions tests/core/test_core_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def test_core_observation_component(tmp_path):
] == rows

df = con.sql(
"SELECT * FROM core__observation_component_dataabsentreason " "ORDER BY id, row, code"
"SELECT * FROM core__observation_component_dataabsentreason ORDER BY id, row, code"
).df()
rows = json.loads(df.to_json(orient="records"))
assert [
Expand Down Expand Up @@ -216,7 +216,7 @@ def test_core_observation_component(tmp_path):
] == rows

df = con.sql(
"SELECT * FROM core__observation_component_interpretation " "ORDER BY id, row, code"
"SELECT * FROM core__observation_component_interpretation ORDER BY id, row, code"
).df()
rows = json.loads(df.to_json(orient="records"))
assert [
Expand Down Expand Up @@ -263,7 +263,7 @@ def test_core_observation_component(tmp_path):
] == rows

df = con.sql(
"SELECT * FROM core__observation_component_valuecodeableconcept " "ORDER BY id, row, code"
"SELECT * FROM core__observation_component_valuecodeableconcept ORDER BY id, row, code"
).df()
rows = json.loads(df.to_json(orient="records"))
assert [
Expand All @@ -286,7 +286,7 @@ def test_core_observation_component(tmp_path):
] == rows

df = con.sql(
"SELECT * FROM core__observation_component_valuequantity " "ORDER BY id, row, code"
"SELECT * FROM core__observation_component_valuequantity ORDER BY id, row, code"
).df()
rows = json.loads(df.to_json(orient="records"))
assert [
Expand Down
16 changes: 15 additions & 1 deletion tests/test_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from datetime import datetime
from unittest import mock

import duckdb
import pyathena
import pytest
from freezegun import freeze_time
Expand Down Expand Up @@ -160,11 +161,24 @@ def test_migrate_transactions_athena(mock_pyathena):
status="debug",
message="message",
)
expected = "ALTER TABLE test.study_valid__lib_transactions" " ADD COLUMNS(message string)"
expected = "ALTER TABLE test.study_valid__lib_transactions ADD COLUMNS(message string)"
call_args = mock_pyathena.return_value.cursor.return_value.execute.call_args_list
assert expected == call_args[2][0][0]


def test_statistics_failure_gets_raised(mock_db_config):
mock_db_config.db.cursor = mock.MagicMock()
mock_db_config.db.cursor.return_value.execute.side_effect = duckdb.OperationalError
with pytest.raises(duckdb.OperationalError):
log_utils.log_statistics(
config=mock_db_config,
manifest=study_manifest.StudyManifest(),
table_type="test1",
table_name="test2",
view_name="test3",
)


@freeze_time("2024-01-01")
@pytest.mark.parametrize(
"schema,study,table_type,table_name,view_type,expects,raises",
Expand Down
47 changes: 47 additions & 0 deletions tests/test_psm.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,50 @@ def test_psm_error_handling(mock_psm, error, tmp_path, mock_db_stats_config):
drop_table=True,
table_suffix=safe_timestamp,
)


def test_psm_missing_file(tmp_path):
with pytest.raises(SystemExit, match="PSM configuration not found"):
psm_builder.PsmBuilder(f"{tmp_path}/does-not-exist.toml", pathlib.Path(tmp_path))


def test_psm_missing_keys(tmp_path):
toml_file = pathlib.Path(f"{tmp_path}/empty.toml")
toml_file.touch()
with pytest.raises(SystemExit, match="contains missing/invalid keys"):
psm_builder.PsmBuilder(str(toml_file), pathlib.Path(tmp_path))


def test_psm_bad_include_cols(tmp_path, mock_db_stats_config):
"""Provide too many include_cols"""
psm_root = f"{pathlib.Path(__file__).parent}/test_data/psm/"
with open(f"{tmp_path}/psm.toml", "w", encoding="utf8") as f:
f.write(f"""config_type = "psm"
classification_json = "{psm_root}/dsm5_classifications.json"
pos_source_table = "psm_test__psm_cohort"
neg_source_table = "core__condition"
target_table = "psm_test__psm_encounter_covariate"
primary_ref = 'encounter_ref'
dependent_variable = "example_diagnosis"
pos_sample_size = 20
neg_sample_size = 100
[join_cols_by_table.core__encounter]
join_id = "encounter_ref"
included_cols = [
["race_display", "race", "age_at_visit"], # too many columns
]
""")
builder = cli.StudyRunner(mock_db_stats_config, data_path=tmp_path)
manifest = study_manifest.StudyManifest(study_path=psm_root)
psmbuilder = psm_builder.PsmBuilder(f"{tmp_path}/psm.toml", tmp_path)
builder.config.db.cursor().execute(
"create table psm_test__psm_cohort as (select * from core__condition "
f"ORDER BY {psmbuilder.config.primary_ref} limit 100)"
)
with pytest.raises(SystemExit, match="unexpected SQL column definition"):
psmbuilder.execute_queries(
builder.config,
manifest,
drop_table=True,
table_suffix="test",
)
2 changes: 1 addition & 1 deletion tests/valueset/test_additional_rules_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def test_additional_rules(mock_api, mock_db_config_rxnorm, prefix):
]:
res = cursor.execute(
f"Select * from {table_conf['name']} order by "
f"{','.join([str(x+1) for x in range(table_conf['columns'])])}"
f"{','.join([str(x + 1) for x in range(table_conf['columns'])])}"
)
data = res.fetchall()
assert len(data) == table_conf["count"]
Expand Down