Skip to content

Commit

Permalink
try something
Browse files Browse the repository at this point in the history
  • Loading branch information
mcovarr committed Jan 23, 2025
1 parent 078edeb commit 2eb184d
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions scripts/variantstore/scripts/import_gvs_ploidy.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def patched_determine_file_length(self) -> int:
DataFileReader.determine_file_length = patched_determine_file_length

fs = hl.current_backend().fs
ploidy_table = defaultdict(dict)
avro_ploidy_table = defaultdict(dict)
print(f"avros is {', '.join(avros)}")
for file in avros:
print(f"reading avro file {file}")
Expand All @@ -43,22 +43,22 @@ def patched_determine_file_length(self) -> int:
print(f"record is {record}")
location, sample_name, ploidy = PloidyRecord(**record)
print(f"location is {location}, sample_name is {sample_name}, ploidy is {ploidy}")
if sample_name in ploidy_table[location]:
if sample_name in avro_ploidy_table[location]:
raise ValueError(
f"duplicate key `{sample_name}` for location {location}"
)
ploidy_table[location][sample_name] = ploidy
avro_ploidy_table[location][sample_name] = ploidy
print(f"read {records_read} records from avro file {file}")
print(f"first ploidy table is {ploidy_table}")
print(f"avro ploidy table is {avro_ploidy_table}")

# undo our monkey patch
DataFileReader.determine_file_length = original_determine_file_length

hg38 = hl.get_reference("GRCh38")
xy_contigs = set(hg38.x_contigs + hg38.y_contigs)
ploidy_table = {
contig: ploidy_table[key]
for contig, key in zip(hg38.contigs, sorted(ploidy_table))
contig: avro_ploidy_table[key]
for contig, key in zip(hg38.contigs, sorted(avro_ploidy_table))
if contig in xy_contigs
}
print(f"second ploidy table is {ploidy_table}")
Expand Down

0 comments on commit 2eb184d

Please sign in to comment.