Skip to content

Commit

Permalink
made sure that the parquet data is actually exported
Browse files Browse the repository at this point in the history
  • Loading branch information
CommanderStorm committed Jul 29, 2024
1 parent fcd5dc3 commit e99bbfd
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
4 changes: 4 additions & 0 deletions data/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@ COPY *.py ./
COPY translations.yaml translations.yaml
RUN mkdir output \
&& python3 compile.py \
&& test -f "./output/status_data.json" \
&& test -f "./output/status_data.parquet" \
&& test -f "./output/search_data.json" \
&& test -f "./output/search_data.parquet" \
&& test -f "./output/api_data.json" \
&& test -f "./output/api_data.parquet" \
&& cp -r sources/img/* output
COPY output/openapi.yaml output/openapi.yaml

Expand Down
8 changes: 5 additions & 3 deletions data/processors/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,12 @@ def export_for_status() -> None:
"""Generate hashes for the contents of data"""
with open("output/api_data.json", encoding="utf-8") as file:
export_data = json.load(file)
export_data = [(d["id"], d["hash"]) for d in export_data]
export_json_data = [(d["id"], d["hash"]) for d in export_data]
with open("output/status_data.json", "w", encoding="utf-8") as file:
json.dump(export_data, file)
df = pl.read_json("output/status_data.json")
json.dump(export_json_data, file)

export_polars_data = [{'id': d["id"], 'hash': d["hash"]} for d in export_data]
df = pl.DataFrame(export_polars_data)
df.write_parquet("output/status_data.parquet", use_pyarrow=True, compression_level=22)


Expand Down

0 comments on commit e99bbfd

Please sign in to comment.