-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add graph of compile time vs schema size (#128)
- Loading branch information
1 parent
fd2e5aa
commit 35285bb
Showing
6 changed files
with
154 additions
and
70 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -128,6 +128,16 @@ jobs: | |
python-version: '3.12.5' | ||
- name: Install uv | ||
run: pipx install uv | ||
- name: Get jsonschema-strip binary | ||
run: | | ||
git clone --depth 1 --branch main https://github.com/sourcemeta-research/jsonschema-strip | ||
cmake -S jsonschema-strip -B ./jsonschema-strip/build -DCMAKE_BUILD_TYPE:STRING=Release | ||
cmake --build ./jsonschema-strip/build --config Release --target strip | ||
mv ./jsonschema-strip/build/strip ./jsonschema-strip/build/jsonschema-strip | ||
echo "$(pwd)/jsonschema-strip/build" >> $GITHUB_PATH | ||
- uses: actions/setup-go@v5 | ||
- name: Install gron | ||
run: go install github.com/tomnomnom/gron@latest | ||
|
||
- name: Process CSV | ||
run: uv run python .github/csv_min.py > dist/report-min.csv | ||
|
@@ -147,17 +157,29 @@ jobs: | |
run: echo "$MARKDOWN_TABLE" >> $GITHUB_STEP_SUMMARY | ||
|
||
- name: Generate plots | ||
run: mkdir -p dist/results/plots && uv run python plot.py | ||
run: | | ||
./dataset_summary.sh csv >> dist/summary.csv | ||
mkdir -p dist/results/plots && uv run python plot.py && uv run python plot_compile.py | ||
- name: Upload plots | ||
id: imgur | ||
uses: devicons/[email protected] | ||
with: | ||
path: dist/results/plots/*.png | ||
client_id: ${{secrets.IMGUR_CLIENT_ID}} | ||
- name: Upload compile plot | ||
id: imgur_compile | ||
uses: devicons/[email protected] | ||
with: | ||
path: dist/results/compile.png | ||
client_id: ${{secrets.IMGUR_CLIENT_ID}} | ||
- name: Add plots to summary | ||
env: | ||
IMG_URLS: ${{ steps.imgur.outputs.imgur_urls }} | ||
run: python .github/plot_markdown.py >> $GITHUB_STEP_SUMMARY | ||
run: python .github/plot_markdown.py dist/results/plots >> $GITHUB_STEP_SUMMARY | ||
- name: Add plots to summary | ||
env: | ||
IMG_URLS: ${{ steps.imgur_compile.outputs.imgur_urls }} | ||
run: python .github/plot_markdown.py dist/results >> $GITHUB_STEP_SUMMARY | ||
- uses: actions/upload-artifact@v4 | ||
if: always() | ||
with: | ||
|
@@ -177,12 +199,8 @@ jobs: | |
cmake --build ./jsonschema-strip/build --config Release --target strip | ||
mv ./jsonschema-strip/build/strip ./jsonschema-strip/build/jsonschema-strip | ||
echo "$(pwd)/jsonschema-strip/build" >> $GITHUB_PATH | ||
- name: Install moreutils | ||
run: sudo apt-get install moreutils | ||
- name: Strip schemas | ||
run: | | ||
for f in schemas/*/schema.json; do | ||
./jsonschema-strip/build/jsonschema-strip "$f" | sponge "$f" | ||
done | ||
- uses: actions/setup-go@v5 | ||
- name: Install gron | ||
run: go install github.com/tomnomnom/gron@latest | ||
- name: Create summary | ||
run: ./.github/dataset_summary_table.sh >> $GITHUB_STEP_SUMMARY | ||
run: ./dataset_summary.sh md >> $GITHUB_STEP_SUMMARY |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#!/bin/bash | ||
|
||
if [ $# -ne 1 ]; then | ||
echo "Usage: $0 csv|md" | ||
exit | ||
fi | ||
|
||
FORMAT=$1 | ||
SCHEMAS=$(ls schemas/) | ||
|
||
LATEX_ROWS="" | ||
|
||
# Output each table row | ||
for schema in $SCHEMAS; do | ||
make "schemas/$schema/schema-noformat.json" > /dev/null | ||
docs=$(wc -l < "schemas/$schema/instances.jsonl") | ||
size=$(jsonschema-strip "schemas/$schema/schema-noformat.json" 2> /dev/null | wc -c) | ||
size_kb=$(bc <<<"scale=1; $size / 1024") | ||
avg_doc_size=$(cat "schemas/$schema/instances.jsonl" | while read l; do echo "$l" | wc -c; done | awk '{ sum += $1; n++ } END { if (n > 0) print sum / n; }') | ||
|
||
if [ "$FORMAT" = "csv" ]; then | ||
CSV_ROWS=$(printf "%s%s,%d,%.1f,%.0f%s" "$CSV_ROWS" "$schema" "$docs" "$size_kb" "$avg_doc_size" '\n') | ||
elif [ "$FORMAT" = "md" ]; then | ||
LATEX_ROWS=$(printf "%s %s & %d & %.1f & %.0f %s" "$LATEX_ROWS" "$schema" "$docs" "$size_kb" "$avg_doc_size" '\\\\\n') | ||
MARKDOWN_ROWS=$(printf "%s| %s | %d | %.1f | %.0f |%s" "$MARKDOWN_ROWS" "$schema" "$docs" "$size_kb" "$avg_doc_size" '\n') | ||
fi | ||
done | ||
|
||
if [ "$FORMAT" = "csv" ]; then | ||
cat << EOF | ||
name,docs,size_kb,avg_doc_size | ||
EOF | ||
|
||
echo -ne "$CSV_ROWS" | ||
fi | ||
|
||
if [ "$FORMAT" = "md" ]; then | ||
# Print the table header | ||
cat << EOF | ||
|Dataset name|# Docs|Schema Size (KB)|Avg. Doc. Size (B)| | ||
|---|---|---|---| | ||
EOF | ||
|
||
echo -e $MARKDOWN_ROWS | ||
|
||
cat << EOF | ||
<details> | ||
<summary>LaTeX table</summary> | ||
EOF | ||
|
||
echo '```' | ||
|
||
cat << EOF | ||
\begin{table}[h] | ||
{\small | ||
\centering | ||
\begin{tabular}{l r r r} | ||
\hline | ||
Name & \# Docs & Schema Size (KB) & Avg. Doc. Size (B) \\\\ | ||
\hline | ||
EOF | ||
|
||
echo -ne "$LATEX_ROWS" | ||
|
||
# Print the table footer | ||
cat << EOF | ||
\end{tabular} | ||
} | ||
\caption{Dataset used for validator evaluation}\label{tab:datasets} | ||
\end{table} | ||
EOF | ||
|
||
echo '```' | ||
|
||
cat << EOF | ||
</details> | ||
EOF | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import matplotlib.pyplot as plt | ||
import pandas as pd | ||
import seaborn as sns | ||
|
||
|
||
if __name__ == "__main__": | ||
# Average out runtime across runs | ||
runtime = ( | ||
pd.read_csv("dist/report.csv") | ||
.groupby(["implementation", "version", "name"]) | ||
.mean() | ||
.astype("int") | ||
) | ||
runtime.reset_index(inplace=True) | ||
runtime = runtime[runtime['implementation'] == 'blaze'] | ||
runtime.set_index(["name"], inplace=True) | ||
|
||
data = pd.read_csv('dist/summary.csv') | ||
data.set_index(['name'], inplace=True) | ||
joined = runtime.join(data, on='name') | ||
|
||
fig, ax = plt.subplots() | ||
ax.set(xscale='log', yscale='log') | ||
ax.set_xlabel('Schema size (KB)') | ||
ax.set_ylabel('Compile time (ns)') | ||
plot = sns.scatterplot(data=joined, x='size_kb', y='compile_ns') | ||
plot.get_figure().savefig( | ||
f"dist/results/compile.png", dpi=96, bbox_inches="tight" | ||
) | ||
plot.get_figure().savefig( | ||
f"dist/results/compile.svg", dpi=96, bbox_inches="tight" | ||
) |