-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdataset_summary.sh
executable file
·80 lines (62 loc) · 1.75 KB
/
dataset_summary.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/bin/bash
if [ $# -ne 1 ]; then
echo "Usage: $0 csv|md"
exit
fi
FORMAT=$1
SCHEMAS=$(ls schemas/)
LATEX_ROWS=""
# Output each table row
for schema in $SCHEMAS; do
make "schemas/$schema/schema-noformat.json" > /dev/null
docs=$(wc -l < "schemas/$schema/instances.jsonl")
size=$(jsonschema-strip "schemas/$schema/schema-noformat.json" 2> /dev/null | wc -c)
size_kb=$(bc <<<"scale=1; $size / 1024")
avg_doc_size=$(cat "schemas/$schema/instances.jsonl" | while read l; do echo "$l" | wc -c; done | awk '{ sum += $1; n++ } END { if (n > 0) print sum / n; }')
if [ "$FORMAT" = "csv" ]; then
CSV_ROWS=$(printf "%s%s,%d,%.1f,%.0f%s" "$CSV_ROWS" "$schema" "$docs" "$size_kb" "$avg_doc_size" '\n')
elif [ "$FORMAT" = "md" ]; then
LATEX_ROWS=$(printf "%s %s & %d & %.1f & %.0f %s" "$LATEX_ROWS" "$schema" "$docs" "$size_kb" "$avg_doc_size" '\\\\\n')
MARKDOWN_ROWS=$(printf "%s| %s | %d | %.1f | %.0f |%s" "$MARKDOWN_ROWS" "$schema" "$docs" "$size_kb" "$avg_doc_size" '\n')
fi
done
if [ "$FORMAT" = "csv" ]; then
cat << EOF
name,docs,size_kb,avg_doc_size
EOF
echo -ne "$CSV_ROWS"
fi
if [ "$FORMAT" = "md" ]; then
# Print the table header
cat << EOF
|Dataset name|# Docs|Schema Size (KB)|Avg. Doc. Size (B)|
|---|---|---|---|
EOF
echo -e $MARKDOWN_ROWS
cat << EOF
<details>
<summary>LaTeX table</summary>
EOF
echo '```'
cat << EOF
\begin{table}[h]
{\small
\centering
\begin{tabular}{l r r r}
\hline
Name & \# Docs & Schema Size (KB) & Avg. Doc. Size (B) \\\\
\hline
EOF
echo -ne "$LATEX_ROWS"
# Print the table footer
cat << EOF
\end{tabular}
}
\caption{Datasets used for validator evaluation}\label{tab:datasets}
\end{table}
EOF
echo '```'
cat << EOF
</details>
EOF
fi