-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathjson-to-csv.rb
64 lines (58 loc) · 1.29 KB
/
json-to-csv.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
require "json"
require "csv"
require 'optparse'
require_relative "pingyam-rb/lib_pingyam.rb"
@options = {:mode => :detail}
OptionParser.new do |opts|
opts.banner = "Usage: #{$0} [-s] [glob]"
opts.on("-s", "--summaries", "[glob] represents cantodict summary pages scrapres, not detail page scrapes") do |n|
@options[:mode] = :summary
end
opts.on("-h", "--help", "Prints this help") do
puts opts
exit
end
end.parse!
Columns = [
"entry_type",
"cantodict_id",
"incomplete",
"chinese",
"definition",
"notes",
"jyutping",
"yale",
"pinyin",
"radical",
"radical_number",
"stroke_count",
"dialect",
"similar",
"variants",
"pos",
"flag",
"addedby",
"created",
"modified",
"views",
"level",
"compound_cantodictids",
"sentence_cantodictids",
"character_cantodictids",
"definition_raw_html",
"google_frequency"
]
conv = Converter.new(6)
CSV.open("output/cantodict.csv", "wb") do |csv|
csv << Columns
[:characters, :compounds, :sentences].each do |type|
File.open("./output/#{@options[:mode]}-#{type}.json") do |f|
entries = JSON.load(f)
entries.keys.sort.each do |id|
data = entries[id]
data["yale"] = conv.convert_line(data["jyutping"], 1)
csv << Columns.map { |c| data[c] }
end
end
end
end