This repository has been archived by the owner on Aug 6, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparse
executable file
·103 lines (93 loc) · 3.33 KB
/
parse
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env ruby
require 'json'
SKIP_EVENTS = %w[
RENAME_CONVERSATION
ADD_USER
REMOVE_USER
HANGOUT_EVENT
START_HANGOUT
END_HANGOUT
GROUP_LINK_SHARING_MODIFICATION
].freeze
def all_chat_users
raw_users = @data['conversations'].length.times.map do |i|
@data['conversations'][i]['conversation']['conversation']['participant_data']
end.flatten.compact
simplified_users = raw_users.map do |u|
{ id: u['id']['chat_id'], name: u['fallback_name'] }
end.flatten.compact
uniq_users = {}
simplified_users.each do |u|
if uniq_users[u[:id]].nil?
uniq_users[u[:id]] = u[:name] || 'no name'
elsif uniq_users[u[:id]] == 'no name' && u[:name] != 'no name'
uniq_users[u[:id]] = u[:name]
end
end
uniq_users
end
def parse_conversation(conversation)
simplified_conversation = []
conversation['events'].each do |event|
next if SKIP_EVENTS.include?(event['event_type'])
content = if event['chat_message']['message_content']['segment']
combine_message(event['chat_message']['message_content']['segment'])
elsif event['chat_message']['message_content']['attachment']
parse_attachments(event['chat_message']['message_content']['attachment'])
end
simplified_conversation.push(
timestamp: event['timestamp'].to_i,
friendly_timestamp: Time.at(event['timestamp'].to_i / 1_000_000).strftime('%H:%M %D'),
user: @users[event['sender_id']['chat_id']],
content: content
)
end
simplified_conversation
end
def combine_message(segments)
messages = segments.map do |segment|
%w[TEXT LINK].include?(segment['type']) ? segment['text'] : ' '
end.flatten.compact
messages.join(' ')
end
def parse_attachments(attachments)
atts = attachments.map do |att|
case att['embed_item']['type'][0]
when 'PLUS_PHOTO'
case att['embed_item']['plus_photo']['media_type']
when 'PHOTO'
{ type: 'image', url: att['embed_item']['plus_photo']['url'] }
when 'VIDEO'
# this isnt curl-able as it requires JS to redirect to real video
{ type: 'video', url: att['embed_item']['plus_photo']['thumbnail']['url'] }
end
when 'PLUS_AUDIO_V2'
{ type: 'audio', url: att['embed_item']['plus_audio_v2']['embed_url'] }
else
{ type: 'unknown', url: nil }
end
end.flatten.compact
atts
end
def main
Dir.mkdir('exports') unless File.directory?('exports')
puts "loading #{ARGV[0]} file..."
@data = JSON.parse(File.read(ARGV[0]))
puts "found #{@data['conversations'].length} conversations"
@users = all_chat_users
current_index = 0
@data['conversations'].each do |conversation|
next if conversation['events'].nil?
conversation_users = conversation['conversation']['conversation']['participant_data'].map do |p|
p['id']['chat_id']
end.flatten.compact
users = conversation_users.map { |u| @users[u] }.flatten.compact
puts "(#{current_index + 1}/#{@data['conversations'].length}) parsing conversation with #{users.join(', ')} (#{conversation['events'].length} messages)"
parsed = parse_conversation(conversation)
sorted = parsed.sort_by! { |m| m[:timestamp] }
output = { users: users, conversation: sorted }
File.write("exports/#{conversation['conversation']['conversation_id']['id']}.json", JSON.pretty_generate(output))
current_index += 1
end
end
main