-
Notifications
You must be signed in to change notification settings - Fork 0
/
db_scheme.sql
215 lines (197 loc) · 5.49 KB
/
db_scheme.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
CREATE TABLE archiveorg_indices (
id SERIAL,
date timestamp without time zone,
actual_date timestamp without time zone,
url text,
final_url text,
status text,
error text,
pos integer
);
CREATE TABLE cdx_archive_headers (
id SERIAL,
cdx_responses_id integer,
url_date date,
start_url character varying(128),
end_url character varying(1024),
headers jsonb,
"timestamp" timestamp without time zone DEFAULT now(),
content_hash character varying(64) DEFAULT NULL::character varying,
status_code integer DEFAULT '-1'::integer
);
CREATE TABLE cdx_responses (
id SERIAL,
tranco_id integer,
domain character varying(128),
"timestamp" timestamp without time zone DEFAULT now(),
content_hash character varying(64) DEFAULT NULL::character varying,
status_code integer DEFAULT 200,
parsed boolean DEFAULT false,
error text
);
CREATE TABLE dataset (
rank integer,
url character varying(256)
);
CREATE TABLE duplicates (
arch character varying(10),
url character varying(64),
date timestamp without time zone,
max integer
);
CREATE TABLE dynamic_script_inclusions_2022 (
id integer,
year integer,
url text,
request_url text,
result jsonb,
status_code integer,
request_site text,
response_status_code integer,
response_headers jsonb,
trackers jsonb
);
CREATE TABLE dynamic_script_inclusions_2016 (
id integer,
year integer,
url text,
request_url text,
result jsonb,
status_code integer,
request_site text,
response_status_code integer,
response_headers jsonb,
trackers jsonb
);
CREATE TABLE historical_data (
id SERIAL,
tranco_id integer,
domain character varying(128),
start_url character varying(128),
end_url character varying(2048),
headers jsonb,
"timestamp" timestamp without time zone DEFAULT now(),
content_hash character varying(64) DEFAULT NULL::character varying,
status_code integer DEFAULT '-1'::integer,
duration integer
);
CREATE TABLE live_headers (
id SERIAL,
tranco_id integer,
domain character varying(64),
start_url character varying(64),
end_url character varying(1024),
headers jsonb,
"timestamp" timestamp without time zone DEFAULT now(),
content_hash character varying(64) DEFAULT NULL::character varying,
status_code integer DEFAULT '-1'::integer,
script_info jsonb,
security_headers jsonb,
trackers text[],
duration integer
);
CREATE TABLE random_dataset (
rank integer,
url character varying(256)
);
CREATE TABLE responses (
id SERIAL,
arch character varying(10),
date timestamp without time zone,
url character varying(64),
status integer,
headers jsonb,
final_url text,
error text,
runtime double precision,
content_hash character varying(64),
valid boolean DEFAULT false,
valid_html boolean DEFAULT false,
valid_headers boolean DEFAULT false,
actual_date timestamp without time zone,
script_info jsonb,
security_headers jsonb,
archived_url character varying(2048) DEFAULT NULL::character varying,
trackers text[],
length integer
);
CREATE TABLE responses_for_comparison (
id SERIAL,
arch character varying(10),
date timestamp without time zone,
url character varying(64),
status integer,
headers jsonb,
final_url text,
error text,
runtime double precision,
content_hash character varying(64),
valid boolean,
valid_html boolean,
valid_headers boolean,
actual_date timestamp without time zone,
script_info jsonb,
security_headers jsonb,
archived_url character varying(2048),
trackers text[],
length integer
);
CREATE TABLE responses_initial (
id SERIAL,
arch character varying(256),
date timestamp without time zone,
url character varying(64),
status integer,
headers jsonb,
final_url text,
error text,
runtime double precision,
content_hash character varying(64) DEFAULT NULL::character varying,
valid boolean DEFAULT false,
valid_html boolean DEFAULT false,
valid_headers boolean DEFAULT false,
actual_date timestamp without time zone,
archived_url character varying(2048)
);
CREATE TABLE responses_neighbors (
id SERIAL,
arch character varying(10),
date timestamp without time zone,
url character varying(64),
status integer,
headers jsonb,
final_url text,
error text,
runtime double precision,
content_hash character varying(64),
valid boolean DEFAULT false,
valid_html boolean DEFAULT false,
valid_headers boolean DEFAULT false,
actual_date timestamp without time zone,
script_info jsonb,
security_headers jsonb,
archived_url character varying(2048) DEFAULT NULL::character varying,
trackers text[],
length integer,
pos integer,
final_date timestamp without time zone,
datasource jsonb,
interesting_null text[],
iframe_info jsonb,
trackers_iframe text[],
src_inclusion_info jsonb,
trackers_src_inclusion text[],
trackers_easyprivacy text[],
security_headers_ext jsonb
);
CREATE TABLE web_archive_headers (
id SERIAL,
tranco_id integer,
domain character varying(128),
start_url character varying(128),
end_url character varying(1024),
headers jsonb,
"timestamp" timestamp without time zone DEFAULT now(),
content_hash character varying(64) DEFAULT NULL::character varying,
status_code integer DEFAULT '-1'::integer
);