-
Notifications
You must be signed in to change notification settings - Fork 4
/
runme.sql
274 lines (243 loc) · 8.46 KB
/
runme.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
\echo Schema generation for the Texas Ethics Commission
\echo Copyright (C) 2024 Evan Carroll
\echo
\echo This program is free software: you can redistribute it and/or modify
\echo it under the terms of the GNU Affero General Public License as
\echo published by the Free Software Foundation, either version 3 of the
\echo License, or (at your option) any later version.
\echo
\echo This program is distributed in the hope that it will be useful,
\echo but WITHOUT ANY WARRANTY; without even the implied warranty of
\echo MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\echo GNU Affero General Public License for more details.
\echo
\echo You should have received a copy of the GNU Affero General Public License
\echo along with this program. If not, see <https://www.gnu.org/licenses/>.
\prompt agree_eula Do you agree to those terms? [Yes] or [No]
SET SEARCH_PATH = 'tec';
DROP SCHEMA IF EXISTS tec CASCADE;
CREATE SCHEMA tec;
COMMENT ON SCHEMA tec IS $$Texas Ethics Comission dataset$$;
\echo LOADING CODES GENERATED FROM PDF
\i sql/cf_new/1_filertype.sql
\i sql/cf_new/02_data/1_1_name_prefixes.sql
\i sql/cf_new/02_data/1_2_name_suffixes.sql
\i sql/cf_new/02_data/3_schedules.sql
\i sql/cf_new/02_data/4_countries.sql
\i sql/cf_new/02_data/5_offices.sql
\i sql/cf_new/02_data/6_reports.sql
\i sql/cf_new/02_data/7_forms.sql
\i sql/cf_new/02_data/8_counties.sql
\i sql/cf_new/02_data/9_totals.sql
\echo LOADING FORM 1295 [1295Certificates.csv]
\i sql/form1295.sql
\echo LOADING CAMPAIGN FINANCE REPORTS [TEC_CF_CSV.zip]
\i sql/gen/TEC_CF_CSV/12_c_FilerData.sql
\i sql/gen/TEC_CF_CSV/04_c_CoverSheet1Data.sql
\i sql/gen/TEC_CF_CSV/11_c_ExpendCategory.sql
\i sql/gen/TEC_CF_CSV/13_c_FinalData.sql
\i sql/gen/TEC_CF_CSV/14_c_LoanData.sql
\i sql/gen/TEC_CF_CSV/15_c_PledgeData.sql
\i sql/gen/TEC_CF_CSV/16_c_SpacData.sql
\i sql/gen/TEC_CF_CSV/17_c_TravelData.sql
\i sql/gen/TEC_CF_CSV/01_c_AssetData.sql
\i sql/gen/TEC_CF_CSV/02_c_CandidateData.sql
\i sql/gen/TEC_CF_CSV/03_c_ContributionData.sql
\i sql/gen/TEC_CF_CSV/05_c_CoverSheet2Data.sql
\i sql/gen/TEC_CF_CSV/06_c_CoverSheet3Data.sql
\i sql/gen/TEC_CF_CSV/07_c_CreditData.sql
\i sql/gen/TEC_CF_CSV/08_c_DebtData.sql
\i sql/gen/TEC_CF_CSV/09_c_ExpendData.sql
\i sql/gen/TEC_CF_CSV/10_c_ExpendRepayment.sql
\i sql/lobby.sql
UPDATE tec.c_contributiondata
SET contributornamesuffixcd = NULL
WHERE contributornamesuffixcd IN ( 'MR', 'MS', 'PA-C', 'DMIN' );
UPDATE tec.c_contributiondata
SET contributornameprefixcd = NULL
WHERE contributornameprefixcd IN ( 'AMB', 'AMBASSADOR', 'TITLE', 'ME' );
UPDATE tec.tec.c_expenddata
SET payeenamesuffixcd = NULL
WHERE payeenamesuffixcd IN ( 'MR', 'MS' );
UPDATE tec.tec.c_expenddata SET payeenameprefixcd = NULL WHERE payeenameprefixcd IN ( 'TITLE', 'ME', 'RADM', 'AMBASSADOR' );
UPDATE tec.c_coversheet1data SET filernamesuffixcd = NULL WHERE filernamesuffixcd = 'MR';
UPDATE tec.c_coversheet1data SET filernameprefixcd = NULL WHERE filernameprefixcd = 'TITLE';
UPDATE tec.c_coversheet1data SET treasnamesuffixcd = NULL WHERE treasnamesuffixcd = 'MR';
UPDATE tec.c_coversheet1data SET chairnamesuffixcd = NULL WHERE chairnamesuffixcd = 'MR';
UPDATE tec.c_coversheet1data SET treasnameprefixcd = 'MR' WHERE treasnameprefixcd = 'Mr.';
UPDATE tec.c_coversheet2data SET treasnameprefixcd = NULL WHERE treasnameprefixcd IN ( 'ME' );
UPDATE tec.c_loandata SET lendernamesuffixcd = NULL WHERE lendernamesuffixcd = 'MR';
UPDATE tec.c_pledgedata SET pledgernamesuffixcd = NULL WHERE pledgernamesuffixcd = 'MR';
UPDATE tec.c_pledgedata SET pledgernameprefixcd = NULL WHERE pledgernameprefixcd = 'AMBASSADOR';
UPDATE tec.c_traveldata SET travellernameprefixcd = NULL WHERE travellernameprefixcd = 'AMBASSADOR';
UPDATE tec.c_candidatedata SET candidatenamesuffixcd = NULL WHERE candidatenamesuffixcd IN ( 'MR', 'MS' );
UPDATE tec.c_candidatedata SET candidatenameprefixcd = NULL WHERE candidatenameprefixcd = 'ME';
;
UPDATE c_coversheet1data
SET formtypecd = NULL
WHERE formtypecd = 'UNK';
-- data integrity problems
UPDATE c_CandidateData
SET CandidateHoldOfficeCd = NULL
WHERE CandidateHoldOfficeCd = 'T';
UPDATE c_CandidateData
SET CandidateSeekOfficeCd = NULL
WHERE CandidateSeekOfficeCd = 'T';
UPDATE c_CoverSheet1Data
SET filerholdofficecd = NULL
WHERE filerholdofficecd = 'T';
UPDATE c_traveldata
SET schedformtypecd = NULL
WHERE schedformtypecd = 'T';
UPDATE c_filerdata
SET contestseekofficecd = trim(contestseekofficecd)
WHERE contestseekofficecd like ' %'
OR contestseekofficecd like '% ';
UPDATE c_CandidateData
SET CandidateHoldOfficeCd = 'COMPTROLLER'
WHERE CandidateHoldOfficeCd = 'COMPTROLLR';
UPDATE c_CandidateData
SET CandidateSeekOfficeCd = 'COMPTROLLER'
WHERE CandidateSeekOfficeCd = 'COMPTROLLR';
UPDATE c_CoverSheet1Data
SET filerholdofficecd = 'COMPTROLLER'
WHERE filerholdofficecd = 'COMPTROLLR';
UPDATE c_CoverSheet1Data
SET filerseekofficecd = 'COMPTROLLER'
WHERE filerseekofficecd = 'COMPTROLLR';
UPDATE c_CoverSheet1Data
SET filerholdofficecd = 'CRIMINAL_DISTATTY'
WHERE filerholdofficecd = 'CRIMAL_DISTATTY';
UPDATE c_CoverSheet1Data
SET filerseekofficecd = 'CRIMINAL_DISTATTY'
WHERE filerseekofficecd = 'CRIMAL_DISTATTY';
UPDATE c_coversheet3data
SET activityseekofficecd = NULL
WHERE activityseekofficecd = 'T';
DO $$
DECLARE
_sql text;
rows_affected INT;
BEGIN
--SET CONSTRAINTS ALL DEFERRED;
-- officecd supports other, these do not.
FOR _sql IN SELECT FORMAT(
$sql$ UPDATE %I.%I.%I SET %I = NULL WHERE %I IN ( 'UNKNOWN', '', 'OTHER' ); $sql$,
table_catalog,
table_schema,
table_name,
column_name,
column_name
)
FROM information_schema.columns
WHERE table_schema = 'tec'
AND column_name LIKE ANY(ARRAY['%countycd', '%expendcatcd'])
ORDER BY table_catalog, table_schema, table_name
LOOP
RAISE NOTICE '%', _sql;
EXECUTE _sql;
GET DIAGNOSTICS rows_affected = ROW_COUNT;
RAISE NOTICE ' Rows Affected: %', rows_affected;
COMMIT;
END LOOP;
END
$$ LANGUAGE plpgsql;
-- Having the two UPDATES modify the same row in the same transaction is problematic
DO $$
DECLARE
_sql text;
rows_affected INT;
BEGIN
--SET CONSTRAINTS ALL DEFERRED;
FOR _sql IN SELECT FORMAT(
$sql$ UPDATE %I.%I.%I SET %I = NULL WHERE %I IN ( 'UNKNOWN', '' ); $sql$,
table_catalog,
table_schema,
table_name,
column_name,
column_name
)
FROM information_schema.columns
WHERE table_schema = 'tec'
AND column_name LIKE ANY(ARRAY['%officecd'])
ORDER BY table_catalog, table_schema, table_name
LOOP
RAISE NOTICE '%', _sql;
EXECUTE _sql;
GET DIAGNOSTICS rows_affected = ROW_COUNT;
RAISE NOTICE ' Rows Affected: %', rows_affected;
COMMIT;
END LOOP;
END
$$ LANGUAGE plpgsql;
DO $$
DECLARE
_sql text;
rows_affected INT;
BEGIN
--SET CONSTRAINTS ALL DEFERRED;
FOR _sql IN SELECT FORMAT(
$sql$ UPDATE %I.%I.%I SET %I = NULL WHERE %I IN ( 'UNKNOWN' ); $sql$,
table_catalog,
table_schema,
table_name,
column_name,
column_name
)
FROM information_schema.columns
WHERE table_schema = 'tec'
AND column_name LIKE ANY(ARRAY['schedformtypecd'])
ORDER BY table_catalog, table_schema, table_name
LOOP
RAISE NOTICE '%', _sql;
EXECUTE _sql;
GET DIAGNOSTICS rows_affected = ROW_COUNT;
RAISE NOTICE ' Rows Affected: %', rows_affected;
COMMIT;
END LOOP;
END
$$ LANGUAGE plpgsql;
-- The CSV schema dumps as seperate columns
-- But the readme shows it as an array
-- So we convert back to array
BEGIN;
ALTER TABLE c_coversheet1data
ADD COLUMN reporttype text[];
UPDATE c_coversheet1data
SET reporttype = array_remove(ARRAY[
reportTypeCd1,reportTypeCd2,reportTypeCd3,reportTypeCd4,reportTypeCd5,
reportTypeCd6,reportTypeCd7,reportTypeCd8,reportTypeCd9,reportTypeCd10
],null);
ALTER TABLE c_coversheet1data
DROP COLUMN reportTypeCd1, DROP COLUMN reportTypeCd2, DROP COLUMN reportTypeCd3, DROP COLUMN reportTypeCd4, DROP COLUMN reportTypeCd5,
DROP COLUMN reportTypeCd6, DROP COLUMN reportTypeCd7, DROP COLUMN reportTypeCd8, DROP COLUMN reportTypeCd9, DROP COLUMN reportTypeCd10;
COMMIT;
--
-- Now let's try to validate all the constriants.
--
DO $$
DECLARE
_sql text;
rows_affected INT;
BEGIN
FOR _sql IN SELECT FORMAT(
'ALTER TABLE %I.%I.%I VALIDATE CONSTRAINT %I;',
current_database(),
nsp.nspname,
cls.relname,
con.conname
)
FROM pg_constraint AS con
JOIN pg_class AS cls
ON con.conrelid = cls.oid
JOIN pg_namespace AS nsp
ON cls.relnamespace = nsp.oid
WHERE convalidated IS FALSE
LOOP
EXECUTE _sql;
GET DIAGNOSTICS rows_affected = ROW_COUNT;
RAISE NOTICE '[%] %', rows_affected, _sql;
END LOOP;
END
$$ LANGUAGE plpgsql;
VACUUM FULL ANALYZE;