diff --git a/doc/HOWTO_MASSAGE_MA_GUN_DATA.md b/doc/HOWTO_MASSAGE_MA_GUN_DATA.md new file mode 100644 index 0000000000000..b6bd51a3dc517 --- /dev/null +++ b/doc/HOWTO_MASSAGE_MA_GUN_DATA.md @@ -0,0 +1,52 @@ +These instructions are built around pretty standard linux command line tools, plus sqlite3 and moreutils. + +First, retrieve the .csv files from https://www.mass.gov/info-details/data-about-firearms-licensing-and-transactions +You want both 'Download: Firearms Dealer Transactions ' and 'Download: Personal Transfers and Registrations - ' + +Translate the date fields to ISO-8601 like so `sed "s_\([^/,]*\)/\([^/,]*\)/\([^/,]*\),\(.*\)_20\3-\1-\2,\4_g" 01.01.23-12.31.23.csv` + +Remove frst line from all but the first sales csv like so +tail -2 01.01.09-12.31.21.iso.date.csv | sponge 01.01.09-12.31.21.iso.date.csv +tail -2 01.01.22-12.31.22.iso.date.csv | sponge 01.01.22-12.31.22.iso.date.csv +tail -2 01.01.23-12.31.23.iso.date.csv | sponge 01.01.23-12.31.23.iso.date.csv + +Create a database, add a table like so: + +Import each file into the database like so: +.mode csv +.import personal_transactions.iso.date.csv personal_transactions +.import 01.01.04-12.31.08.iso.date.csv sales +.import 01.01.09-12.31.21.iso.date.csv sales +.import 01.01.22-12.31.22.iso.date.csv sales +.import 01.01.23-12.31.23.iso.date.csv sales + +Filter and rename the personal transactions columns while copying into a new table like so: +CREATE TABLE gun_data AS SELECT "TRANSACTION DATE" as date, "BUYER STATE" as state, MAKE as make, MODEL as model, TYPE as type, "SERIAL NUMBER" as serial, "WEAPON SIZE" as ammo_type FROM personal_transactions; + +Filter and rename the sales columns while coping into the just created table like so: +INSERT INTO gun_data SELECT "DATE OF TRANSACTION" as date, "BUYER STATE" as state, MAKE as make, MODEL as model, "WEAPON TYPE" as type, "SERIAL NUMBER" as serial, "WEAPON SIZE" as ammo_type FROM sales; + +Drop the intermediate tables. +DROP TABLE sales; +DROP TABLE personal_transactions; +VACUUM; +Possibly save and reload the database here? + +Run "script" to deduplicate the "not available" values for serial number. +.read fix_serial_numbers.sql +Run "script" to remove empty lines +.read remove_empty_entroes.sql + +Deduplicate rows with matching serial fields, with latest date winning. +CREATE INDEX gun_data_idx_deduplication_query ON gun_data(serial, date); +DELETE FROM gun_data WHERE rowid IN (SELECT q1.rowid from gun_data q1 JOIN gun_data q2 on q1.serial = q2.serial WHERE q1.serial != "NONE" AND q1.date < q2.date); +DROP INDEX IF EXISTS gun_data_idx_deduplication_query; + +Remove rows with a state field not equal to MA +DELETE FROM gun_data WHERE state != "MA" AND state != ""; + +Now you're ready to start fixing data entry errors. +From this point on we don't use the date and state fields. + +Basically we have a big list of incorrectly spelled values for particular fields along with the correct spelling. +Then we have a script that will spit out SQL statements to make these substitutions happen. diff --git a/doc/firearm_stats/fix_serial_numbers.sql b/doc/firearm_stats/fix_serial_numbers.sql new file mode 100644 index 0000000000000..94b07fcc75db3 --- /dev/null +++ b/doc/firearm_stats/fix_serial_numbers.sql @@ -0,0 +1,20 @@ +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "NSN"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "N/A"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "NA"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "0"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE ""; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "1"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "2"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "NO SERIAL NUMBER"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "UNKNOWN"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "NO SERIAL"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "NOT AVAILABLE"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "NO NUMBER"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "NONE FOUND"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "NONE VISIBLE"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "-"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "N.S.N"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "UNKNOW"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "NOT AVAIL"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "NVS"; +UPDATE gun_data SET serial = "NONE" WHERE serial LIKE "NO SERIAL #"; diff --git a/doc/firearm_stats/make_renames.in b/doc/firearm_stats/make_renames.in new file mode 100644 index 0000000000000..41e9e03e1d707 --- /dev/null +++ b/doc/firearm_stats/make_renames.in @@ -0,0 +1,85 @@ +"ZASTAVA" "%ZASTAVA%" +"YUGOSLAVIAN IMPORT" "%YUGO%" +"YANKEE HILL MACHINE" "%YANKEE%HILL%" +"YANKEE HILL MACHINE" "YHM" +"WINDHAM WEAPONRY" "%WINDHAM%" +"JAPANESE IMPORT" "%JAPANESE%" +"JAPANESE IMPORT" "%ARISAKA%" +"WINCHESTER" "%WINC%" +"WILSON COMBAT" "%WILSON C%" +"WEATHERBY" "%WEATHER%" +"WALTHER" "%WALT%ER%" +"SPRINGFIELD ARMORY" "%SPRINGFIELD%" +"SPIKE'S TACTICAL" "%SPIKE%" +"UNIVERSAL" "%UNIVERSAL%" +"UBERTI" "%UBERTI" +"TULA" "%TULA%" +"TROY INDUSTRIES" "%TROY%" +"TRISTAR" "%TRISTAR%" +"TIKKA" "%TIKKA%" +"THOMPSON CENTER" "%THOMPSON%" +"THOMPSON CENTER" "%AUTO%ORDINANCE%" +"TAURUS" "%TAURUS%" +"TANFOGLIO" "%TANFOGLIO%" +"SMITH & WESSON" "SW" +"SMITH & WESSON" "S&W" +"SMITH & WESSON" "SMITH%" +"SMITH & WESSON" "%SMITH%WESSON%" +"STOEGER" "%STOEGER%" +"STACCATO" "%STI %" AND make not like "%FAUSTI%" +"STACCATO" "%STACCATO%" +"STEYR" "%STEYR" +"STEVENS" "%STEVENS%" +"STERLING ARMS" "STERLING" +"STERLING ARMS" "%STERLING %" +"STERLINGWORTH" "%STERLINGWORTH%" +"STAG ARMS" "%STAG%" +"SONS OF LIBERTY" "%SONS OF LIBERTY%" +"SIG SAUER" "%SIG %" +"SIG SAUER" "%SAUER%" +"SHADOW SYSTEMS" "%SHADOW SYSTEM%" +"SEECAMP" "%SEECAMP%" +"SEARS" "%SEARS%" +"SAVAGE ARMS" "%SAVAGE%" +"RUGER" "%RUGER%" +"ROSSI" "%ROSSI%" +"ROMANIAN IMPORT" "%ROMANIAN%" +"REMINGTON" "%REMINGTON%" +"PARA" "%PARA%" +"NORINCO" "%NORINCO%" +"MOSSBERG" "%MOSSBERG%" +"MOSIN NAGANT" "%MOSIN%" +"MAUSER" "%MAUSER%" +"MARLIN" "%MARLIN%" +"MAGNUM RESEARCH" "%MAGNUM R%" +"KIMBER" "%KIMBER%" +"KELTEC" "%KEL%TEC%" +"KAHR" "%KAHR%" +"JC ARMS" "%JC%ARMS%" +"IWI" "%IWI%" +"IVER JOHNSON" "%IVER%J%" +"ITHACA" "%ITHACA%" +"HECKLER & KOCH" "HK%" +"HECKLER & KOCH" "%KOCH%" +"HECKLER & KOCH" "H_K" +"HIGH STANDARD" "HIGH%ST%" +"HENRY REPEATING ARMS" "%HENRY%" +"HARRINGTON & RICHARDSON" "%HARRINGTON%" +"HARRINGTON & RICHARDSON" "%RICHARDSON%" +"HARRINGTON & RICHARDSON" "%H&R%" +"GLOCK" "%GLOCK%" +"LEFEVER" "LEFNER" +"FN" "%FN%" +"DPMS" "%DPMS%" +"DANIEL DEFENCE" "%DANIEL D%" +"DAN WESSON" "%DAN%WESSON%" +"CZ" "CZ%USA" +"COLT" "%COLT%" +"CHARTER ARMS" "%CHARTER%" +"CENTURY ARMS" "%CENTURY%" +"CANIK" "%CANIK%" +"BROWNING" "%BROWNING%" +"BERETTA" "%BERETTA%" +"BENELLI" "%BENELLI%" +"ANDERSON" "%ANDERSON%" +"AERO PRECISION" "%AERO P%" diff --git a/doc/firearm_stats/make_updates.sql b/doc/firearm_stats/make_updates.sql new file mode 100644 index 0000000000000..28148890e4b35 --- /dev/null +++ b/doc/firearm_stats/make_updates.sql @@ -0,0 +1,68 @@ +UPDATE GUN_DATA SET OG_MAKE = 'H&K' WHERE MAKE = 'HK'; +UPDATE GUN_DATA SET OG_MAKE = 'HECKLER & KOCH' WHERE MAKE = 'H&K'; +UPDATE GUN_DATA SET OG_MAKE = 'SPRINGFIELD ARMORY' WHERE MAKE = 'SPRINGFIELD'; +UPDATE GUN_DATA SET OG_MAKE = 'AERO PRECISION' WHERE MAKE = 'AERO'; +UPDATE GUN_DATA SET OG_MAKE = 'SMITH & WESSON' WHERE MAKE = 'SW'; +UPDATE GUN_DATA SET OG_MAKE = 'HARRINGTON & RICHARDSON' WHERE MAKE = 'H&R'; +UPDATE GUN_DATA SET OG_MAKE = 'HENRY REPEATING ARMS' WHERE MAKE = 'HENRY'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE = 'SAVAGE / ANSCHUTZ'; +UPDATE GUN_DATA SET OG_MAKE = 'STAG ARMS' WHERE MAKE = 'STAG'; +UPDATE GUN_DATA SET OG_MAKE = 'CZ' WHERE MAKE = 'CZ-USA'; +UPDATE GUN_DATA SET OG_MAKE = 'ANDERSON MANUFACTURING' WHERE MAKE = 'ANDERSON'; +UPDATE GUN_DATA SET OG_MAKE = 'ANDERSON MANUFACTURING' WHERE MAKE = 'ANDERSON MFG'; +UPDATE GUN_DATA SET OG_MAKE = 'ROCK ISLAND ARMORY' WHERE MAKE = 'ROCK ISLAND'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE = 'SAVAGE ARMS'; +UPDATE GUN_DATA SET OG_MAKE = 'FN' WHERE MAKE = 'FN AMERICA'; +UPDATE GUN_DATA SET OG_MAKE = 'WINDHAM WEAPONRY' WHERE MAKE = 'WINDHAM'; +UPDATE GUN_DATA SET OG_MAKE = 'WINDHAM WEAPONRY' WHERE MAKE = 'WINDHAM ARMS'; +UPDATE GUN_DATA SET OG_MAKE = 'WINDHAM WEAPONRY' WHERE MAKE = 'WINDHAM ARMAMENT'; +UPDATE GUN_DATA SET OG_MAKE = 'WINDHAM WEAPONRY' WHERE MAKE = 'WINDHAM WEAPONRY -USA'; +UPDATE GUN_DATA SET OG_MAKE = 'WINDHAM WEAPONRY' WHERE MAKE = 'WINDHAM WEAPONRY (LE SALE'; +UPDATE GUN_DATA SET OG_MAKE = 'WINDHAM WEAPONRY' WHERE MAKE = 'WINDHAM ARMS WEAPONRY'; +UPDATE GUN_DATA SET OG_MAKE = 'WINDHAM WEAPONRY' WHERE MAKE = 'WINDHAM ARMORY'; +UPDATE GUN_DATA SET OG_MAKE = 'SMITH & WESSON' WHERE MAKE LIKE '%SMITH&%'; +UPDATE GUN_DATA SET OG_MAKE = 'SMITH & WESSON' WHERE MAKE LIKE '%SMITH &%'; +UPDATE GUN_DATA SET OG_MAKE = 'SMITH & WESSON' WHERE MAKE LIKE '%SMITH W%'; +UPDATE GUN_DATA SET OG_MAKE = 'SMITH & WESSON' WHERE MAKE LIKE '%SMITH+%'; +UPDATE GUN_DATA SET OG_MAKE = 'SMITH & WESSON' WHERE MAKE LIKE '%SMITHA%'; +UPDATE GUN_DATA SET OG_MAKE = 'RUGER' WHERE MAKE LIKE '%RUGER%'; +UPDATE GUN_DATA SET OG_MAKE = 'SIG SAUER' WHERE MAKE LIKE '%SIG SAUER%'; +UPDATE GUN_DATA SET OG_MAKE = 'SIG SAUER' WHERE MAKE LIKE 'SIG ARMS'; +UPDATE GUN_DATA SET OG_MAKE = 'SIG SAUER' WHERE MAKE LIKE 'SIG'; +UPDATE GUN_DATA SET OG_MAKE = 'SIG SAUER' WHERE MAKE LIKE 'SIG SWISS'; +UPDATE GUN_DATA SET OG_MAKE = 'SIG SAUER' WHERE MAKE LIKE 'SAUER / SIG ARMS'; +UPDATE GUN_DATA SET OG_MAKE = 'GLOCK' WHERE MAKE LIKE '%GLOCK%'; +UPDATE GUN_DATA SET OG_MAKE = 'REMINGTON' WHERE MAKE LIKE '%REMINGTON%'; +UPDATE GUN_DATA SET OG_MAKE = 'MOSSBERG' WHERE MAKE LIKE '%MOSSBERG%'; +UPDATE GUN_DATA SET OG_MAKE = 'BERETTA' WHERE MAKE LIKE '%BERETTA%'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE LIKE 'SAVAGE ARMS CANADA'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE LIKE 'SAVAGE ARMS ONT CAN / SAVAGE ARM'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE LIKE 'SAVAGE ARMS / SAVAGE ARMS'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE LIKE 'SAVAGE ARMS CANADA, INC / SAVAGE'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE LIKE 'SAVAGE ARMS CORP'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE LIKE 'AVAGE ARMS ONT CAN'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE LIKE 'SAVAGE ARMS ONT CAN'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE LIKE 'SAVAGE ARMS INCORPORATED'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE LIKE 'SAVAGE ARMS INC. OF CANADA'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE LIKE 'SAVAGE ARMS CANADA / SAVAGE ARMS'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE LIKE 'SAVAGE ARMS,INC'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE LIKE 'SAVAGE ARMS USA'; +UPDATE GUN_DATA SET OG_MAKE = 'SAVAGE' WHERE MAKE LIKE 'SAVAGE ARMS INC / NONE'; +UPDATE GUN_DATA SET OG_MAKE = 'HENRY REPEATING ARMS' WHERE MAKE LIKE 'HENRY %'; +UPDATE GUN_DATA SET OG_MAKE = 'UBERTI' WHERE MAKE LIKE '%UBERTI%'; +UPDATE GUN_DATA SET OG_MAKE = 'SPIKES TACTICAL' WHERE MAKE LIKE '%SPIKE%TACTICAL%'; +UPDATE GUN_DATA SET OG_MAKE = 'ROCK ISLAND ARMORY' WHERE MAKE LIKE '%ROCK%IS%'; +UPDATE GUN_DATA SET OG_MAKE = 'ROCK RIVER ARMS' WHERE MAKE LIKE 'ROCK RIVER%'; +UPDATE GUN_DATA SET OG_MAKE = 'ZASTAVA' WHERE MAKE LIKE 'ZASTAVA%'; +UPDATE GUN_DATA SET OG_MAKE = 'FN' WHERE MAKE LIKE 'FN%'; +UPDATE GUN_DATA SET OG_MAKE = 'SPIKES TACTICAL' WHERE MAKE LIKE 'SPIKE%'; +UPDATE GUN_DATA SET OG_MAKE = 'SPRINGFIELD ARMORY' WHERE MAKE LIKE 'SPRINGFIELD%'; +UPDATE GUN_DATA SET OG_MAKE = 'CENTURY ARMS' WHERE MAKE LIKE 'CENTURY%'; +UPDATE GUN_DATA SET OG_MAKE = 'YANKEE HILL MACHINE' WHERE MAKE LIKE 'YANKEE HILL%'; +UPDATE GUN_DATA SET OG_MAKE = 'YANKEE HILL MACHINE' WHERE MAKE LIKE 'YHM%'; +UPDATE GUN_DATA SET OG_MAKE = 'YANKEE HILL MACHINE' WHERE MAKE LIKE 'YMH'; +UPDATE GUN_DATA SET OG_MAKE = 'YANKEE HILL MACHINE' WHERE MAKE LIKE 'YNAKEE HILL MACHINE CO YHM'; +UPDATE GUN_DATA SET OG_MAKE = 'WINDHAM WEAPONRY' WHERE MAKE LIKE 'WINDOM%'; +UPDATE GUN_DATA SET OG_MAKE = 'WEATHERBY' WHERE MAKE LIKE 'WEATHERBY%'; +UPDATE GUN_DATA SET OG_MAKE = 'WEATHERBY' WHERE MAKE LIKE 'WEATHBY'; +UPDATE GUN_DATA SET OG_MAKE = 'CESKA ZBROJOVKA' WHERE MAKE LIKE 'CESK%'; diff --git a/doc/firearm_stats/remove_empty_entries.sql b/doc/firearm_stats/remove_empty_entries.sql new file mode 100644 index 0000000000000..88d6c41734ce4 --- /dev/null +++ b/doc/firearm_stats/remove_empty_entries.sql @@ -0,0 +1,2 @@ +DELETE from gun_data WHERE make = "" AND model = ""; +