diff --git a/scripts/prepare-data-files.ts b/scripts/prepare-data-files.ts index 364b8ef..04e8474 100644 --- a/scripts/prepare-data-files.ts +++ b/scripts/prepare-data-files.ts @@ -3,6 +3,10 @@ import { parse, ParseConfig, unparse } from 'papaparse'; const axios = require('axios').default; +const MAX_HEIGHT = 40; +const MAX_CROWN = 40; +const MAX_PLANTED = 2021; + const LAT_LON_ACCURACY_FACTOR = 10000000; const IGNORED_ADDRESS_WORDS = [ @@ -149,8 +153,8 @@ type Classification = { wikipediaPageUrl: string; }; - // Load and parse original csv data file +console.log('Reading and parsing "./data/Baumkataster-Magdeburg-2021.csv"...'); const csv = fs.readFileSync('./data/Baumkataster-Magdeburg-2021.csv', 'utf-8'); const parseOptions: ParseConfig = { dynamicTyping: true, @@ -160,17 +164,21 @@ const parseOptions: ParseConfig = { }; const originalCsvRecords = parse(csv, parseOptions).data as OriginalCsvRecord[]; + // Save locations to json file +console.log('Writing locations to "./src/assets/data/Baumkataster-Magdeburg-2021-Typen.json"...'); const locations = [...new Set(originalCsvRecords.map(r => r.Typ))].sort(); const locationsJson = JSON.stringify(locations, null, 2); fs.writeFileSync('./src/assets/data/Baumkataster-Magdeburg-2021-Typen.json', locationsJson); // Save addresses to json file +console.log('Writing addresses to "./src/assets/data/Baumkataster-Magdeburg-2021-Gebiete.json"...'); const addresses = [...new Set(originalCsvRecords.map(r => cleanAddress(r.Gebiet)))].sort(); const adressesJson = JSON.stringify(addresses, null, 2); fs.writeFileSync('./src/assets/data/Baumkataster-Magdeburg-2021-Gebiete.json', adressesJson); // Save classification to json file +console.log('Writing classifications to "./src/assets/data/Baumkataster-Magdeburg-2021-Gattungen.json"...'); const genii = [...new Set(originalCsvRecords.map(r => r.Gattung))].sort(); const geniiPromises = genii.map(mapToClassification); Promise.all(geniiPromises) @@ -180,11 +188,18 @@ Promise.all(geniiPromises) fs.writeFileSync('./src/assets/data/Baumkataster-Magdeburg-2021-Gattungen.json', classificationsJson); // Save tree records to csv file + console.log('Writing tree data to "./src/assets/data/Baumkataster-Magdeburg-2021.txt"...'); + console.log('When importing a new dataset check if the following hints still hold true'); + console.log(`- Height > ${MAX_HEIGHT} is considered invalid data`); + console.log(`- Crown > ${MAX_CROWN} is considered invalid data`); + console.log(`- Planted > ${MAX_PLANTED} is considered invalid data`); const targetRecords = originalCsvRecords .map(r => mapToStandardTreeRecord(r, classifications)) .sort((a, b) => a.internal_ref < b.internal_ref ? -1 : 0); fs.writeFileSync('./src/assets/data/Baumkataster-Magdeburg-2021.txt', unparse(targetRecords)); + console.log('Finished.'); + }) .catch(console.error); @@ -198,10 +213,10 @@ function mapToStandardTreeRecord(original: OriginalCsvRecord, classifications: C lat: Math.trunc(original.latitude * LAT_LON_ACCURACY_FACTOR) / LAT_LON_ACCURACY_FACTOR, lon: Math.trunc(original.longitude * LAT_LON_ACCURACY_FACTOR) / LAT_LON_ACCURACY_FACTOR, genusIndex: classifications.map(g => g.fullname).indexOf(original.Gattung), - height: original.Baumhoehe, - crown: original.Kronendurc, + height: original.Baumhoehe > 40 ? null : original.Baumhoehe, // height > 40 meters is considered invalid data (see #41) + crown: original.Kronendurc > 40 ? null : original.Kronendurc, // height > 40 meters is considered invalid data (see #41) dbh: original.Stammumfan, - planted: original.Pflanzjahr + planted: original.Pflanzjahr > 2021 ? null : original.Pflanzjahr // planted > 2021 is considered invalid data (see #41) }; } diff --git a/src/assets/data/Baumkataster-Magdeburg-2021.txt b/src/assets/data/Baumkataster-Magdeburg-2021.txt index be9a887..a65a981 100644 --- a/src/assets/data/Baumkataster-Magdeburg-2021.txt +++ b/src/assets/data/Baumkataster-Magdeburg-2021.txt @@ -5341,7 +5341,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 5917,S5346,2,456,52.1491359,11.6772464,340,30,12,210,0 5919,S5342,2,456,52.1493035,11.6773557,340,30,10,200,0 5920,S5340,2,456,52.1493835,11.6774113,340,30,12,220,0 -5921,S5338,2,456,52.1494673,11.6774686,340,59,5,59,0 +5921,S5338,2,456,52.1494673,11.6774686,340,,5,59,0 5922,S5336,2,456,52.1495171,11.6774945,49,12,6,86,0 5923,S5332,2,456,52.1496153,11.67756,49,8,3,55,0 5924,S5330,2,456,52.1496652,11.6775928,49,11,6,100,0 @@ -9662,7 +9662,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 10967,S31659,2,131,52.0920524,11.5630881,124,8,6,65,0 10968,S31661,2,131,52.0921019,11.5628234,124,7,3,47,0 10969,S31663,2,131,52.0921603,11.5625139,124,8,3,48,0 -10970,S31664,2,131,52.092192,11.5623421,124,8,64,40,0 +10970,S31664,2,131,52.092192,11.5623421,124,8,,40,0 10971,S31665,2,131,52.0922114,11.5622394,124,8,3,45,0 10972,S31666,2,131,52.0922334,11.5621221,124,7,5,45,0 10974,S31668,2,131,52.0923918,11.561777,31,9,6,79,0 @@ -9829,7 +9829,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 11136,S32661,2,576,52.0854086,11.5734039,351,12,5,120,0 11137,S7915,2,576,52.0853717,11.5733327,46,7,5,52,0 11138,S32662,2,576,52.085447,11.5733661,288,15,11,210,0 -11139,S7916,2,576,52.0854064,11.5732118,46,68,6,72,0 +11139,S7916,2,576,52.0854064,11.5732118,46,,6,72,0 11140,S32663,2,576,52.0854842,11.5731559,288,16,8,145,0 11141,S17673,2,576,52.0854368,11.5730949,46,8,7,85,0 11142,S7918,2,576,52.085536,11.5727003,46,7,6,65,0 @@ -18014,7 +18014,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 23298,S24148,2,254,52.1026977,11.6251239,309,6,5,90,0 23299,S24147,2,254,52.1026245,11.6250402,31,5,2,47,0 23300,S24149,2,254,52.102776,11.6251393,309,7,7,95,0 -23301,S24150,2,254,52.1028418,11.6251516,309,79,5,92,0 +23301,S24150,2,254,52.1028418,11.6251516,309,,5,92,0 23302,S24145,2,254,52.1025392,11.6250971,309,6,6,70,0 23303,S24151,2,254,52.1028471,11.6250813,31,6,5,63,0 23304,S24152,2,254,52.1029155,11.6251671,309,6,5,94,0 @@ -19404,7 +19404,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 25077,S19512,2,964,52.1570227,11.6312017,340,15,10,99,1996 25078,S19513,2,964,52.1568553,11.6312632,340,8,5,55,1996 25079,S19514,2,964,52.1567938,11.6312851,340,8,6,55,1996 -25080,S19515,2,964,52.1567564,11.6312965,340,87,7,78,1996 +25080,S19515,2,964,52.1567564,11.6312965,340,,7,78,1996 25081,S11392,2,964,52.1563926,11.6314283,124,6,5.5,45,2005 25082,S19517,2,964,52.1562882,11.6314686,340,14,10,101,1996 25083,S19518,2,964,52.1562106,11.6314967,340,17,10,83,1996 @@ -20156,7 +20156,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 26078,G29145,0,913,52.1112024,11.6478543,340,29,12,163,0 26079,G29146,0,913,52.1111454,11.6479739,340,27,12,155,0 26080,G29147,0,913,52.1110921,11.6480861,340,23,8,135,0 -26081,G29148,0,913,52.1110402,11.6481977,340,4,2,18,3013 +26081,G29148,0,913,52.1110402,11.6481977,340,4,2,18, 26082,G29149,0,913,52.1109883,11.6483194,340,30,10,134,0 26083,G29150,0,913,52.1109273,11.6484362,340,25,8,120,0 26084,G29151,0,913,52.1108721,11.648556,340,32,10,150,0 @@ -25483,7 +25483,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 31941,S9582,2,789,52.170575,11.6354779,340,13,7,130,0 31942,S9583,2,789,52.1706128,11.6352514,31,12,7,100,0 31943,S9584,2,789,52.1706363,11.6352996,274,9,10,140,0 -31944,S22492,2,332,52.0732018,11.6585263,340,57,4,63,0 +31944,S22492,2,332,52.0732018,11.6585263,340,,4,63,0 31945,S33965,2,240,52.0853474,11.6232084,49,20,9,260,0 31946,S33966,2,240,52.0853871,11.6233013,49,20,13,245,0 31947,S10403,2,96,52.1097548,11.5983403,102,6,5,60,2006 @@ -31634,7 +31634,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 38859,S16050,2,637,52.0961368,11.6047241,243,11,6,240,1960 38860,S36128,2,642,52.0886114,11.5985957,37,10,7,152,1980 38861,S16049,2,637,52.096144,11.6047626,243,11,6,88,1960 -38862,S16048,2,637,52.0961666,11.6047759,243,61,7,140,1970 +38862,S16048,2,637,52.0961666,11.6047759,243,,7,140,1970 38863,S16047,2,637,52.0961868,11.6047622,243,6,4,69,1975 38864,G27553,0,638,52.0961918,11.6048259,243,12,6,145,1960 38865,G27554,0,638,52.0962258,11.6048439,243,9,4,70,1975 @@ -47870,7 +47870,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 56378,G34707,0,46,52.0932443,11.5562823,232,9,5,80,0 56379,G34708,0,46,52.0931581,11.5563217,77,8,5,87,0 56380,G34709,0,46,52.0931635,11.5562415,77,5,4,66,0 -56381,G34710,0,46,52.0931235,11.5562095,31,82,4,82,0 +56381,G34710,0,46,52.0931235,11.5562095,31,,4,82,0 56382,G34711,0,46,52.0930744,11.556284,343,8,4,66,0 56383,G34712,0,46,52.093113,11.5564515,31,9,5,88,0 56384,G34714,0,46,52.094195,11.5561819,343,5,3,35,0 @@ -49331,7 +49331,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 57839,G40495,0,913,52.1135883,11.641803,37,18,16,240,1960 57840,G40496,0,913,52.1136261,11.641737,37,25,12,160,1930 57841,G40497,0,913,52.1136644,11.6417907,37,25,12,145,1950 -57842,G40498,0,913,52.1136546,11.6417097,37,25,14,200,2930 +57842,G40498,0,913,52.1136546,11.6417097,37,25,14,200, 57843,G40500,0,913,52.1137132,11.6414351,37,12,8,380,1960 57844,G40501,0,913,52.1137464,11.6413962,37,25,15,375,1960 57845,G40502,0,913,52.1137648,11.6412145,348,18,10,167,1950 @@ -61674,7 +61674,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 72390,S1624,2,24,52.0875674,11.6600004,282,3,1,22,2017 72391,S1623,2,24,52.0874959,11.6600725,282,3,1,22,2017 72397,G26501,0,188,52.1343286,11.6619277,8,7,5,55,0 -72398,G26502,0,188,52.1343543,11.6619183,8,60,6,74,0 +72398,G26502,0,188,52.1343543,11.6619183,8,,6,74,0 72399,G26503,0,188,52.1343803,11.6620572,8,7,6,115,0 72400,G26504,0,188,52.134443,11.6620104,243,6,8,120,0 72401,G26505,0,188,52.134583,11.6620726,243,6,6,120,0 @@ -62551,7 +62551,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 73382,G8198,0,1049,52.1388605,11.5776682,16,7,5,80,0 73384,S7300,2,755,52.077688,11.6568374,71,12,8,200,0 73385,S7301,2,755,52.0776844,11.6570721,71,14,8,140,0 -73387,S16979,2,412,52.1555616,11.5904272,31,60,4,49,0 +73387,S16979,2,412,52.1555616,11.5904272,31,,4,49,0 73388,S16980,2,412,52.1547683,11.5935906,16,10,7,180,0 73389,G18566,0,360,52.1229714,11.6206579,219,17,8,165,0 73390,G18567,0,360,52.1229571,11.6206753,219,20,9,424,0 @@ -63367,7 +63367,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 75392,S14983,2,204,52.1455715,11.580673,31,7,4,50,2005 75393,S20428,2,674,52.1619549,11.6228571,354,9,6,90,1997 75394,S20429,2,674,52.1619559,11.622962,354,9,6,80,1997 -75395,S20430,2,674,52.1619562,11.6230623,354,80,4,90,1997 +75395,S20430,2,674,52.1619562,11.6230623,354,,4,90,1997 75396,S20431,2,674,52.1619691,11.6231672,354,11,5,80,1997 75398,G20861,0,53,52.1560069,11.5711446,272,22,10,200,0 75399,G20862,0,53,52.1560081,11.5712597,273,22,12,210,0 @@ -71085,7 +71085,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 90666,G30919,0,737,52.1423252,11.5648497,221,20,10,250,0 90667,G30921,0,737,52.1423563,11.5650941,57,8,4,103,0 90668,G30922,0,737,52.1424025,11.5655542,57,8,7,98,0 -90669,G30923,0,737,52.1424327,11.5658626,57,55,0,66,0 +90669,G30923,0,737,52.1424327,11.5658626,57,,0,66,0 90670,G30924,0,737,52.1424704,11.5662166,57,5,4,42,0 90671,G30925,0,737,52.1424892,11.5662502,57,4,4,78,0 90672,G30926,0,737,52.1425071,11.5663073,57,5,2,38,0 @@ -83152,7 +83152,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 108458,S19131,2,586,52.0688106,11.6156199,231,4,1,18,2018 108459,S19132,2,586,52.0681456,11.6153237,232,4,1,18,2018 108460,S19133,2,586,52.0680063,11.6152574,232,4,1,18,2018 -108461,S19134,2,586,52.0679342,11.6152262,231,51,1,18,2018 +108461,S19134,2,586,52.0679342,11.6152262,231,,1,18,2018 108462,S19135,2,586,52.0678669,11.6151959,232,5,1,20,2018 108463,S19136,2,586,52.0677925,11.6151648,231,4,1,19,2018 108464,S19137,2,586,52.0677253,11.6151345,232,4,1,18,2018 @@ -83161,7 +83161,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 108467,S19140,2,586,52.0675121,11.6150379,231,4,1,18,2018 108468,S19141,2,586,52.0674448,11.6150076,232,5,1,18,2018 108469,S19142,2,586,52.0673716,11.6149715,231,4,1,18,2018 -108470,S19143,2,586,52.0673014,11.6149433,232,41,1,18,2018 +108470,S19143,2,586,52.0673014,11.6149433,232,,1,18,2018 108471,S19144,2,586,52.0672299,11.614912,231,4,1,17,2018 108472,S19145,2,586,52.0671591,11.6148759,232,5,1,18,2018 108473,S19146,2,586,52.06709,11.6148447,231,5,1,17,2018 @@ -83175,7 +83175,7 @@ internal_ref,ref,locationIndex,addressIndex,lat,lon,genusIndex,height,crown,dbh, 108481,S19154,2,586,52.0665244,11.6145882,231,5,1,18,2018 108482,S19155,2,586,52.0664542,11.614558,232,4,1,17,2018 108483,S19156,2,586,52.0663875,11.6145238,231,5,1,19,2018 -108484,S19157,2,586,52.0663155,11.6144964,232,41,1,18,2018 +108484,S19157,2,586,52.0663155,11.6144964,232,,1,18,2018 108485,S19158,2,586,52.0661821,11.6144301,231,5,1,18,2018 108486,S19159,2,586,52.0657243,11.6142305,232,14,1,18,2018 108487,S19160,2,586,52.065657,11.6141973,231,4,1,18,2018