Skip to content

Commit

Permalink
implementando issue #8
Browse files Browse the repository at this point in the history
  • Loading branch information
ppKrauss committed Jun 27, 2018
1 parent 34a01f9 commit 3f68a5e
Show file tree
Hide file tree
Showing 59 changed files with 1,080,093 additions and 41 deletions.
Binary file added assets/plygon-bugExample01.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
69,427 changes: 69,426 additions & 1 deletion data/dump_osm/AC.geojson

Large diffs are not rendered by default.

114,150 changes: 114,150 additions & 0 deletions data/dump_osm/AL.geojson

Large diffs are not rendered by default.

14,902 changes: 14,902 additions & 0 deletions data/dump_osm/AP.geojson

Large diffs are not rendered by default.

79,546 changes: 79,546 additions & 0 deletions data/dump_osm/BA.geojson

Large diffs are not rendered by default.

11,454 changes: 11,454 additions & 0 deletions data/dump_osm/CE.geojson

Large diffs are not rendered by default.

24,550 changes: 24,550 additions & 0 deletions data/dump_osm/ES.geojson

Large diffs are not rendered by default.

12,794 changes: 12,794 additions & 0 deletions data/dump_osm/GO.geojson

Large diffs are not rendered by default.

119,082 changes: 119,082 additions & 0 deletions data/dump_osm/MG.geojson

Large diffs are not rendered by default.

62,358 changes: 62,358 additions & 0 deletions data/dump_osm/MS.geojson

Large diffs are not rendered by default.

70,838 changes: 70,838 additions & 0 deletions data/dump_osm/MT.geojson

Large diffs are not rendered by default.

34,602 changes: 34,602 additions & 0 deletions data/dump_osm/PA.geojson

Large diffs are not rendered by default.

20,794 changes: 20,794 additions & 0 deletions data/dump_osm/PB.geojson

Large diffs are not rendered by default.

26,578 changes: 26,578 additions & 0 deletions data/dump_osm/PE.geojson

Large diffs are not rendered by default.

17,146 changes: 17,146 additions & 0 deletions data/dump_osm/PI.geojson

Large diffs are not rendered by default.

98,706 changes: 98,706 additions & 0 deletions data/dump_osm/PR.geojson

Large diffs are not rendered by default.

31,354 changes: 31,354 additions & 0 deletions data/dump_osm/RJ.geojson

Large diffs are not rendered by default.

11,526 changes: 11,526 additions & 0 deletions data/dump_osm/RN.geojson

Large diffs are not rendered by default.

24,362 changes: 24,362 additions & 0 deletions data/dump_osm/RO.geojson

Large diffs are not rendered by default.

30,414 changes: 30,414 additions & 0 deletions data/dump_osm/RR.geojson

Large diffs are not rendered by default.

59,770 changes: 59,770 additions & 0 deletions data/dump_osm/RS.geojson

Large diffs are not rendered by default.

87,770 changes: 87,770 additions & 0 deletions data/dump_osm/SC.geojson

Large diffs are not rendered by default.

16,858 changes: 16,858 additions & 0 deletions data/dump_osm/SE.geojson

Large diffs are not rendered by default.

13,986 changes: 13,986 additions & 0 deletions data/dump_osm/SP.geojson

Large diffs are not rendered by default.

27,050 changes: 27,050 additions & 0 deletions data/dump_osm/TO.geojson

Large diffs are not rendered by default.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
117 changes: 77 additions & 40 deletions src/dumpWikidata.php
Original file line number Diff line number Diff line change
@@ -1,56 +1,91 @@
-- Generating backups of JSON-Wikidata --

<?php
// usage: php dumpWikidata.php flagOpcionalQuandoFixErr
// usage: php dumpWikidata.php [geo][err]

// CONFIGS
$url_tpl = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&ids=';
// cols 0=subdivision, 1=name_prefix, 2=name, 3=id, 4=idIBGE, 5=wdId, 6=lexLabel
$uf_idx=0; $wdId_idx = 5; $lexLabel_idx = 6;
$UF=''; $localCsv = false; $stopAt=0;
$urlWd_tpl = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&ids=';
$urlOsm_tpl = 'http://polygons.openstreetmap.fr/get_geojson.py?id=';
$UF=''; $localCsv = false; $stopAt=0;

$saveFolder = realpath( dirname(__FILE__)."/../data/wikidata" );
$saveFolder = realpath( dirname(__FILE__)."/../data" );
$url = $localCsv
? "$saveFolder/../br-state-codes.csv"
? "$saveFolder/br-state-codes.csv"
: 'https://github.com/datasets-br/state-codes/raw/master/data/br-state-codes.csv'
;
$fixErr = ($argc>=2)? 'MODO FIX-ERR': '';
print "\n USANDO $fixErr $url";
// cols 0=subdivision, 1=name_prefix, 2=name, 3=id, 4=idIBGE, 5=wdId, 6=lexLabel
$uf_idx=0; $wdId_idx = 5; $lexLabel_idx = 6;


$modo = ($argc>=2)? ( ($argv[1]=='geo')? 'GEO': 'FIX-ERR' ): '';
$ext = ($modo=='GEO')? 'geojson': 'json';
print "\n USANDO $modo $url";


// LOAD DATA:
$R = []; // [fname]= wdId
if (($handle = fopen($url, "r")) !== FALSE) {
for($i=0; ($row=fgetcsv($handle)) && (!$stopAt || $i<$stopAt); $i++)
if ( $i && isset($row[1]) )
$R[ lex2filename($row[$lexLabel_idx]) ] = $row[$wdId_idx];
$R[ $row[$uf_idx] ] = $row[$wdId_idx];
} else
exit("\nERRO ao abrir planilha das cidades em \n\t$url\n");


if ($fixErr) foreach($R as $fname=>$wdId) {
$fs = splitFilename($fname,true);
if ($fs[2]>50) unset($R[$fname]);
if ($modo=='FIX-ERR') foreach($R as $fname=>$wdId) {
if ( filesize("$saveFolder/dump_wikidata/$fname.$ext")>50 ) unset($R[$fname]);
}

// WGET AND SAVE JSON:
$i=1;
$n=count($R);
$ERR=[];
foreach($R as $fname=>$wdId) {
print "\n\t($i of $n) $fname: $wdId ";
$json = file_get_contents("$url_tpl$wdId");
if ($json) {
$out = json_stdWikidata($json);
if ($out) {
$savedBytes = file_put_contents( "$saveFolder/$fname.json", $out );
print "saved ($savedBytes bytes) with fresh $wdId";
} else
ERRset($fname,"invalid Wikidata structure");
} else
ERRset($fname,"empty json");
$i++;
}

switch($modo) {

case '':
case 'FIX-ERR':
foreach($R as $fname=>$wdId) {
print "\n\t($i of $n) $fname: $wdId ";
$json = file_get_contents("$urlWd_tpl$wdId");
if ($json) {
$out = json_stdWikidata($json);
if ($out) {
$savedBytes = file_put_contents( "$saveFolder/dump_wikidata/$fname.$ext", $out );
print "saved ($savedBytes bytes) with fresh $wdId";
} else
ERRset($fname,"invalid Wikidata structure");
} else
ERRset($fname,"empty json");
$i++;
}
break;

case 'GEO':
foreach($R as $fname=>$wdId) {
print "\n\t($i of $n) $fname: $wdId ";
$osmId= getOsmId($fname,$wdId); // usa wdId?
$json='';
if ($osmId) $json = file_get_contents("$urlOsm_tpl$osmId");
else ERRset($fname,"no osmId or P402");
if ($json) {
$out = json_stdOsm($json);
if ($out) {
$savedBytes = file_put_contents( "$saveFolder/dump_osm/$fname.$ext", $out );
print "saved ($savedBytes bytes) with fresh OSM/$osmId";
} else
ERRset($fname,"invalid OSM structure");
} else
ERRset($fname,"empty json");
$i++;
}
break;

default:
die("\n Modo $modo DESCONHECIDO.\n");

} // end switch


if (count($ERR)) { print "\n ----------- ERRORS ---------\n"; foreach($ERR as $msg) print "\n * $msg"; }

Expand All @@ -64,6 +99,13 @@ function ERRset($fname,$msg) {
$ERR[] = $msg;
}

function json_stdOsm($jstr) {
if (!trim($jstr)) return '';
$j = json_decode($jstr,JSON_BIGINT_AS_STRING|JSON_OBJECT_AS_ARRAY);
if ( !isset($j['type']) ) return '';
return json_encode($j,JSON_PRETTY_PRINT|JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES);
}

function json_stdWikidata($jstr) {
if (!trim($jstr)) return '';
$j = json_decode($jstr,JSON_BIGINT_AS_STRING|JSON_OBJECT_AS_ARRAY);
Expand All @@ -82,19 +124,14 @@ function json_stdWikidata($jstr) {
return json_encode($j,JSON_PRETTY_PRINT|JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES);
}

function lex2filename($s) {
$s=ucwords( str_replace('.',' ',$s) );
return preg_replace('/ D | /','',$s); // elimina preposicao contraida (bug norma lexml)
}

function splitFilename($f,$checkSize=false) {
global $saveFolder;
$uf = substr($f,0,2);
$fname2 = substr($f,3);
$saveFolder2 = "$saveFolder/$uf";
$fp = "$saveFolder2/$fname2.json";
$size = $checkSize? (file_exists($fp)? filesize($fp): 0): null;
return [$fp,$saveFolder2,$size];
function getOsmId($fname) {
global $saveFolder;
$f = "$saveFolder/dump_wikidata/$fname.json";
$j = json_decode( file_get_contents($f), JSON_BIGINT_AS_STRING|JSON_OBJECT_AS_ARRAY);
if (isset($j['claims']['P402'][0]['value']) )
return $j['claims']['P402'][0]['value'];
else
return 0;
}

?>
Expand Down

0 comments on commit 3f68a5e

Please sign in to comment.