From 2948dac5f2cecccd7f495487ed8a49531ba79e44 Mon Sep 17 00:00:00 2001 From: kiselev-dv Date: Thu, 16 Mar 2017 11:44:47 -0300 Subject: [PATCH] Dump-rotate --- dump-rotate/.gitignore | 1 + dump-rotate/pom.xml | 94 ++++++++ .../src/main/java/me/osm/gzetteer/App.java | 213 ++++++++++++++++++ .../test/java/me/osm/gzetteer/AppTest.java | 38 ++++ 4 files changed, 346 insertions(+) create mode 100644 dump-rotate/.gitignore create mode 100644 dump-rotate/pom.xml create mode 100644 dump-rotate/src/main/java/me/osm/gzetteer/App.java create mode 100644 dump-rotate/src/test/java/me/osm/gzetteer/AppTest.java diff --git a/dump-rotate/.gitignore b/dump-rotate/.gitignore new file mode 100644 index 00000000..b83d2226 --- /dev/null +++ b/dump-rotate/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/dump-rotate/pom.xml b/dump-rotate/pom.xml new file mode 100644 index 00000000..1ccb7c96 --- /dev/null +++ b/dump-rotate/pom.xml @@ -0,0 +1,94 @@ + + 4.0.0 + me.osm.gzetteer + dump-rotate + jar + 1.0-SNAPSHOT + dump-rotate + http://maven.apache.org + + + 1.8 + 1.8 + + + + + junit + junit + 3.8.1 + test + + + + + commons-io + commons-io + 2.5 + + + + + org.apache.commons + commons-lang3 + 3.0 + + + + + joda-time + joda-time + 2.9.7 + + + + + org.slf4j + slf4j-api + 1.7.22 + + + + + org.slf4j + slf4j-simple + 1.7.22 + + + + + + + + + maven-assembly-plugin + 2.6 + + + jar-with-dependencies + + + + + me.osm.gzetteer.App + + + + ${project.artifactId} + false + + + + make-assembly + + package + + single + + + + + + + diff --git a/dump-rotate/src/main/java/me/osm/gzetteer/App.java b/dump-rotate/src/main/java/me/osm/gzetteer/App.java new file mode 100644 index 00000000..a4db5c50 --- /dev/null +++ b/dump-rotate/src/main/java/me/osm/gzetteer/App.java @@ -0,0 +1,213 @@ +package me.osm.gzetteer; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.joda.time.Days; +import org.joda.time.LocalDate; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class App +{ + private static File base; + + private static final Logger log = LoggerFactory.getLogger(App.class); + + public static void main( String[] args ) + { + try { + + base = new File(System.getProperty("user.dir")); + if (args.length > 0) { + base = new File(args[0]); + } + + File dumps = new File(base, "dumps"); + log.info("Looking for dumps in {}", dumps.getAbsolutePath()); + + LocalDate today = new LocalDate(); + + Map> dumpsByRegion = new HashMap>(); + + Iterator it = FileUtils.iterateFiles(dumps, new String[]{"json.gz"}, true); + while (it.hasNext()) { + File f = it.next(); + LocalDate dumpDate = dateFomName(f); + + if (dumpDate != null) { + + int days = Days.daysBetween(dumpDate, today).getDays(); + int dayOfWeek = dumpDate.dayOfWeek().get(); + + if (dayOfWeek != 1 && days > 3) { + log.info("Delete {}", f.toString()); + f.delete(); + continue; + } + + if (days > 30) { + log.info("Delete {}", f.toString()); + f.delete(); + continue; + } + + String region = f.getParentFile().getName(); + if (dumpsByRegion.get(region) == null) { + dumpsByRegion.put(region, new ArrayList()); + } + + dumpsByRegion.get(region).add(f); + } + else { + log.info("Skip {}", f.toString()); + } + + } + + for (Map.Entry> entry : dumpsByRegion.entrySet()) { + generateDiffs(entry.getKey(), entry.getValue()); + } + } + catch (Throwable t) { + t.printStackTrace(); + } + } + + private static void generateDiffs(String region, List dumps) { + + + + Collections.sort(dumps, new Comparator() { + + public int compare(File f1, File f2) { + LocalDate d1 = dateFomName(f1); + LocalDate d2 = dateFomName(f2); + + return d1.compareTo(d2); + } + + }); + + List dumpNames = dumps.stream().map(f -> f.getName()) + .collect(Collectors.toList()); + log.info("Files in {} region: [{}]", + region, + StringUtils.join(dumpNames, ", ")); + + File fl = null; + for (File f : dumps) { + + if (fl != null && daysBetween(fl, f) == 1) { + File outFolder = new File(new File(base, "diffs"), region); + outFolder.mkdirs(); + generateDiff(fl, f, outFolder, region); + } + + fl = f; + } + + } + + private static void generateDiff(File fnew, File fold, File outFolder, String logPrefix) { + try { + LocalDate dateNew = dateFomName(fnew); + LocalDate dateOld = dateFomName(fold); + + if (dateNew.isBefore(dateOld)) { + generateDiff(fold, fnew, outFolder, logPrefix); + return; + } + + log.info("Generate diff between {} and {}", fold.getName(), fnew.getName()); + + String oldDateString = StringUtils.remove(fold.getName(), ".json.gz"); + String newDateString = StringUtils.remove(fnew.getName(), ".json.gz"); + + String diffName = oldDateString + "_" + newDateString + ".diff.gz"; + + File diffFile = new File(outFolder, diffName); + File binFile = new File(new File(base, "bin"), "gazetteer.jar"); + + if(!diffFile.exists()) { + String outFilePath = diffFile.getAbsolutePath(); + String newPath = fnew.getAbsolutePath(); + String oldPath = fold.getAbsolutePath(); + String binPath = binFile.getAbsolutePath(); + + String cmd = String.format( + "java -jar %s --log-prefix %s diff --old %s --new %s --out-file %s", + binPath, logPrefix, oldPath, newPath, outFilePath); + + log.info("Call {}", cmd); + + callCmd(cmd); + + } + else { + log.info("Diff {} already exists", diffName); + } + + postProcessDiff(diffFile); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private static void callCmd(String cmd) throws IOException { + Process process = Runtime.getRuntime().exec(cmd); + + BufferedReader stdInput = new BufferedReader(new + InputStreamReader(process.getInputStream())); + + BufferedReader stdError = new BufferedReader(new + InputStreamReader(process.getErrorStream())); + + String s = null; + + // read the output from the command + while ((s = stdInput.readLine()) != null) { + System.out.println(s); + } + + // read any errors from the attempted command + while ((s = stdError.readLine()) != null) { + System.out.println(s); + } + } + + private static void postProcessDiff(File diffFile) { + + } + + private static int daysBetween(File fl, File f) { + LocalDate d1 = dateFomName(fl); + LocalDate d2 = dateFomName(f); + + return Days.daysBetween(d1, d2).getDays(); + } + + private static LocalDate dateFomName(File f) { + try { + return new LocalDate(StringUtils.remove(f.getName(), ".json.gz")); + } + catch (IllegalArgumentException e) { + return null; + } + } + + +} diff --git a/dump-rotate/src/test/java/me/osm/gzetteer/AppTest.java b/dump-rotate/src/test/java/me/osm/gzetteer/AppTest.java new file mode 100644 index 00000000..009917a0 --- /dev/null +++ b/dump-rotate/src/test/java/me/osm/gzetteer/AppTest.java @@ -0,0 +1,38 @@ +package me.osm.gzetteer; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * Unit test for simple App. + */ +public class AppTest + extends TestCase +{ + /** + * Create the test case + * + * @param testName name of the test case + */ + public AppTest( String testName ) + { + super( testName ); + } + + /** + * @return the suite of tests being tested + */ + public static Test suite() + { + return new TestSuite( AppTest.class ); + } + + /** + * Rigourous Test :-) + */ + public void testApp() + { + assertTrue( true ); + } +}