Skip to content

Commit

Permalink
Dump-rotate
Browse files Browse the repository at this point in the history
  • Loading branch information
kiselev-dv committed Mar 16, 2017
1 parent 68a36cb commit 2948dac
Show file tree
Hide file tree
Showing 4 changed files with 346 additions and 0 deletions.
1 change: 1 addition & 0 deletions dump-rotate/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/target/
94 changes: 94 additions & 0 deletions dump-rotate/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>me.osm.gzetteer</groupId>
<artifactId>dump-rotate</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<name>dump-rotate</name>
<url>http://maven.apache.org</url>

<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>

<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>

<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.0</version>
</dependency>

<!-- https://mvnrepository.com/artifact/joda-time/joda-time -->
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<version>2.9.7</version>
</dependency>

<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.22</version>
</dependency>

<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-simple -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.22</version>
</dependency>


</dependencies>

<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.6</version>
<configuration>
<!-- Make single jar with dependancies -->
<descriptorId>jar-with-dependencies</descriptorId>

<!-- Make executable via java -jar -->
<archive>
<manifest>
<mainClass>me.osm.gzetteer.App</mainClass>
</manifest>
</archive>

<finalName>${project.artifactId}</finalName>
<appendAssemblyId>false</appendAssemblyId>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<!-- bind to the packaging phase -->
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
213 changes: 213 additions & 0 deletions dump-rotate/src/main/java/me/osm/gzetteer/App.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
package me.osm.gzetteer;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.joda.time.Days;
import org.joda.time.LocalDate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class App
{
private static File base;

private static final Logger log = LoggerFactory.getLogger(App.class);

public static void main( String[] args )
{
try {

base = new File(System.getProperty("user.dir"));
if (args.length > 0) {
base = new File(args[0]);
}

File dumps = new File(base, "dumps");
log.info("Looking for dumps in {}", dumps.getAbsolutePath());

LocalDate today = new LocalDate();

Map<String, List<File>> dumpsByRegion = new HashMap<String, List<File>>();

Iterator<File> it = FileUtils.iterateFiles(dumps, new String[]{"json.gz"}, true);
while (it.hasNext()) {
File f = it.next();
LocalDate dumpDate = dateFomName(f);

if (dumpDate != null) {

int days = Days.daysBetween(dumpDate, today).getDays();
int dayOfWeek = dumpDate.dayOfWeek().get();

if (dayOfWeek != 1 && days > 3) {
log.info("Delete {}", f.toString());
f.delete();
continue;
}

if (days > 30) {
log.info("Delete {}", f.toString());
f.delete();
continue;
}

String region = f.getParentFile().getName();
if (dumpsByRegion.get(region) == null) {
dumpsByRegion.put(region, new ArrayList<File>());
}

dumpsByRegion.get(region).add(f);
}
else {
log.info("Skip {}", f.toString());
}

}

for (Map.Entry<String, List<File>> entry : dumpsByRegion.entrySet()) {
generateDiffs(entry.getKey(), entry.getValue());
}
}
catch (Throwable t) {
t.printStackTrace();
}
}

private static void generateDiffs(String region, List<File> dumps) {



Collections.sort(dumps, new Comparator<File>() {

public int compare(File f1, File f2) {
LocalDate d1 = dateFomName(f1);
LocalDate d2 = dateFomName(f2);

return d1.compareTo(d2);
}

});

List<String> dumpNames = dumps.stream().map(f -> f.getName())
.collect(Collectors.toList());
log.info("Files in {} region: [{}]",
region,
StringUtils.join(dumpNames, ", "));

File fl = null;
for (File f : dumps) {

if (fl != null && daysBetween(fl, f) == 1) {
File outFolder = new File(new File(base, "diffs"), region);
outFolder.mkdirs();
generateDiff(fl, f, outFolder, region);
}

fl = f;
}

}

private static void generateDiff(File fnew, File fold, File outFolder, String logPrefix) {
try {
LocalDate dateNew = dateFomName(fnew);
LocalDate dateOld = dateFomName(fold);

if (dateNew.isBefore(dateOld)) {
generateDiff(fold, fnew, outFolder, logPrefix);
return;
}

log.info("Generate diff between {} and {}", fold.getName(), fnew.getName());

String oldDateString = StringUtils.remove(fold.getName(), ".json.gz");
String newDateString = StringUtils.remove(fnew.getName(), ".json.gz");

String diffName = oldDateString + "_" + newDateString + ".diff.gz";

File diffFile = new File(outFolder, diffName);
File binFile = new File(new File(base, "bin"), "gazetteer.jar");

if(!diffFile.exists()) {
String outFilePath = diffFile.getAbsolutePath();
String newPath = fnew.getAbsolutePath();
String oldPath = fold.getAbsolutePath();
String binPath = binFile.getAbsolutePath();

String cmd = String.format(
"java -jar %s --log-prefix %s diff --old %s --new %s --out-file %s",
binPath, logPrefix, oldPath, newPath, outFilePath);

log.info("Call {}", cmd);

callCmd(cmd);

}
else {
log.info("Diff {} already exists", diffName);
}

postProcessDiff(diffFile);

} catch (Exception e) {
throw new RuntimeException(e);
}
}

private static void callCmd(String cmd) throws IOException {
Process process = Runtime.getRuntime().exec(cmd);

BufferedReader stdInput = new BufferedReader(new
InputStreamReader(process.getInputStream()));

BufferedReader stdError = new BufferedReader(new
InputStreamReader(process.getErrorStream()));

String s = null;

// read the output from the command
while ((s = stdInput.readLine()) != null) {
System.out.println(s);
}

// read any errors from the attempted command
while ((s = stdError.readLine()) != null) {
System.out.println(s);
}
}

private static void postProcessDiff(File diffFile) {

}

private static int daysBetween(File fl, File f) {
LocalDate d1 = dateFomName(fl);
LocalDate d2 = dateFomName(f);

return Days.daysBetween(d1, d2).getDays();
}

private static LocalDate dateFomName(File f) {
try {
return new LocalDate(StringUtils.remove(f.getName(), ".json.gz"));
}
catch (IllegalArgumentException e) {
return null;
}
}


}
38 changes: 38 additions & 0 deletions dump-rotate/src/test/java/me/osm/gzetteer/AppTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package me.osm.gzetteer;

import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;

/**
* Unit test for simple App.
*/
public class AppTest
extends TestCase
{
/**
* Create the test case
*
* @param testName name of the test case
*/
public AppTest( String testName )
{
super( testName );
}

/**
* @return the suite of tests being tested
*/
public static Test suite()
{
return new TestSuite( AppTest.class );
}

/**
* Rigourous Test :-)
*/
public void testApp()
{
assertTrue( true );
}
}

0 comments on commit 2948dac

Please sign in to comment.