Skip to content

Commit

Permalink
Enable VcfFileReader and other classes to use Path (#1018)
Browse files Browse the repository at this point in the history
* Enables the use of Path, and therefore NIO implementations such as google-cloud-nio when opening VCF readers with VcfFileReader. (though the google-cloud-nio library is not part of htsjdk, downstream projects must include it themselves.)

* Cleans up unused test files and some white-space/java8 issues
  • Loading branch information
Yossi Farjoun authored and lbergelson committed Dec 21, 2017
1 parent 335cd06 commit 80122b9
Show file tree
Hide file tree
Showing 29 changed files with 825 additions and 203 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ dependencies {
testRuntime 'org.pegdown:pegdown:1.4.2' // Necessary for generating HTML reports with ScalaTest
testCompile "org.testng:testng:6.9.9"
testCompile "com.google.jimfs:jimfs:1.1"
}
}

sourceCompatibility = 1.8
targetCompatibility = 1.8
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/htsjdk/samtools/SamReaderFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ public SamReader open(final Path path,
/** Utility method to open the file get the header and close the file */
abstract public SAMFileHeader getFileHeader(File samFile);

/** Utility method to open the file get the header and close the file */
abstract public SAMFileHeader getFileHeader(Path samFile);

/** Reapplies any changed options to the reader * */
abstract public void reapplyOptions(SamReader reader);

Expand Down Expand Up @@ -277,6 +280,14 @@ public SAMFileHeader getFileHeader(final File samFile) {
return header;
}

@Override
public SAMFileHeader getFileHeader(final Path samFile) {
final SamReader reader = open(samFile);
final SAMFileHeader header = reader.getFileHeader();
CloserUtil.close(reader);
return header;
}

@Override
public void reapplyOptions(final SamReader reader) {
for (final Option option : enabledOptions) {
Expand Down
93 changes: 75 additions & 18 deletions src/main/java/htsjdk/samtools/util/IOUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import java.util.Scanner;
import java.util.Stack;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.Deflater;
import java.util.zip.GZIPInputStream;

Expand Down Expand Up @@ -437,13 +438,13 @@ public static void assertFileIsReadable(final Path path) {
if (path == null) {
throw new IllegalArgumentException("Cannot check readability of null file.");
} else if (!Files.exists(path)) {
throw new SAMException("Cannot read non-existent file: " + path.toAbsolutePath());
throw new SAMException("Cannot read non-existent file: " + path.toUri().toString());
}
else if (Files.isDirectory(path)) {
throw new SAMException("Cannot read file because it is a directory: " + path.toAbsolutePath());
throw new SAMException("Cannot read file because it is a directory: " + path.toUri().toString());
}
else if (!Files.isReadable(path)) {
throw new SAMException("File exists but is not readable: " + path.toAbsolutePath());
throw new SAMException("File exists but is not readable: " + path.toUri().toString());
}
}

Expand All @@ -456,7 +457,18 @@ else if (!Files.isReadable(path)) {
public static void assertFilesAreReadable(final List<File> files) {
for (final File file : files) assertFileIsReadable(file);
}


/**
* Checks that each path is non-null, exists, is not a directory and is readable. If any
* condition is false then a runtime exception is thrown.
*
* @param paths the list of paths to check for readability
*/
public static void assertPathsAreReadable(final List<Path> paths) {
for (final Path path: paths) assertFileIsReadable(path);
}


/**
* Checks that each string is non-null, exists or is a URL,
* and if it is a file then not a directory and is readable. If any
Expand All @@ -477,8 +489,8 @@ public static void assertInputsAreValid(final List<String> inputs) {
*/
public static void assertFileIsWritable(final File file) {
if (file == null) {
throw new IllegalArgumentException("Cannot check readability of null file.");
} else if (!file.exists()) {
throw new IllegalArgumentException("Cannot check readability of null file.");
} else if (!file.exists()) {
// If the file doesn't exist, check that it's parent directory does and is writable
final File parent = file.getAbsoluteFile().getParentFile();
if (!parent.exists()) {
Expand Down Expand Up @@ -886,7 +898,7 @@ public static File createTempDir(final String prefix, final String suffix) {
/** Checks that a file exists and is readable, and then returns a buffered reader for it. */
public static BufferedReader openFileForBufferedReading(final File file) {
return openFileForBufferedReading(file.toPath());
}
}

/** Checks that a path exists and is readable, and then returns a buffered reader for it. */
public static BufferedReader openFileForBufferedReading(final Path path) {
Expand Down Expand Up @@ -1014,7 +1026,7 @@ public static String slurp(final InputStream is, final Charset charSet) {
private static List<String> tokenSlurp(final InputStream is, final Charset charSet, final String delimiterPattern) {
try {
final Scanner s = new Scanner(is, charSet.toString()).useDelimiter(delimiterPattern);
final LinkedList<String> tokens = new LinkedList<String>();
final LinkedList<String> tokens = new LinkedList<>();
while (s.hasNext()) {
tokens.add(s.next());
}
Expand All @@ -1029,40 +1041,65 @@ private static List<String> tokenSlurp(final InputStream is, final Charset charS
* otherwise assume that file is a list of filenames and unfold it into the output.
*/
public static List<File> unrollFiles(final Collection<File> inputs, final String... extensions) {
Collection<Path> paths = unrollPaths(filesToPaths(inputs), extensions);
return paths.stream().map(p->p.toFile()).collect(Collectors.toList());
}

/**
* Go through the files provided and if they have one of the provided file extensions pass the file to the output
* otherwise assume that file is a list of filenames and unfold it into the output (recursively).
*/
public static List<Path> unrollPaths(final Collection<Path> inputs, final String... extensions) {
if (extensions.length < 1) throw new IllegalArgumentException("Must provide at least one extension.");

final Stack<File> stack = new Stack<File>();
final List<File> output = new ArrayList<File>();
final Stack<Path> stack = new Stack<>();
final List<Path> output = new ArrayList<>();
stack.addAll(inputs);

while (!stack.empty()) {
final File f = stack.pop();
final String name = f.getName();
final Path p = stack.pop();
final String name = p.toString();
boolean matched = false;

for (final String ext : extensions) {
if (!matched && name.endsWith(ext)) {
output.add(f);
output.add(p);
matched = true;
}
}

// If the file didn't match a given extension, treat it as a list of files
if (!matched) {
IOUtil.assertFileIsReadable(f);

for (final String s : IOUtil.readLines(f)) {
if (!s.trim().isEmpty()) stack.push(new File(s.trim()));
IOUtil.assertFileIsReadable(p);

try {
Files.lines(p)
.map(String::trim)
.filter(s -> !s.isEmpty())
.forEach(s -> {
final Path innerPath;
try {
innerPath = getPath(s);
stack.push(innerPath);
} catch (IOException e) {
throw new IllegalArgumentException("cannot convert " + s + " to a Path.");
}
}
);

} catch (IOException e) {
throw new IllegalArgumentException("had trouble reading from " + p.toUri().toString());
}
}
}
}

// Preserve input order (since we're using a stack above) for things that care
Collections.reverse(output);

return output;
}


/**
* Check if the given URI has a scheme.
*
Expand Down Expand Up @@ -1101,6 +1138,26 @@ public static Path getPath(String uriString) throws IOException {
}
}

public static List<Path> getPaths(List<String> uriStrings) throws RuntimeIOException {
return uriStrings.stream().map(s -> {
try {
return IOUtil.getPath(s);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}).collect(Collectors.toList());
}

/** Takes a list of Files and converts them to a list of Paths
* Runs .toPath() on the contents of the input.
*
* @param files a {@link List} of {@link File}s to convert to {@link Path}s
* @return a new List containing the results of running toPath on the elements of the input
*/
public static List<Path> filesToPaths(Collection<File> files){
return files.stream().map(File::toPath).collect(Collectors.toList());
}

/**
* Adds the extension to the given path.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,6 @@ public QueryIterator(final String chr, final int start, final int end, final Lis

}


@Override
public boolean hasNext() {
return currentRecord != null;
Expand Down
49 changes: 20 additions & 29 deletions src/main/java/htsjdk/tribble/util/ParsingUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,26 +32,19 @@
import java.io.InputStream;
import java.lang.reflect.Constructor;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.WeakHashMap;
import java.util.*;
import java.util.function.Function;

/**
* @author jrobinso
*/
public class ParsingUtils {

public static Map<Object, Color> colorCache = new WeakHashMap<Object, Color>(100);
public static Map<Object, Color> colorCache = new WeakHashMap<>(100);

// HTML 4.1 color table, + orange and magenta
static Map<String, String> colorSymbols = new HashMap();
Expand Down Expand Up @@ -80,7 +73,6 @@ public class ParsingUtils {
colorSymbols.put("magenta", "FF00FF");
}


/**
* @return an input stream from the given path
* @throws IOException
Expand All @@ -90,35 +82,34 @@ public static InputStream openInputStream(String path)
return openInputStream(path, null);
}

static private final Set<String> URL_SCHEMES = new HashSet<>(Arrays.asList("http", "ftp", "https"));

/**
* open an input stream from the given path and wrap the raw byte stream with a wrapper if given
*
* the wrapper will only be applied to paths that are not http, https, ftp, or file, i.e. any {@link java.nio.file.Path}
* using a custom filesystem plugin
* @param path a uri like string
* @param uri a uri like string
* @param wrapper to wrap the input stream in, may be used to implement caching or prefetching, etc
* @return
* @throws IOException
* @return An inputStream appropriately created from uri and conditionally wrapped with wrapper (only in certain cases)
* @throws IOException when stream cannot be opened against uri
*/
public static InputStream openInputStream(String path, Function<SeekableByteChannel, SeekableByteChannel> wrapper)
public static InputStream openInputStream(final String uri, final Function<SeekableByteChannel, SeekableByteChannel> wrapper)
throws IOException {

final InputStream inputStream;
if (path.startsWith("http:") || path.startsWith("https:") || path.startsWith("ftp:")) {
inputStream = getURLHelper(new URL(path)).openInputStream();
} else if (IOUtil.hasScheme(path)) {
inputStream = new SeekablePathStream(IOUtil.getPath(path), wrapper);

if (URL_SCHEMES.stream().anyMatch(uri::startsWith)) {
inputStream = getURLHelper(new URL(uri)).openInputStream();
} else if (!IOUtil.hasScheme(uri)) {
File file = new File(uri);
inputStream = Files.newInputStream(file.toPath());
} else {
File file = new File(path);
inputStream = new FileInputStream(file);
inputStream = new SeekablePathStream(IOUtil.getPath(uri), wrapper);
}
return inputStream;
}

//public static String join(String separator, Collection<String> strings) {
// return join( separator, strings.toArray(new String[0]) );
//}

public static <T> String join(String separator, Collection<T> objects) {
if (objects.isEmpty()) {
return "";
Expand All @@ -141,17 +132,17 @@ public static <T> String join(String separator, Collection<T> objects) {
* @return
*/
public static <T extends Comparable> List<T> sortList(Collection<T> list) {
ArrayList<T> ret = new ArrayList<T>();
ArrayList<T> ret = new ArrayList<>();
ret.addAll(list);
Collections.sort(ret);
return ret;
}

public static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
List<T> t = new ArrayList<T>(c.keySet());
List<T> t = new ArrayList<>(c.keySet());
Collections.sort(t);

List<String> pairs = new ArrayList<String>();
List<String> pairs = new ArrayList<>();
for (T k : t) {
pairs.add(k + "=" + c.get(k));
}
Expand Down Expand Up @@ -206,7 +197,7 @@ public static String join(String separator, String[] strings, int start, int end
*/
public static List<String> split(String input, char delim) {
if (input.isEmpty()) return Arrays.asList("");
final ArrayList<String> output = new ArrayList<String>(1+input.length()/2);
final ArrayList<String> output = new ArrayList<>(1 + input.length() / 2);
int from = -1, to;
for (to = input.indexOf(delim);
to >= 0;
Expand Down
Loading

0 comments on commit 80122b9

Please sign in to comment.