Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RasterRef should not read HDFS scheme with GDAL reader. #319

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ addons:
- pandoc

install:
- pip install rasterio shapely pandas numpy pweave
- pip install shapely>=1.6.0 pandas>=0.25.0 numpy>=1.7 pweave rasterio>=1.0.0
- wget -O - https://piccolo.link/sbt-1.2.8.tgz | tar xzf -

script:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,18 +101,21 @@ object RasterSource extends LazyLogging {
ExpressionEncoder()
}

def apply(source: String): RasterSource = apply(new URI(source))

def apply(source: URI): RasterSource =
rsCache.get(
source.toASCIIString, _ => source match {
case IsGDAL() => GDALRasterSource(source)
case IsHadoopGeoTiff() =>
// TODO: How can we get the active hadoop configuration
// TODO: without having to pass it through?
val config = () => new Configuration()
HadoopGeoTiffRasterSource(source, config)
case IsDefaultGeoTiff() => JVMGeoTiffRasterSource(source)
case s => throw new UnsupportedOperationException(s"Reading '$s' not supported")
}
source.toASCIIString, _ =>
source match {
case IsGDAL() => GDALRasterSource(source)
case IsHadoopGeoTiff() =>
// TODO: How can we get the active hadoop configuration
// TODO: without having to pass it through?
val config = () => new Configuration()
HadoopGeoTiffRasterSource(source, config)
case IsDefaultGeoTiff() => JVMGeoTiffRasterSource(source)
case s => throw new UnsupportedOperationException(s"Reading '$s' not supported")
}
)

object IsGDAL {
Expand All @@ -122,6 +125,8 @@ object RasterSource extends LazyLogging {

val gdalOnlyExtensions = Seq(".jp2", ".mrf", ".hdf", ".vrt")

val blacklistedSchemes = Seq("s3a", "s3n", "wasbs")

def gdalOnly(source: URI): Boolean =
if (gdalOnlyExtensions.exists(source.getPath.toLowerCase.endsWith)) {
require(GDALRasterSource.hasGDAL, s"Can only read $source if GDAL is available")
Expand All @@ -130,26 +135,43 @@ object RasterSource extends LazyLogging {

/** Extractor for determining if a scheme indicates GDAL preference. */
def unapply(source: URI): Boolean = {

lazy val schemeIsNotHadoop = Option(source.getScheme())
.filter(blacklistedSchemes.contains)
.isEmpty

lazy val schemeIsGdal = Option(source.getScheme())
.exists(_.startsWith("gdal"))
.exists(_ == "gdal") && schemeIsNotHadoop

(gdalOnly(source) && schemeIsNotHadoop) ||
(GDALRasterSource.hasGDAL &&
(preferGdal && schemeIsGdal) ||
(preferGdal && schemeIsNotHadoop)
)

gdalOnly(source) || ((preferGdal || schemeIsGdal) && GDALRasterSource.hasGDAL)
}
}

object IsDefaultGeoTiff {
def unapply(source: URI): Boolean = source.getScheme match {
case "file" | "http" | "https" | "s3" => true
case null | "" ⇒ true
case _ => false
import IsGDAL.gdalOnly
def unapply(source: URI): Boolean = {
if (gdalOnly(source)) false
else source.getScheme match {
case "file" | "http" | "https" | "s3" => true
case null | "" ⇒ true
case _ => false
}
}
}

object IsHadoopGeoTiff {
def unapply(source: URI): Boolean = source.getScheme match {
case "hdfs" | "s3n" | "s3a" | "wasb" | "wasbs" => true
case _ => false
}
import IsGDAL.gdalOnly
def unapply(source: URI): Boolean =
if (gdalOnly(source)) false
else source.getScheme match {
case "hdfs" | "s3n" | "s3a" | "wasb" | "wasbs" => true
case _ => false
}
}

trait URIRasterSource { _: RasterSource =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ class RasterSourceSpec extends TestEnvironment with TestData {
gdal.layoutExtents(dims) should contain allElementsOf jvm.layoutExtents(dims)
}


it("should support vsi file paths") {
val archivePath = geotiffDir.resolve("L8-archive.zip")
val archiveURI = URI.create("gdal://vsizip/" + archivePath.toString + "/L8-RGB-VA.tiff")
Expand All @@ -173,6 +172,16 @@ class RasterSourceSpec extends TestEnvironment with TestData {
gdal.extent should be (jvm.extent)
gdal.cellSize should be(jvm.cellSize)
}

it("should choose correct delegate for scheme and file"){
val hdfsSchemeTif = RasterSource("s3n://bucket/prefix/raster.tif")
val easySchemeTif = RasterSource("s3://bucket/prefix/raster.tif") // should interpret as /vsis3/
lazy val hdfsSchemeJp2 = RasterSource("s3n://s22s-test-geotiffs/luray_snp/B04.jp2") // can't read with hadoop reader

hdfsSchemeTif should matchPattern {case HadoopGeoTiffRasterSource(_, _) ⇒}
easySchemeTif should matchPattern {case GDALRasterSource(_) ⇒}
assertThrows[UnsupportedOperationException](hdfsSchemeJp2.bandCount)
}
}
}

Expand Down
3 changes: 0 additions & 3 deletions pyrasterframes/src/main/python/tests/RasterFunctionsTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,9 +300,6 @@ def test_render_composite(self):
# Look for the PNG magic cookie
self.assertEqual(png_bytes[0:8], bytearray([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]))




def test_rf_interpret_cell_type_as(self):
from pyspark.sql import Row
from pyrasterframes.rf_types import Tile
Expand Down