From 25e23878d59d49e5e1ee729827d0b814ef9ba02b Mon Sep 17 00:00:00 2001 From: "Jason T. Brown" Date: Fri, 23 Aug 2019 15:24:47 -0400 Subject: [PATCH 1/5] Fix for issue 314 should not read HDFS scheme with GDAL reader. Signed-off-by: Jason T. Brown --- core/src/main/resources/reference.conf | 2 +- .../rasterframes/ref/RasterSource.scala | 25 +++++++++++++++---- .../rasterframes/ref/RasterSourceSpec.scala | 10 ++++++++ .../main/python/tests/RasterFunctionsTests.py | 3 --- 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index e8df05866..fb71db26d 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -4,7 +4,7 @@ rasterframes { showable-tiles = true showable-max-cells = 20 max-truncate-row-element-length = 40 - raster-source-cache-timeout = 120 seconds + raster-source-cache-timeout = 1 seconds } vlm.gdal { diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala index 2d8f1bac8..094cdf379 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala @@ -101,9 +101,12 @@ object RasterSource extends LazyLogging { ExpressionEncoder() } + def apply(source: String): RasterSource = apply(new URI(source)) + def apply(source: URI): RasterSource = - rsCache.get( - source.toASCIIString, _ => source match { +// rsCache.get( +// source.toASCIIString, _ => + source match { case IsGDAL() => GDALRasterSource(source) case IsHadoopGeoTiff() => // TODO: How can we get the active hadoop configuration @@ -113,7 +116,7 @@ object RasterSource extends LazyLogging { case IsDefaultGeoTiff() => JVMGeoTiffRasterSource(source) case s => throw new UnsupportedOperationException(s"Reading '$s' not supported") } - ) +// ) object IsGDAL { @@ -122,6 +125,8 @@ object RasterSource extends LazyLogging { val gdalOnlyExtensions = Seq(".jp2", ".mrf", ".hdf", ".vrt") + val blacklistedSchemes = Seq("s3a", "s3n", "wasbs") + def gdalOnly(source: URI): Boolean = if (gdalOnlyExtensions.exists(source.getPath.toLowerCase.endsWith)) { require(GDALRasterSource.hasGDAL, s"Can only read $source if GDAL is available") @@ -130,10 +135,20 @@ object RasterSource extends LazyLogging { /** Extractor for determining if a scheme indicates GDAL preference. */ def unapply(source: URI): Boolean = { + + lazy val schemeIsNotHadoop = Option(source.getScheme()) + .filter(blacklistedSchemes.contains) + .isEmpty + lazy val schemeIsGdal = Option(source.getScheme()) - .exists(_.startsWith("gdal")) + .exists(_ == "gdal") && schemeIsNotHadoop + + (gdalOnly(source) && schemeIsNotHadoop) || + (GDALRasterSource.hasGDAL && + (preferGdal && schemeIsGdal) || + (preferGdal && schemeIsNotHadoop) + ) - gdalOnly(source) || ((preferGdal || schemeIsGdal) && GDALRasterSource.hasGDAL) } } diff --git a/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala index 6b3371ea3..0ab1c1a74 100644 --- a/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala @@ -141,6 +141,16 @@ class RasterSourceSpec extends TestEnvironment with TestData { } if(GDALRasterSource.hasGDAL) { + it("should choose correct delegate for scheme and file"){ + val hdfsSchemeTif = RasterSource("s3n://bucket/prefix/raster.tif") + val easySchemeTif = RasterSource("s3://bucket/prefix/raster.tif") // should interpret as /vsis3/ + lazy val hdfsSchemeJp2 = RasterSource("s3n://s22s-test-geotiffs/luray_snp/B04.jp2") // can't read with hadoop reader + + hdfsSchemeTif should matchPattern {case HadoopGeoTiffRasterSource(_, _) ⇒} + easySchemeTif should matchPattern {case GDALRasterSource(_) ⇒} + assertThrows[UnsupportedOperationException](hdfsSchemeJp2.bandCount) + + } describe("GDAL Rastersource") { val gdal = GDALRasterSource(cogPath) val jvm = JVMGeoTiffRasterSource(cogPath) diff --git a/pyrasterframes/src/main/python/tests/RasterFunctionsTests.py b/pyrasterframes/src/main/python/tests/RasterFunctionsTests.py index 2a57cf356..fe2909a32 100644 --- a/pyrasterframes/src/main/python/tests/RasterFunctionsTests.py +++ b/pyrasterframes/src/main/python/tests/RasterFunctionsTests.py @@ -300,9 +300,6 @@ def test_render_composite(self): # Look for the PNG magic cookie self.assertEqual(png_bytes[0:8], bytearray([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])) - - - def test_rf_interpret_cell_type_as(self): from pyspark.sql import Row from pyrasterframes.rf_types import Tile From 73135c0dd8fedcbe55b00d2eeb6ac9f9dcb35fab Mon Sep 17 00:00:00 2001 From: "Jason T. Brown" Date: Mon, 26 Aug 2019 10:26:02 -0400 Subject: [PATCH 2/5] Check for gdal only URI in non-gdal schemes Signed-off-by: Jason T. Brown --- .../rasterframes/ref/RasterSource.scala | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala index 094cdf379..be1a2503d 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala @@ -153,18 +153,25 @@ object RasterSource extends LazyLogging { } object IsDefaultGeoTiff { - def unapply(source: URI): Boolean = source.getScheme match { - case "file" | "http" | "https" | "s3" => true - case null | "" ⇒ true - case _ => false + import IsGDAL.gdalOnly + def unapply(source: URI): Boolean = { + if (gdalOnly(source)) false + else source.getScheme match { + case "file" | "http" | "https" | "s3" => true + case null | "" ⇒ true + case _ => false + } } } object IsHadoopGeoTiff { - def unapply(source: URI): Boolean = source.getScheme match { - case "hdfs" | "s3n" | "s3a" | "wasb" | "wasbs" => true - case _ => false - } + import IsGDAL.gdalOnly + def unapply(source: URI): Boolean = + if (gdalOnly(source)) false + else source.getScheme match { + case "hdfs" | "s3n" | "s3a" | "wasb" | "wasbs" => true + case _ => false + } } trait URIRasterSource { _: RasterSource => From b4ca509a6846e9dbacccf4a4200b7e78ee25b556 Mon Sep 17 00:00:00 2001 From: "Jason T. Brown" Date: Mon, 2 Sep 2019 11:19:57 -0400 Subject: [PATCH 3/5] Remove local testing cruft bypassing cache Signed-off-by: Jason T. Brown --- core/src/main/resources/reference.conf | 2 +- .../rasterframes/ref/RasterSource.scala | 26 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index fb71db26d..e8df05866 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -4,7 +4,7 @@ rasterframes { showable-tiles = true showable-max-cells = 20 max-truncate-row-element-length = 40 - raster-source-cache-timeout = 1 seconds + raster-source-cache-timeout = 120 seconds } vlm.gdal { diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala index be1a2503d..0fc0691ce 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala @@ -104,19 +104,19 @@ object RasterSource extends LazyLogging { def apply(source: String): RasterSource = apply(new URI(source)) def apply(source: URI): RasterSource = -// rsCache.get( -// source.toASCIIString, _ => - source match { - case IsGDAL() => GDALRasterSource(source) - case IsHadoopGeoTiff() => - // TODO: How can we get the active hadoop configuration - // TODO: without having to pass it through? - val config = () => new Configuration() - HadoopGeoTiffRasterSource(source, config) - case IsDefaultGeoTiff() => JVMGeoTiffRasterSource(source) - case s => throw new UnsupportedOperationException(s"Reading '$s' not supported") - } -// ) + rsCache.get( + source.toASCIIString, _ => + source match { + case IsGDAL() => GDALRasterSource(source) + case IsHadoopGeoTiff() => + // TODO: How can we get the active hadoop configuration + // TODO: without having to pass it through? + val config = () => new Configuration() + HadoopGeoTiffRasterSource(source, config) + case IsDefaultGeoTiff() => JVMGeoTiffRasterSource(source) + case s => throw new UnsupportedOperationException(s"Reading '$s' not supported") + } + ) object IsGDAL { From 3c94a3b501cffb06bc73279eb8c076742a7a2632 Mon Sep 17 00:00:00 2001 From: "Simeon H.K. Fitch" Date: Tue, 3 Sep 2019 15:31:12 -0400 Subject: [PATCH 4/5] Attempt at fixing Travis build. --- .travis.yml | 2 +- .../rasterframes/ref/RasterSourceSpec.scala | 21 +++++++++---------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index 12cad75b7..9fa8c6ee6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,7 +25,7 @@ addons: - pandoc install: - - pip install rasterio shapely pandas numpy pweave + - pip install shapely>=1.6.0 pandas>=0.25.0 numpy>=1.7 pweave rasterio>=1.0.0 - wget -O - https://piccolo.link/sbt-1.2.8.tgz | tar xzf - script: diff --git a/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala index 0ab1c1a74..de0aa1cb3 100644 --- a/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala @@ -141,16 +141,6 @@ class RasterSourceSpec extends TestEnvironment with TestData { } if(GDALRasterSource.hasGDAL) { - it("should choose correct delegate for scheme and file"){ - val hdfsSchemeTif = RasterSource("s3n://bucket/prefix/raster.tif") - val easySchemeTif = RasterSource("s3://bucket/prefix/raster.tif") // should interpret as /vsis3/ - lazy val hdfsSchemeJp2 = RasterSource("s3n://s22s-test-geotiffs/luray_snp/B04.jp2") // can't read with hadoop reader - - hdfsSchemeTif should matchPattern {case HadoopGeoTiffRasterSource(_, _) ⇒} - easySchemeTif should matchPattern {case GDALRasterSource(_) ⇒} - assertThrows[UnsupportedOperationException](hdfsSchemeJp2.bandCount) - - } describe("GDAL Rastersource") { val gdal = GDALRasterSource(cogPath) val jvm = JVMGeoTiffRasterSource(cogPath) @@ -166,7 +156,6 @@ class RasterSourceSpec extends TestEnvironment with TestData { gdal.layoutExtents(dims) should contain allElementsOf jvm.layoutExtents(dims) } - it("should support vsi file paths") { val archivePath = geotiffDir.resolve("L8-archive.zip") val archiveURI = URI.create("gdal://vsizip/" + archivePath.toString + "/L8-RGB-VA.tiff") @@ -183,6 +172,16 @@ class RasterSourceSpec extends TestEnvironment with TestData { gdal.extent should be (jvm.extent) gdal.cellSize should be(jvm.cellSize) } + + it("should choose correct delegate for scheme and file"){ + val hdfsSchemeTif = RasterSource("s3n://bucket/prefix/raster.tif") + val easySchemeTif = RasterSource("s3://bucket/prefix/raster.tif") // should interpret as /vsis3/ + lazy val hdfsSchemeJp2 = RasterSource("s3n://s22s-test-geotiffs/luray_snp/B04.jp2") // can't read with hadoop reader + + hdfsSchemeTif should matchPattern {case HadoopGeoTiffRasterSource(_, _) ⇒} + easySchemeTif should matchPattern {case GDALRasterSource(_) ⇒} + assertThrows[UnsupportedOperationException](hdfsSchemeJp2.bandCount) + } } } From fc030e05273229b175b64323191ecd831fd5636d Mon Sep 17 00:00:00 2001 From: "Jason T. Brown" Date: Thu, 5 Sep 2019 17:19:55 -0400 Subject: [PATCH 5/5] Tweak to GDALRasterSource.hasGDAL Signed-off-by: Jason T. Brown --- .../org/locationtech/rasterframes/ref/GDALRasterSource.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala index 481155f24..62e55313c 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala @@ -78,7 +78,7 @@ object GDALRasterSource extends LazyLogging { val _ = new GDALWarp() true } catch { - case _: UnsatisfiedLinkError => + case _:UnsatisfiedLinkError | _:NoClassDefFoundError => logger.warn("GDAL native bindings are not available. Falling back to JVM-based reader for GeoTIFF format.") false }