diff --git a/case-studies/ancillary_data/Base Map/Boundary_administration.cpg b/case-studies/ancillary_data/Base Map/Boundary_administration.cpg new file mode 100644 index 0000000..3ad133c --- /dev/null +++ b/case-studies/ancillary_data/Base Map/Boundary_administration.cpg @@ -0,0 +1 @@ +UTF-8 \ No newline at end of file diff --git a/case-studies/ancillary_data/Base Map/Boundary_administration.dbf b/case-studies/ancillary_data/Base Map/Boundary_administration.dbf new file mode 100644 index 0000000..496b118 Binary files /dev/null and b/case-studies/ancillary_data/Base Map/Boundary_administration.dbf differ diff --git a/case-studies/ancillary_data/Base Map/Boundary_administration.prj b/case-studies/ancillary_data/Base Map/Boundary_administration.prj new file mode 100644 index 0000000..f45cbad --- /dev/null +++ b/case-studies/ancillary_data/Base Map/Boundary_administration.prj @@ -0,0 +1 @@ +GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]] \ No newline at end of file diff --git a/case-studies/ancillary_data/Base Map/Boundary_administration.sbn b/case-studies/ancillary_data/Base Map/Boundary_administration.sbn new file mode 100644 index 0000000..a0c7780 Binary files /dev/null and b/case-studies/ancillary_data/Base Map/Boundary_administration.sbn differ diff --git a/case-studies/ancillary_data/Base Map/Boundary_administration.sbx b/case-studies/ancillary_data/Base Map/Boundary_administration.sbx new file mode 100644 index 0000000..72ac7a6 Binary files /dev/null and b/case-studies/ancillary_data/Base Map/Boundary_administration.sbx differ diff --git a/case-studies/ancillary_data/Base Map/Boundary_administration.shp b/case-studies/ancillary_data/Base Map/Boundary_administration.shp new file mode 100644 index 0000000..be5639c Binary files /dev/null and b/case-studies/ancillary_data/Base Map/Boundary_administration.shp differ diff --git a/case-studies/ancillary_data/Base Map/Boundary_administration.shp.xml b/case-studies/ancillary_data/Base Map/Boundary_administration.shp.xml new file mode 100644 index 0000000..d80f7ca --- /dev/null +++ b/case-studies/ancillary_data/Base Map/Boundary_administration.shp.xml @@ -0,0 +1,3 @@ + +20210716113628001.0FALSECalculateField 29_JS_EDIT NAMA_DAS "SALO SALONGKO" VB #CalculateField 29_JS_EDIT NAMA_DAS "SALU KADINGE" VB #CalculateField 29_JS_EDIT NAMA_DAS "SALU TOMBANG" + VB #CalculateField 29_JS_EDIT NAMA_DAS "SALU BATTANG" VB #CalculateField 29_JS_EDIT NAMA_DAS "SALU SIKAPA" VB #CalculateField 29_JS_EDIT NAMA_DAS "SALU ANDOLI" VB #CalculateField 29_JS_EDIT NAMA_DAS "SALU BULO" VB #CalculateField 29_JS_EDIT NAMA_DAS "SALU BOTTING" VB #CalculateField 29_JS_edit NAMA_DAS "TANA ERA" VB #CalculateField 29_JS_edit NAMA_DAS "LAPARIA" VB #CalculateField 29_JS_edit NAMA_DAS "SOBONG SOBONG" VB #CalculateField 29_JS_edit NAMA_DAS "LEMBANGIA" VB #CalculateField 29_JS_edit NAMA_DAS "KANARIA" VB #CalculateField 29_JS_edit NAMA_DAS "NGAPA" VB #CalculateField 29_JS_edit NAMA_DAS "LANGSANGIRING" VB #CalculateField 29_JS_edit NAMA_DAS "BATULOHE" VB #CalculateField 29_JS_edit NAMA_DAS "BINANGA BUTUNGIA" VB #CalculateField 29_JS_edit NAMA_DAS "DODAIA" VB #CalculateField 29_JS_edit NAMA_DAS "BINANGA SILOKA" VB #CalculateField 29_JS_edit NAMA_DAS "NGAPAYYA" VB #CalculateField 29_JS_edit NAMA_DAS "BALANG BALANG" VB #CalculateField 29_JS_edit NAMA_DAS "BAROBBO" VB #CalculateField 29_JS_edit NAMA_DAS "NGAPALOHE" VB #CalculateField 29_JS_edit NAMA_DAS "AKKASA" VB #CalculateField 29_JS_edit NAMA_DAS "PAMATATA" VB #CalculateField 29_JS_edit NAMA_DAS "BATANG MATA" VB #CalculateField 29_JS_edit NAMA_DAS "BANGSIANG" VB #CalculateField 29_JS_edit NAMA_DAS "TULANG" VB #CalculateField 29_JS_edit NAMA_DAS "BONE APARA" VB #CalculateField 29_JS_edit NAMA_DAS "PATINGALLOANG" VB #CalculateField 29_JS_edit NAMA_DAS "PARAK UTARA" VB #CalculateField 29_JS_edit NAMA_DAS "PARAK SELATAN" VB #CalculateField 29_JS_edit NAMA_DAS "APPABATU 2" VB #CalculateField 29_JS_edit NAMA_DAS "APPABATU 1" VB #CalculateField 29_JS_edit NAMA_DAS "BUAH BUAH" VB #CalculateField 29_JS_edit NAMA_DAS "PANAIKANG" VB #CalculateField 29_JS_edit NAMA_DAS "COPONG" VB #CalculateField 29_JS_edit NAMA_DAS "KANDO" VB #CalculateField 29_JS_edit NAMA_DAS "BUNGUNG CAMBA" VB #CalculateField 29_JS_edit NAMA_DAS "BALANG LOE" VB #CalculateField 29_JS_edit NAMA_DAS "BINANGA LANTEBONG" VB #CalculateField 29_JS_edit NAMA_DAS "BINANGA LAMALAKKA" VB #CalculateField 29_JS_edit NAMA_DAS "LUMPANGAN" VB #CalculateField 29_JS_edit NAMA_DAS "SALOMEKO" VB #CalculateField 29_JS_edit NAMA_DAS "LABALANG" VB #CalculateField 29_JS_edit NAMA_DAS "BULU BULU" VB #CalculateField 29_JS_edit NAMA_DAS "AKOTENGENG" VB #CalculateField 29_JS_edit NAMA_DAS "BUA BUA" VB #CalculateField 29_JS_edit NAMA_DAS "PASI TANETE" VB #CalculateField 29_JS_edit NAMA_DAS "SARONTANG" VB #CalculateField 29_JS_edit NAMA_DAS "APPATANAH" VB #CalculateField 29_JS_edit NAMA_DAS "BAHULUANG" VB #CalculateField 29_JS_edit NAMA_DAS "BONERATE" VB #CalculateField 29_JS_edit NAMA_DAS "DABONGKI" VB #CalculateField 29_JS_edit NAMA_DAS "BALANGPANGI" VB #CalculateField 29_JS_edit NAMA_DAS "PAKKA" VB #CalculateField 29_JS_edit NAMA_DAS "BONTOLOHE" VB #CalculateField 29_JS_edit NAMA_DAS "BATANG" VB #CalculateField 29_JS_edit NAMA_DAS "BONTOLOHE" VB #CalculateField 29_JS_edit NAMA_DAS "TANAKEKE" VB #CalculateField 29_JS_edit NAMA_DAS "BAULUANG" VB #CalculateField 29_JS_edit NAMA_DAS "SATANGA" VB #CalculateField 29_JS_edit NAMA_DAS "DAYANGDAYANGAN 1" VB #CalculateField 29_JS_edit NAMA_DAS "LAE LAE" VB #CalculateField 29_JS_edit NAMA_DAS "GUSUNG LAE LAE CADDI" VB #CalculateField 29_JS_edit NAMA_DAS "LAE LAE CADDI" VB #CalculateField 29_JS_edit NAMA_DAS "KHAYANGAN" VB #CalculateField 29_JS_edit NAMA_DAS "SAMALONA" VB #CalculateField 29_JS_edit NAMA_DAS "KODINGARENG" VB #CalculateField 29_JS_edit NAMA_DAS "KODINGARENG KEKE" VB #CalculateField 29_JS_edit NAMA_DAS "BARANG CADDI" VB #CalculateField 29_JS_edit NAMA_DAS "BARANG LOMPO" VB #CalculateField 29_JS_edit NAMA_DAS "BONETAMBU" VB #CalculateField 29_JS_edit NAMA_DAS "BADI" VB #CalculateField 29_JS_edit NAMA_DAS "PAJENEKANG" VB #CalculateField 29_JS_edit NAMA_DAS "PANAMBUNGANG" VB #CalculateField 29_JS_edit NAMA_DAS "SANANE" VB #CalculateField 29_JS_edit NAMA_DAS "BONTOSUA" VB #CalculateField 29_JS_edit NAMA_DAS "BALANGLOMPO" VB #CalculateField 29_JS_edit NAMA_DAS "BALANGCADDI" VB #CalculateField 29_JS_edit NAMA_DAS "LANGKADEA" VB #CalculateField 29_JS_edit NAMA_DAS "PAKAIYA" VB #CalculateField 29_JS_edit NAMA_DAS "KARANRANG" VB #CalculateField 29_JS_edit NAMA_DAS "LAIYA" VB #CalculateField 29_JS_edit NAMA_DAS "KULAMBING" VB #CalculateField 29_JS_edit NAMA_DAS "BANGKOBANGKOANG" VB #CalculateField 29_JS_edit NAMA_DAS "SATANDO" VB #CalculateField 29_JS_edit NAMA_DAS "CAMBA CAMBANG" VB #CalculateField 29_JS_edit NAMA_DAS "SAUNGI" VB #CalculateField 29_JS_edit NAMA_DAS "SABUTUNG" VB #CalculateField 29_JS_edit NAMA_DAS "SAPULI" VB #CalculateField 29_JS_edit NAMA_DAS "PODANGPODANG" VB #CalculateField 29_JS_edit NAMA_DAS "PODANGPODANG LOMPO" VB #CalculateField 29_JS_edit NAMA_DAS "LAMPUTANG" VB #CalculateField 29_JS_edit NAMA_DAS "POLEWALI" VB #CalculateField 29_JS_edit NAMA_DAS "SAGARA" VB #CalculateField 29_JS_edit NAMA_DAS "SABANGKO" VB #CalculateField 29_JS_edit NAMA_DAS "SALEMO" VB #CalculateField 29_JS_edit NAMA_DAS "LIUKANG LOE" VB #CalculateField 29_JS_edit NAMA_DAS "GUSUNG" VB #CalculateField 29_JS_edit NAMA_DAS "KAYUADI" VB #CalculateField 29_JS_edit NAMA_DAS "KAYUADI 1" VB #CalculateField 29_JS_edit NAMA_DAS "KAUNA" VB #CalculateField 29_JS_edit NAMA_DAS "PANJANG" VB #CalculateField 29_JS_edit NAMA_DAS "LATONDU BESAR" VB #CalculateField 29_JS_edit NAMA_DAS "RAJUNI KECIL" VB #CalculateField 29_JS_edit NAMA_DAS "RAJUNI KECIL" VB #CalculateField 29_JS_edit NAMA_DAS "RAJUNI BESAR" VB #CalculateField 29_JS_edit NAMA_DAS "LATONDU KECIL" VB #CalculateField 29_JS_edit NAMA_DAS "LANTINGIANG" VB #CalculateField 29_JS_edit NAMA_DAS "TINABO BESAR" VB #CalculateField 29_JS_edit NAMA_DAS "TINABO KECIL" VB #CalculateField 29_JS_edit NAMA_DAS "TINABO KECIL 1" VB #CalculateField 29_JS_edit NAMA_DAS "TINAANJA 1" VB #CalculateField 29_JS_edit NAMA_DAS "TARUPA KECIL" VB #CalculateField 29_JS_edit NAMA_DAS "TARUPA BESAR" VB #CalculateField 29_JS_edit NAMA_DAS "BELANGBELANG" VB #CalculateField 29_JS_edit NAMA_DAS "PM" VB #CalculateField 29_JS_edit NAMA_DAS "BUNGIKAMASI" VB #CalculateField 29_JS_edit NAMA_DAS "PASITELU LAUT" VB #CalculateField 29_JS_edit NAMA_DAS "PASITELU TENGAH" VB #CalculateField 29_JS_edit NAMA_DAS "PASITELU RAJA" VB #CalculateField 29_JS_edit NAMA_DAS "PASITELU 1" VB #CalculateField 29_JS_edit NAMA_DAS "TAMBUNA BESAR" VB #CalculateField 29_JS_edit NAMA_DAS "KAROMPA LOMPA" VB #CalculateField 29_JS_edit NAMA_DAS "KAROMPA LOMPA 1" VB #CalculateField 29_JS_edit NAMA_DAS "KALAOTOA 1" VB #CalculateField 29_JS_edit NAMA_DAS "KALAOTOA 2" VB #CalculateField 29_JS_edit NAMA_DAS "KALAOTOA 2" VB #CalculateField 29_JS_edit NAMA_DAS "MADU" VB #CalculateField 29_JS_edit NAMA_DAS "MADU 1" VB #CalculateField 29_JS_edit NAMA_DAS "BANGE" VB #CalculateField 29_JS_edit NAMA_DAS "BANGGE" VB #CalculateField 29_JS_edit NAMA_DAS "SATTU" VB #CalculateField 29_JS_edit NAMA_DAS "KETELA" VB #CalculateField 29_JS_edit NAMA_DAS "SIRANGE" VB #CalculateField 29_JS_edit NAMA_DAS "JANGGUT" VB #CalculateField 29_JS_edit NAMA_DAS "MEONG" VB #CalculateField 29_JS_edit NAMA_DAS "SARANGA" VB #CalculateField 29_JS_edit NAMA_DAS "TETERANG" VB #CalculateField 29_JS_edit NAMA_DAS "LIBUKKANG" VB #CalculateField 29_JS_edit NAMA_DAS "MALEMONGENG" VB #CalculateField 29_JS_edit NAMA_DAS "SANJAI 1" VB #Sort Export_Output C:\Users\ASUS\Documents\ArcGIS\Default.gdb\Export_Output_Sort "LUAS_HA DESCENDING" URCalculateField 2 KD_BPDAS "29" VB #CalculateField 2 KD_1 "5" VB #CalculateField 2 KD_4DGT Right( "0000"+ [KD_1],4 ) VB #CalculateField 2 KD_4DGT Right("0000" + [KD_1],4) VB #CalculateField 2 III Right("0000" + [KD_1],4) VB #CalculateField 2 III Right( "0000" + [KD_1],4 ) VB #CalculateField 2 HJK Right( "0000" + [KD_1],4 ) VB #CalculateField 2 HJK "" VB #CalculateField 2 HJK Right("0000" + [KD_1],4 ) VB #CalculateField 2 HJK Right("0000"+[KD_1],4) VB #CalculateField 2 aa Right("0000" + [HJK],4 ) VB #CalculateField 2 kdi [OBJECTID_1] VB #CalculateField 2 kd4 Right("0000" + [kdi],4 ) VB #CalculateField 2 kd4 Right("0000" + [kdi],4 ) VB #CalculateField 2 aaaa Right("0000" + [kdi],4 ) VB #CalculateField 2 aaaa "" VB #CalculateField 2 aaaa "" VB #CalculateField 2 kd1 [OBJECTID_1] VB #CalculateField 2 asdf Right("0000" + [kd1],4 ) VB #CalculateField 2 asdf Right("0000" + [kd1],4 ) VB #CalculateField 2 h Right("0000" + [kd1],4 ) VB #CalculateField 2 h Right(" 0000 " + [kd1],4 ) VB #CalculateField 2 h Right( "0000" + [kd1],4 ) VB #CalculateField 2 KD1 [OBJECTID_1] VB #CalculateField 2 KD4_DIGIT Right("0000" + [KD1],4 ) VB #CalculateField 2 KODE_DAS_1 ""295" & [KD4_DIGIT]" VB #CalculateField 2 KODE_REGIO "5" VB #CalculateField DAS_ Label ""DAS " & [NAMA_DAS]" VB #Identity Kawasan_Penyangga Export_Output C:\Users\surya\Documents\ArcGIS\Default.gdb\Kawasan_Penyangga_Identity ALL # NO_RELATIONSHIPSCalculateField Kawasan_Penyangga_Identity Nama_unsur "Kawasan Penyangga" VB #CalculateField Kawasan_Penyangga_Identity Nama_unsur "Kawasan Inti" VB #Identity Kawasan_Penyangga_Identity Export_Output_SmoothPolygon C:\Users\surya\Documents\ArcGIS\Default.gdb\Kawasan_Penyangga_Identity_I ALL # NO_RELATIONSHIPSCalculateField Kawasan_Penyangga_Identity_I Nama_unsur "Kawasan Penyangga" VB #Dissolve Kawasan_Penyangga_Identity_I C:\Users\surya\Documents\ArcGIS\Default.gdb\Kawasan_Penyangga_Identity_I1 Nama_unsur # MULTI_PART DISSOLVE_LINESIdentity Batas_Kajian_KSP_Danau_Tempe_AR Batas_administrasi_Desa_Ar_5 D:\User\Documents\ArcGIS\Default.gdb\Batas_Kajian_KSP_Danau_Tempe ALL # NO_RELATIONSHIPSCalculateField Boundary_administration Province [PROVINSI] VB #CalculateField Boundary_administration Regency [KABKOT] VB #CalculateField Boundary_administration District [KECAMATAN] VB #CalculateField Boundary_administration Village [DESA] VB #file://\\DESKTOP-C71G05R\D$\User\Documents\ArcGIS\Default.gdbLocal Area NetworkBatas_Kajian_KSP_Danau_Tempe002GeographicGCS_WGS_1984<GeographicCoordinateSystem xsi:type='typens:GeographicCoordinateSystem' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xs='http://www.w3.org/2001/XMLSchema' xmlns:typens='http://www.esri.com/schemas/ArcGIS/10.6'><WKT>GEOGCS[&quot;GCS_WGS_1984&quot;,DATUM[&quot;D_WGS_1984&quot;,SPHEROID[&quot;WGS_1984&quot;,6378137.0,298.257223563]],PRIMEM[&quot;Greenwich&quot;,0.0],UNIT[&quot;Degree&quot;,0.0174532925199433],AUTHORITY[&quot;EPSG&quot;,4326]]</WKT><XOrigin>-400</XOrigin><YOrigin>-400</YOrigin><XYScale>1111948722.2222221</XYScale><ZOrigin>-100000</ZOrigin><ZScale>10000</ZScale><MOrigin>-100000</MOrigin><MScale>10000</MScale><XYTolerance>8.983152841195215e-09</XYTolerance><ZTolerance>0.001</ZTolerance><MTolerance>0.001</MTolerance><HighPrecision>true</HighPrecision><LeftLongitude>-180</LeftLongitude><WKID>4326</WKID><LatestWKID>4326</LatestWKID></GeographicCoordinateSystem>Angular Unit: Degree (0.017453)XTools Pro Metadata20191124205955002019112420595500 Version 6.2 (Build 9200) ; Esri ArcGIS 10.6.1.9270Batas_Kajian_KSP_Danau_Tempe002File Geodatabase Feature ClassdatasetEPSG6.14(3.0.1)SimpleFALSE0TRUETRUE0Batas_Kajian_KSP_Danau_TempeFeature Class0OBJECTID_1FIDOID400Internal feature number.EsriSequential unique whole numbers that are automatically generated.ShapeShapeGeometry000Feature geometry.ESRICoordinates defining the features.FID_Batas_Kajian_KSP_Danau_Tempe_ARFID_Batas_Kajian_KSP_Danau_Tempe_ARInteger400OBJECTIDOBJECTIDInteger400Internal feature number.EsriSequential unique whole numbers that are automatically generated.Nama_unsurNama_unsurString5000Luas_haLuas_haDouble800FID_Batas_administrasi_Desa_Ar_5FID_Batas_administrasi_Desa_Ar_5Integer400PROVNOPROVNOString200KABKOTNOKABKOTNOString200KECNOKECNOString300DESANODESANOString300IDSP2010IDSP2010String1000PROVINSIPROVINSIString4000KABKOTKABKOTString5000KECAMATANKECAMATANString4000DESADESAString4000SUMBERSUMBERString5000IKAIKADouble800IKSIKSDouble800KERENTANANKERENTANANDouble800Shape_LengShape_LengDouble800FID_BOXFID_BOXInteger400IdIdInteger400ZonaZonaString1000Luas_ha_1Luas_haDouble800Shape_LengthShape_LengthDouble800Length of feature in internal units.ESRIPositive real numbers that are automatically generated.Shape_AreaShape_AreaDouble800Area of feature in internal units squared.ESRIPositive real numbers that are automatically generated.20191124 diff --git a/case-studies/ancillary_data/Base Map/Boundary_administration.shx b/case-studies/ancillary_data/Base Map/Boundary_administration.shx new file mode 100644 index 0000000..7b5c8cb Binary files /dev/null and b/case-studies/ancillary_data/Base Map/Boundary_administration.shx differ diff --git a/case-studies/measuring_TSS_in_Lake_Tempe.ipynb b/case-studies/measuring_TSS_in_Lake_Tempe.ipynb new file mode 100644 index 0000000..570c00f --- /dev/null +++ b/case-studies/measuring_TSS_in_Lake_Tempe.ipynb @@ -0,0 +1,1092 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Measuring sediments in Lake Tempe\n", + "\n", + "**Contents**\n", + "\n", + " - [Background](#Background)\n", + " - [Notebook overview](#Notebook-overview)\n", + " - [Suspended sediments](#Suspended-sediments)\n", + " - [Notebook setup](#Notebook-setup)\n", + " - [Analysis parameters](#Analysis-parameters)\n", + " - [Spatial and temporal window](#Spatial-and-temporal-window)\n", + " - [Datacube query](#Datacube-query)\n", + " - [Sentinel-2 dataset](#Sentinel-2-dataset)\n", + " - [Data load](#Data-load)\n", + " - [Data clean-up](#Data-clean-up)\n", + " - [Lake boundary](#Lake-boundary)\n", + " - [Loading up the shapefile](#Loading-up-the-shapefile)\n", + " - [Raster mask](#Raster-mask)\n", + " - [Masking the data](#Masking-the-data)\n", + " - [Filtering out land (i.e. non-water) pixels](#Filtering-out-land-(i.e.-non-water)-pixels)\n", + " - [Water index](#Water-index)\n", + " - [Removing non-water pixels](#Removing-non-water-pixels)\n", + " - [TSS analysis](#TSS-analysis)\n", + " - [TSS calculation](#TSS-calculation)\n", + " - [Selected displays](#Selected-displays)\n", + " - [Individual time slices](#Individual-time-slices)\n", + " - [Temporal aggregation (1)](#Temporal-aggregation-(1))\n", + " - [Temporal aggregation (2)](#Temporal-aggregation-(2))\n", + " - [Temporal statistics (standard)](#Temporal-statistics-(standard))\n", + " - [Robust statistics (using Dask)](#Robust-statistics-(using-Dask))\n", + " - [Approach](#Approach)\n", + " - [Robust functions](#Robust-functions)\n", + " - [Applying parallelised custom functions](#Applying-parallelised-custom-functions)\n", + " - [Discussion](#Discussion)\n", + " - [Extracting TSS data for further analysis (pixel drills)](#Extracting-TSS-data-for-further-analysis-(pixel-drills))\n", + " - [Time series indexing](#Time-series-indexing)\n", + " - [Extracting to Pandas data frame](#Extracting-to-Pandas-data-frame)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Background \n", + "\n", + "## Notebook overview\n", + "\n", + "In this notebook, we use a dataset of Sentinel-2 data to extract a time series of total suspended sediments (TSS) for the purpose of monitoring water quality. The region of interest here is centred over Lake Tempe in South Sulawesi, Indonesia. \n", + "\n", + "In addition to providing a general overview of TSS estimation, this notebook also demonstrates a number of technical and computational aspects related to working in an open data cube (ODC) environment such as EASI. In particular, this notebook will touch on the following aspects:\n", + "\n", + " - basic handling and processing of remote sensing datasets\n", + " - deriving a raster mask of lake pixels from a shape file\n", + " - filtering out non-water pixels using a normalised water index\n", + " - calculation of standard temporal statistics as well as robust statistics\n", + " - parallelised application of custom functions to a Dask array\n", + " - extracting a pixel-based time series of TSS values (pixel drill) and saving it to disk for further analysis.\n", + "\n", + "## Suspended sediments\n", + "\n", + "In order to derive an estimate of TSS concentration (in mg/L) from the remote sensing data, we need a specific formula (algorithm) that characterises the relationship between the Sentinel-2 spectral band values and the true TSS measurements. Such a formula needs to be derived and calibrated on the basis of, among others, TSS measurements sampled during one or more field campaigns on the lake of interest.\n", + "\n", + "In this demonstration notebook, we rely on pre-existing research published in the following manuscript:\n", + "\n", + " > E. Pandhadha et al., 2020. Total Suspended Solid (TSS) Estimation in Lake Tempe, South Sulawesi Using Sentinel-2B Imagery. \n", + "Journal of Engineering Technology and Applied Physics, Special Issue on Remote Sensing for Sustainable Environment, no. 1 (2020). [DOI:10.33093/jetap.2020.x1.4](http://dx.doi.org/10.33093/jetap.2020.x1.4)\n", + "\n", + "In this paper, the remote-sensing-based formula used to derive values for the $\\text{TSS}$ parameter of interest is as follows: \n", + "\n", + "$$\n", + "\\text{TSS} = \\alpha \\cdot \\text{NSMI} + \\beta \\\\\n", + "\\text{NSMI} = \\frac{ \\text{red}+\\text{green}-\\text{blue} }{ \\text{red}+\\text{green}+\\text{blue} } \\\\ \n", + "\\alpha = 775.98 \\\\\n", + "\\beta = -93.606\n", + "$$\n", + "\n", + "where $\\text{red}$, $\\text{green}$ and $\\text{blue}$ correspond to the respective Sentinel-2 bands, and $\\text{NSMI}$ represents the Normalised Suspended Material Index.\n", + "\n", + "
Caution – No attempt is made here to double-check or validate the above TSS algorithm in the context of this notebook and the corresponding Sentinel-2 dataset available on the current EASI / ODC deployment. The results provided in this notebook should thus only be considered as an overview of a possible approach to TSS measurement from remote sensing data, which would need to be further scrutinised and validated.
\n", + "\n", + "Note also that in the above paper, the authors implement a pre-processing step aiming to remove pixels affected by sun-glint in the time series of Sentinel-2 data. This step is _not_ implemented in this notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook setup\n", + "\n", + "First, let's import the key Python packages and supporting functions required in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### System\n", + "import sys\n", + "\n", + "### Datacube \n", + "import datacube\n", + "from datacube.utils import masking\n", + "from odc.algo import enum_to_bool\n", + "\n", + "### Data tools\n", + "import numpy as np\n", + "import xarray as xr\n", + "import pandas as pd\n", + "from astropy.stats import sigma_clip\n", + "import geopandas as gpd\n", + "import rasterio.features\n", + "\n", + "### Plotting\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "\n", + "### EASI tools\n", + "sys.path.append('../tools/')\n", + "from datacube_utils import display_map, mostcommon_crs, xarray_object_size" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And let's now also connect to the EASI database:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dc = datacube.Datacube(app=\"Lake_Tempe_TSS\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Analysis parameters\n", + "\n", + "## Spatial and temporal window\n", + "\n", + "The region of interest for this demonstration notebook is centred over Lake Tempe, South Sulawesi. The utility function `display_map` provides a convenient overview of the selected latitude / longitude extents." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Region of interest\n", + "min_longitude, max_longitude = (119.87, 120.04) # Lake Tempe\n", + "min_latitude, max_latitude = (-4.03, -4.198)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "display_map( x = (min_longitude,max_longitude), \n", + " y = (min_latitude,max_latitude) )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A quick look at the Sentinel-2 dataset on the [ODC Explorer](https://explorer.sg-dev.easi-eo.solutions/products/s2_l2a/extents) for the current EASI deployment indicates that data is available from 2017 onwards. For reasonable loading times, we will here only use three years' worth of satellite data over the region of interest." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Dates of interest:\n", + "min_date = '2018-01-01'\n", + "max_date = '2021-01-01'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Datacube query\n", + "\n", + "The Sentinel-2 product used in this notebook to calculate TSS is labelled `s2_l2a`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "product = 's2_l2a' # Sentinel-2 product" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can now initialise the main parameters of a datacube query, which we can then use to check the dataset's native projection (`mostcommon_crs`) – as we don't need to re-project the dataset to another coordinate reference system, we will simply load up the data in its native projection. \n", + "\n", + "We also set the `dask_chunks` query parameter to ensure that the loading process makes use of Dask, which will return a (lazy-loaded) dataset that can be processed in a parallelised manner. The use of the `.persist()` directive throughout this notebook essentially \"forces\" the loading / computation of the data contained in these Dask arrays." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This code cell may generate some 'SQLAlchemy' warnings, but they can be safely ignored.\n", + "\n", + "query = { 'product': product,\n", + " 'lat': (min_latitude, max_latitude),\n", + " 'lon': (min_longitude, max_longitude),\n", + " 'time': (min_date, max_date) }\n", + "\n", + "### Dataset's native projection\n", + "native_crs = mostcommon_crs(dc, query)\n", + "print(f\"The dataset's native CRS is \\\"{native_crs}\\\".\")\n", + "\n", + "query.update({ 'output_crs': native_crs,\n", + " 'resolution': (30, 30),\n", + " 'group_by': 'solar_day',\n", + " 'dask_chunks': {'x': 1024, 'y': 1024} })" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, loading up all Sentinel-2 bands would lead to excessive memory requirements and computational overheads. We will thus only select the bands relevant to this analysis.\n", + "\n", + "The list of measurements (i.e. satellite bands and derived products) for the current product of interest can be displayed as follows." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dc.list_measurements().loc[product]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "According to the TSS formula provided at the beginning of this notebook, we can select only those bands needed for the analysis. In addition, we also load the layer `SCL` (or its known alias `qa`) of pixel QA data, which will allow us to clean up the dataset, as well as the `swir_2` band, which will be used further below to filter out non-water pixels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query.update( {'measurements': ['red', 'green', 'blue', 'swir_2', 'qa']} )\n", + "query" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Sentinel-2 dataset\n", + "\n", + "## Data load\n", + "\n", + "In the next cell, we load up the Sentinel-2 dataset as directed by the `query` parameters. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "data = dc.load(**query)\n", + "data = data.persist() # Dask data processing\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data clean-up\n", + "\n", + "As usual, we need to filter out various pixels from the remote sensing dataset. This includes the removal of invalid (`nodata`) pixels, as well as those affected by various pixel quality issues. In the next cell, we create the various masks required for this clean-up operation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Valid mask (i.e. not 'nodata'), for each data layer\n", + "valid_mask = masking.valid_data_mask(data).persist()\n", + "\n", + "### Mask of clear pixels\n", + "bad_pixel_flags = {'no data', 'saturated or defective', 'cloud shadows', 'cloud high probability', 'cloud medium probability'}\n", + "good_pixel_mask = ~enum_to_bool(data['qa'], bad_pixel_flags).persist()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
Caution – Further work is required to investigate the resulting \"cleaned-up\" dataset in more detail. During the development of this notebook, various plots and results (not shown here) pointed to various issues related to the pixel QA information, with, among others, cloud shadows not being identified and filtered out properly, water pixels in the lake being mis-classified as cloud medium probability or thin cirrus, etc. A more in-depth analysis of the pixel QA information should be performed to ensure that such issues are fixed, and/or do not substantially bias the results further below.
\n", + "\n", + "The Sentinel-2 masking and scaling operations are subsequently applied on a band-by-band basis, as done in the next cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Scaling factor for Sentinel-2 data\n", + "scale = 0.0001 # divide by 10000\n", + "offset = 0.0\n", + "\n", + "### Apply valid mask, good pixel mask, and scaling to each layer\n", + "data['red'] = ( data['red'].where(valid_mask['red'] & good_pixel_mask) * scale + offset ).persist()\n", + "data['blue'] = ( data['blue'].where(valid_mask['blue'] & good_pixel_mask) * scale + offset ).persist()\n", + "data['green'] = ( data['green'].where(valid_mask['green'] & good_pixel_mask) * scale + offset ).persist()\n", + "data['swir_2'] = ( data['swir_2'].where(valid_mask['swir_2'] & good_pixel_mask) * scale + offset ).persist()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Dimensions of dataset\n", + "print( xarray_object_size(data) )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And finally, we can remove any time slice from the dataset (if any) not containing at least one valid (non-`NaN`) pixel, as a result of the above operations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = data.dropna('time', how='all')\n", + "data = data.persist() # Dask data processing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can now inspect the resulting data object further, e.g. by displaying the `Xarray.DataArray` for one of the bands:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "data.red" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "From this, we can gather that the pre-processed Sentinel-2 data is available as a Dask array over a region of about 600-by-600 pixels, and with about 200 time steps. In this data object, each Dask chunk has a size `(1,623,632)` in the `time`, `x` and `y` dimensions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Lake boundary\n", + "\n", + "To improve the plots further below in this notebook, we will use a mask of the lake area in the region of interest. This mask can be derived, for instance, from an existing shape file.\n", + "\n", + "## Loading up the shapefile\n", + "\n", + "For this example, we use a polygon of the Lake Tempe boundary line, which can be accessed from the shapefile provided in the `ancillary_data` folder in this repository." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "shape_file = './ancillary_data/Base Map//Boundary_administration.shp'\n", + "\n", + "### Load the shapefile\n", + "shp = gpd.read_file(shape_file)\n", + "display(shp)\n", + "shp.crs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see here that the vector data within that shapefile is in the projection `EPSG:4326`, which is different from that of our main Sentinel-2 dataset (`EPSG:32750`). For compatibility, we can here re-project the shapefile data to the CRS of the Sentinel-2 dataset. \n", + "\n", + "Subsequently, we will also filter the shapefile contents to only select those polygons associated with Lake Tempe." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Reproject to current coordinate reference system\n", + "shp = shp.to_crs(native_crs)\n", + "\n", + "### Remove unwanted polygons\n", + "print(\"Selected polygons are:\")\n", + "drop_list = []\n", + "for ff in shp.iterrows():\n", + " tmp = ff[1].Village.lower()\n", + " if 'tempe' in tmp and 'danau' in tmp: \n", + " print(ff[0], ff[1].Village)\n", + " else: \n", + " drop_list.append(ff[0])\n", + " \n", + "shp.drop(drop_list, inplace=True)\n", + "\n", + "### Plot\n", + "shp.boundary.plot(figsize=(8,8))\n", + "plt.xlabel(\"x [metre]\"); plt.ylabel(\"y [metre]\")\n", + "plt.title(\"Lake Tempe boundary\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Raster mask\n", + "\n", + "We can now create a raster mask from the vector data. The code below iterates over the polygons in the shapefile (in case multiple polygons are available), setting the raster mask values to `1` for all the pixels located within the footprint of each polygon, and `0` otherwise." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-08-29T05:35:29.838006Z", + "start_time": "2018-08-29T05:35:29.833034Z" + } + }, + "outputs": [], + "source": [ + "### Rasterise\n", + "mask = rasterio.features.rasterize( ((feature['geometry'], 1) for feature in shp.iterfeatures()),\n", + " out_shape = (data.dims['y'],data.dims['x']),\n", + " transform = data.affine )\n", + "\n", + "### Convert the mask (numpy array) to an Xarray DataArray\n", + "mask = xr.DataArray(mask, coords=(data.y, data.x))\n", + "mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Plot\n", + "mask.plot(size=8).axes.set_aspect('equal')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Masking the data\n", + "\n", + "Finally, we can use the mask we just created, apply it to the time series of Sentinel-2 data, and plot the result." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Masking\n", + "data = data.where(mask).persist()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "time_ind3 = np.linspace(1, data.sizes['time'], 3, dtype='int') - 1 # select some time slices to display\n", + "\n", + "### Plot the selected time slices (true-colour display)\n", + "image_array = data[['red', 'green', 'blue']].isel(time=time_ind3).to_array()\n", + "tmp = image_array.plot.imshow(robust=True, col='time', col_wrap=3, size=5)\n", + "for ax in tmp.axes.flatten(): \n", + " ax.set_aspect('equal')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now have a cropped data time series containing only the pixels of interest over Lake Tempe. \n", + "\n", + "However, due to the varying extents of the lake over time (during wet / dry conditions), some time slices in the time series will contain a certain number of non-water (i.e. land) pixels. Further below, the TSS algorithm would thus also be applied to these land / vegetation pixels, thereby leading to some bias in the results.\n", + "\n", + "# Filtering out land (i.e. non-water) pixels\n", + "\n", + "In order to address this issue, we could try to use the modified normalised difference water index (MNDWI) in order to filter out the non-water pixels. \n", + "\n", + "## Water index\n", + "\n", + "The MNDWI is calculated on the basis of the Sentinel-2 bands as per the following equation, with MNDWI values greater than 0 indicating water pixels:\n", + "\n", + "$$\n", + "\\text{MNDWI}= \\frac{ \\text{green}−\\text{SWIR} }{ \\text{green}+\\text{SWIR} }.\n", + "$$\n", + "\n", + "So let's apply this formula to the Sentinel-2 dataset, and save the resulting MNDWI data back into the `data` object as an additional band." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data['MNDWI'] = ( (data.green - data.swir_2) / (data.green + data.swir_2) ).persist()\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As shown above, we now have the MNDWI band integrated as part of the `data` (Dask) array. For insight, we can also plot a few MNDWI time slices to investigate the results further." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "time_ind9 = np.linspace(1, data.sizes['time'], 9, dtype='int') - 1 # select some time slices to display\n", + "tmp = data.MNDWI[time_ind9].plot(col='time', col_wrap=3, size=4)\n", + "for ax in tmp.axes.flatten(): \n", + " ax.set_aspect('equal')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This approach appears to provide good results for the current Sentinel-2 dataset, with various regions on the edge of the lake clearly identified as being non-water (MNDWI values below 0.0).\n", + "\n", + "## Removing non-water pixels\n", + "\n", + "We can now use the MNDWI information to remove the non-water pixels from the time series." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = data.where(data.MNDWI>0.0).persist()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Plot some selected time slices (true-colour display)\n", + "image_array = data[['red', 'green', 'blue']].isel(time=time_ind3).to_array()\n", + "tmp = image_array.plot.imshow(robust=True, col='time', col_wrap=3, size=5);\n", + "for ax in tmp.axes.flatten(): \n", + " ax.set_aspect('equal')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TSS analysis\n", + "\n", + "## TSS calculation\n", + "\n", + "As per the formula provided at the start of this notebook, TSS values can be calculated for each pixel in the time series on the basis of the selected Sentinel-2 bands." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### TSS calculation\n", + "tmp = data.red + data.green\n", + "nsmi = (tmp - data.blue) / (tmp + data.blue)\n", + "data_tss = (775.98 * nsmi - 93.606).persist()\n", + "data_tss" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Selected displays\n", + "\n", + "### Individual time slices\n", + "\n", + "For insight, let's take a look at the TSS data at a few selected time points." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tmp = data_tss[time_ind9].plot( col='time', col_wrap=3, cmap='rainbow', size=4, robust=True,\n", + " cbar_kwargs = dict(label=\"TSS [mg/L]\") )\n", + "for ax in tmp.axes.flatten(): \n", + " ax.set_aspect('equal')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can here clearly see that some pixel quality issues are still affecting the datasets, e.g. with residual areas of cloud shadow not having been removed successfully during the data clean-up process.\n", + "\n", + "### Temporal aggregation (1)\n", + "\n", + "While these \"daily\" TSS plots can be insightful, the abundance of missing (and corrupt) data, as well as the many time slices in the time series, make for a difficult assessment of the results. One approach to circumvent this is to first aggregate the data over coarser time spans, and then display the average TSS over these periods.\n", + "\n", + "`Xarray` allows for a straightforward aggregation of the data according to [Pandas indexing](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects) using the `dateoffset` functionality. For instance, we could calculate the temporal mean of the data over yearly quarters (with the first quarter ending in January):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_tss_quarter = data_tss.resample(time=\"QS-JAN\").mean() # aggregation over each successive quarter\n", + "data_tss_quarter = data_tss_quarter.persist()\n", + "data_tss_quarter" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see from the resulting Xarray object, there is a total of 12 quarters in the current time series, leading to 12 \"time slices\" in the resulting array.\n", + "\n", + "Then, as done before for the daily time series, we can select a few of the resulting quarters and plot the respective (temporally averaged) TSS maps." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "time_ind = np.linspace(1, data_tss_quarter.sizes['time'], 9, dtype='int') - 1 # some selected time slices to display\n", + "\n", + "### Main plot\n", + "tmp = data_tss_quarter[time_ind].plot( col='time', col_wrap=3, cmap='rainbow', size=4, robust=True,\n", + " cbar_kwargs = dict(label=\"quarterly-averaged TSS [mg/L]\") )\n", + "for ax in tmp.axes.flatten(): \n", + " ax.set_aspect('equal')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we can see that a clearer picture of the temporal TSS dynamics / characteristics is starting to emerge for each averaging period (quarter) along the selected time series.\n", + "\n", + "### Temporal aggregation (2)\n", + "\n", + "Another potential way to temporally aggregate the time series data is to calculate the average TSS values for _all_ time slices in specific periods such as months, seasons, quarters, etc. In other words, while the previous plots present averaged results over successive quarters (resulting in a total of 12 quarters for the current time series), we could now average the data from _all_ the time slices within, e.g., a given month or season (regardless of the year). This would provide an overview of the average TSS concentration in various months or seasons in any given year.\n", + "\n", + "For illustration, let's apply this approach to calculate the average TSS maps for each individual month in our time series – this is done by essentially collecting _all_ the January time slices to calculate the January average, and so forth for all other months.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_tss_group = data_tss.groupby(\"time.month\").mean().persist() # aggregation over each month\n", + "data_tss_group = data_tss_group\n", + "data_tss_group" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As can be seen here, the result of this operation is a DataArray with a new dimension (`month`) and 12 coordinates along it – one \"slice\" for each individual month." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tmp = data_tss_group.plot( col='month', col_wrap=4, cmap='rainbow', size=4, robust=True,\n", + " cbar_kwargs = dict(label=\"monthly average TSS [mg/L]\") )\n", + "for ax in tmp.axes.flatten(): \n", + " ax.set_aspect('equal')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "This plot seems to indicate that, over the whole time series under current consideration, the months between January and April generally exhibit elevated levels of TSS over most of Lake Tempe. In contrast, lower values of TSS are recorded, on average, during the months of May, June and July.\n", + "\n", + "
Caution – Once again, this result should be here treated with caution, and further analyses should be carried out to ensure that the data time series leading to this result is completely free of any potential pixel quality issues and artefacts.
\n", + " \n", + "## Temporal statistics (standard)\n", + "\n", + "The overall TSS average across the _entire_ time series, for each pixel, can be easily calculated as follows." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_tss_mean = data_tss.mean('time') # standard mean over entire time series\n", + "data_tss_mean = data_tss_mean.persist()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Plot\n", + "fig = plt.figure(figsize=(11,7))\n", + "data_tss_mean.plot(robust=True, cmap='rainbow', cbar_kwargs={'label':'mean TSS [mg/L]'})\n", + "plt.gca().set_aspect('equal','box');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This map provides an overview of the overall TSS concentrations for the region of interest and over the entire specified time span.\n", + "\n", + "From the previous results in this notebook, however, we should expect that the above plot integrates a number of pixels for which the pixel QA clean-up process has failed. For instance, some pixels affected by cloud shadows may not have been successfully masked out, leading to erroneously low TSS values in some time slices. These problematic TSS values have subsequently been used in the computation of the above average plot, leading to potentially biased results.\n", + "\n", + "## Robust statistics (using Dask)\n", + "\n", + "### Approach\n", + "\n", + "In order to minimise the impact from such outliers, we could make use of a more _robust_ metric of temporal averaging instead of the simple `.mean()` operation used earlier. For instance, one such metric would be to calculate the _median_ TSS instead.\n", + "\n", + "Here, we will use another approach, which is to define our own (custom) function and apply it to the Xarray / Dask array of TSS data (`data_tss`). The aim here is to instruct `Xarray` to take each pixel, apply the function to the corresponding TSS time series (along the `time` dimension), and aggregate the results to produce a map with two `x` and `y` dimensions. And given the Dask arrays at hand, we would also like this process to occur in a parallelised fashion on all CPUs available in this JupyterLab environment.\n", + "\n", + "### Robust functions\n", + "\n", + "In the next cell, we start by defining a `robust_mean()` function, which operates as follow on a Numpy vector `z` of input data:\n", + "\n", + "1. remove `Nan`s from the data\n", + "1. remove outliers from the data using the `sigma_clip` function\n", + "1. calculate and return the (standard) mean of the filtered data (`NaN`s and outliers removed) – if the filtered vector contains less than 10 data points, simply return `NaN` instead.\n", + "\n", + "Further below, this function will be applied, in a parallelised fashion, to the Dask array of TSS values – essentially, `robust_mean()` will receive as input `z` the time series of TSS values for each pixel in turn. The returned value of the pixel's (robust) average TSS will then be used to build the resulting map of average mean TSS, calculated in a robust way.\n", + "\n", + "In addition, we here also define another function (`robust_cv()`) to calculate the _coefficient of variation_ (CV) for the same time series of TSS data (at each pixel). As shown below, the CV is simply defined as the standard deviation of the input values, divided by the mean – here again, calculated in a robust manner on the basis of the filtered vector of data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def robust_mean(z):\n", + " zf = z[~np.isnan(z)] # filter out NaNs\n", + " if len(zf)<10: return np.nan # use at least 10 values to compute the mean\n", + " zf = sigma_clip( zf, masked=False ) # remove outliers\n", + " return np.mean(zf) # mean of data without outliers\n", + "\n", + "def robust_cv(z):\n", + " zf = z[~np.isnan(z)] # filter out NaNs\n", + " if len(zf)<10: return np.nan # use at least 10 values to compute the CV\n", + " zf = sigma_clip( zf, masked=False ) # remove outliers\n", + " return np.std(zf) / np.mean(zf) # CV of data without outliers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Applying parallelised custom functions\n", + "\n", + "In the following cell, the `robust_mean` function is applied in a parallelised fashion to the dataset by making use of the `apply_ufunc()` function in `Xarray` – another way to carry out this operation would be to use `xr.map_blocks()`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Re-chunk Dask array for efficient time-series processing\n", + "data_tss = data_tss.chunk({'time':-1, 'x':32, 'y':32}).persist()\n", + "\n", + "### Parallelised processing\n", + "data_tss_robMean = xr.apply_ufunc( robust_mean, data_tss, input_core_dims=[[\"time\"]], \n", + " dask='parallelized', vectorize=True ) # robust mean, whole time series\n", + "data_tss_robMean = data_tss_robMean.persist()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Our second custom function can now be applied in the same way to the TSS data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_tss_robCV = xr.apply_ufunc( robust_cv, data_tss, input_core_dims=[[\"time\"]], \n", + " dask='parallelized', vectorize=True ) # robust mean, whole time series\n", + "data_tss_robCV = data_tss_robCV.persist()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "### Plots\n", + "fig, (ax1,ax2) = plt.subplots(1, 2, figsize=(20,7))\n", + "\n", + "data_tss_robMean.plot(robust=True, cmap='rainbow', cbar_kwargs={'label':'TSS mean'}, ax=ax1)\n", + "ax1.set_title(f\"Robust mean of TSS\")\n", + "ax1.set_aspect('equal','box')\n", + "\n", + "data_tss_robCV.plot(robust=True, cmap=\"rainbow\", cbar_kwargs={'label':'TSS C.V.'}, ax=ax2)\n", + "ax2.set_title(f\"Robust C.V. of TSS\")\n", + "ax2.set_aspect('equal','box');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Discussion\n", + "\n", + "The plot of robust mean values (left-hand side) has not changed significantly compared to the previous plot of the (standard) average values. However, the fact that we have used a robust metric for the temporal averaging operation gives us more confidence in the validity of the plotted results, in particular with respect to the residual pixel quality issues affecting the considered TSS dataset.\n", + "\n", + "On the right-hand side, the CV map provides further insight into the temporal dynamics in the TSS dataset, highlighting a number of \"hot spots\" with elevated TSS variability near the edge of the lake (perhaps as a result of regular sediment contribution from specific rivers / tributaries to the lake waters).\n", + "\n", + "A final note here is with regards to the range of (average) TSS values displayed in the above plot (left-hand side). In the Pandhadha _et al._ paper cited at the beginning of this notebook, the authors report that the field measurements of TSS in Lake Tempe are ranging between 115 and 203 mg/L, which is (roughly) in the same order of magnitude as the values displayed in the plot. We can thus have some confidence that the TSS algorithm used in this notebook at least provides TSS data that appears to be sensible, though further validation work is required to determine whether the TSS algorithm used here provides accurate results for the range of values experienced in this notebook. \n", + "\n", + "# Extracting TSS data for further analysis (pixel drills)\n", + "\n", + "## Time series indexing\n", + "\n", + "At this point, a practitioner might want to extract the time series of TSS values at selected locations, display them, and potentially write them to file to be used as input to further processing or modelling work.\n", + "\n", + "We demonstrate this by first selecting a few points of interest, e.g. along a transect line across the region of interest." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Some pixels along a transect\n", + "n_points = 5\n", + "pixloc_y = np.linspace(9547000, 9545000, n_points)\n", + "pixloc_x = np.linspace(825000, 832000, n_points)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Plot\n", + "fig = plt.figure(figsize=(8,8))\n", + "plt.plot(pixloc_x, pixloc_y, marker='o', color='black', linestyle='none')\n", + "shp.boundary.plot(ax=fig.axes[0], color='black');\n", + "[plt.text(x,y,f\"{p:4d}\") for p,(y,x) in enumerate(zip(pixloc_y,pixloc_x))];" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can now extract the pixels' TSS data using `Xarray`'s vectorised indexing functionality, which will retrieve data at the grid cells nearest to the target `x` and `y` coordinates. For illustration purposes, here we make use of the time series of quarterly averaged TSS values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "points_x = xr.DataArray(pixloc_x, dims=\"points\")\n", + "points_y = xr.DataArray(pixloc_y, dims=\"points\")\n", + "\n", + "### Extract data (quarterly dataset)\n", + "points_dat = data_tss_quarter.sel(x=points_x, y=points_y, method=\"nearest\")\n", + "points_dat = points_dat.dropna('time', how='all')\n", + "points_dat = points_dat.persist()\n", + "\n", + "### Plot\n", + "points_dat.plot.line(x='time', marker='.', figsize=(15,5));\n", + "plt.gca().set_title(\"Quarterly TSS values, selected pixels\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "From this plot, we can clearly identify a period of decreased TSS levels at all point of interest, preceded by a series of higher TSS concentrations.\n", + "\n", + "## Extracting to Pandas data frame\n", + "\n", + "The following code cell will save the sampled (quarterly) TSS data into a Pandas data frame:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Extract to Pandas\n", + "tss_df = pd.DataFrame( data = points_dat.values, \n", + " index = points_dat.time.values, \n", + " columns = points_dat.points.values )\n", + "\n", + "### (Re)set DF index\n", + "tss_df['month'] = tss_df.index.month\n", + "tss_df['year'] = tss_df.index.year\n", + "tss_df = tss_df.set_index(['year','month'])\n", + "\n", + "tss_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If desired, we can also \"re-format\" the data frame into a long (as opposed to wide) format:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tss_df = tss_df.stack(dropna=False).rename_axis(['year','month','point'])\n", + "tss_df = pd.DataFrame(tss_df).rename(columns={0:'TSS'})\n", + "tss_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And finally, saving the data (e.g. to `.csv` or `.pkl` file) can be achieved with the following code if desired." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Uncomment if needed:\n", + "# tss_df.to_csv('./TSS_pixel_data.csv')\n", + "# tss_df.to_pickle(path='./TSS_pixel_data.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### End notebook" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/html/case-studies/measuring_TSS_in_Lake_Tempe.html b/html/case-studies/measuring_TSS_in_Lake_Tempe.html new file mode 100644 index 0000000..65a3e50 --- /dev/null +++ b/html/case-studies/measuring_TSS_in_Lake_Tempe.html @@ -0,0 +1,22265 @@ + + + + + +measuring_TSS_in_Lake_Tempe + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+
+
+
+ + +
+
+
+
+ + +
+
+
+ + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + + + +
+ +
+
+
+ + +
+
+
+ + +
+ +
+
+ + +
+ +
+ + + + + + + + + diff --git a/tools/datacube_utils.py b/tools/datacube_utils.py index 4c2e1cb..6829eb8 100644 --- a/tools/datacube_utils.py +++ b/tools/datacube_utils.py @@ -10,6 +10,7 @@ import math import folium from pyproj import Transformer +from collections import Counter import geopandas as gpd import xarray as xr import rasterio.features @@ -149,6 +150,13 @@ def _degree_to_zoom_level(l1, l2, margin=0.0): return zoom_level_int +def xarray_object_size(data): + """Return a formatted string""" + val, unit = data.nbytes / (1024 ** 2), 'MB' + if val > 1024: + val, unit = data.nbytes / (1024 ** 3), 'GB' + return f'Dataset size: {val:.2f} {unit}' + # Borrowed from https://github.com/GeoscienceAustralia/dea-notebooks/blob/develop/Tools/dea_tools/spatial.py def xr_vectorize(da, attribute_col='attribute', @@ -417,4 +425,5 @@ def xr_rasterize(gdf, export_tiff, overwrite=True) - return xarr \ No newline at end of file + return xarr +