From 40b4d5ed68e68ab7662fb1fa50f8a8ea0c3ec2c4 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sun, 9 Feb 2025 23:06:22 +0100 Subject: [PATCH] Add 'gdal vector sql', as standalone or part of 'gdal vector pipeline' --- apps/CMakeLists.txt | 1 + apps/gdalalg_abstract_pipeline.h | 15 + apps/gdalalg_vector.cpp | 2 + apps/gdalalg_vector_pipeline.cpp | 22 +- apps/gdalalg_vector_sql.cpp | 328 ++++++++++++++++++ apps/gdalalg_vector_sql.h | 63 ++++ autotest/utilities/test_gdalalg_vector_sql.py | 166 +++++++++ doc/source/conf.py | 7 + doc/source/programs/gdal_vector.rst | 2 + doc/source/programs/gdal_vector_pipeline.rst | 17 + doc/source/programs/gdal_vector_sql.rst | 134 +++++++ doc/source/programs/index.rst | 2 + gcore/gdalalgorithm.cpp | 64 +++- gcore/gdalalgorithm.h | 2 + ogr/ogrsf_frmts/generic/ogrlayerpool.cpp | 34 +- ogr/ogrsf_frmts/generic/ogrlayerpool.h | 5 + 16 files changed, 835 insertions(+), 29 deletions(-) create mode 100644 apps/gdalalg_vector_sql.cpp create mode 100644 apps/gdalalg_vector_sql.h create mode 100644 autotest/utilities/test_gdalalg_vector_sql.py create mode 100644 doc/source/programs/gdal_vector_sql.rst diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 3b2c309994a4..9f48f3be08f1 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -31,6 +31,7 @@ add_library( gdalalg_vector_read.cpp gdalalg_vector_filter.cpp gdalalg_vector_reproject.cpp + gdalalg_vector_sql.cpp gdalalg_vector_write.cpp gdalinfo_lib.cpp gdalbuildvrt_lib.cpp diff --git a/apps/gdalalg_abstract_pipeline.h b/apps/gdalalg_abstract_pipeline.h index f2eb0589a8a4..5e198afbfe27 100644 --- a/apps/gdalalg_abstract_pipeline.h +++ b/apps/gdalalg_abstract_pipeline.h @@ -19,6 +19,8 @@ #include "cpl_json.h" #include "gdalalgorithm.h" +#include + template class GDALAbstractPipelineAlgorithm CPL_NON_FINAL : public StepAlgorithm { @@ -44,6 +46,19 @@ class GDALAbstractPipelineAlgorithm CPL_NON_FINAL : public StepAlgorithm { } + ~GDALAbstractPipelineAlgorithm() override + { + // Destroy steps in the reverse order they have been constructed, + // as a step can create object that depends on the validity of + // objects of previous steps, and while cleaning them it needs those + // prior objects to be still alive. + // Typically for "gdal vector pipeline read ... ! sql ..." + for (auto it = std::rbegin(m_steps); it != std::rend(m_steps); it++) + { + it->reset(); + } + } + virtual GDALArgDatasetValue &GetOutputDataset() = 0; std::string m_pipeline{}; diff --git a/apps/gdalalg_vector.cpp b/apps/gdalalg_vector.cpp index 75191f2c01ba..6445cd808577 100644 --- a/apps/gdalalg_vector.cpp +++ b/apps/gdalalg_vector.cpp @@ -18,6 +18,7 @@ #include "gdalalg_vector_pipeline.h" #include "gdalalg_vector_filter.h" #include "gdalalg_vector_reproject.h" +#include "gdalalg_vector_sql.h" /************************************************************************/ /* GDALVectorAlgorithm */ @@ -43,6 +44,7 @@ class GDALVectorAlgorithm final : public GDALAlgorithm RegisterSubAlgorithm(); RegisterSubAlgorithm(); RegisterSubAlgorithm(); + RegisterSubAlgorithm(); } private: diff --git a/apps/gdalalg_vector_pipeline.cpp b/apps/gdalalg_vector_pipeline.cpp index eba4c0ae10ab..82764aa8f934 100644 --- a/apps/gdalalg_vector_pipeline.cpp +++ b/apps/gdalalg_vector_pipeline.cpp @@ -15,6 +15,7 @@ #include "gdalalg_vector_clip.h" #include "gdalalg_vector_filter.h" #include "gdalalg_vector_reproject.h" +#include "gdalalg_vector_sql.h" #include "gdalalg_vector_write.h" #include "cpl_conv.h" @@ -60,9 +61,12 @@ void GDALVectorPipelineStepAlgorithm::AddInputArgs(bool hiddenForCLI) AddInputDatasetArg(&m_inputDataset, GDAL_OF_VECTOR, /* positionalAndRequired = */ !hiddenForCLI) .SetHiddenForCLI(hiddenForCLI); - AddArg("input-layer", 'l', _("Input layer name(s)"), &m_inputLayerNames) - .AddAlias("layer") - .SetHiddenForCLI(hiddenForCLI); + if (GetName() != "sql") + { + AddArg("input-layer", 'l', _("Input layer name(s)"), &m_inputLayerNames) + .AddAlias("layer") + .SetHiddenForCLI(hiddenForCLI); + } } /************************************************************************/ @@ -94,10 +98,13 @@ void GDALVectorPipelineStepAlgorithm::AddOutputArgs( &m_appendLayer) .SetDefault(false) .SetHiddenForCLI(hiddenForCLI); - AddArg("output-layer", shortNameOutputLayerAllowed ? 'l' : 0, - _("Output layer name"), &m_outputLayerName) - .AddHiddenAlias("nln") // For ogr2ogr nostalgic people - .SetHiddenForCLI(hiddenForCLI); + if (GetName() != "sql") + { + AddArg("output-layer", shortNameOutputLayerAllowed ? 'l' : 0, + _("Output layer name"), &m_outputLayerName) + .AddHiddenAlias("nln") // For ogr2ogr nostalgic people + .SetHiddenForCLI(hiddenForCLI); + } } /************************************************************************/ @@ -178,6 +185,7 @@ GDALVectorPipelineAlgorithm::GDALVectorPipelineAlgorithm() m_stepRegistry.Register(); m_stepRegistry.Register(); m_stepRegistry.Register(); + m_stepRegistry.Register(); } /************************************************************************/ diff --git a/apps/gdalalg_vector_sql.cpp b/apps/gdalalg_vector_sql.cpp new file mode 100644 index 000000000000..04cd5c1a9e25 --- /dev/null +++ b/apps/gdalalg_vector_sql.cpp @@ -0,0 +1,328 @@ +/****************************************************************************** + * + * Project: GDAL + * Purpose: "sql" step of "vector pipeline" + * Author: Even Rouault + * + ****************************************************************************** + * Copyright (c) 2025, Even Rouault + * + * SPDX-License-Identifier: MIT + ****************************************************************************/ + +#include "gdalalg_vector_sql.h" + +#include "gdal_priv.h" +#include "ogrsf_frmts.h" +#include "ogrlayerpool.h" + +#include + +//! @cond Doxygen_Suppress + +#ifndef _ +#define _(x) (x) +#endif + +/************************************************************************/ +/* GDALVectorSQLAlgorithm::GDALVectorSQLAlgorithm() */ +/************************************************************************/ + +GDALVectorSQLAlgorithm::GDALVectorSQLAlgorithm(bool standaloneStep) + : GDALVectorPipelineStepAlgorithm(NAME, DESCRIPTION, HELP_URL, + standaloneStep) +{ + AddArg("sql", 0, _("SQL statement(s)"), &m_sql) + .SetPositional() + .SetRequired() + .SetPackedValuesAllowed(false) + .SetReadFromFileAtSyntaxAllowed() + .SetMetaVar("|@") + .SetRemoveSQLCommentsEnabled(); + AddArg("output-layer", standaloneStep ? 0 : 'l', _("Output layer name(s)"), + &m_outputLayer); + AddArg("dialect", 0, _("SQL dialect (e.g. OGRSQL, SQLITE)"), &m_dialect); +} + +/************************************************************************/ +/* GDALVectorSQLAlgorithmDataset */ +/************************************************************************/ + +namespace +{ +class GDALVectorSQLAlgorithmDataset final : public GDALDataset +{ + GDALDataset &m_oSrcDS; + std::vector m_layers{}; + + CPL_DISALLOW_COPY_ASSIGN(GDALVectorSQLAlgorithmDataset) + + public: + explicit GDALVectorSQLAlgorithmDataset(GDALDataset &oSrcDS) + : m_oSrcDS(oSrcDS) + { + } + + ~GDALVectorSQLAlgorithmDataset() override + { + for (OGRLayer *poLayer : m_layers) + m_oSrcDS.ReleaseResultSet(poLayer); + } + + void AddLayer(OGRLayer *poLayer) + { + m_layers.push_back(poLayer); + } + + int GetLayerCount() override + { + return static_cast(m_layers.size()); + } + + OGRLayer *GetLayer(int idx) override + { + return idx >= 0 && idx < GetLayerCount() ? m_layers[idx] : nullptr; + } +}; +} // namespace + +/************************************************************************/ +/* GDALVectorSQLAlgorithmDatasetMultiLayer */ +/************************************************************************/ + +namespace +{ + +class ProxiedSQLLayer final : public OGRProxiedLayer +{ + OGRFeatureDefn *m_poLayerDefn = nullptr; + + CPL_DISALLOW_COPY_ASSIGN(ProxiedSQLLayer) + + public: + ProxiedSQLLayer(const std::string &osName, OGRLayerPool *poPoolIn, + OpenLayerFunc pfnOpenLayerIn, + ReleaseLayerFunc pfnReleaseLayerIn, + FreeUserDataFunc pfnFreeUserDataIn, void *pUserDataIn) + : OGRProxiedLayer(poPoolIn, pfnOpenLayerIn, pfnReleaseLayerIn, + pfnFreeUserDataIn, pUserDataIn) + { + SetDescription(osName.c_str()); + } + + ~ProxiedSQLLayer() + { + if (m_poLayerDefn) + m_poLayerDefn->Release(); + } + + const char *GetName() override + { + return GetDescription(); + } + + OGRFeatureDefn *GetLayerDefn() override + { + if (!m_poLayerDefn) + { + m_poLayerDefn = OGRProxiedLayer::GetLayerDefn()->Clone(); + m_poLayerDefn->SetName(GetDescription()); + } + return m_poLayerDefn; + } +}; + +class GDALVectorSQLAlgorithmDatasetMultiLayer final : public GDALDataset +{ + // We can't safely have 2 SQL layers active simultaneously on the same + // source dataset. So each time we access one, we must close the last + // active one. + OGRLayerPool m_oPool{1}; + GDALDataset &m_oSrcDS; + std::vector> m_layers{}; + + struct UserData + { + GDALDataset &oSrcDS; + std::string osSQL{}; + std::string osDialect{}; + std::string osLayerName{}; + + UserData(GDALDataset &oSrcDSIn, const std::string &osSQLIn, + const std::string &osDialectIn, + const std::string &osLayerNameIn) + : oSrcDS(oSrcDSIn), osSQL(osSQLIn), osDialect(osDialectIn), + osLayerName(osLayerNameIn) + { + } + CPL_DISALLOW_COPY_ASSIGN(UserData) + }; + + CPL_DISALLOW_COPY_ASSIGN(GDALVectorSQLAlgorithmDatasetMultiLayer) + + public: + explicit GDALVectorSQLAlgorithmDatasetMultiLayer(GDALDataset &oSrcDS) + : m_oSrcDS(oSrcDS) + { + } + + void AddLayer(const std::string &osSQL, const std::string &osDialect, + const std::string &osLayerName) + { + const auto OpenLayer = [](void *pUserDataIn) + { + UserData *pUserData = static_cast(pUserDataIn); + return pUserData->oSrcDS.ExecuteSQL( + pUserData->osSQL.c_str(), nullptr, + pUserData->osDialect.empty() ? nullptr + : pUserData->osDialect.c_str()); + }; + + const auto CloseLayer = [](OGRLayer *poLayer, void *pUserDataIn) + { + UserData *pUserData = static_cast(pUserDataIn); + pUserData->oSrcDS.ReleaseResultSet(poLayer); + }; + + const auto DeleteUserData = [](void *pUserDataIn) + { delete static_cast(pUserDataIn); }; + + auto pUserData = new UserData(m_oSrcDS, osSQL, osDialect, osLayerName); + auto poLayer = std::make_unique( + osLayerName, &m_oPool, OpenLayer, CloseLayer, DeleteUserData, + pUserData); + m_layers.push_back(std::move(poLayer)); + } + + int GetLayerCount() override + { + return static_cast(m_layers.size()); + } + + OGRLayer *GetLayer(int idx) override + { + return idx >= 0 && idx < GetLayerCount() ? m_layers[idx].get() + : nullptr; + } +}; +} // namespace + +/************************************************************************/ +/* GDALVectorSQLAlgorithm::RunStep() */ +/************************************************************************/ + +bool GDALVectorSQLAlgorithm::RunStep(GDALProgressFunc, void *) +{ + CPLAssert(m_inputDataset.GetDatasetRef()); + CPLAssert(m_outputDataset.GetName().empty()); + CPLAssert(!m_outputDataset.GetDatasetRef()); + + if (!m_outputLayer.empty() && m_outputLayer.size() != m_sql.size()) + { + ReportError(CE_Failure, CPLE_AppDefined, + "There should be as many layer names in --output-layer as " + "in --statement"); + return false; + } + + auto poSrcDS = m_inputDataset.GetDatasetRef(); + + if (m_sql.size() == 1) + { + auto outDS = std::make_unique(*poSrcDS); + outDS->SetDescription(poSrcDS->GetDescription()); + + const auto nErrorCounter = CPLGetErrorCounter(); + OGRLayer *poLayer = poSrcDS->ExecuteSQL( + m_sql[0].c_str(), nullptr, + m_dialect.empty() ? nullptr : m_dialect.c_str()); + if (!poLayer) + { + if (nErrorCounter == CPLGetErrorCounter()) + { + ReportError(CE_Failure, CPLE_AppDefined, + "Execution of the SQL statement '%s' did not " + "result in a result layer.", + m_sql[0].c_str()); + } + return false; + } + + if (!m_outputLayer.empty()) + { + const std::string &osLayerName = m_outputLayer[0]; + poLayer->GetLayerDefn()->SetName(osLayerName.c_str()); + poLayer->SetDescription(osLayerName.c_str()); + } + outDS->AddLayer(poLayer); + m_outputDataset.Set(std::move(outDS)); + } + else + { + // First pass to check all statements are valid and figure out layer + // names + std::set setOutputLayerNames; + std::vector aosLayerNames; + for (const std::string &sql : m_sql) + { + const auto nErrorCounter = CPLGetErrorCounter(); + auto poLayer = poSrcDS->ExecuteSQL( + sql.c_str(), nullptr, + m_dialect.empty() ? nullptr : m_dialect.c_str()); + if (!poLayer) + { + if (nErrorCounter == CPLGetErrorCounter()) + { + ReportError(CE_Failure, CPLE_AppDefined, + "Execution of the SQL statement '%s' did not " + "result in a result layer.", + sql.c_str()); + } + return false; + } + + std::string osLayerName; + + if (!m_outputLayer.empty()) + { + osLayerName = m_outputLayer[aosLayerNames.size()]; + } + else if (cpl::contains(setOutputLayerNames, + poLayer->GetDescription())) + { + int num = 1; + do + { + osLayerName = poLayer->GetDescription(); + ++num; + osLayerName += std::to_string(num); + } while (cpl::contains(setOutputLayerNames, osLayerName)); + } + + if (!osLayerName.empty()) + { + poLayer->GetLayerDefn()->SetName(osLayerName.c_str()); + poLayer->SetDescription(osLayerName.c_str()); + } + setOutputLayerNames.insert(poLayer->GetDescription()); + aosLayerNames.push_back(poLayer->GetDescription()); + + poSrcDS->ReleaseResultSet(poLayer); + } + + auto outDS = + std::make_unique(*poSrcDS); + outDS->SetDescription(poSrcDS->GetDescription()); + + for (size_t i = 0; i < aosLayerNames.size(); ++i) + { + outDS->AddLayer(m_sql[i], m_dialect, aosLayerNames[i]); + } + + m_outputDataset.Set(std::move(outDS)); + } + + return true; +} + +//! @endcond diff --git a/apps/gdalalg_vector_sql.h b/apps/gdalalg_vector_sql.h new file mode 100644 index 000000000000..2192a850ee96 --- /dev/null +++ b/apps/gdalalg_vector_sql.h @@ -0,0 +1,63 @@ +/****************************************************************************** + * + * Project: GDAL + * Purpose: "sql" step of "vector pipeline" + * Author: Even Rouault + * + ****************************************************************************** + * Copyright (c) 2025, Even Rouault + * + * SPDX-License-Identifier: MIT + ****************************************************************************/ + +#ifndef GDALALG_VECTOR_SQL_INCLUDED +#define GDALALG_VECTOR_SQL_INCLUDED + +#include "gdalalg_vector_pipeline.h" + +//! @cond Doxygen_Suppress + +/************************************************************************/ +/* GDALVectorSQLAlgorithm */ +/************************************************************************/ + +class GDALVectorSQLAlgorithm /* non final */ + : public GDALVectorPipelineStepAlgorithm +{ + public: + static constexpr const char *NAME = "sql"; + static constexpr const char *DESCRIPTION = + "Apply SQL statement(s) to a dataset."; + static constexpr const char *HELP_URL = "/programs/gdal_vector_sql.html"; + + static std::vector GetAliases() + { + return {}; + } + + explicit GDALVectorSQLAlgorithm(bool standaloneStep = false); + + private: + bool RunStep(GDALProgressFunc pfnProgress, void *pProgressData) override; + + std::vector m_sql{}; + std::vector m_outputLayer{}; + std::string m_dialect{}; +}; + +/************************************************************************/ +/* GDALVectorSQLAlgorithmStandalone */ +/************************************************************************/ + +class GDALVectorSQLAlgorithmStandalone final : public GDALVectorSQLAlgorithm +{ + public: + GDALVectorSQLAlgorithmStandalone() + : GDALVectorSQLAlgorithm(/* standaloneStep = */ true) + { + } +}; + +//! @endcond + +#endif /* GDALALG_VECTOR_SQL_INCLUDED */ diff --git a/autotest/utilities/test_gdalalg_vector_sql.py b/autotest/utilities/test_gdalalg_vector_sql.py new file mode 100644 index 000000000000..e9ca4db73a2a --- /dev/null +++ b/autotest/utilities/test_gdalalg_vector_sql.py @@ -0,0 +1,166 @@ +#!/usr/bin/env pytest +# -*- coding: utf-8 -*- +############################################################################### +# Project: GDAL/OGR Test Suite +# Purpose: 'gdal vector sql' testing +# Author: Even Rouault +# +############################################################################### +# Copyright (c) 2025, Even Rouault +# +# SPDX-License-Identifier: MIT +############################################################################### + +import pytest + +from osgeo import gdal + + +def get_sql_alg(): + reg = gdal.GetGlobalAlgorithmRegistry() + vector = reg.InstantiateAlg("vector") + return vector.InstantiateSubAlgorithm("sql") + + +def test_gdalalg_vector_sql_base(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out.shp") + + sql_alg = get_sql_alg() + assert sql_alg.ParseRunAndFinalize( + ["../ogr/data/poly.shp", out_filename, "select * from poly limit 1"] + ) + + with gdal.OpenEx(out_filename) as ds: + assert ds.GetLayerCount() == 1 + assert ds.GetLayer(0).GetFeatureCount() == 1 + assert ds.GetLayer(-1) is None + assert ds.GetLayer(1) is None + + +def test_gdalalg_vector_sql_layer_name(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out") + + sql_alg = get_sql_alg() + assert sql_alg.ParseRunAndFinalize( + [ + "--output-layer=foo", + "../ogr/data/poly.shp", + out_filename, + "select * from poly limit 1", + ] + ) + + with gdal.OpenEx(out_filename) as ds: + assert ds.GetLayer(0).GetFeatureCount() == 1 + assert ds.GetLayer(0).GetName() == "foo" + + +def test_gdalalg_vector_sql_error(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out.shp") + + sql_alg = get_sql_alg() + with pytest.raises(Exception): + sql_alg.ParseRunAndFinalize(["../ogr/data/poly.shp", out_filename, "error"]) + + +def test_gdalalg_vector_sql_error_2_layers(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out.shp") + + sql_alg = get_sql_alg() + with pytest.raises(Exception): + sql_alg.ParseRunAndFinalize( + ["../ogr/data/poly.shp", out_filename, "select * from poly", "error"] + ) + + +def test_gdalalg_vector_sql_layer_name_inconsistent_number(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out") + + sql_alg = get_sql_alg() + with pytest.raises( + Exception, + match="sql: There should be as many layer names in --output-layer as in --statement", + ): + sql_alg.ParseRunAndFinalize( + [ + "--output-layer=foo,bar", + "../ogr/data/poly.shp", + out_filename, + "select * from poly limit 1", + ] + ) + + +def test_gdalalg_vector_sql_several(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out") + + sql_alg = get_sql_alg() + assert sql_alg.ParseRunAndFinalize( + [ + "../ogr/data/poly.shp", + out_filename, + "select * from poly limit 1", + "select * from poly limit 2", + ] + ) + + with gdal.OpenEx(out_filename) as ds: + assert ds.GetLayerCount() == 2 + assert ds.GetLayer(-1) is None + assert ds.GetLayer(2) is None + assert ds.GetLayer(0).GetFeatureCount() == 1 + assert ds.GetLayer(0).GetDescription() == "poly" + assert ds.GetLayer(1).GetFeatureCount() == 2 + assert ds.GetLayer(1).GetDescription() == "poly2" + assert ds.GetLayer(0).GetFeatureCount() == 1 + assert ds.GetLayer(0).GetDescription() == "poly" + + +@pytest.mark.require_driver("SQLite") +def test_gdalalg_vector_sql_dialect(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out.shp") + + sql_alg = get_sql_alg() + assert sql_alg.ParseRunAndFinalize( + [ + "--dialect", + "SQLite", + "../ogr/data/poly.shp", + out_filename, + "select *, sqlite_version() from poly limit 1", + ] + ) + + with gdal.OpenEx(out_filename) as ds: + assert ds.GetLayer(0).GetFeatureCount() == 1 + + +def test_gdalalg_vector_sql_layer_names(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out") + + sql_alg = get_sql_alg() + assert sql_alg.ParseRunAndFinalize( + [ + "--output-layer", + "lyr1,lyr2", + "../ogr/data/poly.shp", + out_filename, + "select * from poly limit 1", + "select * from poly limit 2", + ] + ) + + with gdal.OpenEx(out_filename) as ds: + assert ds.GetLayerCount() == 2 + assert ds.GetLayer(0).GetFeatureCount() == 1 + assert ds.GetLayer(0).GetDescription() == "lyr1" + assert ds.GetLayer(1).GetFeatureCount() == 2 + assert ds.GetLayer(1).GetDescription() == "lyr2" diff --git a/doc/source/conf.py b/doc/source/conf.py index 952b1c723073..01247cffc559 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -320,6 +320,13 @@ [author_evenr], 1, ), + ( + "programs/gdal_vector_sql", + "gdal-vector-sql", + "Apply SQL statement(s) to a dataset", + [author_evenr], + 1, + ), # Traditional utilities ( "programs/gdalinfo", diff --git a/doc/source/programs/gdal_vector.rst b/doc/source/programs/gdal_vector.rst index 9b503db16b7a..1ba9017f8620 100644 --- a/doc/source/programs/gdal_vector.rst +++ b/doc/source/programs/gdal_vector.rst @@ -25,6 +25,7 @@ Synopsis - info: Return information on a vector dataset. - pipeline: Process a vector dataset. - reproject: Reproject a vector dataset. + - sql: Apply SQL statement(s) to a dataset. Available sub-commands ---------------------- @@ -33,6 +34,7 @@ Available sub-commands - :ref:`gdal_vector_convert_subcommand` - :ref:`gdal_vector_info_subcommand` - :ref:`gdal_vector_pipeline_subcommand` +- :ref:`gdal_vector_sql_subcommand` Examples -------- diff --git a/doc/source/programs/gdal_vector_pipeline.rst b/doc/source/programs/gdal_vector_pipeline.rst index 20d6b756fccd..a0eba8db1950 100644 --- a/doc/source/programs/gdal_vector_pipeline.rst +++ b/doc/source/programs/gdal_vector_pipeline.rst @@ -93,6 +93,23 @@ Details for options can be found in :ref:`gdal_vector_clip_subcommand`. -s, --src-crs Source CRS -d, --dst-crs Destination CRS [required] +* sql [OPTIONS] + +.. code-block:: + + Apply SQL statement(s) to a dataset. + + Positional arguments: + --sql |@ SQL statement(s) [may be repeated] [required] + + Options: + -l, --output-layer Output layer name(s) [may be repeated] + --dialect SQL dialect (e.g. OGRSQL, SQLITE) + + +Details for options can be found in :ref:`gdal_vector_sql_subcommand`. + + * write [OPTIONS] .. code-block:: diff --git a/doc/source/programs/gdal_vector_sql.rst b/doc/source/programs/gdal_vector_sql.rst new file mode 100644 index 000000000000..dbd357422dde --- /dev/null +++ b/doc/source/programs/gdal_vector_sql.rst @@ -0,0 +1,134 @@ +.. _gdal_vector_sql_subcommand: + +================================================================================ +"gdal vector sql" sub-command +================================================================================ + +.. versionadded:: 3.11 + +.. only:: html + + Apply SQL statement(s) to a dataset. + +.. Index:: gdal vector sql + +Synopsis +-------- + +.. code-block:: + + Usage: gdal vector sql [OPTIONS] |@ + + Apply SQL statement(s) to a dataset. + + Positional arguments: + -i, --input Input vector dataset [required] + -o, --output Output vector dataset [required] + --sql |@ SQL statement(s) [may be repeated] [required] + + Common Options: + -h, --help Display help message and exit + --version Display GDAL version and exit + --json-usage Display usage as JSON document and exit + --drivers Display driver list as JSON document and exit + --config = Configuration option [may be repeated] + --progress Display progress bar + + Options: + -f, --of, --format, --output-format Output format + --co, --creation-option = Creation option [may be repeated] + --lco, --layer-creation-option = Layer creation option [may be repeated] + --overwrite Whether overwriting existing output is allowed + --update Whether to open existing dataset in update mode + --overwrite-layer Whether overwriting existing layer is allowed + --append Whether appending to existing layer is allowed + --output-layer Output layer name(s) [may be repeated] + --dialect SQL dialect (e.g. OGRSQL, SQLITE) + + Advanced Options: + --if, --input-format Input formats [may be repeated] + --oo, --open-option Open options [may be repeated] + + +Description +----------- + +:program:`gdal vector sql` returns one or several layers evaluated from +SQL statements. + +Standard options +++++++++++++++++ + +.. include:: gdal_options/of_vector.rst + +.. include:: gdal_options/co_vector.rst + +.. include:: gdal_options/overwrite.rst + +.. option:: --sql |@ + + SQL statement to execute that returns a table/layer (typically a SELECT + statement). + + Can be repeated to generated multiple output layers (repeating --sql + for each output layer) + +.. option:: --dialect + + SQL dialect. + + By default the native SQL of an RDBMS is used when using + ``gdal vector sql``. If using ``sql`` as a step of ``gdal vector pipeline``, + this is only true if the step preceding ``sql`` is ``read``, otherwise the + :ref:`OGRSQL ` dialect is used. + + If a datasource does not support SQL natively, the default is to use the + ``OGRSQL`` dialect, which can also be specified with any data source. + + The :ref:`sql_sqlite_dialect` dialect can be chosen with the ``SQLITE`` + and ``INDIRECT_SQLITE`` dialect values, and this can be used with any data source. + Overriding the default dialect may be beneficial because the capabilities of + the SQL dialects vary. What SQL dialects a driver supports can be checked + with "gdal vector info". + + .. code-block:: + + $ gdal vector info --format "PostgreSQL" + Supported SQL dialects: NATIVE OGRSQL SQLITE + + $ gdal vector info --format "ESRI Shapefile" + Supported SQL dialects: OGRSQL SQLITE + + +.. option:: --output-layer + + Output SQL layer name(s). If not specified, a generic layer name such as + "SELECT" may be generated. + + Must be specified as many times as there are SQL statements, either as + several --output-layer arguments, or a single one with the layer names + combined with comma. + +Advanced options +++++++++++++++++ + +.. include:: gdal_options/oo.rst + +.. include:: gdal_options/if.rst + +Examples +-------- + +.. example:: + :title: Generate a GeoPackage file with a layer sorted by descending population + + .. code-block:: bash + + $ gdal vector sql in.gpkg out.gpkg --output-layer country_sorted_by_pop --sql="SELECT * FROM country ORDER BY pop DESC" + +.. example:: + :title: Generate a GeoPackage file with 2 SQL result layers + + .. code-block:: bash + + $ gdal vector sql in.gpkg out.gpkg --output-layer=beginning,end --sql="SELECT * FROM my_layer LIMIT 100" --sql="SELECT * FROM my_layer OFFSET 100000 LIMIT 100" diff --git a/doc/source/programs/index.rst b/doc/source/programs/index.rst index 95477fda0777..a9e8c930ff1d 100644 --- a/doc/source/programs/index.rst +++ b/doc/source/programs/index.rst @@ -46,6 +46,7 @@ single :program:`gdal` program that accepts commands and subcommands. gdal_vector_clip gdal_vector_convert gdal_vector_pipeline + gdal_vector_sql .. only:: html @@ -70,6 +71,7 @@ single :program:`gdal` program that accepts commands and subcommands. - :ref:`gdal_vector_clip_subcommand`: Clip a vector dataset - :ref:`gdal_vector_convert_subcommand`: Convert a vector dataset - :ref:`gdal_vector_pipeline_subcommand`: Process a vector dataset + - :ref:`gdal_vector_sql_subcommand`: Apply SQL statement(s) to a dataset "Traditional" applications diff --git a/gcore/gdalalgorithm.cpp b/gcore/gdalalgorithm.cpp index 26fd554e733c..4695cd7031f4 100644 --- a/gcore/gdalalgorithm.cpp +++ b/gcore/gdalalgorithm.cpp @@ -231,18 +231,8 @@ bool GDALAlgorithmArg::Set(bool value) return SetInternal(value); } -bool GDALAlgorithmArg::Set(const std::string &value) +bool GDALAlgorithmArg::ProcessString(std::string &value) const { - if (m_decl.GetType() != GAAT_STRING) - { - CPLError(CE_Failure, CPLE_AppDefined, - "Calling Set(std::string) on argument '%s' of type %s is not " - "supported", - GetName().c_str(), GDALAlgorithmArgTypeName(m_decl.GetType())); - return false; - } - - std::string newValue(value); if (m_decl.IsReadFromFileAtSyntaxAllowed() && !value.empty() && value.front() == '@') { @@ -257,7 +247,7 @@ bool GDALAlgorithmArg::Set(const std::string &value) { offset = 3; } - newValue = reinterpret_cast(pabyData + offset); + value = reinterpret_cast(pabyData + offset); VSIFree(pabyData); } else @@ -267,9 +257,24 @@ bool GDALAlgorithmArg::Set(const std::string &value) } if (m_decl.IsRemoveSQLCommentsEnabled()) - newValue = CPLRemoveSQLComments(newValue); + value = CPLRemoveSQLComments(value); + + return true; +} + +bool GDALAlgorithmArg::Set(const std::string &value) +{ + if (m_decl.GetType() != GAAT_STRING) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Calling Set(std::string) on argument '%s' of type %s is not " + "supported", + GetName().c_str(), GDALAlgorithmArgTypeName(m_decl.GetType())); + return false; + } - return SetInternal(newValue); + std::string newValue(value); + return ProcessString(newValue) && SetInternal(newValue); } bool GDALAlgorithmArg::Set(int value) @@ -392,7 +397,22 @@ bool GDALAlgorithmArg::Set(const std::vector &value) GetName().c_str(), GDALAlgorithmArgTypeName(m_decl.GetType())); return false; } - return SetInternal(value); + + if (m_decl.IsReadFromFileAtSyntaxAllowed() || + m_decl.IsRemoveSQLCommentsEnabled()) + { + std::vector newValue(value); + for (auto &s : newValue) + { + if (!ProcessString(s)) + return false; + } + return SetInternal(newValue); + } + else + { + return SetInternal(value); + } } bool GDALAlgorithmArg::Set(const std::vector &value) @@ -2972,9 +2992,17 @@ GDALAlgorithm::GetUsageForCLI(bool shortUsage, osRet += " [OPTIONS]"; for (const auto *arg : m_positionalArgs) { - osRet += " <"; - osRet += arg->GetMetaVar(); - osRet += '>'; + const std::string &metavar = arg->GetMetaVar(); + if (!metavar.empty() && metavar[0] == '<') + { + osRet += metavar; + } + else + { + osRet += " <"; + osRet += metavar; + osRet += '>'; + } } } diff --git a/gcore/gdalalgorithm.h b/gcore/gdalalgorithm.h index 3823c8cced4c..3c4dbf8255c7 100644 --- a/gcore/gdalalgorithm.h +++ b/gcore/gdalalgorithm.h @@ -1447,6 +1447,8 @@ class CPL_DLL GDALAlgorithmArg /* non-final */ return RunAllActions(); } + bool ProcessString(std::string &value) const; + bool RunAllActions(); void RunActions(); bool RunValidationActions(); diff --git a/ogr/ogrsf_frmts/generic/ogrlayerpool.cpp b/ogr/ogrsf_frmts/generic/ogrlayerpool.cpp index d3166667e685..58a6ef90fa47 100644 --- a/ogr/ogrsf_frmts/generic/ogrlayerpool.cpp +++ b/ogr/ogrsf_frmts/generic/ogrlayerpool.cpp @@ -131,13 +131,36 @@ void OGRLayerPool::UnchainLayer(OGRAbstractProxiedLayer *poLayer) /* OGRProxiedLayer() */ /************************************************************************/ +static void ReleaseDelete(OGRLayer *poLayer, void *) +{ + delete poLayer; +} + +OGRProxiedLayer::OGRProxiedLayer(OGRLayerPool *poPoolIn, + OpenLayerFunc pfnOpenLayerIn, + FreeUserDataFunc pfnFreeUserDataIn, + void *pUserDataIn) + : OGRAbstractProxiedLayer(poPoolIn), pfnOpenLayer(pfnOpenLayerIn), + pfnReleaseLayer(ReleaseDelete), pfnFreeUserData(pfnFreeUserDataIn), + pUserData(pUserDataIn), poUnderlyingLayer(nullptr), + poFeatureDefn(nullptr), poSRS(nullptr) +{ + CPLAssert(pfnOpenLayerIn != nullptr); +} + +/************************************************************************/ +/* OGRProxiedLayer() */ +/************************************************************************/ + OGRProxiedLayer::OGRProxiedLayer(OGRLayerPool *poPoolIn, OpenLayerFunc pfnOpenLayerIn, + ReleaseLayerFunc pfnReleaseLayerIn, FreeUserDataFunc pfnFreeUserDataIn, void *pUserDataIn) : OGRAbstractProxiedLayer(poPoolIn), pfnOpenLayer(pfnOpenLayerIn), - pfnFreeUserData(pfnFreeUserDataIn), pUserData(pUserDataIn), - poUnderlyingLayer(nullptr), poFeatureDefn(nullptr), poSRS(nullptr) + pfnReleaseLayer(pfnReleaseLayerIn), pfnFreeUserData(pfnFreeUserDataIn), + pUserData(pUserDataIn), poUnderlyingLayer(nullptr), + poFeatureDefn(nullptr), poSRS(nullptr) { CPLAssert(pfnOpenLayerIn != nullptr); } @@ -148,7 +171,7 @@ OGRProxiedLayer::OGRProxiedLayer(OGRLayerPool *poPoolIn, OGRProxiedLayer::~OGRProxiedLayer() { - delete poUnderlyingLayer; + OGRProxiedLayer::CloseUnderlyingLayer(); if (poSRS) poSRS->Release(); @@ -184,7 +207,10 @@ int OGRProxiedLayer::OpenUnderlyingLayer() void OGRProxiedLayer::CloseUnderlyingLayer() { CPLDebug("OGR", "CloseUnderlyingLayer(%p)", this); - delete poUnderlyingLayer; + if (poUnderlyingLayer) + { + pfnReleaseLayer(poUnderlyingLayer, pUserData); + } poUnderlyingLayer = nullptr; } diff --git a/ogr/ogrsf_frmts/generic/ogrlayerpool.h b/ogr/ogrsf_frmts/generic/ogrlayerpool.h index e37ce8ca1112..a5aef18d366d 100644 --- a/ogr/ogrsf_frmts/generic/ogrlayerpool.h +++ b/ogr/ogrsf_frmts/generic/ogrlayerpool.h @@ -18,6 +18,7 @@ #include "ogrsf_frmts.h" typedef OGRLayer *(*OpenLayerFunc)(void *user_data); +typedef void (*ReleaseLayerFunc)(OGRLayer *, void *user_data); typedef void (*FreeUserDataFunc)(void *user_data); class OGRLayerPool; @@ -89,6 +90,7 @@ class CPL_DLL OGRProxiedLayer : public OGRAbstractProxiedLayer CPL_DISALLOW_COPY_ASSIGN(OGRProxiedLayer) OpenLayerFunc pfnOpenLayer; + ReleaseLayerFunc pfnReleaseLayer; FreeUserDataFunc pfnFreeUserData; void *pUserData; OGRLayer *poUnderlyingLayer; @@ -103,6 +105,9 @@ class CPL_DLL OGRProxiedLayer : public OGRAbstractProxiedLayer public: OGRProxiedLayer(OGRLayerPool *poPool, OpenLayerFunc pfnOpenLayer, FreeUserDataFunc pfnFreeUserData, void *pUserData); + OGRProxiedLayer(OGRLayerPool *poPool, OpenLayerFunc pfnOpenLayer, + ReleaseLayerFunc pfnReleaseLayer, + FreeUserDataFunc pfnFreeUserData, void *pUserData); virtual ~OGRProxiedLayer(); OGRLayer *GetUnderlyingLayer();