From a995bcfa8aa75cbc728c966342aac204c1d278b5 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 16 Mar 2024 17:32:55 +0100 Subject: [PATCH 1/4] add convenience functions to determine object types --- src/osmium/osm/types.py | 41 +++++++++++++++++++++++++++++++++++++++++ test/test_osm.py | 10 ++++++++++ 2 files changed, 51 insertions(+) diff --git a/src/osmium/osm/types.py b/src/osmium/osm/types.py index fbe4e328..c9dbf41e 100644 --- a/src/osmium/osm/types.py +++ b/src/osmium/osm/types.py @@ -370,6 +370,32 @@ def user_is_anonymous(self) -> bool: """ return self._pyosmium_data.user_is_anonymous() + def type_str(self) -> str: + """ Return a single character identifying the type of the object. + The character is the same as used in OPL. + """ + return 'o' + + def is_node(self) -> bool: + """ Return true if the object is a Node object. + """ + return isinstance(self, Node) + + def is_way(self) -> bool: + """ Return true if the object is a Way object. + """ + return isinstance(self, Way) + + def is_relation(self) -> bool: + """ Return true if the object is a Relation object. + """ + return isinstance(self, Relation) + + def is_area(self) -> bool: + """ Return true if the object is a Way object. + """ + return isinstance(self, Area) + class Node(OSMObject['cosm.COSMNode']): """ Represents a single OSM node. It inherits all properties from OSMObjects @@ -403,6 +429,9 @@ def location(self) -> 'osmium.osm.Location': return self._location + def type_str(self) -> str: + return 'n' + def __str__(self) -> str: if self._pyosmium_data.is_valid(): return f'n{self.id:d}: location={self.location!s} tags={self.tags!s}' @@ -467,6 +496,9 @@ def ends_have_same_location(self) -> bool: """ return self._pyosmium_data.ends_have_same_location() + def type_str(self) -> str: + return 'w' + def __str__(self) -> str: if self._pyosmium_data.is_valid(): return f'w{self.id:d}: nodes={self.nodes!s} tags={self.tags!s}' @@ -503,6 +535,9 @@ def replace(self, **kwargs: Any) -> 'osmium.osm.mutable.Relation': """ return osmium.osm.mutable.Relation(self, **kwargs) + def type_str(self) -> str: + return 'r' + def __str__(self) -> str: if self._pyosmium_data.is_valid(): return f"r{self.id:d}: members={self.members!s}, tags={self.tags!s}" @@ -593,6 +628,9 @@ def inner_rings(self, oring: OuterRing) -> InnerRingIterator: """ return InnerRingIterator(self._pyosmium_data, oring) + def type_str(self) -> str: + return 'a' + def __str__(self) -> str: if self._pyosmium_data.is_valid(): return f"a{self.id:d}: num_rings={self.num_rings()}, tags={self.tags!s}" @@ -677,6 +715,9 @@ def user_is_anonymous(self) -> bool: """ return self._pyosmium_data.user_is_anonymous() + def type_str(self) -> str: + return 'c' + def __str__(self) -> str: if self._pyosmium_data.is_valid(): return f'c{self.id:d}: closed_at={self.closed_at!s}, bounds={self.bounds!s}, tags={self.tags!s}' diff --git a/test/test_osm.py b/test/test_osm.py index e44174eb..9124eb23 100644 --- a/test/test_osm.py +++ b/test/test_osm.py @@ -96,6 +96,9 @@ def node(n): assert n.timestamp == mkdate(2014, 1, 31, 6, 23, 35) assert n.user == u'änonymous' assert n.positive_id() == 1 + assert n.is_node() + assert not n.is_way() + assert n.type_str() == 'n' assert str(n) == 'n1: location=invalid tags={}' assert repr(n) == "osmium.osm.Node(id=1, deleted=False, visible=True, version=5, changeset=58674, uid=42, timestamp=datetime.datetime(2014, 1, 31, 6, 23, 35, tzinfo=datetime.timezone.utc), user='änonymous', tags=osmium.osm.TagList({}), location=osmium.osm.Location())" @@ -127,6 +130,8 @@ def way(o): assert o.timestamp == mkdate(2014, 1, 31, 6, 23, 35) assert o.user == 'anonymous' assert o.positive_id() == 1 + assert o.is_way() + assert o.type_str() == 'w' assert not o.is_closed() assert not o.ends_have_same_id() assert not o.ends_have_same_location() @@ -154,6 +159,8 @@ def relation(o): assert o.timestamp == mkdate(2014, 1, 31, 6, 23, 35) assert o.user == ' anonymous' assert o.positive_id() == 1 + assert o.is_relation() + assert o.type_str() == 'r' assert str(o) == 'r1: members=[w1], tags={}' assert repr(o) == "osmium.osm.Relation(id=1, deleted=False, visible=True, version=5, changeset=58674, uid=42, timestamp=datetime.datetime(2014, 1, 31, 6, 23, 35, tzinfo=datetime.timezone.utc), user=' anonymous', tags=osmium.osm.TagList({}), members=osmium.osm.RelationMemberList([osmium.osm.RelationMember(ref=1, type='w', role='')]))" @@ -179,6 +186,8 @@ def area(o): assert o.positive_id() == 46 assert o.orig_id() == 23 assert o.from_way() == True + assert o.is_area() + assert o.type_str() == 'a' assert o.is_multipolygon() == False assert o.num_rings() == (1, 0) assert len(list(o.outer_rings())) == 1 @@ -243,6 +252,7 @@ def changeset(c): assert 515288620 == c.bounds.top_right.y assert -1465242 == c.bounds.bottom_left.x assert 515288506 == c.bounds.bottom_left.y + assert c.type_str() == 'c' assert str(c) == 'c34: closed_at=2005-04-09 20:54:39+00:00, bounds=(-0.1465242/51.5288506 -0.1464925/51.5288620), tags={}' assert repr(c) == "osmium.osm.Changeset(id=34, uid=1, created_at=datetime.datetime(2005, 4, 9, 19, 54, 13, tzinfo=datetime.timezone.utc), closed_at=datetime.datetime(2005, 4, 9, 20, 54, 39, tzinfo=datetime.timezone.utc), open=False, num_changes=2, bounds=osmium.osm.Box(bottom_left=osmium.osm.Location(x=-1465242, y=515288506), top_right=osmium.osm.Location(x=-1464925, y=515288620)), user='Steve', tags=osmium.osm.TagList({}))" From 70bb56b3ceb15e76d4d1fbc60ce88e343a4b1b24 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 16 Mar 2024 17:59:45 +0100 Subject: [PATCH 2/4] implement iterative handling of AreaHandler output --- lib/area.cc | 54 +++++++++++--- lib/buffer_iterator.h | 159 ++++++++++++++++++++++++++++++++++++++++++ lib/handler_chain.h | 35 +++++++--- lib/osmium.cc | 10 +++ test/test_area.py | 20 ++++++ 5 files changed, 260 insertions(+), 18 deletions(-) create mode 100644 lib/buffer_iterator.h diff --git a/lib/area.cc b/lib/area.cc index 1280b494..6b4fb75c 100644 --- a/lib/area.cc +++ b/lib/area.cc @@ -13,6 +13,7 @@ #include "base_handler.h" #include "handler_chain.h" +#include "buffer_iterator.h" namespace py = pybind11; @@ -20,15 +21,13 @@ namespace { using MpManager = osmium::area::MultipolygonManager; -class AreaManagerSecondPassHandler : public BaseHandler +class AreaManagerSecondPassHandlerBase : public BaseHandler { public: - AreaManagerSecondPassHandler(MpManager *mp_manager, py::args args) - : m_mp_manager(mp_manager), m_args(args), m_handlers(m_args) - { - m_mp_manager->set_callback([this](osmium::memory::Buffer &&ab) - { osmium::apply(ab, this->m_handlers); }); - } + AreaManagerSecondPassHandlerBase(MpManager *mp_manager) + : m_mp_manager(mp_manager) + {} + bool node(osmium::Node const *n) override { @@ -53,10 +52,37 @@ class AreaManagerSecondPassHandler : public BaseHandler m_mp_manager->flush_output(); } -private: +protected: MpManager *m_mp_manager; +}; + + +class AreaManagerSecondPassHandler : public AreaManagerSecondPassHandlerBase +{ +public: + AreaManagerSecondPassHandler(MpManager *mp_manager, py::args args) + : AreaManagerSecondPassHandlerBase(mp_manager), m_args(args), m_handlers(m_args) + { + m_mp_manager->set_callback([this](osmium::memory::Buffer &&ab) + { osmium::apply(ab, this->m_handlers); }); + } + +private: py::args m_args; HandlerChain m_handlers; + +}; + + +class AreaManagerBufferHandler : public AreaManagerSecondPassHandlerBase +{ +public: + AreaManagerBufferHandler(MpManager *mp_manager, pyosmium::BufferIterator *cb) + : AreaManagerSecondPassHandlerBase(mp_manager) + { + m_mp_manager->set_callback([cb](osmium::memory::Buffer &&ab) + { cb->add_buffer(std::move(ab)); }); + } }; @@ -82,6 +108,12 @@ class AreaManager : public BaseHandler return new AreaManagerSecondPassHandler(&m_mp_manager, args); } + AreaManagerBufferHandler *second_pass_to_buffer(pyosmium::BufferIterator *cb) + { + m_mp_manager.prepare_for_lookup(); + return new AreaManagerBufferHandler(&m_mp_manager, cb); + } + private: osmium::area::Assembler::config_type m_assembler_config; osmium::area::MultipolygonManager m_mp_manager; @@ -93,6 +125,8 @@ PYBIND11_MODULE(_area, m) { py::class_(m, "AreaManagerSecondPassHandler"); + py::class_(m, + "AreaManagerBufferHandler"); py::class_(m, "AreaManager", "Object manager class that manages building area objects from " @@ -107,5 +141,9 @@ PYBIND11_MODULE(_area, m) "file, where areas are assembled. Pass the handlers that " "should handle the areas.", py::return_value_policy::take_ownership, py::keep_alive<1, 2>()) + .def("second_pass_to_buffer", &AreaManager::second_pass_to_buffer, + py::keep_alive<1, 2>(), + "Return a handler object for the second pass of the file. " + "The handler holds a buffer, which can be iterated over.") ; } diff --git a/lib/buffer_iterator.h b/lib/buffer_iterator.h new file mode 100644 index 00000000..29ebde59 --- /dev/null +++ b/lib/buffer_iterator.h @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: BSD-2-Clause + * + * This file is part of pyosmium. (https://osmcode.org/pyosmium/) + * + * Copyright (C) 2024 Sarah Hoffmann and others. + * For a full list of authors see the git log. + */ +#ifndef PYOSMIUM_BUFFER_ITERATOR_H +#define PYOSMIUM_BUFFER_ITERATOR_H +#include + +#include + +#include "osmium_module.h" +#include "osm_base_objects.h" +#include "handler_chain.h" + +namespace pyosmium { + +class BufferIterator +{ +public: + BufferIterator(pybind11::args args) + : m_handler(args) + {} + + ~BufferIterator() + { + invalidate_current(); + } + + void add_buffer(osmium::memory::Buffer &&buf) + { + if (m_buffers.empty()) { + m_current_it = buf.begin(); + } + + m_buffers.push(std::move(buf)); + } + + bool empty() const + { return m_buffers.empty(); } + + pybind11::object next() + { + invalidate_current(); + + if (m_buffers.empty()) { + throw pybind11::stop_iteration(); + } + + while (true) { + while (m_current_it == m_buffers.front().end()) { + m_buffers.pop(); + if (m_buffers.empty()) { + throw pybind11::stop_iteration(); + } + m_current_it = m_buffers.front().begin(); + } + + osmium::OSMEntity *entity = &*m_current_it; + ++m_current_it; + + switch (entity->type()) { + case osmium::item_type::node: + { + auto *node = static_cast(entity); + if (!m_handler.handle_node(*node)) { + m_current = m_type_module.attr("Node")(COSMNode{node}); + m_current_type = osmium::item_type::node; + return m_current; + } + break; + } + case osmium::item_type::way: + { + auto *way = static_cast(entity); + if (!m_handler.handle_way(*way)) { + m_current = m_type_module.attr("Way")(COSMWay{way}); + m_current_type = osmium::item_type::way; + return m_current; + } + break; + } + case osmium::item_type::relation: + { + auto *rel = static_cast(entity); + if (!m_handler.handle_relation(*rel)) { + m_current = m_type_module.attr("Relation")(COSMRelation{rel}); + m_current_type = osmium::item_type::relation; + return m_current; + } + break; + } + case osmium::item_type::area: + { + auto *area = static_cast(entity); + if (!m_handler.handle_area(*area)) { + m_current = m_type_module.attr("Area")(COSMArea{area}); + m_current_type = osmium::item_type::area; + return m_current; + } + break; + } + case osmium::item_type::changeset: + { + auto *cs = static_cast(entity); + if (!m_handler.handle_changeset(*cs)) { + m_current = m_type_module.attr("Changeset")(COSMChangeset{cs}); + m_current_type = osmium::item_type::changeset; + return m_current; + } + break; + } + default: + break; + } + } + + return pybind11::object(); + } + + +private: + void invalidate_current() + { + switch (m_current_type) { + case osmium::item_type::node: + m_current.attr("_pyosmium_data").template cast()->invalidate(); + break; + case osmium::item_type::way: + m_current.attr("_pyosmium_data").template cast()->invalidate(); + break; + case osmium::item_type::relation: + m_current.attr("_pyosmium_data").template cast()->invalidate(); + break; + case osmium::item_type::area: + m_current.attr("_pyosmium_data").template cast()->invalidate(); + break; + case osmium::item_type::changeset: + m_current.attr("_pyosmium_data").template cast()->invalidate(); + break; + } + m_current_type = osmium::item_type::undefined; + } + + HandlerChain m_handler; + + std::queue m_buffers; + osmium::memory::Buffer::iterator m_current_it; + osmium::item_type m_current_type = osmium::item_type::undefined; + pybind11::object m_current; + + pybind11::object m_type_module = pybind11::module_::import("osmium.osm.types"); +}; + +} // namespace + +#endif // PYOSMIUM_BUFFER_ITERATOR_H diff --git a/lib/handler_chain.h b/lib/handler_chain.h index 2af2599d..dad49ca2 100644 --- a/lib/handler_chain.h +++ b/lib/handler_chain.h @@ -43,44 +43,59 @@ class HandlerChain : public osmium::handler::Handler } } - void node(osmium::Node const &o) { + void node(osmium::Node const &o) { handle_node(o); } + + bool handle_node(osmium::Node const &o) { for (auto const &handler : m_handlers) { if (handler->node(&o)) { - return; + return true; } } + return false; } - void way(osmium::Way &w) { + void way(osmium::Way &w) { handle_way(w); } + + bool handle_way(osmium::Way &w) { for (auto const &handler : m_handlers) { if (handler->way(&w)) { - return; + return true; } } + return false; } - void relation(osmium::Relation const &o) { + void relation(osmium::Relation const &o) { handle_relation(o); } + + bool handle_relation(osmium::Relation const &o) { for (auto const &handler : m_handlers) { if (handler->relation(&o)) { - return; + return true; } } + return false; } - void changeset(osmium::Changeset const &o) { + void changeset(osmium::Changeset const &o) { handle_changeset(o); } + + bool handle_changeset(osmium::Changeset const &o) { for (auto const &handler : m_handlers) { if (handler->changeset(&o)) { - return; + return true; } } + return false; } - void area(osmium::Area const &o) { + void area(osmium::Area const &o) { handle_area(o); } + + bool handle_area(osmium::Area const &o) { for (auto const &handler : m_handlers) { if (handler->area(&o)) { - return; + return true; } } + return false; } void flush() { diff --git a/lib/osmium.cc b/lib/osmium.cc index 489e3c5b..07c04b30 100644 --- a/lib/osmium.cc +++ b/lib/osmium.cc @@ -18,6 +18,7 @@ #include "osmium_module.h" #include "python_handler.h" #include "handler_chain.h" +#include "buffer_iterator.h" namespace py = pybind11; @@ -67,6 +68,15 @@ PYBIND11_MODULE(_osmium, m) { py::class_(m, "BaseHandler"); + py::class_(m, "BufferIterator", + "Iterator interface for reading from a queue of buffers.") + .def(py::init()) + .def("__bool__", [](pyosmium::BufferIterator const &it) { return !it.empty(); }) + .def("__iter__", [](py::object const &self) { return self; }) + .def("__next__", &pyosmium::BufferIterator::next, + "Get the next OSM object from the buffer or raise an StopIteration.") + ; + init_merge_input_reader(m); init_write_handler(m); init_simple_writer(m); diff --git a/test/test_area.py b/test/test_area.py index 9cde0346..e0c509d7 100644 --- a/test/test_area.py +++ b/test/test_area.py @@ -27,3 +27,23 @@ def test_area_handler(): assert ch_area.counts == [0, 0, 0, 5239] assert ch_others.counts == [211100, 10315, 244, 0] + + +def test_area_buffer_handler(): + area = o.area.AreaManager() + + o.apply(o.io.Reader(TEST_FILE), area.first_pass_handler()) + + lh = o.NodeLocationsForWays(o.index.create_map("flex_mem")) + lh.ignore_errors() + + buf = o.BufferIterator() + + o.apply(o.io.Reader(TEST_FILE), lh, area.second_pass_to_buffer(buf)) + + counts = 0 + for obj in buf: + assert obj.is_area() + counts += 1 + + assert counts == 5239 From 081084566a4a2a6443404e2694483572777aa0c7 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 16 Mar 2024 21:30:31 +0100 Subject: [PATCH 3/4] add iterative processing of OSM files --- CMakeLists.txt | 3 +- lib/file_iterator.cc | 153 +++++++++++++++++++++++++++++++ lib/osmium.cc | 1 + lib/osmium_module.h | 1 + src/osmium/__init__.py | 1 + src/osmium/file_processor.py | 109 ++++++++++++++++++++++ test/conftest.py | 11 ++- test/test_dangling_references.py | 37 ++++++++ test/test_osmium.py | 92 +++++++++++++++++++ 9 files changed, 405 insertions(+), 3 deletions(-) create mode 100644 lib/file_iterator.cc create mode 100644 src/osmium/file_processor.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 399485cf..1f9dd6de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -78,7 +78,8 @@ pybind11_add_module(_osmium lib/merge_input_reader.cc lib/node_location_handler.cc lib/simple_writer.cc - lib/write_handler.cc) + lib/write_handler.cc + lib/file_iterator.cc) set_module_output(_osmium osmium) pybind11_add_module(_replication lib/replication.cc) set_module_output(_replication osmium/replication) diff --git a/lib/file_iterator.cc b/lib/file_iterator.cc new file mode 100644 index 00000000..7f0fc528 --- /dev/null +++ b/lib/file_iterator.cc @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: BSD-2-Clause + * + * This file is part of pyosmium. (https://osmcode.org/pyosmium/) + * + * Copyright (C) 2024 Sarah Hoffmann and others. + * For a full list of authors see the git log. + */ +#include + +#include +#include + +#include "osmium_module.h" +#include "osm_base_objects.h" +#include "handler_chain.h" + +namespace py = pybind11; + +namespace { + +class OsmFileIterator +{ +public: + OsmFileIterator(osmium::io::Reader *reader, py::args args) + : m_reader(reader), m_pre_handler(args) + { + m_buffer = m_reader->read(); + + if (m_buffer) { + m_buffer_it = m_buffer.begin(); + } + } + + + ~OsmFileIterator() + { + invalidate_current(); + } + + pybind11::object next() + { + while (true) { + invalidate_current(); + + if (!m_buffer) { + throw pybind11::stop_iteration(); + } + + while (m_buffer_it == m_buffer.end()) { + m_buffer = m_reader->read(); + if (!m_buffer) { + m_pre_handler.flush(); + throw pybind11::stop_iteration(); + } + m_buffer_it = m_buffer.begin(); + } + + osmium::OSMEntity *entity = &*m_buffer_it; + ++m_buffer_it; + + switch (entity->type()) { + case osmium::item_type::node: + { + auto *node = static_cast(entity); + if (!m_pre_handler.handle_node(*node)) { + m_current = m_type_module.attr("Node")(COSMNode{node}); + m_current_type = osmium::item_type::node; + return m_current; + } + break; + } + case osmium::item_type::way: + { + auto *way = static_cast(entity); + if (!m_pre_handler.handle_way(*way)) { + m_current = m_type_module.attr("Way")(COSMWay{way}); + m_current_type = osmium::item_type::way; + return m_current; + } + break; + } + case osmium::item_type::relation: + { + auto *rel = static_cast(entity); + if (!m_pre_handler.handle_relation(*rel)) { + m_current = m_type_module.attr("Relation")(COSMRelation{rel}); + m_current_type = osmium::item_type::relation; + return m_current; + } + break; + } + case osmium::item_type::changeset: + { + auto *cs = static_cast(entity); + if (!m_pre_handler.handle_changeset(*cs)) { + m_current = m_type_module.attr("Changeset")(COSMChangeset{cs}); + m_current_type = osmium::item_type::changeset; + return m_current; + } + break; + } + default: + break; + } + } + + return pybind11::object(); + } + +private: + void invalidate_current() + { + switch (m_current_type) { + case osmium::item_type::node: + m_current.attr("_pyosmium_data").template cast()->invalidate(); + break; + case osmium::item_type::way: + m_current.attr("_pyosmium_data").template cast()->invalidate(); + break; + case osmium::item_type::relation: + m_current.attr("_pyosmium_data").template cast()->invalidate(); + break; + case osmium::item_type::changeset: + m_current.attr("_pyosmium_data").template cast()->invalidate(); + break; + } + m_current_type = osmium::item_type::undefined; + } + + osmium::io::Reader *m_reader; + osmium::memory::Buffer m_buffer; + osmium::memory::Buffer::iterator m_buffer_it; + osmium::item_type m_current_type = osmium::item_type::undefined; + pybind11::object m_current; + + HandlerChain m_pre_handler; + + pybind11::object m_type_module = pybind11::module_::import("osmium.osm.types"); +}; + +} + +void init_osm_file_iterator(py::module &m) +{ + py::class_(m, "OsmFileIterator", + "Iterator interface for reading an OSM file.") + .def(py::init()) + .def("__iter__", [](py::object const &self) { return self; }) + .def("__next__", &OsmFileIterator::next, + "Get the next OSM object from the file or raise a StopIteration.") + ; +} + diff --git a/lib/osmium.cc b/lib/osmium.cc index 07c04b30..89e77148 100644 --- a/lib/osmium.cc +++ b/lib/osmium.cc @@ -81,4 +81,5 @@ PYBIND11_MODULE(_osmium, m) { init_write_handler(m); init_simple_writer(m); init_node_location_handler(m); + init_osm_file_iterator(m); }; diff --git a/lib/osmium_module.h b/lib/osmium_module.h index 04b01397..ac425591 100644 --- a/lib/osmium_module.h +++ b/lib/osmium_module.h @@ -14,5 +14,6 @@ void init_merge_input_reader(pybind11::module &m); void init_write_handler(pybind11::module &m); void init_simple_writer(pybind11::module &m); void init_node_location_handler(pybind11::module &m); +void init_osm_file_iterator(pybind11::module &m); #endif // PYOSMIUM_OSMIUM_MODULE_H diff --git a/src/osmium/__init__.py b/src/osmium/__init__.py index 2b39a768..1a93e537 100644 --- a/src/osmium/__init__.py +++ b/src/osmium/__init__.py @@ -8,6 +8,7 @@ from osmium._osmium import * from osmium.helper import * from osmium.simple_handler import SimpleHandler +from osmium.file_processor import FileProcessor import osmium.io import osmium.osm import osmium.index diff --git a/src/osmium/file_processor.py b/src/osmium/file_processor.py new file mode 100644 index 00000000..08d9b7f8 --- /dev/null +++ b/src/osmium/file_processor.py @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: BSD-2-Clause +# +# This file is part of pyosmium. (https://osmcode.org/pyosmium/) +# +# Copyright (C) 2024 Sarah Hoffmann and others. +# For a full list of authors see the git log. +from pathlib import Path + +import osmium + +class FileProcessor: + """ A generator that emits OSM objects read from a file. + """ + + def __init__(self, filename): + if isinstance(filename, (osmium.io.File, osmium.io.FileBuffer)): + self._file = filename + elif isinstance(filename, (str, Path)): + self._file = osmium.io.File(str(filename)) + else: + raise TypeError("File must be an osmium.io.File, osmium.io.FileBuffer, str or Path") + self._reader = osmium.io.Reader(self._file) + self._node_store = None + self._area_handler = None + self._filters = [] + + @property + def header(self): + """ Return the header information for the file to be read. + """ + return self._reader.header() + + @property + def node_location_storage(self): + """ Return the node location cache, if enabled. + This can be used to manually look up locations of nodes. + """ + return self._node_store + + def with_locations(self, storage='flex_mem'): + """ Enable caching of node locations. This is necessary in order + to get geometries for ways and relations. + """ + if isinstance(storage, str): + self._node_store = osmium.index.create_map(storage) + elif storage is None or isinstance(storage, osmium.index.LocationTable): + self._node_store = storage + else: + raise TypeError("'storage' argument must be a LocationTable or a string describing the index") + + return self + + def with_areas(self): + """ Enable area processing. When enabled, then closed ways and + relations of type multipolygon will also be returned as an + Area type. + + Automatically enables location caching, if it was not yet set. + It uses the default location cache type. To use a different + cache tyoe, you need to call with_locations() explicity. + + Area processing requires that the file is read twice. This + happens transparently. + """ + if self._area_handler is None: + self._area_handler = osmium.area.AreaManager() + if self._node_store is None: + self.with_locations() + return self + + def with_filter(self, filt): + """ Add a filter function that is called before an object is + returned in the iterator. + """ + self._filters.append(filt) + return self + + def __iter__(self): + """ Return the iterator over the file. + """ + handlers = [] + + if self._node_store is not None: + lh = osmium.NodeLocationsForWays(self._node_store) + lh.ignore_errors() + handlers.append(lh) + + if self._area_handler is None: + yield from osmium.OsmFileIterator(self._reader, *handlers, *self._filters) + return + + # need areas, do two pass handling + rd = osmium.io.Reader(self._file, osmium.osm.RELATION) + try: + osmium.apply(rd, *self._filters, self._area_handler.first_pass_handler()) + finally: + rd.close() + + buffer_it = osmium.BufferIterator(*self._filters) + handlers.append(self._area_handler.second_pass_to_buffer(buffer_it)) + + for obj in osmium.OsmFileIterator(self._reader, *handlers, *self._filters): + yield obj + if buffer_it: + yield from buffer_it + + # catch anything after the final flush + if buffer_it: + yield from buffer_it diff --git a/test/conftest.py b/test/conftest.py index 669dbc9d..9d97e4b9 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -53,13 +53,20 @@ def _mkfile(data): return _mkfile @pytest.fixture -def opl_reader(to_opl): +def opl_buffer(to_opl): def _mkbuffer(data): - return o.io.Reader(o.io.FileBuffer(to_opl(data).encode('utf-8'), 'opl')) + return o.io.FileBuffer(to_opl(data).encode('utf-8'), 'opl') return _mkbuffer +@pytest.fixture +def opl_reader(opl_buffer): + + def _mkbuffer(data): + return o.io.Reader(opl_buffer(data)) + + return _mkbuffer @pytest.fixture def simple_handler(to_opl): diff --git a/test/test_dangling_references.py b/test/test_dangling_references.py index 7fb247b0..e524d2c9 100644 --- a/test/test_dangling_references.py +++ b/test/test_dangling_references.py @@ -39,6 +39,26 @@ def test_keep_reference(self): str(obj) repr(obj) + def test_keep_reference_generator(self): + for obj in o.FileProcessor(TEST_DIR / 'example-test.osc').with_areas(): + if obj.type_str() == 'n' and self.node is not None: + self.node(obj) + elif obj.type_str() == 'w' and self.way is not None: + self.way(obj) + elif obj.type_str() == 'r' and self.relation is not None: + self.relation(obj) + elif obj.type_str() == 'a' and self.area is not None: + self.area(obj) + + assert self.refkeeper + + for obj, func in self.refkeeper: + with pytest.raises(RuntimeError, match="removed OSM object"): + func(obj) + # str() and repr() must not throw errors + str(obj) + repr(obj) + class TestKeepNodeRef(DanglingReferenceBase): @@ -140,6 +160,23 @@ def test_keep_reference(self): for obj, func in self.refkeeper: func(obj) + def test_keep_reference_generator(self): + for obj in o.FileProcessor(TEST_DIR / 'example-test.pbf').with_areas(): + if obj.is_node() and self.node is not None: + self.node(obj) + elif obj.is_way() and self.way is not None: + self.way(obj) + elif obj.is_relation() and self.relation is not None: + self.relation(obj) + elif obj.is_area() and self.area is not None: + self.area(obj) + + assert self.refkeeper + + for obj, func in self.refkeeper: + func(obj) + + class TestKeepLocation(NotADanglingReferenceBase): def node(self, n): diff --git a/test/test_osmium.py b/test/test_osmium.py index be38575b..c53a83c9 100644 --- a/test/test_osmium.py +++ b/test/test_osmium.py @@ -91,3 +91,95 @@ def node(self, n): o.apply(opl_reader("n1 x0 y0"), NewStyle(), OldStyle(), NewStyle(), OldStyle()) assert logged == ['new', 'old', 'new', 'old'] + +@pytest.mark.parametrize('init', [None, 1]) +def test_file_processor_bad_init(init): + with pytest.raises(TypeError): + o.FileProcessor(init) + +def test_simple_generator(opl_buffer): + count = 0 + for obj in o.FileProcessor(opl_buffer('n1 x5 y5')): + assert obj.type_str() == 'n' + assert obj.id == 1 + count += 1 + + assert count == 1 + +def test_generator_with_location(opl_buffer): + data = opl_buffer("""\ + n1 x10 y20 + n2 x11 y21 + w45 Nn1,n2 + """) + + count = 0 + for obj in o.FileProcessor(data).with_locations(): + count += 1 + if obj.type_str() == 'w': + assert len(obj.nodes) == 2 + assert [n.ref for n in obj.nodes] == [1, 2] + assert [n.location.lon for n in obj.nodes] == [10, 11] + assert [n.location.lat for n in obj.nodes] == [20, 21] + + assert count == 3 + +def test_generator_with_areas(opl_buffer): + data = opl_buffer("""\ + n10 x3 y3 + n11 x3 y3.01 + n12 x3.01 y3.01 + n13 x3.01 y3 + w12 Nn10,n11,n12,n13,n10 Tbuilding=yes + """) + + count = 0 + for obj in o.FileProcessor(data).with_areas(): + if obj.type_str() == 'a': + count += 1 + assert obj.from_way() + assert obj.orig_id() == 12 + + assert count == 1 + +def test_generator_with_filter(opl_buffer): + data = opl_buffer("""\ + n10 x3 y3 + n11 x3 y3.01 Tfoo=bar + """) + + count = 0 + for obj in o.FileProcessor(data).with_filter(o.filter.EmptyTagFilter()): + count += 1 + assert obj.type_str() == 'n' + assert obj.id == 11 + + assert count == 1 + +def test_file_processor_header(tmp_path): + fn = tmp_path / 'empty.xml' + fn.write_text(""" + + + + """) + + h = o.FileProcessor(fn).header + + assert not h.has_multiple_object_versions + assert h.box().valid() + assert h.box().size() == 64800.0 + +def test_file_processor_access_nodestore(opl_buffer): + fp = o.FileProcessor(opl_buffer('n56 x3 y-3'))\ + .with_locations(o.index.create_map('sparse_mem_map')) + + for _ in fp: + pass + + assert fp.node_location_storage.get(56).lat == -3 + assert fp.node_location_storage.get(56).lon == 3 + +def test_file_processor_bad_location_type(opl_buffer): + with pytest.raises(TypeError, match='LocationTable'): + o.FileProcessor(opl_buffer('n56 x3 y-3')).with_locations(67) From e3a5a6d1b0ea1e95bcfabd2fef5a221901782dd0 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 16 Mar 2024 22:28:09 +0100 Subject: [PATCH 4/4] switch some examples to iterative processing --- examples/convert.py | 24 +++++----------- examples/convert_to_geojson.py | 2 +- examples/filter_coastlines.py | 50 +++++++++++++--------------------- examples/osm_diff_stats.py | 25 ++++++++--------- examples/osm_url_stats.py | 5 +--- examples/pub_names.py | 18 ++++-------- examples/road_length.py | 27 ++++++++---------- examples/use_nodecache.py | 5 ++-- src/osmium/file_processor.py | 7 +++-- 9 files changed, 62 insertions(+), 101 deletions(-) diff --git a/examples/convert.py b/examples/convert.py index d2563d62..0bc8271e 100644 --- a/examples/convert.py +++ b/examples/convert.py @@ -8,30 +8,20 @@ import sys -class Convert(o.SimpleHandler): - - def __init__(self, writer): - super(Convert, self).__init__() - self.writer = writer - - def node(self, n): - self.writer.add_node(n) - - def way(self, w): - self.writer.add_way(w) - - def relation(self, r): - self.writer.add_relation(r) - if __name__ == '__main__': if len(sys.argv) != 3: print("Usage: python convert.py ") sys.exit(-1) writer = o.SimpleWriter(sys.argv[2]) - handler = Convert(writer) - handler.apply_file(sys.argv[1]) + for obj in o.FileProcessor(sys.argv[1]): + if obj.is_node(): + writer.add_node(obj) + elif obj.is_way(): + writer.add_way(obj) + elif obj.is_relation(): + writer.add_relation(obj) writer.close() diff --git a/examples/convert_to_geojson.py b/examples/convert_to_geojson.py index 52608fb1..09fb8d7e 100644 --- a/examples/convert_to_geojson.py +++ b/examples/convert_to_geojson.py @@ -48,7 +48,7 @@ def print_object(self, geojson, tags): def main(osmfile): handler = GeoJsonWriter() - handler.apply_file(osmfile) + handler.apply_file(osmfile,filters=[o.filter.EmptyTagFilter().apply_to(o.osm.NODE)]) handler.finish() return 0 diff --git a/examples/filter_coastlines.py b/examples/filter_coastlines.py index 8a0cb159..6c89d2a4 100644 --- a/examples/filter_coastlines.py +++ b/examples/filter_coastlines.py @@ -7,37 +7,9 @@ we are interested in and remember the nodes required. Then, in a second run all the relevant nodes and ways are written out. """ - import osmium as o import sys -class WayFilter(o.SimpleHandler): - - def __init__(self): - super(WayFilter, self).__init__() - self.nodes = set() - - def way(self, w): - if 'natural' in w.tags and w.tags['natural'] == 'coastline': - for n in w.nodes: - self.nodes.add(n.ref) - - -class CoastlineWriter(o.SimpleHandler): - - def __init__(self, writer, nodes): - super(CoastlineWriter, self).__init__() - self.writer = writer - self.nodes = nodes - - def node(self, n): - if n.id in self.nodes: - self.writer.add_node(n) - - def way(self, w): - if 'natural' in w.tags and w.tags['natural'] == 'coastline': - self.writer.add_way(w) - if __name__ == '__main__': if len(sys.argv) != 3: @@ -46,11 +18,27 @@ def way(self, w): # go through the ways to find all relevant nodes - ways = WayFilter() - ways.apply_file(sys.argv[1]) + nodes = set() + # Pre-filter the ways by tags. The less object we need to look at, the better. + way_filter = o.filter.KeyFilter('natural') + # only scan the ways of the file + for obj in o.FileProcessor(sys.argv[1], o.osm.WAY).with_filter(way_filter): + if obj.tags['natural'] == 'coastline': + nodes.update(n.ref for n in obj.nodes) + # go through the file again and write out the data writer = o.SimpleWriter(sys.argv[2]) - CoastlineWriter(writer, ways.nodes).apply_file(sys.argv[1]) + + # This time the pre-filtering should only apply to ways. + way_filter = o.filter.KeyFilter('natural').enable_for(o.osm.WAY) + + # We need nodes and ways in the second pass. + for obj in o.FileProcessor(sys.argv[1], o.osm.WAY | o.osm.NODE).with_filter(way_filter): + if obj.is_node() and obj.id in nodes: + # Strip the object of tags along the way + writer.add_node(obj.replace(tags={})) + elif obj.is_way() and obj.tags['natural'] == 'coastline': + writer.add_way(obj) writer.close() diff --git a/examples/osm_diff_stats.py b/examples/osm_diff_stats.py index 6f879c57..2777ef33 100644 --- a/examples/osm_diff_stats.py +++ b/examples/osm_diff_stats.py @@ -7,14 +7,14 @@ import osmium as o import sys -class Stats(object): +class Stats: def __init__(self): self.added = 0 self.modified = 0 self.deleted = 0 - def __call__(self, o): + def add(self, o): if o.deleted: self.deleted += 1 elif o.version == 1: @@ -23,23 +23,20 @@ def __call__(self, o): self.modified += 1 def outstats(self, prefix): - print("%s added: %d" % (prefix, self.added)) - print("%s modified: %d" % (prefix, self.modified)) - print("%s deleted: %d" % (prefix, self.deleted)) + print(f"{prefix} added: {self.added}") + print(f"{prefix} modified: {self.modified}") + print(f"{prefix} deleted: {self.deleted}") def main(osmfile): - nodes = Stats() - ways = Stats() - rels = Stats() + stats = {t: Stats() for t in 'nwr'} - h = o.make_simple_handler(node=nodes, way=ways, relation=rels) + for obj in o.FileProcessor(osmfile): + stats[obj.type_str()].add(obj) - h.apply_file(osmfile) - - nodes.outstats("Nodes") - ways.outstats("Ways") - rels.outstats("Relations") + stats['n'].outstats("Nodes") + stats['w'].outstats("Ways") + stats['r'].outstats("Relations") return 0 diff --git a/examples/osm_url_stats.py b/examples/osm_url_stats.py index 9c69b3af..7fef01fe 100644 --- a/examples/osm_url_stats.py +++ b/examples/osm_url_stats.py @@ -6,10 +6,7 @@ """ import osmium as o import sys -try: - import urllib.request as urlrequest -except ImportError: - import urllib2 as urlrequest +import urllib.request as urlrequest class FileStatsHandler(o.SimpleHandler): def __init__(self): diff --git a/examples/pub_names.py b/examples/pub_names.py index 133844ad..f9046e07 100644 --- a/examples/pub_names.py +++ b/examples/pub_names.py @@ -4,20 +4,12 @@ import osmium import sys -class NamesHandler(osmium.SimpleHandler): - - def output_pubs(self, tags): - if tags.get('amenity') == 'pub' and 'name' in tags: - print(tags['name']) - - def node(self, n): - self.output_pubs(n.tags) - - def way(self, w): - self.output_pubs(w.tags) - def main(osmfile): - NamesHandler().apply_file(osmfile) + for obj in osmium.FileProcessor(osmfile)\ + .with_filter(osmium.filter.KeyFilter('amenity'))\ + .with_filter(osmium.filter.KeyFilter('name')): + if obj.tags['amenity'] == 'pub': + print(obj.tags['name']) return 0 diff --git a/examples/road_length.py b/examples/road_length.py index b597b0fd..3ccceca3 100644 --- a/examples/road_length.py +++ b/examples/road_length.py @@ -6,27 +6,22 @@ import osmium as o import sys -class RoadLengthHandler(o.SimpleHandler): - def __init__(self): - super(RoadLengthHandler, self).__init__() - self.length = 0.0 - - def way(self, w): - if 'highway' in w.tags: +def main(osmfile): + total = 0.0 + # As we need the way geometry, the node locations need to be cached. + # This is enabled with the with_locations() function. + for obj in o.FileProcessor(osmfile, o.osm.NODE | o.osm.WAY)\ + .with_locations()\ + .with_filter(o.filter.KeyFilter('highway')): + if obj.is_way(): try: - self.length += o.geom.haversine_distance(w.nodes) + total += o.geom.haversine_distance(obj.nodes) except o.InvalidLocationError: # A location error might occur if the osm file is an extract # where nodes of ways near the boundary are missing. - print("WARNING: way %d incomplete. Ignoring." % w.id) - -def main(osmfile): - h = RoadLengthHandler() - # As we need the geometry, the node locations need to be cached. Therefore - # set 'locations' to true. - h.apply_file(osmfile, locations=True) + print("WARNING: way %d incomplete. Ignoring." % obj.id) - print('Total way length: %.2f km' % (h.length/1000)) + print('Total way length: %.2f km' % (total/1000)) return 0 diff --git a/examples/use_nodecache.py b/examples/use_nodecache.py index 3e5cc76a..29984fe6 100644 --- a/examples/use_nodecache.py +++ b/examples/use_nodecache.py @@ -1,10 +1,9 @@ import osmium as o import sys -class WayHandler(o.SimpleHandler): +class WayHandler: def __init__(self, idx): - super(WayHandler, self).__init__() self.idx = idx def way(self, w): @@ -13,7 +12,7 @@ def way(self, w): print("%d %s" % (w.id, len(w.nodes))) if len(sys.argv) != 3: - print("Usage: python create_nodecache.py ") + print("Usage: python use_nodecache.py ") exit() reader = o.io.Reader(sys.argv[1], o.osm.osm_entity_bits.WAY) diff --git a/src/osmium/file_processor.py b/src/osmium/file_processor.py index 08d9b7f8..51b6f134 100644 --- a/src/osmium/file_processor.py +++ b/src/osmium/file_processor.py @@ -12,14 +12,15 @@ class FileProcessor: """ A generator that emits OSM objects read from a file. """ - def __init__(self, filename): + def __init__(self, filename, entities=osmium.osm.ALL): if isinstance(filename, (osmium.io.File, osmium.io.FileBuffer)): self._file = filename elif isinstance(filename, (str, Path)): self._file = osmium.io.File(str(filename)) else: raise TypeError("File must be an osmium.io.File, osmium.io.FileBuffer, str or Path") - self._reader = osmium.io.Reader(self._file) + self._reader = osmium.io.Reader(self._file, entities) + self._entities = entities self._node_store = None self._area_handler = None self._filters = [] @@ -41,6 +42,8 @@ def with_locations(self, storage='flex_mem'): """ Enable caching of node locations. This is necessary in order to get geometries for ways and relations. """ + if not (self._entities & osmium.osm.NODE): + raise RuntimeError('Nodes not read from file. Cannot enable location cache.') if isinstance(storage, str): self._node_store = osmium.index.create_map(storage) elif storage is None or isinstance(storage, osmium.index.LocationTable):