Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce iterative processing #245

Merged
merged 4 commits into from
Mar 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ pybind11_add_module(_osmium
lib/merge_input_reader.cc
lib/node_location_handler.cc
lib/simple_writer.cc
lib/write_handler.cc)
lib/write_handler.cc
lib/file_iterator.cc)
set_module_output(_osmium osmium)
pybind11_add_module(_replication lib/replication.cc)
set_module_output(_replication osmium/replication)
Expand Down
24 changes: 7 additions & 17 deletions examples/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,20 @@

import sys

class Convert(o.SimpleHandler):

def __init__(self, writer):
super(Convert, self).__init__()
self.writer = writer

def node(self, n):
self.writer.add_node(n)

def way(self, w):
self.writer.add_way(w)

def relation(self, r):
self.writer.add_relation(r)

if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: python convert.py <infile> <outfile>")
sys.exit(-1)

writer = o.SimpleWriter(sys.argv[2])
handler = Convert(writer)

handler.apply_file(sys.argv[1])
for obj in o.FileProcessor(sys.argv[1]):
if obj.is_node():
writer.add_node(obj)
elif obj.is_way():
writer.add_way(obj)
elif obj.is_relation():
writer.add_relation(obj)

writer.close()

2 changes: 1 addition & 1 deletion examples/convert_to_geojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def print_object(self, geojson, tags):
def main(osmfile):
handler = GeoJsonWriter()

handler.apply_file(osmfile)
handler.apply_file(osmfile,filters=[o.filter.EmptyTagFilter().apply_to(o.osm.NODE)])
handler.finish()

return 0
Expand Down
50 changes: 19 additions & 31 deletions examples/filter_coastlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,9 @@
we are interested in and remember the nodes required. Then, in a second
run all the relevant nodes and ways are written out.
"""

import osmium as o
import sys

class WayFilter(o.SimpleHandler):

def __init__(self):
super(WayFilter, self).__init__()
self.nodes = set()

def way(self, w):
if 'natural' in w.tags and w.tags['natural'] == 'coastline':
for n in w.nodes:
self.nodes.add(n.ref)


class CoastlineWriter(o.SimpleHandler):

def __init__(self, writer, nodes):
super(CoastlineWriter, self).__init__()
self.writer = writer
self.nodes = nodes

def node(self, n):
if n.id in self.nodes:
self.writer.add_node(n)

def way(self, w):
if 'natural' in w.tags and w.tags['natural'] == 'coastline':
self.writer.add_way(w)


if __name__ == '__main__':
if len(sys.argv) != 3:
Expand All @@ -46,11 +18,27 @@ def way(self, w):


# go through the ways to find all relevant nodes
ways = WayFilter()
ways.apply_file(sys.argv[1])
nodes = set()
# Pre-filter the ways by tags. The less object we need to look at, the better.
way_filter = o.filter.KeyFilter('natural')
# only scan the ways of the file
for obj in o.FileProcessor(sys.argv[1], o.osm.WAY).with_filter(way_filter):
if obj.tags['natural'] == 'coastline':
nodes.update(n.ref for n in obj.nodes)


# go through the file again and write out the data
writer = o.SimpleWriter(sys.argv[2])
CoastlineWriter(writer, ways.nodes).apply_file(sys.argv[1])

# This time the pre-filtering should only apply to ways.
way_filter = o.filter.KeyFilter('natural').enable_for(o.osm.WAY)

# We need nodes and ways in the second pass.
for obj in o.FileProcessor(sys.argv[1], o.osm.WAY | o.osm.NODE).with_filter(way_filter):
if obj.is_node() and obj.id in nodes:
# Strip the object of tags along the way
writer.add_node(obj.replace(tags={}))
elif obj.is_way() and obj.tags['natural'] == 'coastline':
writer.add_way(obj)

writer.close()
25 changes: 11 additions & 14 deletions examples/osm_diff_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
import osmium as o
import sys

class Stats(object):
class Stats:

def __init__(self):
self.added = 0
self.modified = 0
self.deleted = 0

def __call__(self, o):
def add(self, o):
if o.deleted:
self.deleted += 1
elif o.version == 1:
Expand All @@ -23,23 +23,20 @@ def __call__(self, o):
self.modified += 1

def outstats(self, prefix):
print("%s added: %d" % (prefix, self.added))
print("%s modified: %d" % (prefix, self.modified))
print("%s deleted: %d" % (prefix, self.deleted))
print(f"{prefix} added: {self.added}")
print(f"{prefix} modified: {self.modified}")
print(f"{prefix} deleted: {self.deleted}")


def main(osmfile):
nodes = Stats()
ways = Stats()
rels = Stats()
stats = {t: Stats() for t in 'nwr'}

h = o.make_simple_handler(node=nodes, way=ways, relation=rels)
for obj in o.FileProcessor(osmfile):
stats[obj.type_str()].add(obj)

h.apply_file(osmfile)

nodes.outstats("Nodes")
ways.outstats("Ways")
rels.outstats("Relations")
stats['n'].outstats("Nodes")
stats['w'].outstats("Ways")
stats['r'].outstats("Relations")

return 0

Expand Down
5 changes: 1 addition & 4 deletions examples/osm_url_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
"""
import osmium as o
import sys
try:
import urllib.request as urlrequest
except ImportError:
import urllib2 as urlrequest
import urllib.request as urlrequest

class FileStatsHandler(o.SimpleHandler):
def __init__(self):
Expand Down
18 changes: 5 additions & 13 deletions examples/pub_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,12 @@
import osmium
import sys

class NamesHandler(osmium.SimpleHandler):

def output_pubs(self, tags):
if tags.get('amenity') == 'pub' and 'name' in tags:
print(tags['name'])

def node(self, n):
self.output_pubs(n.tags)

def way(self, w):
self.output_pubs(w.tags)

def main(osmfile):
NamesHandler().apply_file(osmfile)
for obj in osmium.FileProcessor(osmfile)\
.with_filter(osmium.filter.KeyFilter('amenity'))\
.with_filter(osmium.filter.KeyFilter('name')):
if obj.tags['amenity'] == 'pub':
print(obj.tags['name'])

return 0

Expand Down
27 changes: 11 additions & 16 deletions examples/road_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,22 @@
import osmium as o
import sys

class RoadLengthHandler(o.SimpleHandler):
def __init__(self):
super(RoadLengthHandler, self).__init__()
self.length = 0.0

def way(self, w):
if 'highway' in w.tags:
def main(osmfile):
total = 0.0
# As we need the way geometry, the node locations need to be cached.
# This is enabled with the with_locations() function.
for obj in o.FileProcessor(osmfile, o.osm.NODE | o.osm.WAY)\
.with_locations()\
.with_filter(o.filter.KeyFilter('highway')):
if obj.is_way():
try:
self.length += o.geom.haversine_distance(w.nodes)
total += o.geom.haversine_distance(obj.nodes)
except o.InvalidLocationError:
# A location error might occur if the osm file is an extract
# where nodes of ways near the boundary are missing.
print("WARNING: way %d incomplete. Ignoring." % w.id)

def main(osmfile):
h = RoadLengthHandler()
# As we need the geometry, the node locations need to be cached. Therefore
# set 'locations' to true.
h.apply_file(osmfile, locations=True)
print("WARNING: way %d incomplete. Ignoring." % obj.id)

print('Total way length: %.2f km' % (h.length/1000))
print('Total way length: %.2f km' % (total/1000))

return 0

Expand Down
5 changes: 2 additions & 3 deletions examples/use_nodecache.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import osmium as o
import sys

class WayHandler(o.SimpleHandler):
class WayHandler:

def __init__(self, idx):
super(WayHandler, self).__init__()
self.idx = idx

def way(self, w):
Expand All @@ -13,7 +12,7 @@ def way(self, w):
print("%d %s" % (w.id, len(w.nodes)))

if len(sys.argv) != 3:
print("Usage: python create_nodecache.py <osm file> <node cache>")
print("Usage: python use_nodecache.py <osm file> <node cache>")
exit()

reader = o.io.Reader(sys.argv[1], o.osm.osm_entity_bits.WAY)
Expand Down
54 changes: 46 additions & 8 deletions lib/area.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,21 @@

#include "base_handler.h"
#include "handler_chain.h"
#include "buffer_iterator.h"

namespace py = pybind11;

namespace {

using MpManager = osmium::area::MultipolygonManager<osmium::area::Assembler>;

class AreaManagerSecondPassHandler : public BaseHandler
class AreaManagerSecondPassHandlerBase : public BaseHandler
{
public:
AreaManagerSecondPassHandler(MpManager *mp_manager, py::args args)
: m_mp_manager(mp_manager), m_args(args), m_handlers(m_args)
{
m_mp_manager->set_callback([this](osmium::memory::Buffer &&ab)
{ osmium::apply(ab, this->m_handlers); });
}
AreaManagerSecondPassHandlerBase(MpManager *mp_manager)
: m_mp_manager(mp_manager)
{}


bool node(osmium::Node const *n) override
{
Expand All @@ -53,10 +52,37 @@ class AreaManagerSecondPassHandler : public BaseHandler
m_mp_manager->flush_output();
}

private:
protected:
MpManager *m_mp_manager;
};


class AreaManagerSecondPassHandler : public AreaManagerSecondPassHandlerBase
{
public:
AreaManagerSecondPassHandler(MpManager *mp_manager, py::args args)
: AreaManagerSecondPassHandlerBase(mp_manager), m_args(args), m_handlers(m_args)
{
m_mp_manager->set_callback([this](osmium::memory::Buffer &&ab)
{ osmium::apply(ab, this->m_handlers); });
}

private:
py::args m_args;
HandlerChain m_handlers;

};


class AreaManagerBufferHandler : public AreaManagerSecondPassHandlerBase
{
public:
AreaManagerBufferHandler(MpManager *mp_manager, pyosmium::BufferIterator *cb)
: AreaManagerSecondPassHandlerBase(mp_manager)
{
m_mp_manager->set_callback([cb](osmium::memory::Buffer &&ab)
{ cb->add_buffer(std::move(ab)); });
}
};


Expand All @@ -82,6 +108,12 @@ class AreaManager : public BaseHandler
return new AreaManagerSecondPassHandler(&m_mp_manager, args);
}

AreaManagerBufferHandler *second_pass_to_buffer(pyosmium::BufferIterator *cb)
{
m_mp_manager.prepare_for_lookup();
return new AreaManagerBufferHandler(&m_mp_manager, cb);
}

private:
osmium::area::Assembler::config_type m_assembler_config;
osmium::area::MultipolygonManager<osmium::area::Assembler> m_mp_manager;
Expand All @@ -93,6 +125,8 @@ PYBIND11_MODULE(_area, m)
{
py::class_<AreaManagerSecondPassHandler, BaseHandler>(m,
"AreaManagerSecondPassHandler");
py::class_<AreaManagerBufferHandler, BaseHandler>(m,
"AreaManagerBufferHandler");

py::class_<AreaManager, BaseHandler>(m, "AreaManager",
"Object manager class that manages building area objects from "
Expand All @@ -107,5 +141,9 @@ PYBIND11_MODULE(_area, m)
"file, where areas are assembled. Pass the handlers that "
"should handle the areas.",
py::return_value_policy::take_ownership, py::keep_alive<1, 2>())
.def("second_pass_to_buffer", &AreaManager::second_pass_to_buffer,
py::keep_alive<1, 2>(),
"Return a handler object for the second pass of the file. "
"The handler holds a buffer, which can be iterated over.")
;
}
Loading
Loading