Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: use new endpoints for mexico_nuevo_leon_* #1122

Merged
merged 2 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions kingfisher_scrapy/spiders/mexico_nuevo_leon_records.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,28 @@
import scrapy
from kingfisher_scrapy.base_spiders import PeriodicSpider
from kingfisher_scrapy.util import browser_user_agent, parameters

from kingfisher_scrapy.base_spiders import SimpleSpider


class MexicoNuevoLeonRecords(SimpleSpider):
class MexicoNuevoLeonRecords(PeriodicSpider):
"""
Domain
Secretaría de Movilidad y Planeación Urbana de Nuevo León
Spider arguments
from_date
Download only data from this year onward (YYYY format). Defaults to '2013'.
until_date
Download only data until this year (YYYY format). Defaults to the current year.
Bulk download documentation
https://smpu.nl.gob.mx/transparencia/publicaciones
"""

name = 'mexico_nuevo_leon_records'
user_agent = browser_user_agent # to avoid HTTP 403

# SimpleSpider
data_type = 'record_package'

def start_requests(self):
yield scrapy.Request(
'https://smpu.nl.gob.mx/siasi_ws/api/ocds/DescargarRecordPackage',
meta={'file_name': 'records.json'}
)
# PeriodicSpider
date_format = 'year'
pattern = 'https://smpu.nl.gob.mx/siasi_ws/api/ocds/ListarProduccionXAnio?anio=%5B%7B"value":"{0}"%7D%5D'
formatter = staticmethod(parameters('anio'))
default_from_date = '2013'
24 changes: 15 additions & 9 deletions kingfisher_scrapy/spiders/mexico_nuevo_leon_releases.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import scrapy
from kingfisher_scrapy.spiders.mexico_nuevo_leon_records import MexicoNuevoLeonRecords
from kingfisher_scrapy.util import components

from kingfisher_scrapy.base_spiders import CompressedFileSpider


class MexicoNuevoLeonReleases(CompressedFileSpider):
class MexicoNuevoLeonReleases(MexicoNuevoLeonRecords):
"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want this spider? I don't think we typically use a records endpoint to create a releases spider.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we want it as it would be the only way to get the original releases, as the record contains compiled and linked releases only. I could create a base class instead of inheriting from Records though

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha, makes sense. Please create a base class, since in future we might not want changes to the records spider to cause changes in the releases spider.

Domain
Secretaría de Movilidad y Planeación Urbana de Nuevo León
Spider arguments
from_date
Download only data from this year onward (YYYY format). Defaults to '2013'.
until_date
Download only data until this year (YYYY format). Defaults to the current year.
Bulk download documentation
https://smpu.nl.gob.mx/transparencia/acerca-del-proyecto
"""
Expand All @@ -19,9 +23,11 @@ class MexicoNuevoLeonReleases(CompressedFileSpider):
# SimpleSpider
data_type = 'release_package'

# CompressedFileSpider
file_name_must_contain = 'ReleasePackage'
# PeriodicSpider
start_requests_callback = 'parse_list'

def start_requests(self):
url = 'https://smpu.nl.gob.mx/acceso/DatosAbiertos/JSONsInfraestructuraAbierta.rar'
yield scrapy.Request(url, meta={'file_name': 'all.rar'})
def parse_list(self, response):
for record_package in response.json():
for record in record_package['records']:
for release in record['releases']:
yield self.build_request(release['url'], formatter=components(-1))
Loading