Skip to content

Commit

Permalink
[SH] Use direct entry URL
Browse files Browse the repository at this point in the history
  • Loading branch information
k-nut committed Jun 25, 2024
1 parent 621d075 commit b9e41d2
Showing 1 changed file with 1 addition and 14 deletions.
15 changes: 1 addition & 14 deletions jedeschule/spiders/schleswig_holstein.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,10 @@

class SchleswigHolsteinSpider(SchoolSpider):
name = "schleswig-holstein"
base_url = 'https://opendata.schleswig-holstein.de/collection/schulen/aktuell'
base_url = 'https://opendata.schleswig-holstein.de/collection/schulen/aktuell.csv'
start_urls = [base_url]

def parse(self, response):
url = response.css('link[rel="alternate"][type="application/ld+json"]::attr(href)').get()
yield scrapy.Request(url, callback=self.parse_dataset_metadata)

def parse_dataset_metadata(self, response):
parsed = json.loads(response.text)
csv_url = next(node['dcat:accessURL']['@id'] for node in parsed['@graph'] if
node['dcat:mediaType']['@id'] == 'https://www.iana.org/assignments/media-types/text/csv')
# TODO: Remove this temporary replacement
# It is only here because the API seems to return wrong data currently
csv_url = csv_url.replace("zitsh.de", "schleswig-holstein.de")
yield scrapy.Request(csv_url, callback=self.parse_csv)

def parse_csv(self, response: scrapy.http.Response):
reader = csv.DictReader(response.text.splitlines(), delimiter='\t')
for row in reader:
yield row
Expand Down

0 comments on commit b9e41d2

Please sign in to comment.