diff --git a/scripts/document_link.py b/scripts/document_link.py index ca998db..b23130a 100644 --- a/scripts/document_link.py +++ b/scripts/document_link.py @@ -6,17 +6,15 @@ def Link(url): + if url.startswith('https://metro.legistar.com/ViewReport.ashx'): return BoardReportLink(url) return DocumentLink(url) -class DocumentLink(object): - ''' - https://metro.legistar1.com/metro/meetings/2021/10/2100_A_Operations%2C_Safety%2C_and_Customer_Experience_Committee_21-10-21_Agenda.pdf - http://metro.legistar1.com/metro/attachments/6484cdc7-3c2a-4598-abc5-9d846771158e.pdf - ''' +class _Link(object): + def __init__(self, url): self.url = url @@ -26,6 +24,20 @@ def content(self): response = s.get(self.url, retry_on_404=True) return response.content + @property + def filename(self): + raise NotImplementedError() + + @property + def filetype(self): + return os.path.splitext(self.filename)[-1] + + +class DocumentLink(_Link): + ''' + https://metro.legistar1.com/metro/meetings/2021/10/2100_A_Operations%2C_Safety%2C_and_Customer_Experience_Committee_21-10-21_Agenda.pdf + http://metro.legistar1.com/metro/attachments/6484cdc7-3c2a-4598-abc5-9d846771158e.pdf + ''' @property def filename(self): ''' @@ -35,7 +47,7 @@ def filename(self): return re.sub(r'%', '', os.path.basename(self.url)) -class BoardReportLink(DocumentLink): +class BoardReportLink(_Link): ''' https://metro.legistar.com/ViewReport.ashx?M=R&N=TextL5&GID=557&ID=7936&GUID=LATEST&Title=Board+Report ''' diff --git a/scripts/download_attachments.py b/scripts/download_attachments.py index 96ef9b8..69f874f 100644 --- a/scripts/download_attachments.py +++ b/scripts/download_attachments.py @@ -10,6 +10,16 @@ if __name__ == '__main__': + ACCEPTED_FILETYPES = ( + '.pdf', + '.xlsx', + '.doc', + '.docx', + '.ppt', + '.pptx', + '.rtf', + ) + filenames = [] for attachment_link in json.loads( @@ -17,6 +27,9 @@ ): link = Link(attachment_link) + if link.filetype not in ACCEPTED_FILETYPES: + continue + with open(os.path.join('attachments', link.filename), 'wb') as file: file.write(link.content)