This repository has been archived by the owner on Mar 12, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathapi_to_csv.py
99 lines (81 loc) · 3.44 KB
/
api_to_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import csv
import json
import sys
import argparse
import urllib.request
import logging
"""
This demonstration script fetches search results from the CAP cases endpoint and writes a subset of their fields to
a CSV file. It uses only the Python 3 standard library, so no additional installation is required.
There is not currently a command line option to select which fields are included, so changing the fields requires
manually editing the script.
Usage:
$ python api_to_csv.py -h
usage: api_to_csv.py [-h] [--api-key API_KEY] [--out-path OUT_PATH] url
Print CAPAPI query to CSV.
positional arguments:
url target url, e.g.
https://api.case.law/v1/cases/?search=first+amendment
optional arguments:
-h, --help show this help message and exit
--api-key API_KEY api key (optional; only needed if requesting full text)
--out-path OUT_PATH output path (default stdout)
example:
python api_to_csv.py --out-path first_amendment_cases.csv https://api.case.law/v1/cases/?search=first+amendment
"""
logger = logging.getLogger('api_to_csv')
def get_results(url, api_key=None):
"""
Yield each individual case result from the target URL.
"""
headers = {}
if api_key:
headers['Authorization'] = 'Token {}'.format(api_key)
page_count = 1
while True:
logger.info("Fetching page %s" % page_count)
response = urllib.request.urlopen(urllib.request.Request(url, headers=headers))
page = json.loads(response.read().decode('utf-8'))
for result in page['results']:
yield result
url = page['next']
if not url:
break
page_count += 1
def api_query_to_csv(url, api_key=None, out_path=None):
"""
Write all case results from URL to out_path, defaulting to stdout.
"""
if out_path:
out_file = open(out_path, 'w', encoding='utf-8')
else:
out_file = sys.stdout
out = csv.writer(out_file)
out.writerow(['id', 'frontend_url', 'name', 'name_abbreviation', 'citation', 'decision_date', 'jurisdiction'])
for result in get_results(url, api_key):
out.writerow([
result['id'],
result['frontend_url'],
result['name'],
result['name_abbreviation'],
next((cite['cite'] for cite in result['citations'] if cite['type'] == 'official'), ''),
result['decision_date'],
result['jurisdiction']['name'],
])
def main():
"""
Parse command line arguments and call api_query_to_csv.
"""
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description='Print CAPAPI query to CSV.',
epilog="""example:\n python api_to_csv.py --out-path first_amendment_cases.csv https://api.case.law/v1/cases/?search=first+amendment"""
)
parser.add_argument('url', help='target url, e.g. https://api.case.law/v1/cases/?search=first+amendment')
parser.add_argument('--api-key', help='api key (optional; only needed if requesting full text)')
parser.add_argument('--out-path', help='output path (default stdout)')
args = parser.parse_args()
logging.basicConfig(level=logging.INFO)
api_query_to_csv(args.url, args.api_key, args.out_path)
if __name__ == '__main__':
main()