-
-
Notifications
You must be signed in to change notification settings - Fork 107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
activity_report #282
base: main
Are you sure you want to change the base?
activity_report #282
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,254 @@ | ||
"""Parse the DktActivityRpt.pl ("Docket Activity Report") results. | ||
A parser for PACER's Docket Activity Report that results as JSON. | ||
""" | ||
import pprint | ||
import sys | ||
|
||
from six.moves import range | ||
|
||
from .docket_report import BaseDocketReport | ||
from .reports import BaseReport | ||
from ..lib.log_tools import make_default_logger | ||
from ..lib.string_utils import clean_string, force_unicode, harmonize | ||
from ..lib.utils import clean_court_object | ||
|
||
logger = make_default_logger() | ||
|
||
|
||
class ActivityReport(BaseDocketReport, BaseReport): | ||
""" | ||
Parse the DktActivityRpt.pl ("Docket Activity Report") results. | ||
""" | ||
PATH = 'cgi-bin/DktActivityRpt.pl' | ||
|
||
CACHE_ATTRS = ['metadata'] | ||
|
||
def __init__(self, court_id, pacer_session=None): | ||
super(ActivityReport, self).__init__(court_id, pacer_session) | ||
# Initialize the empty cache properties | ||
self._clear_caches() | ||
self._metadata = None | ||
|
||
def parse(self): | ||
self._clear_caches() | ||
print(self) | ||
super(ActivityReport, self).parse() | ||
|
||
@property | ||
def metadata(self): | ||
if self._metadata is not None: | ||
|
||
return self._metadata | ||
|
||
# The data we're after look like this (re-spacing): | ||
# | ||
# <table width="98%" border="1"> | ||
# <tbody> | ||
# <tr> | ||
# <td align="center"><b>Case Number/Title</b></td> | ||
# <td align="center"><b>Dates</b></td> | ||
# <td align="center"><b>Category/<br>Event</b></td> | ||
# <td align="center"><b>Docketed by</b></td> | ||
# <td align="center"><b>Notes</b></td> | ||
# </tr> | ||
# <tr> | ||
# <td valign="top"><a href="https://ecf.azd.uscourts.gov/cgi-bin/DktRpt.pl?1224854">4:20-po-20403-EJM <br>USA v. Lopez-Domingo<br></a><b><font size="-1"> CASE CLOSED on 01/10/2020</font></b></td> | ||
# <td valign="top"><i>Entered: </i>01/13/2020 <br>15:25:21<br><i>Filed: </i>01/13/2020</td> | ||
# <td valign="top"><i>Category:</i> order-cr<br><i>Event:</i> OSL Order - Petty Cases Only<br><i>Document: </i><a href="https://ecf.azd.uscourts.gov/doc1/025121588988" onclick="goDLS('/doc1/025121588988','1224855','5','','','1','','');return(false);">1</a></td> | ||
# <td valign="top">BAC<br> | ||
# <i>Type: </i>crt</td> | ||
# <td valign="top"><i>Office:</i> Tucson Division | ||
# <br><i>Presider: </i>Eric J Markovich | ||
# <br><i>Case Flags:</i> <span>OSFLP</span> | ||
# </td> | ||
# </tr> | ||
# <tr> | ||
# <td valign="top"><a href="https://ecf.azd.uscourts.gov/cgi-bin/DktRpt.pl?1224773">4:20-po-20404-EJM <br>USA v. Reyes-Lopez<br></a><b><font size="-1"> CASE CLOSED on 01/10/2020</font></b></td> | ||
# <td valign="top"><i>Entered: </i>01/13/2020 <br>08:53:20<br><i>Filed: </i>01/13/2020</td> | ||
# <td valign="top"><i>Category:</i> order-cr<br><i>Event:</i> OSL Order - Petty Cases Only<br><i>Document: </i><a href="https://ecf.azd.uscourts.gov/doc1/025121585462" onclick="goDLS('/doc1/025121585462','1224774','5','','','1','','');return(false);">1</a></td> | ||
# <td valign="top">ARC<br> | ||
# <i>Type: </i>crt</td> | ||
# <td valign="top"><i>Office:</i> Tucson Division | ||
# <br><i>Presider: </i>Eric J Markovich | ||
# </td> | ||
# </tr> | ||
# <tr> | ||
# <td valign="top"><a href="https://ecf.azd.uscourts.gov/cgi-bin/DktRpt.pl?1224861">4:20-po-20410-EJM <br>USA v. Florentino-Cano<br></a><b><font size="-1"> CASE CLOSED on 01/10/2020</font></b></td> | ||
# <td valign="top"><i>Entered: </i>01/13/2020 <br>15:45:25<br><i>Filed: </i>01/13/2020</td> | ||
# <td valign="top"><i>Category:</i> order-cr<br><i>Event:</i> OSL Order - Petty Cases Only<br><i>Document: </i><a href="https://ecf.azd.uscourts.gov/doc1/025121589269" onclick="goDLS('/doc1/025121589269','1224862','5','','','1','','');return(false);">1</a></td> | ||
# <td valign="top">BAR<br> | ||
# <i>Type: </i>crt</td> | ||
# <td valign="top"><i>Office:</i> Tucson Division | ||
# <br><i>Presider: </i>Eric J Markovich | ||
# </td> | ||
# </tr> | ||
# </tbody> | ||
# </table> | ||
|
||
data = [] | ||
|
||
table = self.tree.xpath('//table')[0] | ||
|
||
rows = table.xpath('.//tr') | ||
|
||
Comment on lines
+88
to
+93
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This whitespace should go. |
||
for b, row in enumerate(rows): | ||
|
||
cols = row.xpath('.//node()') | ||
|
||
def isFlags(cols): | ||
#Check for case flags. It's not always present. | ||
try: | ||
cols.index('Case Flags:') | ||
except ValueError: | ||
flag = "none" | ||
return flag | ||
else: | ||
flag = cols[cols.index('Case Flags:')+3] | ||
return flag | ||
Comment on lines
+98
to
+107
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think I would nest this here. Better to pull it out. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, shouldn't it be more like |
||
|
||
flags = isFlags(cols) | ||
|
||
if b > 0: # skip first | ||
data.append({ | ||
u"case":cols[2], | ||
u"case_number":cols[4], | ||
u"docket_report_url":cols[1].xpath('@href')[0], | ||
u"entered": cols[cols.index('Entered: ')+1], | ||
u"time": cols[cols.index('Entered: ')+3], | ||
u"filed": cols[cols.index('Filed: ')+1], | ||
u"closed": cols[8], | ||
u"event": cols[cols.index('Event:')+1], | ||
u"case_flags": flags, | ||
u"category": cols[cols.index('Category:')+1], | ||
u"presider": cols[cols.index('Presider: ')+1], | ||
u"office": cols[cols.index('Office:')+1], | ||
u"docketed_by":cols[cols.index('Document: ')+5], | ||
u"doc1":cols[cols.index('Document: ')+1].xpath('@href')[0], | ||
u"doc_count":cols[cols.index('Document: ')+2] | ||
Comment on lines
+115
to
+127
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's hard to understand here what you're doing with the math and the indexing and whatnot. Could you pull some of these Formatting wise, these should have spaces around the |
||
|
||
}) | ||
|
||
|
||
self._metadata = data | ||
return data | ||
|
||
def query(self, open, closed, office, type, start, end, text): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't use This should also support docket number, event category, case flags, and sorting. Are start and end done the same as other examples in Juriscraper? I think they have different names elsewhere. Finally, your docstring should outline the parameters using |
||
""" | ||
closed = 'on' or '' | ||
open = 'on' or '' | ||
|
||
office = 4 | ||
options: | ||
<option value="2">Phoenix Division</option> | ||
<option value="3">Prescott Division</option> | ||
<option value="4">Tucson Division</option> | ||
|
||
|
||
type = 'po' | ||
options: | ||
<select name="case_type" multiple="" size="3"> | ||
<option selected=""></option> | ||
<option value="cv">Civil</option> | ||
<option value="cr">Criminal</option> | ||
<option value="em">Emergency Matter</option> | ||
<option value="mj">Magistrate Judge</option> | ||
<option value="mc">Miscellaneous</option> | ||
<option value="md">Multi-District Litigation</option> | ||
<option value="nr">Non Reportable</option> | ||
<option value="nc">Non-Compliance</option> | ||
<option value="po">Petty Offense</option> | ||
<option value="ph">Pro Hac Vice Case</option> | ||
<option value="pt">Probation Transfer</option> | ||
<option value="ps">Prose Screening</option> | ||
<option value="sc">Sealed Civil</option> | ||
<option value="sm">Sealed Miscellaneous</option> | ||
<option value="mb">Search Warrant</option> | ||
<option value="sa">Staff Attorney Assignment</option> | ||
<option value="at">Temporary Case Type</option> | ||
<option value="vc">Virtual CR Case for 3rd Judge</option> | ||
<option value="wi">Waiver of Indictment</option> | ||
<option value="wt">Wire Tap</option> | ||
<option value="gj">~Grand Jury</option> | ||
Comment on lines
+150
to
+171
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these universal or per court? If the latter, we should either remove them from here (I think this is probably preferred) or note that loudly. |
||
</select> | ||
|
||
start = 'm/d/yyyy' | ||
end = 'm/d/yyyy' | ||
Comment on lines
+174
to
+175
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These should be date objects. That way calling code doesn't have to think about dumb PACER date formatting decisions. |
||
|
||
text = 'summary' or 'full' | ||
""" | ||
|
||
assert self.session is not None, \ | ||
"session attribute of DocketReport cannot be None." | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update this, please. |
||
|
||
#Example: | ||
# One note, when open_cases is not checked in the form, | ||
# it does not appear as form data in the header. | ||
# u'all_case_ids': '0' | ||
# u'case_num': '' | ||
# u'closed_cases': 'on' | ||
# u'office': '4' | ||
# u'case_type': 'po' | ||
# u'event_category':'' | ||
# u'case_flags': '' | ||
# u'filed_from': '1/13/2020' | ||
# u'filed_to': '1/13/2020' | ||
# u'date_range_limit': '' | ||
# u'docket_text': 'summary' | ||
# u'sort1': 'case number' | ||
|
||
params = { | ||
u'all_case_ids': '0', | ||
u'case_num': '', | ||
u'open_cases': open, | ||
u'closed_cases': closed, | ||
u'office': office, | ||
u'case_type': type, | ||
u'event_category':'' , | ||
u'case_flags': '', | ||
u'filed_from': start, | ||
u'filed_to': end, | ||
u'date_range_limit': '', | ||
u'docket_text': text, | ||
u'sort1': 'case number' | ||
} | ||
|
||
|
||
logger.info(u"Running activity report from '%s' to '%s'", (start, end) ) | ||
logger.info(self.url) | ||
self.response = self.session.post(self.url + '?1-L_1_0-1', data=params) | ||
logger.info(u"Response: %s", self.response) | ||
|
||
self.parse() | ||
|
||
@property | ||
def data(self): | ||
"""Get all the data back from this endpoint. | ||
Don't attempt to return parties or docket_entries like the superclass | ||
does. | ||
""" | ||
if self.is_valid is False: | ||
return {} | ||
|
||
data = list(self.metadata) | ||
return data | ||
|
||
|
||
def _main(): | ||
if len(sys.argv) != 2: | ||
print("Usage: python -m juriscraper.pacer.case_query filepath") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update this please |
||
print("Please provide a path to an HTML file to parse.") | ||
sys.exit(1) | ||
|
||
# Court ID is only needed for querying. Actual | ||
# parsed value appears in output | ||
report = ActivityReport('mad') | ||
filepath = sys.argv[1] | ||
print("Parsing HTML file at %s" % filepath) | ||
with open(filepath, 'r') as f: | ||
text = f.read().decode('utf-8') | ||
report._parse_text(text) | ||
pprint.pprint(report.data, indent=2) | ||
|
||
|
||
if __name__ == "__main__": | ||
_main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I assume printing this every time isn't desirable?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And if that's true, do we need this function at all?