Skip to content

Commit

Permalink
make de_identify_results List[List[Replacement]]
Browse files Browse the repository at this point in the history
  • Loading branch information
joeferraratonic committed Nov 21, 2024
1 parent 8207e0b commit 0d0318f
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 76 deletions.
96 changes: 46 additions & 50 deletions docs/source/redact/redacting_text.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,56 +67,52 @@ This produces the following output. Note that the 'idx' property denotes the po

.. code-block:: console
[ORGANIZATION_P5XLAH] was founded in [DATE_TIME_0FW53]
[NAME_GIVEN_HI1h7] [NAME_FAMILY_5oMl28] is a person
{
"start": 0,
"end": 5,
"new_start": 0,
"new_end": 21,
"label": "ORGANIZATION",
"text": "Tonic",
"score": 0.9,
"language": "en",
"new_text": "[ORGANIZATION_P5XLAH]",
"idx": 0
}
{
"start": 21,
"end": 25,
"new_start": 37,
"new_end": 54,
"label": "DATE_TIME",
"text": "2018",
"score": 0.9,
"language": "en",
"new_text": "[DATE_TIME_0FW53]",
"idx": 0
}
{
"start": 0,
"end": 4,
"new_start": 0,
"new_end": 18,
"label": "NAME_GIVEN",
"text": "John",
"score": 0.9,
"language": "en",
"new_text": "[NAME_GIVEN_HI1h7]",
"idx": 1
}
{
"start": 5,
"end": 10,
"new_start": 19,
"new_end": 39,
"label": "NAME_FAMILY",
"text": "Smith",
"score": 0.9,
"language": "en",
"new_text": "[NAME_FAMILY_5oMl28]",
"idx": 1
}
[ORGANIZATION_5Ve7OH] was founded in [DATE_TIME_DnuC1]
{
"start": 0,
"end": 5,
"new_start": 0,
"new_end": 21,
"label": "ORGANIZATION",
"text": "Tonic",
"score": 0.9,
"language": "en",
"new_text": "[ORGANIZATION_5Ve7OH]"
}
{
"start": 21,
"end": 25,
"new_start": 37,
"new_end": 54,
"label": "DATE_TIME",
"text": "2018",
"score": 0.9,
"language": "en",
"new_text": "[DATE_TIME_DnuC1]"
}
[NAME_GIVEN_dySb5] [NAME_FAMILY_7w4Db3] is a person
{
"start": 0,
"end": 4,
"new_start": 0,
"new_end": 18,
"label": "NAME_GIVEN",
"text": "John",
"score": 0.9,
"language": "en",
"new_text": "[NAME_GIVEN_dySb5]"
}
{
"start": 5,
"end": 10,
"new_start": 19,
"new_end": 39,
"label": "NAME_FAMILY",
"text": "Smith",
"score": 0.9,
"language": "en",
"new_text": "[NAME_FAMILY_7w4Db3]"
}

Redact JSON data
----------------
Expand Down
7 changes: 0 additions & 7 deletions tonic_textual/classes/common_api_responses/replacement.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ class Replacement(dict):
xml_path : Optional[str]
The xpath of the entity in the original XML document. This is only present
if the input text was an XML document. NOTE: Arrays in xpath are 1-based
idx : Optional[int]
The index in the original bulk text array to which the NER result corresponds. NOTE: This is only used when calling our bulk redaction methods
"""

def __init__(
Expand All @@ -55,7 +53,6 @@ def __init__(
example_redaction: Optional[str] = None,
json_path: Optional[str] = None,
xml_path: Optional[str] = None,
idx: Optional[int] = None,
):
self.start = start
self.end = end
Expand All @@ -69,7 +66,6 @@ def __init__(
self.example_redaction = example_redaction
self.json_path = json_path
self.xml_path = xml_path
self.idx = idx

dict.__init__(
self,
Expand All @@ -89,7 +85,6 @@ def __init__(
),
**({} if json_path is None else {"json_path": json_path}),
**({} if xml_path is None else {"xml_path": xml_path}),
**({} if idx is None else {"idx": idx}),
)

def describe(self) -> str:
Expand All @@ -114,6 +109,4 @@ def to_dict(self) -> Dict:
out["json_path"] = self.json_path
if self.xml_path is not None:
out["xml_path"] = self.xml_path
if self.idx is not None:
out["idx"] = self.idx
return out
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,13 @@ def __init__(

def describe(self) -> str:

result = '\n'.join(self.bulk_redacted_text) + '\n'
for x in self.de_identify_results:
result += f"{x.describe()}\n"
result = ""
for redacted_text, de_id_res in zip(
self.bulk_redacted_text, self.de_identify_results
):
result += f"{redacted_text}\n"
for replacement in de_id_res:
result += f"{replacement.describe()}\n"
return result

def get_usage(self):
Expand Down
32 changes: 16 additions & 16 deletions tonic_textual/redact_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,23 +703,23 @@ def send_redact_bulk_request(
if e.response.status_code == 400:
raise InvalidJsonForRedactionRequest(e.response.text)
raise e

de_id_results = [
Replacement(
start=x["start"],
end=x["end"],
new_start=x.get("newStart"),
new_end=x.get("newEnd"),
label=x["label"],
text=x["text"],
new_text=x.get("newText"),
score=x["score"],
language=x.get("language"),
example_redaction=x.get("exampleRedaction"),
idx=x.get("idx")

de_id_results = [[] for i in range(len(response["bulkText"]))]
for x in response["deIdentifyResults"]:
de_id_results[x["idx"]].append(
Replacement(
start=x["start"],
end=x["end"],
new_start=x.get("newStart"),
new_end=x.get("newEnd"),
label=x["label"],
text=x["text"],
new_text=x.get("newText"),
score=x["score"],
language=x.get("language"),
example_redaction=x.get("exampleRedaction"),
)
)
for x in response["deIdentifyResults"]
]

return BulkRedactionResponse(
response["bulkText"],
Expand Down

0 comments on commit 0d0318f

Please sign in to comment.