Skip to content

Commit

Permalink
Merge pull request #106 from simulacrum6/master
Browse files Browse the repository at this point in the history
Add new job type "text_label_multiple_span_select".
  • Loading branch information
alidzm authored Oct 31, 2023
2 parents c43f56b + c807442 commit f215438
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 4 deletions.
27 changes: 25 additions & 2 deletions basemodels/manifest/data/groundtruth.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import List, Optional, Union

import requests
from pydantic import BaseModel, HttpUrl, ValidationError
from pydantic import BaseModel, HttpUrl, ValidationError, conlist, validator, root_validator, Field
from requests import RequestException
from typing_extensions import Literal

Expand Down Expand Up @@ -78,13 +78,36 @@ class ILASGroundtruthEntry(BaseModel):
ilas_groundtruth_entry_type = List[List[ILASGroundtruthEntry]]
ILASGroundtruthEntryModel = create_wrapper_model(ilas_groundtruth_entry_type)

class TLMSSGroundTruthEntry(BaseModel):
start: int
end: int
label: str


"""
Groundtruth file format for `text_label_multiple_span_select` job type
{
"https://domain.com/file1.txt": [
{
"start": 0,
"end": 4,
"label": "0"
}
]
}
"""
tlmss_groundtruth_entry_type = List[TLMSSGroundTruthEntry]
TLMSSGroundTruthEntryModel = create_wrapper_model(tlmss_groundtruth_entry_type)


groundtruth_entry_models_map = {
"image_label_binary": ILBGroundtruthEntryModel,
"image_label_multiple_choice": ILMCGroundtruthEntryModel,
"image_label_area_select": ILASGroundtruthEntryModel,
"text_label_multiple_span_select": TLMSSGroundTruthEntryModel,
}


def validate_content_type(uri: str) -> None:
"""Validate uri content type"""
try:
Expand Down
1 change: 1 addition & 0 deletions basemodels/manifest/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class BaseJobTypesEnum(str, Enum):
image_label_binary = "image_label_binary"
image_label_multiple_choice = "image_label_multiple_choice"
text_free_entry = "text_free_entry"
text_label_multiple_span_select = "text_label_multiple_span_select"
text_multiple_choice_one_option = "text_multiple_choice_one_option"
text_multiple_choice_multiple_options = "text_multiple_choice_multiple_options"
image_label_area_adjust = "image_label_area_adjust"
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "hmt-basemodels"
version = "0.2.5"
version = "0.2.6"
description = ""
authors = ["Intuition Machines, Inc <[email protected]>"]
packages = [
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setuptools.setup(
name="hmt-basemodels",
version="0.2.5",
version="0.2.6",
author="HUMAN Protocol",
description="Common data models shared by various components of the Human Protocol stack",
url="https://github.com/hCaptcha/hmt-basemodels",
Expand Down
37 changes: 37 additions & 0 deletions tests/test_manifest_validation.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,43 @@ def test_groundtruth_uri_ilas_invalid_key(self):
with self.assertRaises(ValidationError):
self.validate_groundtruth_response("image_label_area_select", body)

def test_groundtruth_uri_tlmss_valid(self):
groundtruth_uri = "https://domain.com/file1.txt"
body = {
groundtruth_uri: [
{"start": 0, "end": 4, "label": "0"},
{"start": 17, "end": 89, "label": "1"},
]
}
self.register_http_response(groundtruth_uri, method=httpretty.HEAD, headers={"Content-Type": "text/plain"})
self.validate_groundtruth_response("text_label_multiple_span_select", body)


def test_groundtruth_uri_tlmss_invalid_key(self):
body = {
"not_uri": [
{"start": 0, "end": 4, "label": "0"},
{"start": 17, "end": 89, "label": "1"},
]
}

with self.assertRaises(ValidationError):
self.validate_groundtruth_response("text_label_multiple_span_select", body)


def test_groundtruth_uri_tlmss_invalid_value(self):
body = {
"https://www.domain.com/file1.txt": [
{"span": [0, 4]},
{"span": [17, 89], "label": "1"},
]
}

with self.assertRaises(ValidationError):
self.validate_groundtruth_response("text_label_multiple_span_select", body)



def test_groundtruth_uri_ilas_invalid_value(self):
body = {"https://domain.com/file1.jpeg": [[True]]}

Expand Down

0 comments on commit f215438

Please sign in to comment.