Skip to content

Commit

Permalink
Merge branch 'add-prolific-provider' of https://github.com/facebookre…
Browse files Browse the repository at this point in the history
…search/Mephisto into add-prolific-provider
  • Loading branch information
JackUrb committed Aug 4, 2023
2 parents 7d08666 + db3dd84 commit c66b861
Show file tree
Hide file tree
Showing 49 changed files with 1,988 additions and 899 deletions.
11 changes: 11 additions & 0 deletions .git-hooks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
### Git Hooks

Please set up your local pre-commit git hook as below

```shell
git config --local core.hooksPath .git-hooks
```

Enabled hooks:

- AWS credentials protection from accidental commit
55 changes: 55 additions & 0 deletions .git-hooks/pre-commit
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash

if git rev-parse --verify HEAD >/dev/null 2>&1
then
against=HEAD
else
# Initial commit: diff against an empty tree object
EMPTY_TREE=$(git hash-object -t tree /dev/null)
against=$EMPTY_TREE
fi

# Redirect output to stderr.
exec 1>&2

# Check changed files for an AWS keys
FILES=$(git diff --cached --name-only $against)
echo $FILES

local aws="(AWS|aws|Aws)?_?" quote="(\"|')" connect="\s*(:|=>|=)\s*"
local opt_quote="${quote}?"

if [ -n "$FILES" ]; then
KEY_ID=$(grep -E --line-number '(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}' $FILES)
KEY=$(grep -E --line-number "${opt_quote}${aws}(SECRET|secret|Secret)?_?(ACCESS|access|Access)?_?(KEY|key|Key)${opt_quote}${connect}${opt_quote}[A-Za-z0-9/\+=]{40}${opt_quote}" $FILES)

echo $KEY_ID
echo $KEY

if [ -n "$KEY_ID" ] || [ -n "$KEY" ]; then
exec < /dev/tty # Capture input
echo "=========== Possible AWS Access Key IDs ==========="
echo "${KEY_ID}"
echo ""

echo "=========== Possible AWS Secret Access Keys ==========="
echo "${KEY}"
echo ""

while true; do
read -p "[AWS Key Check] Possible AWS keys found. Commit files anyway? (y/N) " yn
if [ "$yn" = "" ]; then
yn='N'
fi
case $yn in
[Yy] ) exit 0;;
[Nn] ) exit 1;;
* ) echo "Please answer y or n for yes or no.";;
esac
done
exec <&- # Release input
fi
fi

# Normal exit
exit 0
2 changes: 1 addition & 1 deletion .github/workflows/python-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
run: |
python -m pytest -v -m "not req_creds and not prolific" --cov --cov-report=xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
fail_ci_if_error: true
2 changes: 1 addition & 1 deletion docs/web/docs/guides/tutorials/first_task.md
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ mephisto:
```
Save this configuration file, and you're ready to see your task live:
```bash
$ python static_test_script conf=my_config
$ python static_test_script.py conf=my_config
```
Mephisto should print out a link to view your task on the mturk sandbox, like `https://workersandbox.mturk.com/mturk/preview?groupId=XXXXXXXXXXXXXXXX`. Navigate here and you're working on the same task, available on MTurk (on the sandbox at least)!
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,42 +7,36 @@ mephisto:
architect:
_architect_type: ec2
profile_name: mephisto-router-iam
subdomain: '0623.1'
subdomain: '0802.1'
blueprint:
data_csv: ${task_dir}/data_prolific.csv
task_source: ${task_dir}/server_files/demo_task.html
preview_source: ${task_dir}/server_files/demo_preview.html
extra_source_dir: ${task_dir}/server_files/extra_refs
units_per_assignment: 1
units_per_assignment: 2
log_level: 'debug'
task:
task_name: '0623'
task_title: '0623 Task'
task_name: '0802'
task_title: '0802 Task'
task_description: 'This is a simple test of static Prolific tasks.'
task_reward: 70
task_tags: 'static,task,testing'
max_num_concurrent_units: 1
provider:
prolific_external_study_url: 'https://example.com?participant_id={{%PROLIFIC_PID%}}&study_id={{%STUDY_ID%}}&submission_id={{%SESSION_ID%}}'
prolific_id_option: 'url_parameters'
prolific_workspace_name: 'My Workspace'
prolific_project_name: 'Project'
prolific_allow_list_group_name: 'Allow list'
prolific_block_list_group_name: 'Block list'
prolific_total_available_places: 1
prolific_eligibility_requirements:
- name: 'CustomWhitelistEligibilityRequirement'
white_list:
- '6463d16eb92a24a0c3fdc8d5'
# - '6463d32f50a18041930b71be'
# - '6463d3922d7d99360896228f'
# - name: 'AgeRangeEligibilityRequirement'
# min_age: 10
# max_age: 20
# - name: 'ApprovalNumbersEligibilityRequirement'
# minimum_approvals: 100
# maximum_approvals: 200
# - name: 'ApprovalRateEligibilityRequirement'
# minimum_approval_rate: 10
# maximum_approval_rate: 20
# - name: 'JoinedBeforeEligibilityRequirement'
# joined_before: ''
- 6463d32f50a18041930b71be
- 6463d3922d7d99360896228f
- 6463d40e8d5d2f0cce2b3b23
- 6463d44ed1b61a8fb4e0765a
- 6463d488c2f2821eaa2fa13f
- name: 'ApprovalRateEligibilityRequirement'
minimum_approval_rate: 0
maximum_approval_rate: 100
27 changes: 25 additions & 2 deletions examples/simple_static_task/static_test_prolific_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,36 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from mephisto.tools.scripts import task_script
from omegaconf import DictConfig

from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import (
SharedStaticTaskState,
)
from mephisto.data_model.qualification import QUAL_GREATER_EQUAL
from mephisto.tools.scripts import task_script
from mephisto.utils.qualifications import make_qualification_dict


@task_script(default_config_file='prolific_example')
def main(operator, cfg: DictConfig) -> None:
operator.launch_task_run(cfg.mephisto)
shared_state = SharedStaticTaskState()

# Mephisto qualifications
# shared_state.qualifications = [
# make_qualification_dict('sample_qual_name', QUAL_GREATER_EQUAL, 1),
# ]

# Prolific qualifications
# Note that we'll prefix names with a customary `web.eligibility.models.` later in the code
shared_state.prolific_specific_qualifications = [
{
'name': 'AgeRangeEligibilityRequirement',
'min_age': 18,
'max_age': 100,
},
]

operator.launch_task_run(cfg.mephisto, shared_state)
operator.wait_for_runs_then_shutdown(skip_input=True, log_rate=30)


Expand Down
37 changes: 37 additions & 0 deletions mephisto/abstractions/_subcomponents/agent_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import time
import weakref
import os.path
from mephisto.data_model.constants.assignment_state import AssignmentState

if TYPE_CHECKING:
from mephisto.data_model.agent import Agent, OnboardingAgent
Expand Down Expand Up @@ -92,6 +93,18 @@ def __new__(cls, agent: Union["Agent", "OnboardingAgent"]) -> "AgentState":
# We are constructing another instance directly
return super().__new__(cls)

@staticmethod
def immutable() -> List[str]:
"""Return all agent statuses that cannot be changed under normal operation"""
return [
AgentState.STATUS_DISCONNECT,
AgentState.STATUS_TIMEOUT,
AgentState.STATUS_EXPIRED,
AgentState.STATUS_RETURNED,
AgentState.STATUS_SOFT_REJECTED,
AgentState.STATUS_APPROVED,
]

@staticmethod
def complete() -> List[str]:
"""Return all final Agent statuses which will not be updated by the WorkerPool"""
Expand Down Expand Up @@ -126,6 +139,30 @@ def valid() -> List[str]:
AgentState.STATUS_REJECTED,
]

@staticmethod
def to_assignment_state(agent_state: str) -> str:
"""Return corresponding AssignmentState for an AgentState"""
AGENT_STATE_TO_ASSIGNMENTS_STATE_MAP = {
AgentState.STATUS_NONE: AssignmentState.CREATED,
AgentState.STATUS_ACCEPTED: AssignmentState.ASSIGNED,
AgentState.STATUS_ONBOARDING: AssignmentState.ASSIGNED,
AgentState.STATUS_WAITING: AssignmentState.ASSIGNED,
AgentState.STATUS_IN_TASK: AssignmentState.ASSIGNED,
AgentState.STATUS_COMPLETED: AssignmentState.COMPLETED,
AgentState.STATUS_DISCONNECT: AssignmentState.CREATED,
AgentState.STATUS_TIMEOUT: AssignmentState.CREATED,
AgentState.STATUS_PARTNER_DISCONNECT: AssignmentState.COMPLETED,
AgentState.STATUS_EXPIRED: AssignmentState.EXPIRED,
AgentState.STATUS_RETURNED: AssignmentState.CREATED,
AgentState.STATUS_SOFT_REJECTED: AssignmentState.SOFT_REJECTED,
AgentState.STATUS_APPROVED: AssignmentState.ACCEPTED,
AgentState.STATUS_REJECTED: AssignmentState.REJECTED,
}
assert (
agent_state in AGENT_STATE_TO_ASSIGNMENTS_STATE_MAP
), f"Invalid agent state {agent_state} provided, valid: {AGENT_STATE_TO_ASSIGNMENTS_STATE_MAP.keys()}"
return AGENT_STATE_TO_ASSIGNMENTS_STATE_MAP[agent_state]

# Implementations of an AgentState must implement the following:

def __init__(self, agent: "Agent"):
Expand Down
15 changes: 15 additions & 0 deletions mephisto/abstractions/architects/channels/websocket_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

logger = get_logger(name=__name__)

MAX_RETRIES = 3


class WebsocketChannel(Channel):
"""
Expand Down Expand Up @@ -55,6 +57,7 @@ def __init__(
self._is_alive = False
self._is_closed = False
self._socket_task: Optional[asyncio.Task] = None
self._retries = MAX_RETRIES

def is_closed(self):
"""
Expand Down Expand Up @@ -161,6 +164,18 @@ async def run_socket():
f"Unhandled OSError exception in socket {e}, attempting restart"
)
await asyncio.sleep(0.2)
except websockets.exceptions.InvalidStatusCode as e:
if self._retries == 0:
raise ConnectionRefusedError(
"Could not connect after retries"
) from e
curr_retry = MAX_RETRIES - self._retries
logger.exception(
f"Status code error {repr(e)}, attempting retry {curr_retry}",
exc_info=True,
)
await asyncio.sleep(1 + curr_retry)
self._retries += 1
except Exception as e:
logger.exception(f"Unhandled exception in socket {e}, {repr(e)}")
if self._is_closed:
Expand Down
40 changes: 31 additions & 9 deletions mephisto/abstractions/architects/ec2/cleanup_ec2_server_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import os
import json

Expand Down Expand Up @@ -33,27 +32,50 @@ def main():
n_names = len(all_server_names)

if not n_names:
logger.warning('No server to clean up!')
logger.info('No servers found to clean up')
return

logger.info(f'Found {n_names} server names: {", ".join(all_server_names)}')

confirm = input(
f'Are you sure you want to remove the {n_names} found servers? [y/N]\n'
f'>> '
)
if confirm != 'y':
return

# Get EC2 user role
iam_role_name = input('Please enter local profile name for IAM role\n>> ')
iam_role_name = input(
'Please enter local profile name for IAM role\n'
'>> '
)
logger.info(f'Removing {n_names} servers...')

# Clean directory with server JSON files (DEFAULT_SERVER_DETAIL_LOCATION)
# Cleanup local server JSON files, and remove related EC2 infra
skipped_names = []
for i, server_name in enumerate(all_server_names):
logger.info(f'Removing {i+1}/{n_names} server "{server_name}"...')
_name = f'"{server_name}"'
logger.info(f'{i+1}/{n_names} Removing {_name}...')

session = boto3.Session(profile_name=iam_role_name, region_name='us-east-2')
try:
skipped_names.append(_name)
ec2_helpers.remove_instance_and_cleanup(session, server_name)
logger.info(f'..."{server_name}" was removed!')
logger.debug(f'...{_name} - successfully removed')
skipped_names.remove(_name)
except botocore.exceptions.ClientError as e:
logger.info(f'..."{server_name}" could not be removed - {e}')
logger.warning(f'...{_name} - could not be removed: {e}')
except json.decoder.JSONDecodeError as e:
logger.info(f'..."{server_name}" could not read JSON config - {e}')
logger.warning(f'...{_name} - could not read JSON config: {e}')
except Exception as e:
logger.info(f'..."{server_name}" encountered error - {e}')
logger.warning(f'...{_name} - encountered error: {e}')

if skipped_names:
logger.info(
f'Could not remove {len(skipped_names)}/{n_names} servers: {", ".join(skipped_names)}'
)
else:
logger.info(f'Successfully removed {n_names} servers')


if __name__ == '__main__':
Expand Down
15 changes: 10 additions & 5 deletions mephisto/abstractions/architects/ec2/ec2_architect.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import os
import sh # type: ignore
import shutil
import time
import signal
import requests
import re
import json
Expand Down Expand Up @@ -306,10 +306,6 @@ def __setup_ec2_server(self) -> str:
server_dir,
)

# TODO: Remove this hack to address sporadic ConnectionRefusedError
print("EC2: Waiting to establish connection...")
time.sleep(10)

url = f"https://{self.full_domain}"
print(f"EC2: Deployed server at {url}")
return url
Expand Down Expand Up @@ -376,4 +372,13 @@ def shutdown(self) -> None:
in the db.
"""
if self.created: # only delete the server if it's created by us

def cant_cancel_shutdown(sig, frame):
logger.warn(
"Ignoring ^C during ec2 cleanup. ^| if you NEED to exit and you will "
"have to clean up this server with cleanup_ec2_server_by_name.py after."
)

old_handler = signal.signal(signal.SIGINT, cant_cancel_shutdown)
self.__delete_ec2_server()
signal.signal(signal.SIGINT, old_handler)
Loading

0 comments on commit c66b861

Please sign in to comment.