Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New testing option for ec2 (reuse existing server) #1036

Merged
merged 1 commit into from
Jul 27, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 100 additions & 66 deletions mephisto/abstractions/architects/ec2/ec2_architect.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ class EC2ArchitectArgs(ArchitectArgs):
profile_name: str = field(
default=MISSING, metadata={"help": "Profile name for deploying an ec2 instance"}
)
_deploy_type: str = field(
default="standard",
metadata={
"help": "Type of deploy, default is [standard] debug options are [retain|use_existing]",
},
)


@register_mephisto_abstraction()
Expand All @@ -91,6 +97,7 @@ def __init__(
"""
self.args = args
self.task_run = task_run
self._deploy_type = args.architect._deploy_type
with open(DEFAULT_FALLBACK_FILE, "r") as fallback_detail_file:
self.fallback_details = json.load(fallback_detail_file)

Expand Down Expand Up @@ -180,9 +187,23 @@ def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
# in ec2 resources
subdomain = url_safe_string(args.architect.subdomain)

assert cls.check_domain_unused_locally(
subdomain=subdomain
), "Given subdomain does exist. Run `python3 -m mephisto.abstractions.architects.ec2.cleanup_ec2_server_by_name`"
if args.architect._deploy_type in ["retain", "use_existing"]:
assert (
args.architect.subdomain != args.task.task_name
), "Must use unique mephisto.architect.subdomain for non-standard launch"
unused_locally = cls.check_domain_unused_locally(subdomain=subdomain)
if args.architect._deploy_type in ["retain", "standard"]:
assert (
unused_locally
), "Given subdomain does exist. Run `python3 -m mephisto.abstractions.architects.ec2.cleanup_ec2_server_by_name`"
else:
assert (
not unused_locally
), "Must have existing subdomain setup to use it. Try deploying with retain first"
if args.architect._deploy_type == "retain":
logger.warn(
f"Launching architect with domain {subdomain} with retain deploy, will need MANUAL shutdown"
)

# VALID_INSTANCES = []
# assert args.architect.instance_type in VALID_INSTANCES
Expand All @@ -204,9 +225,17 @@ def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
assert key in fallback_details, f"Fallback file missing required key {key}"

session = boto3.Session(profile_name=profile_name, region_name="us-east-2")
assert ec2_helpers.rule_is_new(
is_new_rule = ec2_helpers.rule_is_new(
session, subdomain, fallback_details["listener_arn"]
), "Rule was not new, existing subdomain found registered to the listener. Check on AWS."
)
if args.architect._deploy_type in ["retain", "standard"]:
assert (
is_new_rule
), "Rule was not new, existing subdomain found registered to the listener. Check on AWS."
else:
assert (
not is_new_rule
), "Rule did not exist, Clean up and redeploy a new retain server."

def __get_build_directory(self) -> str:
"""
Expand Down Expand Up @@ -246,77 +275,82 @@ def __setup_ec2_server(self) -> str:
"""
Deploy the server using the setup server directory, return the URL
"""
server_dir = os.path.abspath(self.__get_build_directory())

print("EC2: Starting instance...")

# Launch server
server_id = ec2_helpers.create_instance(
self.session,
self.fallback_details["key_pair_name"],
self.fallback_details["security_group_id"],
self.fallback_details["vpc_details"]["subnet_1_id"],
self.router_name,
instance_type=self.instance_type,
)
self.server_id = server_id

self.created = True

print("EC2: Configuring routing table...")
# Configure router
(
self.target_group_arn,
self.router_rule_arn,
) = ec2_helpers.register_instance_to_listener(
self.session,
server_id,
self.fallback_details["vpc_details"]["vpc_id"],
self.fallback_details["listener_arn"],
self.full_domain,
)
if self._deploy_type != "use_existing":
server_dir = os.path.abspath(self.__get_build_directory())

# Write out details
server_details = {
"balancer_rule_arn": self.router_rule_arn,
"instance_id": self.server_id,
"subdomain": self.subdomain,
"target_group_arn": self.target_group_arn,
}

with open(self.server_detail_path, "w+") as detail_file:
json.dump(server_details, detail_file)

print("EC2: Deploying server...")
# Push server files and execute launch
ec2_helpers.deploy_to_routing_server(
self.session,
server_id,
self.fallback_details["key_pair_name"],
server_dir,
)
print("EC2: Starting instance...")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could include here what deploy type this is


# Launch server
server_id = ec2_helpers.create_instance(
self.session,
self.fallback_details["key_pair_name"],
self.fallback_details["security_group_id"],
self.fallback_details["vpc_details"]["subnet_1_id"],
self.router_name,
instance_type=self.instance_type,
)
self.server_id = server_id

self.created = True

print("EC2: Configuring routing table...")
# Configure router
(
self.target_group_arn,
self.router_rule_arn,
) = ec2_helpers.register_instance_to_listener(
self.session,
server_id,
self.fallback_details["vpc_details"]["vpc_id"],
self.fallback_details["listener_arn"],
self.full_domain,
)

# Write out details
server_details = {
"balancer_rule_arn": self.router_rule_arn,
"instance_id": self.server_id,
"subdomain": self.subdomain,
"target_group_arn": self.target_group_arn,
}

with open(self.server_detail_path, "w+") as detail_file:
json.dump(server_details, detail_file)

print("EC2: Deploying server...")
# Push server files and execute launch
ec2_helpers.deploy_to_routing_server(
self.session,
server_id,
self.fallback_details["key_pair_name"],
server_dir,
)

return f"https://{self.full_domain}"

def __delete_ec2_server(self):
"""
Remove the heroku server associated with this task run
"""
server_id = self.server_id
assert server_id is not None, "Cannot shutdown a non-existent server"
print(f"Ec2: Deleting server: {self.server_id}")
if self.router_rule_arn is not None:
ec2_helpers.delete_rule(
if self._deploy_type == "standard":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if deploy type is different, I suggest to output an INFO message here saying that for deploy type X the server continue to live, and needs to be shut down manually via cleanup command.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The warning message appears on launch for the retain server currently. Feel free to add additional logging elsewhere as appropriate.

server_id = self.server_id
assert server_id is not None, "Cannot shutdown a non-existent server"
assert (
self.target_group_arn is not None
), "Target group always exists on standard server"
print(f"Ec2: Deleting server: {self.server_id}")
if self.router_rule_arn is not None:
ec2_helpers.delete_rule(
self.session,
self.router_rule_arn,
self.target_group_arn,
)

ec2_helpers.delete_instance(
self.session,
self.router_rule_arn,
self.target_group_arn,
server_id,
)

ec2_helpers.delete_instance(
self.session,
server_id,
)
os.unlink(self.server_detail_path)
os.unlink(self.server_detail_path)

def server_is_running(self) -> bool:
"""
Expand Down
Loading