Skip to content

Commit

Permalink
Merge pull request #329 from macrocosm-os/staging
Browse files Browse the repository at this point in the history
Staging
  • Loading branch information
mccrindlebrian authored Jan 21, 2025
2 parents 3bdcc27 + a1b663d commit 5c296cf
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 29 deletions.
2 changes: 1 addition & 1 deletion folding/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .protocol import JobSubmissionSynapse
from .validators.protein import Protein

__version__ = "1.4.3"
__version__ = "1.4.4"
version_split = __version__.split(".")
__spec_version__ = (10000 * int(version_split[0])) + (100 * int(version_split[1])) + (1 * int(version_split[2]))

Expand Down
21 changes: 10 additions & 11 deletions folding/base/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,36 +44,35 @@ def create_simulation(
This method takes in a seed, state, and checkpoint file path to recreate a simulation object.
Args:
seed (str): The seed for the random number generator.
state (str): The state of the simulation.
cpt_file (str): The path to the checkpoint file.
system_config (dict): A dictionary containing the system configuration settings.
pdb (app.PDBFile): The PDB file used to initialize the simulation
Returns:
app.Simulation: The recreated simulation object.
system_config: The potentially altered system configuration in SimulationConfig format.
Tuple[app.Simulation, SimulationConfig]: A tuple containing the recreated simulation object and the potentially altered system configuration in SystemConfig format.
"""
start_time = time.time()
forcefield = app.ForceField(system_config["ff"], system_config["water"])
logger.warning(f"Creating ff took {time.time() - start_time:.4f} seconds")
logger.debug(f"Creating ff took {time.time() - start_time:.4f} seconds")

modeller = app.Modeller(pdb.topology, pdb.positions)

start_time = time.time()
modeller.deleteWater()
logger.warning(f"Deleting water took {time.time() - start_time:.4f} seconds")
logger.debug(f"Deleting water took {time.time() - start_time:.4f} seconds")

# modeller.addExtraParticles(forcefield)

start_time = time.time()
modeller.addHydrogens(forcefield)
logger.warning(f"Adding hydrogens took {time.time() - start_time:.4f} seconds")
logger.debug(f"Adding hydrogens took {time.time() - start_time:.4f} seconds")

start_time = time.time()
# modeller.addSolvent(
# forcefield,
# padding=system_config.box_padding * unit.nanometer,
# boxShape=system_config.box,
# )
logger.warning(f"Adding solvent took {time.time() - start_time:.4f} seconds")
logger.debug(f"Adding solvent took {time.time() - start_time:.4f} seconds")

# Create the system
start_time = time.time()
Expand All @@ -85,7 +84,7 @@ def create_simulation(
nonbondedCutoff = threshold * mm.unit.nanometers
# set the attribute in the config for the pipeline.
system_config["cutoff"] = threshold
logger.warning(
logger.debug(
f"Nonbonded cutoff is greater than half the minimum box dimension. Setting nonbonded cutoff to {threshold} nm"
)
else:
Expand Down Expand Up @@ -133,14 +132,14 @@ def create_simulation(
simulation = mm.app.Simulation(
modeller.topology, system, integrator, platform, properties
)
logger.warning(
logger.debug(
f"Creating simulation took {time.time() - start_time:.4f} seconds"
)
# Set initial positions

start_time = time.time()
simulation.context.setPositions(modeller.positions)
logger.warning(f"Setting positions took {time.time() - start_time:.4f} seconds")
logger.debug(f"Setting positions took {time.time() - start_time:.4f} seconds")

# Converting the system config into a Dict[str,str] and ensure all values in system_config are of the correct type
for k, v in system_config.items():
Expand Down
54 changes: 39 additions & 15 deletions folding/validators/protein.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,34 @@
import os
import time
import glob
import asyncio
import base64
import glob
import os
import random
import shutil
import asyncio
import datetime
from pathlib import Path
from dataclasses import dataclass
import time
from collections import defaultdict
from typing import Dict, List, Literal, Any
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Literal

import numpy as np
import pandas as pd
from openmm import app, unit
from pdbfixer import PDBFixer

from folding.utils.s3_utils import DigitalOceanS3Handler
from folding.base.simulation import OpenMMSimulation
from folding.store import Job
from folding.utils.logger import logger
from folding.utils.opemm_simulation_config import SimulationConfig
from folding.utils.ops import (
OpenMMException,
ValidationError,
write_pkl,
load_pkl,
check_and_download_pdbs,
check_if_directory_exists,
load_pkl,
plot_miner_validator_curves,
write_pkl,
)

from folding.utils.logger import logger
from folding.utils.s3_utils import DigitalOceanS3Handler

ROOT_DIR = Path(__file__).resolve().parents[2]

Expand Down Expand Up @@ -537,6 +535,28 @@ def check_masses(self) -> bool:
logger.error(f"Masses for atom {i} do not match. Validator: {v_mass}, Miner: {m_mass}")
return False
return True

def compare_state_to_cpt(self, state_energies: list, checkpoint_energies: list) -> bool:
"""
Check if the state file is the same as the checkpoint file by comparing the median of the first few energy values
in the simulation created by the checkpoint and the state file respectively.
"""

WINDOW = 50

state_energies = np.array(state_energies)
checkpoint_energies = np.array(checkpoint_energies)

state_median = np.median(state_energies[:WINDOW])
checkpoint_median = np.median(checkpoint_energies[:WINDOW])

percent_diff = abs((state_median - checkpoint_median) / checkpoint_median) * 100

if percent_diff > self.epsilon:
return False
return True



def is_run_valid(self):
"""
Expand Down Expand Up @@ -575,8 +595,8 @@ def is_run_valid(self):
)
self.simulation.loadState(self.state_xml_path)
state_energies = []
for _ in range(100):
self.simulation.step(100)
for _ in range(steps_to_run // 10):
self.simulation.step(10)
energy = self.simulation.context.getState(getEnergy=True).getPotentialEnergy()._value
state_energies.append(energy)

Expand Down Expand Up @@ -622,6 +642,10 @@ def is_run_valid(self):
if not self.check_gradient(check_energies=check_energies):
logger.warning(f"hotkey {self.hotkey_alias} failed cpt-gradient check for {self.pdb_id}, ... Skipping!")
return False, [], [], "cpt-gradient"

if not self.compare_state_to_cpt(state_energies=state_energies, checkpoint_energies=check_energies):
logger.warning(f"hotkey {self.hotkey_alias} failed state-checkpoint comparison for {self.pdb_id}, ... Skipping!")
return False, [], [], "state-checkpoint"

# calculating absolute percent difference per step
percent_diff = abs(((check_energies - miner_energies) / miner_energies) * 100)
Expand Down
5 changes: 4 additions & 1 deletion folding/validators/reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ def check_if_identical(event):
logger.warning(f"Setting {len(flattened_list)} / {len(event['checked_energy'])} uids to 0 reward due to identical submissions.")
for idx in flattened_list:
event["is_valid"][idx] = False
event["reason"][idx] = "Identical submission to another hotkey in the group"
if event["reason"]== "":
event["reason"][idx] = "Identical submission to another hotkey in the group"

return event

Expand All @@ -58,6 +59,7 @@ def get_energies(protein: Protein, responses: List[JobSubmissionSynapse], uids:
event["ns_computed"] = [0] * len(uids)
event["reason"] = [""] * len(uids)
event["best_cpt"] = [""] * len(uids)
event["seed"] = []

energies = np.zeros(len(uids))

Expand All @@ -71,6 +73,7 @@ def get_energies(protein: Protein, responses: List[JobSubmissionSynapse], uids:
state=resp.miner_state,
seed=resp.miner_seed,
)
event['seed'].append(resp.miner_seed)
event["process_md_output_time"][i] = time.time() - start_time
event["best_cpt"][i] = protein.checkpoint_path if hasattr(protein, "checkpoint_path") else ""

Expand Down
7 changes: 7 additions & 0 deletions neurons/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,13 @@ async def update_job(self, job: Job):
energies = torch.Tensor(job.event["energies"])
rewards = torch.zeros(len(energies)) # one-hot per update step

# If there is an exploit on the cpt file detected via the state-checkpoint reason, we will reset the score to 0.
logger.info(f"event information: {job.event['reason']}, {job.event['uids']}")
for uid, reason in zip(job.event["uids"], job.event["reason"]):
if reason == "state-checkpoint":
logger.warning(f"Setting uid {uid} score to zero, State-checkpoint check failed.")
self.scores[uid] = 0

best_index = np.argmin(energies)
best_loss = energies[best_index].item() # item because it's a torch.tensor
best_hotkey = serving_hotkeys[best_index]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "folding"
version = "1.4.3"
version = "1.4.4"
description = "Macrocosmos Subnet 25: Folding"
authors = ["Brian McCrindle <[email protected]>", "Sergio Champoux <[email protected]>", "Szymon Fonau <[email protected]>"]

Expand Down

0 comments on commit 5c296cf

Please sign in to comment.