[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
a-r-j · May 29, 2024 · e577c82 · e577c82
1 parent 0ca75eb
commit e577c82
Show file tree

Hide file tree

Showing 2 changed files with 50 additions and 29 deletions.
diff --git a/graphein/ml/datasets/pdb_data.py b/graphein/ml/datasets/pdb_data.py
@@ -93,8 +93,6 @@ def __init__(
 
         self.pdb_chain_ec_number_url = "https://ftp.ebi.ac.uk/pub/databases/msd/sifts/flatfiles/tsv/pdb_chain_enzyme.tsv.gz"
 
-
-
         self.pdb_dir = self.root_dir / "pdb"
         if not os.path.exists(self.pdb_dir):
             os.makedirs(self.pdb_dir)
@@ -111,9 +109,13 @@ def __init__(
             self.pdb_deposition_date_url
         ).name
         self.pdb_availability_filename = Path(self.pdb_availability_url).name
-        self.pdb_chain_cath_uniprot_filename = Path(self.pdb_chain_cath_uniprot_url).name
+        self.pdb_chain_cath_uniprot_filename = Path(
+            self.pdb_chain_cath_uniprot_url
+        ).name
         self.cath_id_cath_code_filename = Path(self.cath_id_cath_code_url).name
-        self.pdb_chain_ec_number_filename = Path(self.pdb_chain_ec_number_url).name
+        self.pdb_chain_ec_number_filename = Path(
+            self.pdb_chain_ec_number_url
+        ).name
 
         self.list_columns = ["ligands"]
 
@@ -428,16 +430,20 @@ def _download_pdb_availability(self):
             log.info("Downloading PDB availability map...")
             wget.download(self.pdb_availability_url, out=str(self.root_dir))
             log.debug("Downloaded PDB availability map")
-    
+
     def _download_pdb_chain_cath_uniprot_map(self):
         """Download mapping from PDB chain to uniprot accession and CATH ID from
         https://ftp.ebi.ac.uk/pub/databases/msd/sifts/flatfiles/tsv/pdb_chain_cath_uniprot.tsv.gz
         """
-        if not os.path.exists(self.root_dir / self.pdb_chain_cath_uniprot_filename):
+        if not os.path.exists(
+            self.root_dir / self.pdb_chain_cath_uniprot_filename
+        ):
             log.info("Downloading Uniprot CATH map...")
-            wget.download(self.pdb_chain_cath_uniprot_url, out=str(self.root_dir))
+            wget.download(
+                self.pdb_chain_cath_uniprot_url, out=str(self.root_dir)
+            )
             log.debug("Downloaded Uniprot CATH map")
-    
+
     def _download_cath_id_cath_code_map(self):
         """Download mapping from CATH IDs to CATH code from
         http://download.cathdb.info/cath/releases/daily-release/newest/cath-b-newest-all.gz
@@ -451,7 +457,9 @@ def _download_pdb_chain_ec_number_map(self):
         """Download mapping from PDB chains to EC number from
         https://ftp.ebi.ac.uk/pub/databases/msd/sifts/flatfiles/tsv/pdb_chain_enzyme.tsv.gz
         """
-        if not os.path.exists(self.root_dir / self.pdb_chain_ec_number_filename):
+        if not os.path.exists(
+            self.root_dir / self.pdb_chain_ec_number_filename
+        ):
             log.info("Downloading EC number map...")
             wget.download(self.pdb_chain_ec_number_url, out=str(self.root_dir))
             log.debug("Downloaded EC number map")
@@ -553,7 +561,7 @@ def _parse_entries(self) -> Dict[str, datetime]:
         df.dropna(subset=["id"], inplace=True)
 
         df.id = df.id.str.lower()
-        df.date = pd.to_datetime(df.date, format = "%m/%d/%y")
+        df.date = pd.to_datetime(df.date, format="%m/%d/%y")
         return pd.Series(df["date"].values, index=df["id"]).to_dict()
 
     def _parse_experiment_type(self) -> Dict[str, str]:
@@ -589,16 +597,18 @@ def _parse_uniprot_id(self) -> Dict[str, str]:
         :rtype: Dict[str, str]
         """
         uniprot_mapping = {}
-        with gzip.open(self.root_dir / self.pdb_chain_cath_uniprot_filename, 'rt') as f:
+        with gzip.open(
+            self.root_dir / self.pdb_chain_cath_uniprot_filename, "rt"
+        ) as f:
             for line in f:
                 try:
-                    pdb, chain, uniprot_id, cath_id = line.strip().split('\t')
+                    pdb, chain, uniprot_id, cath_id = line.strip().split("\t")
                     key = f"{pdb}_{chain}"
                     uniprot_mapping[key] = uniprot_id
                 except ValueError:
                     continue
         return uniprot_mapping
-    
+
     def _parse_cath_id(self) -> Dict[str, str]:
         """Parse the CATH ID for all PDB chains.
 
@@ -607,17 +617,19 @@ def _parse_cath_id(self) -> Dict[str, str]:
         :rtype: Dict[str, str]
         """
         cath_mapping = {}
-        with gzip.open(self.root_dir / self.pdb_chain_cath_uniprot_filename, 'rt') as f:
-            next(f) # Skip header line
+        with gzip.open(
+            self.root_dir / self.pdb_chain_cath_uniprot_filename, "rt"
+        ) as f:
+            next(f)  # Skip header line
             for line in f:
                 try:
-                    pdb, chain, uniprot_id, cath_id = line.strip().split('\t')
+                    pdb, chain, uniprot_id, cath_id = line.strip().split("\t")
                     key = f"{pdb}_{chain}"
                     cath_mapping[key] = cath_id
                 except ValueError:
                     continue
         return cath_mapping
-    
+
     def _parse_cath_code(self) -> Dict[str, str]:
         """Parse the CATH code for all CATH IDs.
 
@@ -626,18 +638,22 @@ def _parse_cath_code(self) -> Dict[str, str]:
         :rtype: Dict[str, str]
         """
         cath_mapping = {}
-        with gzip.open(self.root_dir / self.cath_id_cath_code_filename, 'rt') as f:
+        with gzip.open(
+            self.root_dir / self.cath_id_cath_code_filename, "rt"
+        ) as f:
             print(f)
             for line in f:
                 print(line)
                 try:
-                    cath_id, cath_version, cath_code, cath_segment = line.strip().split()
+                    cath_id, cath_version, cath_code, cath_segment = (
+                        line.strip().split()
+                    )
                     cath_mapping[cath_id] = cath_code
                     print(cath_id, cath_code)
                 except ValueError:
                     continue
         return cath_mapping
-    
+
     def _parse_ec_number(self) -> Dict[str, str]:
         """Parse the CATH ID for all PDB chains and adds None when no EC number is present.
 
@@ -646,13 +662,17 @@ def _parse_ec_number(self) -> Dict[str, str]:
         :rtype: Dict[str, str]
         """
         ec_mapping = {}
-        with gzip.open(self.root_dir / self.pdb_chain_ec_number_filename, 'rt') as f:
-            next(f) # Skip header line
+        with gzip.open(
+            self.root_dir / self.pdb_chain_ec_number_filename, "rt"
+        ) as f:
+            next(f)  # Skip header line
             for line in f:
                 try:
-                    pdb, chain, uniprot_id, ec_number = line.strip().split('\t')
+                    pdb, chain, uniprot_id, ec_number = line.strip().split(
+                        "\t"
+                    )
                     key = f"{pdb}_{chain}"
-                    ec_number = None if ec_number == '?' else ec_number
+                    ec_number = None if ec_number == "?" else ec_number
                     ec_mapping[key] = ec_number
                 except ValueError:
                     continue
@@ -1303,13 +1323,12 @@ def has_uniprot_id(
         :rtype: pd.DataFrame
         """
         splits_df = self.get_splits(splits)
-        df = splits_df.dropna(subset=['uniprot_id'])
+        df = splits_df.dropna(subset=["uniprot_id"])
 
         if update:
             self.df = df
         return df
 
-
     def has_cath_code(
         self,
         splits: Optional[List[str]] = None,
@@ -1329,7 +1348,7 @@ def has_cath_code(
         :rtype: pd.DataFrame
         """
         splits_df = self.get_splits(splits)
-        df = splits_df.dropna(subset=['cath_code'])
+        df = splits_df.dropna(subset=["cath_code"])
 
         if update:
             self.df = df
@@ -1354,7 +1373,7 @@ def has_ec_number(
         :rtype: pd.DataFrame
         """
         splits_df = self.get_splits(splits)
-        df = splits_df.dropna(subset=['ec_number'])
+        df = splits_df.dropna(subset=["ec_number"])
 
         if update:
             self.df = df

diff --git a/graphein/protein/utils.py b/graphein/protein/utils.py
@@ -190,7 +190,9 @@ def download_pdb(
         BASE_URL = "https://models.rcsb.org/"
         extension = ".bcif.gz"
     else:
-        raise ValueError(f"Invalid format: {format}. Must be 'pdb', 'mmtf', 'mmcif' or 'bcif'.")
+        raise ValueError(
+            f"Invalid format: {format}. Must be 'pdb', 'mmtf', 'mmcif' or 'bcif'."
+        )
 
     # Make output directory if it doesn't exist or set it to tempdir if None
     if out_dir is not None: