diff --git a/proteinshake/__init__.py b/proteinshake/__init__.py
index 6a49b246..d180631a 100644
--- a/proteinshake/__init__.py
+++ b/proteinshake/__init__.py
@@ -1 +1 @@
-__version__ = '0.3.9'
+__version__ = '0.3.11'
\ No newline at end of file
diff --git a/proteinshake/datasets/alphafold.py b/proteinshake/datasets/alphafold.py
index ecf77117..83d6bf35 100644
--- a/proteinshake/datasets/alphafold.py
+++ b/proteinshake/datasets/alphafold.py
@@ -1,40 +1,10 @@
-import os
-import re
-import tarfile
 import glob
 
 from proteinshake.datasets import Dataset
-from proteinshake.utils import download_url, extract_tar, load, save, unzip_file, progressbar
-
-# A map of organism names to their download file names. See https://alphafold.ebi.ac.uk/download
-AF_DATASET_NAMES = {
-    'arabidopsis_thaliana': 'UP000006548_3702_ARATH',
-    'caenorhabditis_elegans': 'UP000001940_6239_CAEEL',
-    'candida_albicans': 'UP000000559_237561_CANAL',
-    'danio_rerio': 'UP000000437_7955_DANRE',
-    'dictyostelium_discoideum': 'UP000002195_44689_DICDI',
-    'drosophila_melanogaster': 'UP000000803_7227_DROME',
-    'escherichia_coli': 'UP000000625_83333_ECOLI',
-    'glycine_max': 'UP000008827_3847_SOYBN',
-    'homo_sapiens': 'UP000005640_9606_HUMAN',
-    'methanocaldococcus_jannaschii': 'UP000000805_243232_METJA',
-    'mus_musculus': 'UP000000589_10090_MOUSE',
-    'oryza_sativa': 'UP000059680_39947_ORYSJ',
-    'rattus_norvegicus': 'UP000002494_10116_RAT',
-    'saccharomyces_cerevisiae': 'UP000002311_559292_YEAST',
-    'schizosaccharomyces_pombe': 'UP000002485_284812_SCHPO',
-    'zea_mays': 'UP000007305_4577_MAIZE',
-    'swissprot': 'swissprot_pdb',
-}
-
-description = 'Predicted structures'
+from proteinshake.utils import download_url, extract_tar, unzip_file, progressbar
 
 class AlphaFoldDataset(Dataset):
-    """ 3D structures predicted by AlphaFold.
-    Requires the `organism` name to be specified.
-    See https://alphafold.ebi.ac.uk/download for a full list of available organsims.
-    Pass the full latin organism name separated by a space or underscore.
-    `organism` can also be 'swissprot', in which case the full SwissProt structure predictions will be downloaded (ca. 500.000).
+    """ SwissProt 3D structures predicted by AlphaFold.
 
     .. admonition:: Please cite
 
@@ -46,74 +16,22 @@ class AlphaFoldDataset(Dataset):
 
       Raw data was obtained and modified from `AlphaFoldDB <https://alphafold.ebi.ac.uk>`_, originally licensed under `CC-BY-4.0 <https://creativecommons.org/licenses/by/4.0/>`_.
 
-
-    .. list-table :: Data Properties
-       :widths: 50 50
-       :header-rows: 1
-
-       * - organism
-         - # proteins
-       * - ``'arabidopsis_thaliana'``
-         - 27,386
-       * - ``'caenorhabditis_elegans'``
-         - 19,613
-       * - ``'candida_albicans'``
-         - 5,951
-       * - ``'danio_rerio'``
-         - 24,430
-       * - ``'dictyostelium_discoideum'``
-         - 12,485
-       * - ``'drosophila_melanogaster'``
-         - 13,318
-       * - ``'escherichia_coli'``
-         - 4,362
-       * - ``'glycine_max'``
-         - 55,696
-       * - ``'homo_sapiens'``
-         - 23,172
-       * - ``'methanocaldococcus_jannaschii'``
-         - 1,773
-       * - ``'mus_musculus'``
-         - 21,398
-       * - ``'oryza_sativa'``
-         - 43,631
-       * - ``'rattus_norvegicus'``
-         - 21,069
-       * - ``'saccharomyces_cerevisiae'``
-         - 6,016
-       * - ``'schizosaccharomyces_pombe'``
-         - 5,104
-       * - ``'zea_mays'``
-         - 39,203
-       * - ``'swissprot'``
-         - 541,143
-
     Parameters
     ----------
-    organism: str
-        The organism name or 'swissprot'.
+    version: int, default 4
+        The AlphaFoldDB version.
     """
 
-    exlude_args_from_signature = ['organism']
-
-    def __init__(self, organism='swissprot', version='v4', only_single_chain=True, **kwargs):
-        self.organism = organism.lower().replace(' ','_')
-        self.base_url = 'https://ftp.ebi.ac.uk/pub/databases/alphafold/latest/'
-        self.version = version
+    def __init__(self, version=4, only_single_chain=True, **kwargs):
+        self.base_url = f'https://ftp.ebi.ac.uk/pub/databases/alphafold/v{version}'
+        #self.file_name = f'swissprot_pdb_v{version}'
+        self.file_name = f'UP000000805_243232_METJA_v{version}'
         super().__init__(only_single_chain=only_single_chain, **kwargs)
 
-    @property
-    def name(self):
-        return f'{self.__class__.__name__}_{self.organism}'
-
-    def get_raw_files(self):
-        return glob.glob(f'{self.root}/raw/*/*.pdb')[:self.limit]
-
     def get_id_from_filename(self, filename):
-        return re.search('(?<=AF-)(.*)(?=-F.+-model)', filename).group()
+        return filename.split('-')[1]
 
     def download(self):
-        os.makedirs(f'{self.root}/raw/{self.organism}', exist_ok=True)
-        download_url(self.base_url+AF_DATASET_NAMES[self.organism]+f'_{self.version}.tar', f'{self.root}/raw/{self.organism}', verbosity=self.verbosity)
-        extract_tar(f'{self.root}/raw/{self.organism}/{AF_DATASET_NAMES[self.organism]}_{self.version}.tar', f'{self.root}/raw/{self.organism}', verbosity=self.verbosity)
-        [unzip_file(f) for f in progressbar(glob.glob(f'{self.root}/raw/*/*.pdb.gz')[:self.limit], desc='Unzipping', verbosity=self.verbosity)]
+        download_url(f'{self.base_url}/{self.file_name}.tar', f'{self.root}/raw', verbosity=self.verbosity)
+        extract_tar(f'{self.root}/raw/{self.file_name}.tar', f'{self.root}/raw/files', verbosity=self.verbosity)
+        for path in progressbar(glob.glob(f'{self.root}/raw/*/*.pdb.gz')[:self.limit], desc='Unzipping', verbosity=self.verbosity): unzip_file(path)
diff --git a/proteinshake/datasets/dataset.py b/proteinshake/datasets/dataset.py
index 9af77047..e3f541c0 100644
--- a/proteinshake/datasets/dataset.py
+++ b/proteinshake/datasets/dataset.py
@@ -2,22 +2,17 @@
 """
 Base dataset class for protein 3D structures.
 """
-import os, gzip, inspect, time, itertools, tarfile, io, requests
-import copy
-from collections import defaultdict, Counter
+import os, inspect, requests, glob
 from functools import cached_property
-import multiprocessing as mp
 
-import pandas as pd
 import numpy as np
 import freesasa
 from biopandas.pdb import PandasPdb
 from joblib import Parallel, delayed
-from sklearn.neighbors import kneighbors_graph, radius_neighbors_graph
 from fastavro import reader as avro_reader
 
 from proteinshake.transforms import IdentityTransform, RandomRotateTransform, CenterTransform
-from proteinshake.utils import download_url, save, load, unzip_file, write_avro, Generator, progressbar, warning, error
+from proteinshake.utils import download_url, unzip_file, write_avro, Generator, progressbar, warning, error
 
 AA_THREE_TO_ONE = {'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L', 'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R', 'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y'}
 AA_ONE_TO_THREE = {v:k for k, v in AA_THREE_TO_ONE.items()}
@@ -241,7 +236,7 @@ def get_raw_files(self):
         list
             The list of raw PDB files used in this dataset.
         """
-        raise NotImplementedError
+        return glob.glob(f'{self.root}/raw/files/*.pdb')[:self.limit]
 
     def get_id_from_filename(self, filename):
         """ Implement me in a subclass!
@@ -258,7 +253,7 @@ def get_id_from_filename(self, filename):
         str
             A PDB identifier or other ID.
         """
-        raise NotImplementedError
+        return filename.rstrip('.pdb')
 
     def download(self):
         """ Implement me in a subclass!
@@ -306,6 +301,10 @@ def download_precomputed(self, resolution='residue'):
             download_url(f'{self.repository_url}/{self.name}.{resolution}.avro.gz', f'{self.root}', verbosity=self.verbosity)
             if self.verbosity > 0: print('Unzipping...')
             unzip_file(f'{self.root}/{self.name}.{resolution}.avro.gz')
+            for filename in self.additional_files:
+                if not os.path.exists(f'{self.root}/{filename}'):
+                    download_url(f'{self.repository_url}/{filename}.gz', f'{self.root}', verbosity=0)
+                    unzip_file(f'{self.root}/{filename}.gz')
 
     def parse(self):
         """ Parses all PDB files returned from :meth:`proteinshake.datasets.Dataset.get_raw_files()` and saves them to disk. Can run in parallel.
@@ -320,9 +319,6 @@ def parse(self):
 
         if self.verbosity > 0: print(f'Filtered {before-len(proteins)} proteins.')
 
-        # if self.center:
-        # if True:
-        # if self.random_rotate:
         if self.name == 'ProteinProteinInteractionDataset':
             print("Centering")
             proteins = [CenterTransform()(p) for p in proteins]
@@ -334,6 +330,7 @@ def parse(self):
         atom_proteins = [{'protein':p['protein'], 'atom':p['atom']} for p in proteins]
         write_avro(residue_proteins, f'{self.root}/{self.name}.residue.avro')
         write_avro(atom_proteins, f'{self.root}/{self.name}.atom.avro')
+        return proteins
 
     def parse_pdb(self, path):
         """ Parses a single PDB file first into a DataFrame, then into a protein object (a dictionary). Also validates the PDB file and provides the hook for `add_protein_attributes`. Returns `None` if the protein was found to be invalid.
@@ -489,21 +486,6 @@ def validate(self, df):
         if not sum(df['residue_type'].map(lambda x: not x is None)) > 0:
             return False
         return True
-
-    def describe(self):
-        """ Produces dataset statistics.
-
-        Returns
-        -------
-        dict
-            A dictionary of summary statistics of this dataset.
-        """
-        n_resi = len(self.data.residue_index) / len(self.data.ID)
-        data = {'name': type(self).__name__,
-                'num_proteins': len(self),
-                'avg size (# residues)': n_resi
-               }
-        return data
     
     def to_graph(self, resolution='residue', transform=IdentityTransform(), **kwargs):
         """ Converts the raw dataset to a graph dataset. See :meth:`proteinshake.representations.GraphDataset` for arguments.
diff --git a/proteinshake/datasets/enzyme_commission.py b/proteinshake/datasets/enzyme_commission.py
index 26d6caba..b203bae6 100644
--- a/proteinshake/datasets/enzyme_commission.py
+++ b/proteinshake/datasets/enzyme_commission.py
@@ -13,15 +13,6 @@ class EnzymeCommissionDataset(RCSBDataset):
 
         Raw data was obtained and modified from `RCSB Protein Data Bank <https://www.rcsb.org/>`_, originally licensed under `CC0 1.0 <https://creativecommons.org/publicdomain/zero/1.0/>`_.
 
-
-    .. list-table:: Dataset stats
-        :widths: 100
-        :header-rows: 1
-
-        * - # proteins
-        * - 15603 
-
-
     .. list-table:: Annotations
         :widths: 25 35 45
         :header-rows: 1
@@ -35,8 +26,6 @@ class EnzymeCommissionDataset(RCSBDataset):
 
     """
 
-    description = 'Enzymes'
-
     def __init__(self, query=[['rcsb_polymer_entity.rcsb_ec_lineage.name','exists']], **kwargs):
         """
 
diff --git a/proteinshake/datasets/gene_ontology.py b/proteinshake/datasets/gene_ontology.py
index ad39e0dd..2576f8e5 100644
--- a/proteinshake/datasets/gene_ontology.py
+++ b/proteinshake/datasets/gene_ontology.py
@@ -22,16 +22,6 @@ class GeneOntologyDataset(RCSBDataset):
 
       Raw data was obtained and modified from `RCSB Protein Data Bank <https://www.rcsb.org/>`_, originally licensed under `CC0 1.0 <https://creativecommons.org/publicdomain/zero/1.0/>`_.
 
-
-
-    .. list-table:: Dataset stats
-       :widths: 100
-       :header-rows: 1
-
-       * - # proteins
-       * - 32633
-
-
    .. list-table:: Annotations
       :widths: 25 25 50
       :header-rows: 1
@@ -52,8 +42,6 @@ class GeneOntologyDataset(RCSBDataset):
 
     """
 
-    description = 'Gene Ontology'
-
     additional_files = ['GeneOntologyDataset.godag.obo']
 
     def __init__(self, query=[['rcsb_polymer_entity_annotation.type','exact_match','GO']], **kwargs):
@@ -85,10 +73,3 @@ def add_protein_attributes(self, protein):
         protein['protein']['cellular_component'] = [term for term in go_terms if godag[term].namespace == 'cellular_component']
         protein['protein']['biological_process'] = [term for term in go_terms if godag[term].namespace == 'biological_process']
         return protein
-
-    def describe(self):
-        desc = super().describe()
-        desc['property'] = "Gene Ontology (GO)"
-        desc['values'] = f"{len(set((p['GO'][0] for p in self.proteins)))} (root)"
-        desc['type'] = 'Categorical, Hierarchical'
-        return desc
diff --git a/proteinshake/datasets/protein_family.py b/proteinshake/datasets/protein_family.py
index 1e084e49..5f65c790 100644
--- a/proteinshake/datasets/protein_family.py
+++ b/proteinshake/datasets/protein_family.py
@@ -14,16 +14,6 @@ class ProteinFamilyDataset(RCSBDataset):
 
       Raw data was obtained and modified from `RCSB Protein Data Bank <https://www.rcsb.org/>`_, originally licensed under `CC0 1.0 <https://creativecommons.org/publicdomain/zero/1.0/>`_.
 
-
-
-    .. list-table:: Dataset stats
-       :widths: 100
-       :header-rows: 1
-
-       * - # proteins
-       * - 31109
-
-
    .. list-table:: Annotations
       :widths: 25 35 45
       :header-rows: 1
@@ -37,8 +27,6 @@ class ProteinFamilyDataset(RCSBDataset):
 
     """
 
-    description = 'Protein Families'
-
     def __init__(self, pfam_version='34.0', query=[['rcsb_polymer_entity_annotation.type','exact_match','Pfam']], **kwargs):
         self.pfam_version = pfam_version
         super().__init__(query=query, **kwargs)
@@ -53,10 +41,3 @@ def add_protein_attributes(self, protein):
                 pfams.append(a['annotation_id'])
         protein['protein']['Pfam'] = pfams
         return protein
-
-    def describe(self):
-        desc = super().describe()
-        desc['property'] = "Protein Family (Pfam)"
-        desc['values'] = f"{len(set((p['Pfam'][0] for p in self.proteins)))} (root)"
-        desc['type'] = 'Categorical, Hierarchical'
-        return desc
diff --git a/proteinshake/datasets/protein_ligand_decoys.py b/proteinshake/datasets/protein_ligand_decoys.py
index 56fa1222..40d0265b 100644
--- a/proteinshake/datasets/protein_ligand_decoys.py
+++ b/proteinshake/datasets/protein_ligand_decoys.py
@@ -33,17 +33,6 @@ class ProteinLigandDecoysDataset(Dataset):
 
       Raw data was obtained and modified from `DUDE-Z <https://dudez.docking.org/>`_.
 
-
-
-
-    .. list-table:: Dataset stats
-       :widths: 100
-       :header-rows: 1
-
-       * - # proteins
-       * - 38
-
-
    .. list-table:: Annotations
       :widths: 25 35 45
       :header-rows: 1
@@ -70,18 +59,10 @@ class ProteinLigandDecoysDataset(Dataset):
 
     """
 
-    description = 'Proteins with ligands and decoys'
-
     @patch('proteinshake.datasets.dataset.AA_THREE_TO_ONE', EXTENDED_AA_THREE_TO_ONE)
     def pdb2df(self, path):
         return super().pdb2df(path)
 
-    def get_raw_files(self):
-        return glob.glob(f'{self.root}/raw/files/*.pdb')[:self.limit]
-
-    def get_id_from_filename(self, filename):
-        return filename.split(".")[0]
-
     def download(self):
         targets  = ['AA2AR', 'ABL1', 'ACES', 'ADA', 'ADRB2', 'AMPC', 'ANDR', 'CSF1R', 'CXCR4', 'DEF', 'DRD4', 'EGFR', 'FA7', 'FA10', 'FABP4', 'FGFR1', 'FKB1A', 'GLCM', 'HDAC8', 'HIVPR', 'HMDH', 'HS90A', 'ITAL', 'KITH', 'KIT', 'LCK', 'MAPK2', 'MK01', 'MT1', 'NRAM', 'PARP1', 'PLK1', 'PPARA', 'PTN1', 'PUR2', 'RENI', 'ROCK1', 'SRC', 'THRB', 'TRY1', 'TRYB1', 'UROK', 'XIAP']
 
diff --git a/proteinshake/datasets/protein_ligand_interface.py b/proteinshake/datasets/protein_ligand_interface.py
index 40a4b586..0ef49e1d 100644
--- a/proteinshake/datasets/protein_ligand_interface.py
+++ b/proteinshake/datasets/protein_ligand_interface.py
@@ -37,13 +37,6 @@ class ProteinLigandInterfaceDataset(Dataset):
     version: str
         PDBBind version to use.
 
-    .. list-table:: Dataset stats
-       :widths: 100
-       :header-rows: 1
-
-       * - # proteins
-       * - 4642
-
     .. list-table:: Annotations
         :widths: 20 55 25
         :header-rows: 1
@@ -80,8 +73,6 @@ class ProteinLigandInterfaceDataset(Dataset):
         - :code:`'[..,0, 0, 1, 0, 1, 0, 0, 0,..]`
     """
 
-    description = ''
-
     def __init__(self, version='2020', **kwargs):
         self.version = version
         super().__init__(**kwargs)
@@ -203,10 +194,3 @@ def add_protein_attributes(self, protein):
         protein['protein']['fp_morgan_r2'] = fp_morgan
 
         return protein
-
-    def describe(self):
-        desc = super().describe()
-        desc['property'] = "Small Mol. Binding Site (residue-level)"
-        desc['values'] = 2
-        desc['type'] = 'Binary'
-        return desc
diff --git a/proteinshake/datasets/protein_protein_interface.py b/proteinshake/datasets/protein_protein_interface.py
index 5abcffca..edd1c953 100644
--- a/proteinshake/datasets/protein_protein_interface.py
+++ b/proteinshake/datasets/protein_protein_interface.py
@@ -1,14 +1,10 @@
 # -*- coding: utf-8 -*-
-import os
-import glob
-from joblib import Parallel, delayed
-from tqdm import tqdm
-from collections import defaultdict, Counter
-
+import os, glob
 import pandas as pd
+import numpy as np
+from joblib import Parallel, delayed
 from biopandas.pdb import PandasPdb
 from sklearn.neighbors import KDTree
-import numpy as np
 
 from proteinshake.datasets import Dataset
 from proteinshake.utils import extract_tar, download_url, progressbar, load, save, unzip_file
@@ -40,14 +36,6 @@ class ProteinProteinInterfaceDataset(Dataset):
         Distance in angstroms within which a pair of residues is considered to
         belong to the interface.
 
-    .. list-table:: Dataset stats
-       :widths: 100
-       :header-rows: 1
-
-       * - # proteins
-       * - 2839
-
-
    .. list-table:: Annotations
       :widths: 25 55 20
       :header-rows: 1
@@ -72,120 +60,51 @@ def __init__(self, cutoff=6, version='2020', **kwargs):
         self.version = version
         self.cutoff = cutoff
         super().__init__(**kwargs)
-
-        def download_file(filename):
-            if not os.path.exists(f'{self.root}/{filename}'):
-                if not self.use_precomputed:
-                    self.parse_interfaces()
-                else:
-                    download_url(f'{self.repository_url}/{filename}.gz', f'{self.root}', verbosity=0)
-                    unzip_file(f'{self.root}/{filename}.gz')
-            return load(f'{self.root}/{filename}')
-
-        self._interfaces = download_file(f'{self.name}.interfaces.json')
+        self.interfaces = load(f'{self.root}/{self.name}.interfaces.json')
 
     def get_raw_files(self):
-        return glob.glob(f'{self.root}/raw/files/chains/*.pdb')
-
+        return sorted(glob.glob(f'{self.root}/raw/files/chains/*.pdb'))
+    
     def get_id_from_filename(self, filename):
-        return filename[:4]
-
-    def get_contacts(self, protein, cutoff=6):
-        """Obtain interfacing residues within a single structure of polymers. Uses
-        KDTree data structure for vector search.
-
-        Parameters
-        ----------
-        protein: dict
-            Parsed protein dictionary.
+        return filename.rstrip('.pdb')#.rstrip('.ent')
 
-        Returns
-        --------
-            `dict`: 2-level dictionary mapping a pair of chains to the list of interfacing residue positions (e.g `interfaces['A']['B'] = {(1, 3), (2, 4)}`
-                    says that residues 1 and 2 of chain A are in contact with 3 and 4 in chain B. The positions are _indices_ in the residue/atom list.
+    def get_contacts(self, path):
+        """ Obtain interfacing residues within a single structure of polymers.
+        Uses KDTree data structure for vector search.
         """
-
-        def get_coords(p):
-            return np.array([p['residue']['x'],
-                             p['residue']['y'],
-                             p['residue']['z']]).T
-
-
-        def defaultdict_to_dict(default_dict):
-            if isinstance(default_dict, defaultdict):
-                default_dict = {k: defaultdict_to_dict(v) for k, v in default_dict.items()}
-            elif isinstance(default_dict, dict):
-                default_dict = {k: defaultdict_to_dict(v) for k, v in default_dict.items()}
-            return default_dict
-
-        interfaces = defaultdict(lambda: defaultdict(list))
-        coords = get_coords(protein)
-        kdt = KDTree(coords, leaf_size=1)
-
-        seq_inds = [0]
-        current_chain = protein['residue']['chain_id'][0]
-
-        # get a vector of sequence positions
-        ind = 0
-        for chain_id in protein['residue']['chain_id'][1:]:
-            if chain_id != current_chain:
-                ind = -1
-                current_chain = chain_id
-            ind += 1
-            seq_inds.append(ind)
-
-        query = kdt.query_radius(coords, cutoff)
-        interface = []
-        for i,result in enumerate(query):
-            this_chain = protein['residue']['chain_id'][i]
-            this_pos = seq_inds[i]
-            for r in result:
-                that_chain = protein['residue']['chain_id'][r]
-                that_pos = seq_inds[r]
-                if this_chain != that_chain:
-                    # ugly , I know
-                    interfaces[this_chain][that_chain].append((this_pos, that_pos))
-                    interfaces[that_chain][this_chain].append((that_pos, this_pos))
-
-        return defaultdict_to_dict(interfaces)
-
-    def get_complexes_files(self):
-        return glob.glob(f"{self.root}/raw/files/PP/*.pdb")
-
-    def parse_interfaces(self):
-        """ Get all interfaces and store in dict"""
-        protein_dfs = Parallel(n_jobs=self.n_jobs)(delayed(self.parse_pdb)(path) for path in progressbar(self.get_complexes_files(), desc='Loading complexes'))
-        print("Computing interfaces")
-        interfaces = {p['protein']['ID']: self.get_contacts(p, cutoff=self.cutoff) for p in tqdm(protein_dfs, total=len(protein_dfs)) if not p is None}
-        save(interfaces, f'{self.root}/{self.name}.interfaces.json')
+        df = PandasPdb().read_pdb(path).df['ATOM']
+        
+        # split chains
+        pdbid = os.path.basename(path).rstrip('.pdb')
+        for chain, chain_df  in df.groupby('chain_id'):
+            new_df = PandasPdb()
+            new_df._df = {'ATOM': chain_df}
+            new_df.to_pdb(f'{self.root}/raw/files/chains/{pdbid}_{chain}.pdb')
+            
+        # compute contacts
+        df = df[df['atom_name'] == 'CA']
+        coords = df[['x_coord','y_coord','z_coord']].to_numpy()
+        chain_ids = np.array(df['chain_id'])
+        chain_index = np.hstack([np.arange(np.sum(chain_ids==chain)) for chain in list(dict.fromkeys(chain_ids))]) # get residue position relative to chain
+        contacts = KDTree(coords, leaf_size=1).query_radius(coords, self.cutoff) # for each residue, get all neighbouring residues within the cutoff radius
+        query_chains = np.repeat(chain_ids, [len(c) for c in contacts]) # construct long index of query chain ids
+        query_chain_index = np.repeat(chain_index, [len(c) for c in contacts]) # construct long index of query chain index
+        result_chains = np.hstack([chain_ids[c] for c in contacts]) # construct long index of result chain ids
+        result_chain_index = np.hstack([chain_index[c] for c in contacts]) # construct long index of result chain index
+        interfaces = pd.DataFrame({'query': query_chains, 'result': result_chains, 'index': tuple(zip(query_chain_index.tolist(),result_chain_index.tolist()))})
+        interfaces = interfaces[query_chains != result_chains] # remove contacts in the same chain
+        if len(interfaces) == 0: return pdbid, None
+        interfaces = dict(interfaces.groupby('query').apply(lambda x: dict(x.groupby('result')['index'].apply(list)))) # format to dict of dicts: {chain_A: chain_B: [13, 27, ...]}
+        return pdbid, interfaces
 
     def download(self):
-        download_url(f'https://pdbbind.oss-cn-hangzhou.aliyuncs.com/download/PDBbind_v{self.version}_PP.tar.gz', f'{self.root}/raw')
-        extract_tar(f'{self.root}/raw/PDBbind_v{self.version}_PP.tar.gz', f'{self.root}/raw/files', extract_members=True)
+        #download_url(f'https://pdbbind.oss-cn-hangzhou.aliyuncs.com/download/PDBbind_v{self.version}_PP.tar.gz', f'{self.root}/raw')
+        #extract_tar(f'{self.root}/raw/PDBbind_v{self.version}_PP.tar.gz', f'{self.root}/raw/files', extract_members=True)
         os.makedirs(f'{self.root}/raw/files/chains', exist_ok=True)
-        print("Chain splitting")
-        self.chain_split(f'{self.root}/raw/files/chains')
-
-    def chain_split(self, dest):
-        """ Split all the raw PDBs in path to individual ones by chain.
-        Replaces original PDB file in place.
-        """
-        for p in tqdm(self.get_complexes_files()):
-            ppdb = PandasPdb().read_pdb(p).df['ATOM']
-            pdbid = os.path.basename(p).split(".")[0]
-            for chain, chain_df  in ppdb.groupby('chain_id'):
-                new_df = PandasPdb()
-                new_df._df = {'ATOM': chain_df}
-                new_df.to_pdb(os.path.join(dest, f"{pdbid}_{chain}.pdb"))
-                # assert not chain_df.isnull().values.any(), f"NULL in {p}"
-        pass
-
-    def get_id_from_filename(self, filename):
-        return filename.split(".")[0]
-
-    def describe(self):
-        desc = super().describe()
-        desc['property'] = "Protein-protein interface (residue-level)"
-        desc['values'] = 2
-        desc['type'] = "Binary"
-        return desc
+        complexed_files = sorted(glob.glob(f'{self.root}/raw/files/PP/*.pdb'))
+        contacts = Parallel(n_jobs=self.n_jobs)(delayed(self.get_contacts)(path) for path in progressbar(complexed_files, desc='Computing interfaces', verbosity=self.verbosity))
+        interfaces = {pdbid: interfaces for pdbid, interfaces in contacts if not interfaces is None}
+        save(interfaces, f'{self.root}/{self.name}.interfaces.json')
+        
+            
+        
diff --git a/proteinshake/datasets/rcsb.py b/proteinshake/datasets/rcsb.py
index a7abb91f..a1073dfa 100644
--- a/proteinshake/datasets/rcsb.py
+++ b/proteinshake/datasets/rcsb.py
@@ -39,12 +39,6 @@ def __init__(self, query=[], from_list=None, only_single_chain=True, max_request
         self.max_requests = max_requests
         super().__init__(only_single_chain=only_single_chain, **kwargs)
 
-    def get_raw_files(self):
-        return glob.glob(f'{self.root}/raw/files/*.pdb')
-
-    def get_id_from_filename(self, filename):
-        return filename[:4]
-
     def download(self):
         """ Fetches PDBs from RCSB with an API call. The default query selects protein-only
         structures with a single chain.
diff --git a/proteinshake/datasets/scop.py b/proteinshake/datasets/scop.py
index d528ed89..5e818807 100644
--- a/proteinshake/datasets/scop.py
+++ b/proteinshake/datasets/scop.py
@@ -19,15 +19,6 @@ class SCOPDataset(RCSBDataset):
 
       Raw data was obtained and modified from `RCSB Protein Data Bank <https://www.rcsb.org/>`_, originally licensed under `CC0 1.0 <https://creativecommons.org/publicdomain/zero/1.0/>`_.
 
-
-    .. list-table:: Dataset stats
-      :widths: 100
-      :header-rows: 1
-
-      * - # proteins
-      * - 10066
-
-
     .. list-table:: Annotations
       :widths: 25 45 35
       :header-rows: 1
@@ -51,7 +42,7 @@ class SCOPDataset(RCSBDataset):
 
     """
 
-    def _parse_scop(self, path):
+    def parse_scop(self, path):
         names = ['FA-DOMID', 'FA-PDBID', 'FA-PDBREG', 'FA-UNIID', 'FA-UNIREG', 'SF-DOMID', 'SF-PDBID', 'SF-PDBREG', 'SF-UNIID', 'SF-UNIREG', 'SCOPCLA']
         df = pd.read_csv(path, sep=' ', comment='#', names=names, dtype=str)
         return {k: dict([cla.split("=") for cla in v.split(",")]) for k,v in zip(df['FA-PDBID'], df['SCOPCLA'])}
@@ -59,7 +50,7 @@ def _parse_scop(self, path):
     def download(self):
         # get the annots
         download_url(f'http://scop.mrc-lmb.cam.ac.uk/files/scop-cla-latest.txt', f'{self.root}/raw/scop.txt')
-        self.scop = self._parse_scop(f'{self.root}/raw/scop.txt')
+        self.scop = self.parse_scop(f'{self.root}/raw/scop.txt')
         ids = list(self.scop['FA-PDBID'].unique())
 
         # get the proteins
diff --git a/proteinshake/datasets/tm_align.py b/proteinshake/datasets/tm_align.py
index 139ed772..f3a038fa 100644
--- a/proteinshake/datasets/tm_align.py
+++ b/proteinshake/datasets/tm_align.py
@@ -2,8 +2,6 @@
 '''
 TMalign needs to be in your $PATH. Follow the instructions at https://zhanggroup.org/TM-align/readme.c++.txt
 '''
-import glob
-import requests
 import os
 import itertools
 import re
@@ -12,20 +10,10 @@
 import shutil
 import numpy as np
 from biopandas.pdb import PandasPdb
-from collections import defaultdict
 from joblib import Parallel, delayed
-from functools import cached_property
 
 from proteinshake.datasets import RCSBDataset
-from proteinshake.utils import (extract_tar,
-                                download_url,
-                                save,
-                                load,
-                                unzip_file,
-                                global_distance_test,
-                                local_distance_difference_test,
-                                progressbar
-                                )
+from proteinshake.utils import download_url, load, unzip_file, global_distance_test, local_distance_difference_test, progressbar
 
 
 class TMAlignDataset(RCSBDataset):
@@ -42,15 +30,6 @@ class TMAlignDataset(RCSBDataset):
 
       Raw data was obtained and modified from `RCSB Protein Data Bank <https://www.rcsb.org/>`_, originally licensed under `CC0 1.0 <https://creativecommons.org/publicdomain/zero/1.0/>`_.
 
-      
-    .. list-table:: Dataset stats
-       :widths: 100
-       :header-rows: 1
-
-       * - # proteins
-       * - 994
-
-
     .. code-block:: python
 
         from proteinshake.datasets import TMAlignDataset
diff --git a/proteinshake/tasks/__init__.py b/proteinshake/tasks/__init__.py
index 1cd088b7..9a983702 100644
--- a/proteinshake/tasks/__init__.py
+++ b/proteinshake/tasks/__init__.py
@@ -1,6 +1,6 @@
 from .task import Task
 from .enzyme_class import EnzymeClassTask
-from .pfam_task import ProteinFamilyTask
+from .protein_family import ProteinFamilyTask
 from .ligand_affinity import LigandAffinityTask
 from .binding_site_detection import BindingSiteDetectionTask
 from .structure_similarity import StructureSimilarityTask
diff --git a/proteinshake/tasks/binding_site_detection.py b/proteinshake/tasks/binding_site_detection.py
index b4cb14c9..84bd1133 100644
--- a/proteinshake/tasks/binding_site_detection.py
+++ b/proteinshake/tasks/binding_site_detection.py
@@ -1,4 +1,5 @@
 from sklearn import metrics
+import numpy as np
 
 from proteinshake.datasets import ProteinLigandInterfaceDataset
 from proteinshake.tasks import Task
@@ -11,44 +12,29 @@ class BindingSiteDetectionTask(Task):
     DatasetClass = ProteinLigandInterfaceDataset
     
     type = 'Binary Classification'
-    input = 'Residue'
+    input = 'Protein'
     output = 'Small Molecule Binding Residues'
+    default_metric = 'MCC'
+    level = 'Residue'
 
-    @property
-    def task_in(self):
-        return ('residue')
-
-    @property
-    def task_type(self):
-        return ('residue', 'binary')
-
-    @property
-    def task_out(self):
-        return ('binary')
+    def target(self, protein_dict):
+        binding_sites = protein_dict['residue']['binding_site']
+        return np.arange(len(binding_sites))[binding_sites].tolist(), len(binding_sites)
     
-    @property
-    def target_dim(self):
-        return (1)
-
-    def dummy_output(self):
-        import random
-        return [random.randint(0, 1) for p in self.test_targets]
-
-    def target(self, protein):
-        return protein['residue']['binding_site']
-
-    def compute_targets(self):
-        # compute targets (e.g. for scaling)
-        self.train_targets = [p for i in self.train_index for p in self.target(self.proteins[i])]
-        self.val_targets = [p for i in self.val_index for p in self.target(self.proteins[i])]
-        self.test_targets = [p for i in self.test_index for p in self.target(self.proteins[i])]
-
-    @property
-    def default_metric(self):
-        return 'mcc'
+    def target_transform(self, target):
+        residue_indices, size = target
+        transformed_target = np.zeros(size, dtype=bool)
+        transformed_target[residue_indices] = True
+        return transformed_target
 
     def evaluate(self, y_true, y_pred):
+        print(np.hstack(y_true))
+        print(np.hstack(y_pred))
         return {
-            'accuracy': metrics.accuracy_score(y_true, y_pred),
-            'mcc': metrics.matthews_corrcoef(y_true, y_pred),
+            'Accuracy': metrics.accuracy_score(np.hstack(y_true), np.hstack(y_pred)),
+            'MCC': metrics.matthews_corrcoef(np.hstack(y_true), np.hstack(y_pred)),
         }
+    
+    @property
+    def y_dummy(self):
+        return np.array([np.random.choice([0,1], size=len(x)).astype(bool) for x in self.y_test], dtype=object)
diff --git a/proteinshake/tasks/enzyme_class.py b/proteinshake/tasks/enzyme_class.py
index 4294613f..b90a5a44 100644
--- a/proteinshake/tasks/enzyme_class.py
+++ b/proteinshake/tasks/enzyme_class.py
@@ -1,13 +1,11 @@
 from sklearn import metrics
-from functools import cached_property
 import numpy as np
 
 from proteinshake.datasets import EnzymeCommissionDataset
 from proteinshake.tasks import Task
 
 class EnzymeClassTask(Task):
-    """ Predict the enzyme commission classification of a protein structure. This is a protein-level multi-class prediction.
-
+    """ Predict the enzyme commission classification of a protein structure.
     """
 
     DatasetClass = EnzymeCommissionDataset
@@ -15,59 +13,23 @@ class EnzymeClassTask(Task):
     type = 'Multiclass Classification'
     input = 'Protein'
     output = 'Enzyme Commission Level 1'
-    
-    def __init__(self, ec_level=0, *args, **kwargs):
-        self.ec_level = ec_level
-        super().__init__(*args, **kwargs)
-
-    @cached_property
-    def token_map(self):
-        labels = {p['protein']['EC'].split(".")[self.ec_level] for p in self.proteins}
-        return {label: i for i, label in enumerate(sorted(list(labels)))}
-
-    def dummy_output(self):
-        import random
-        tokens = list(self.token_map.values())
-        return [random.choice(tokens) for _ in range(len(self.test_targets))]
-
-    @property
-    def num_classes(self):
-        return len(self.token_map)
-
-    @property
-    def task_type(self):
-        return ('protein', 'multi_class')
+    default_metric = 'Precision'
 
-    @property
-    def task_in(self):
-        return ('protein')
+    def compute_token_map(self):
+        unique_labels = {p['protein']['EC'].split(".")[0] for p in self.dataset.proteins()}
+        return {k:v for v,k in enumerate(sorted(unique_labels))}
 
-    @property
-    def task_out(self):
-        return ('multi_class')
-
-    @property
-    def target_dim(self):
-        return (len(self.token_map.values()))
-
-    @property
-    def num_features(self):
-        return 20
-
-    def target(self, protein):
-        return self.token_map[protein['protein']['EC'].split(".")[0]]
-
-    @property
-    def default_metric(self):
-        return 'precision'
+    def target(self, protein_dict):
+        return self.token_map[protein_dict['protein']['EC'].split(".")[0]]
 
     def evaluate(self, y_true, y_pred):
-        """ Using metrics from https://doi.org/10.1073/pnas.1821905116 """
         y_true = np.array(y_true, dtype=int)
         y_pred = np.array(y_pred, dtype=int)
         return {
-            'precision': metrics.precision_score(y_true, y_pred, average='macro', zero_division=0),
-            'recall': metrics.recall_score(y_true, y_pred, average='macro', zero_division=0),
-            'accuracy': metrics.accuracy_score(y_true, y_pred),
-            #'AUROC': metrics.roc_auc_score(y_true, y_pred, average='macro', multi_class='ovo'),
+            'Precision': metrics.precision_score(y_true, y_pred, average='macro', zero_division=0),
+            'Recall': metrics.recall_score(y_true, y_pred, average='macro', zero_division=0),
+            'Accuracy': metrics.accuracy_score(y_true, y_pred),
         }
+    
+    def dummy(self):
+        return np.random.choice(list(self.token_map.values()), len(self.test_targets))
diff --git a/proteinshake/tasks/gene_ontology.py b/proteinshake/tasks/gene_ontology.py
index a9531c58..997c1f51 100644
--- a/proteinshake/tasks/gene_ontology.py
+++ b/proteinshake/tasks/gene_ontology.py
@@ -1,7 +1,6 @@
 import itertools
 import numpy as np
-from sklearn import metrics
-from functools import cached_property
+from scipy.sparse import csr_matrix
 
 from proteinshake.datasets import GeneOntologyDataset
 from proteinshake.tasks import Task
@@ -18,49 +17,19 @@ class GeneOntologyTask(Task):
     type = 'Multilabel Classification'
     input = 'Protein'
     output = 'Gene Ontology Terms'
-    
-    def __init__(self, branch='molecular_function', *args, **kwargs):
-        self.branch = branch
-        super().__init__(*args, **kwargs)
-
-    @cached_property
-    def token_map(self):
-        labels = set(itertools.chain(*[p['protein'][self.branch] for p in self.proteins]))
-        return {label: i for i, label in enumerate(sorted(list(labels)))}
-
-    @property
-    def num_classes(self):
-        return len(self.token_map)
-    
-    @property
-    def classes(self):
-        return list(self.token_map.keys())
-
-    @property
-    def task_in(self):
-        return ('protein')
-
-    @property
-    def task_type(self):
-        return ('protein', 'multi_label')
-
-    @property
-    def task_out(self):
-        return ('multi_label')
+    default_metric = 'Fmax'
 
-    @property
-    def target_dim(self):
-        return (len(self.token_map.values()))
-
-    @property
-    def num_features(self):
-        return 20
+    def compute_token_map(self):
+        labels = set(itertools.chain(*[p['protein']['molecular_function'] for p in self.dataset.proteins()]))
+        return {k:v for v,k in enumerate(sorted(labels))}
 
     def target(self, protein):
-        tokens = [self.token_map[i] for i in protein['protein'][self.branch]]
-        target = np.zeros_like(self.classes, dtype=bool)
-        target[tokens] = True
-        return target
+        return [self.token_map[i] for i in protein['protein']['molecular_function']]
+    
+    def target_transform(self, target):
+        transformed = np.zeros(len(self.token_map), dtype=bool)
+        transformed[target] = True
+        return transformed
 
     def precision(self, y_true, y_pred, threshold):
         mt = (y_pred.max(axis=1) >= threshold).sum()
@@ -77,41 +46,22 @@ def recall(self, y_true, y_pred, threshold):
         nom = np.logical_and(y_true, y_pred).sum(axis=1).astype(np.float32)
         denom = y_true.sum(axis=1).astype(np.float32)
         return 1/ne * np.divide(nom, denom, out=np.zeros_like(nom), where=denom!=0).sum()
-    
-    def remaining_uncertainty(self, y_true, y_pred, threshold):
-        pass
-
-    def missing_information(self, y_true, y_pred, threshold):
-        pass
 
     def fmax(self, y_true, y_pred):
         fmax = 0
-        for t in np.linspace(0,1,21):
+        for t in np.linspace(0,1,11):
             prec, rec = self.precision(y_true, y_pred, t), self.recall(y_true, y_pred, t)
             if prec+rec == 0: continue
             f1 = (2 * prec * rec) / (prec + rec)
             fmax = max(fmax, f1)
         return fmax
-    
-    def smin(self, y_pred):
-        return min([
-            np.sqrt(
-                self.remaining_uncertainty(y_pred, t) ** 2
-                + self.missing_information(y_pred, t) ** 2
-            )
-            for t in np.linspace(0,1,21)
-        ])
-
-    def dummy_output(self):
-        return np.random.rand(len(self.test_index), len(self.token_map.keys()))
-
-    @property
-    def default_metric(self):
-        return 'Fmax'
 
     def evaluate(self, y_true, y_pred):
-        y_true, y_pred = np.array(y_true), np.array(y_pred)
+        y_pred = np.array(y_pred)
         return {
             'Fmax': self.fmax(y_true, y_pred),
-            #'Smin': self.smin(y_true, y_pred),
         }
+    
+    @property
+    def y_dummy(self):
+        return np.random.rand(len(self.y_test), len(self.token_map))
diff --git a/proteinshake/tasks/ligand_affinity.py b/proteinshake/tasks/ligand_affinity.py
index 2ec127a7..321e6a07 100644
--- a/proteinshake/tasks/ligand_affinity.py
+++ b/proteinshake/tasks/ligand_affinity.py
@@ -1,4 +1,5 @@
 from sklearn import metrics
+import numpy as np
 
 from proteinshake.datasets import ProteinLigandInterfaceDataset
 from proteinshake.tasks import Task
@@ -14,36 +15,16 @@ class LigandAffinityTask(Task):
     type = 'Regression'
     input = 'Protein and Molecule'
     output = 'Dissociation Constant Kd'
+    default_metric = 'R2'
 
-    @property
-    def task_in(self):
-        return ('protein', 'molecule')
-
-    @property
-    def task_type(self):
-        return ('protein', 'regression')
-
-    @property
-    def task_out(self):
-        return ('regression')
-
-    @property
-    def out_dim(self):
-        return (1)
-
-    def dummy_output(self):
-        import random
-        return [random.random() for _ in range(len(self.test_index))] 
-
-    def target(self, protein):
-        return protein['protein']['neglog_aff']
-
-    @property
-    def default_metric(self):
-        return 'r2'
+    def target(self, protein_dict):
+        return protein_dict['protein']['neglog_aff']
 
     def evaluate(self, y_true, y_pred):
         return {
-            'mse': metrics.mean_squared_error(y_true, y_pred),
-            'r2': metrics.r2_score(y_true, y_pred)
+            'Mean Squared Error': metrics.mean_squared_error(y_true, y_pred),
+            'R2': metrics.r2_score(y_true, y_pred)
         }
+    
+    def dummy(self):
+        return np.random.uniform(size=len(self.test_targets))
diff --git a/proteinshake/tasks/pfam_task.py b/proteinshake/tasks/pfam_task.py
deleted file mode 100644
index 634c2005..00000000
--- a/proteinshake/tasks/pfam_task.py
+++ /dev/null
@@ -1,69 +0,0 @@
-from sklearn import metrics
-from functools import cached_property
-import numpy as np
-
-from proteinshake.datasets import ProteinFamilyDataset
-from proteinshake.tasks import Task
-
-class ProteinFamilyTask(Task):
-    """ Predict the protein family classification of a protein structure. This is a protein-level multi-class prediction.
-
-    """
-
-    DatasetClass = ProteinFamilyDataset
-    
-    type = 'Multiclass Classification'
-    input = 'Protein'
-    output = 'Protein Family (Pfam)'
-    
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-    @cached_property
-    def token_map(self):
-        # Pfam': ['Fis1 N-terminal tetratricopeptide repeat (Fis1_TPR_N)', 'Fis1 C-terminal tetratricopeptide repeat (Fis1_TPR_C)'], 
-        labels = {p['protein']['Pfam'][0] for p in self.proteins}
-        return {label: i for i, label in enumerate(sorted(list(labels)))}
-
-    def dummy_output(self):
-        import random
-        tokens = list(self.token_map.values())
-        return [random.choice(tokens) for _ in range(len(self.test_targets))]
-
-    @property
-    def task_in(self):
-        return ('protein')
-
-    @property
-    def task_type(self):
-        return ('protein', 'multi_class')
-
-    @property
-    def task_out(self):
-        return ('multi_class')
-
-    @property
-    def out_dim(self):
-        return len(self.token_map)
-
-    @property
-    def num_features(self):
-        return 20
-
-    def target(self, protein):
-        return self.token_map[protein['protein']['Pfam'][0]]
-
-    @property
-    def default_metric(self):
-        return 'precision'
-
-    def evaluate(self, y_true, y_pred):
-        """ Using metrics from https://doi.org/10.1073/pnas.1821905116 """
-        y_true = np.array(y_true, dtype=int)
-        y_pred = np.array(y_pred, dtype=int)
-        return {
-            'precision': metrics.precision_score(y_true, y_pred, average='macro', zero_division=0),
-            'recall': metrics.recall_score(y_true, y_pred, average='macro', zero_division=0),
-            'accuracy': metrics.accuracy_score(y_true, y_pred),
-            #'AUROC': metrics.roc_auc_score(y_true, y_pred, average='macro', multi_class='ovo'),
-        }
diff --git a/proteinshake/tasks/protein_family.py b/proteinshake/tasks/protein_family.py
new file mode 100644
index 00000000..c60be744
--- /dev/null
+++ b/proteinshake/tasks/protein_family.py
@@ -0,0 +1,36 @@
+from sklearn import metrics
+import numpy as np
+
+from proteinshake.datasets import ProteinFamilyDataset
+from proteinshake.tasks import Task
+
+class ProteinFamilyTask(Task):
+    """ Predict the protein family classification of a protein structure. This is a protein-level multi-class prediction.
+    """
+
+    DatasetClass = ProteinFamilyDataset
+    
+    type = 'Multiclass Classification'
+    input = 'Protein'
+    output = 'Protein Family (Pfam)'
+    default_metric = 'Precision'
+
+    def compute_token_map(self):
+        labels = {p['protein']['Pfam'][0] for p in self.dataset.proteins()}
+        return {k: v for v,k in enumerate(sorted(labels))}
+
+    def target(self, protein):
+        return self.token_map[protein['protein']['Pfam'][0]]
+
+    def evaluate(self, y_true, y_pred):
+        """ Using metrics from https://doi.org/10.1073/pnas.1821905116 """
+        y_true = np.array(y_true, dtype=int)
+        y_pred = np.array(y_pred, dtype=int)
+        return {
+            'Precision': metrics.precision_score(y_true, y_pred, average='macro', zero_division=0),
+            'Recall': metrics.recall_score(y_true, y_pred, average='macro', zero_division=0),
+            'Accuracy': metrics.accuracy_score(y_true, y_pred),
+        }
+    
+    def dummy(self):
+        return np.random.choice(list(self.token_map.values()), len(self.test_targets))
diff --git a/proteinshake/tasks/protein_protein_interface.py b/proteinshake/tasks/protein_protein_interface.py
index 6b6dd2b5..6e660dbd 100644
--- a/proteinshake/tasks/protein_protein_interface.py
+++ b/proteinshake/tasks/protein_protein_interface.py
@@ -5,12 +5,8 @@
 from proteinshake.tasks import Task
 
 class ProteinProteinInterfaceTask(Task):
-    """ Identify the binding residues of a protein-protein complex. This is a residue-level binary classification.
-
-    .. code-block:: python
-
-        >>> from proteinshake.tasks import ProteinProteinInterfaceTask
-        >>> ta = ProteinProteinInterfaceTask()
+    """ Identify the binding residues of a protein-protein complex.
+    This is a residue-level binary classification.
     """
 
     DatasetClass = ProteinProteinInterfaceDataset
@@ -18,86 +14,37 @@ class ProteinProteinInterfaceTask(Task):
     type = 'Binary Classification'
     input = 'Protein and Protein'
     output = 'Protein Binding Interface Residues'
+    default_metric = 'AUROC (median)'
+    pairwise = True
+    level = 'Residues'
 
-    @property
-    def task_in(self):
-        return ('residue', 'residue')
-
-    @property
-    def task_type(self):
-        return ('residue_pair', 'binary')
-
-    @property
-    def task_out(self):
-        return ('binary')
-
-    @property
-    def out_dim(self):
-        return (1)
-
-    def dummy_output(self):
-        import random
-        return [np.where(np.random.randint(0, 2, p.shape) == 0, 0, 1) for p in self.test_targets]
-
-    def update_index(self):
-        """ Transform to pairwise indexing """
-        self.train_index = self.compute_pairs(self.train_index)
-        self.val_index = self.compute_pairs(self.val_index)
-        self.test_index = self.compute_pairs(self.test_index)
-        
-    def compute_targets(self):
-        self.train_targets = [self.target(self.proteins[i], self.proteins[j]) for i,j in self.train_index]
-        self.val_targets = [self.target(self.proteins[i], self.proteins[j]) for i,j in self.val_index]
-        self.test_targets = [self.target(self.proteins[i], self.proteins[j]) for i,j in self.test_index]
-
-    def compute_pairs(self, index):
-        """ Grab all pairs of chains that share an interface"""
-        protein_to_index = {p['protein']['ID']: i for i, p in enumerate(self.dataset.proteins())}
-        def find_index(pdbid, chain):
-            return protein_to_index[f'{pdbid}_{chain}']
-
-        proteins = self.dataset.proteins()
-        chain_pairs = []
-        for i, protein in enumerate(proteins):
-            if i not in index:
-                continue
-            #chain = protein['residue']['chain_id'][0]
-            pdbid, chain = protein['protein']['ID'].split('_')
-            try:
-                chain_pairs.extend([(i, find_index(pdbid, partner)) for partner in self.dataset._interfaces[pdbid][chain]])
-            # if chain is not in any interface, we skip
-            except (KeyError, IndexError):
-                continue
-        chain_pairs = [(i,j) for i,j in chain_pairs if i in index and j in index] # @carlos please check
-        return np.array(chain_pairs, dtype=int)
-
+    def compute_paired_index(self, index):
+        """ Return all pairs of interactions that are in self.interfaces.
+        """
+        pdb_to_idx = {p['protein']['ID']:i for i,p in enumerate(self.dataset.proteins())}
+        i,j = [],[]
+        for k
 
     def target(self, protein_1, protein_2):
-        chain_1 = protein_1['residue']['chain_id'][0]
-        chain_2 = protein_2['residue']['chain_id'][0]
-        chain_1_length = len(protein_1['residue']['chain_id'])
-        chain_2_length = len(protein_2['residue']['chain_id'])
-        pdbid = protein_1['protein']['ID'].split('_')[0]
-
-        contacts = np.zeros((chain_1_length, chain_2_length))
-        try:
-            inds = np.array(self.dataset._interfaces[pdbid][chain_1][chain_2])
-            contacts[inds[:,0], inds[:,1]] = 1.0
-        except KeyError: # raised if there are no interactions between query chains
-            pass
-        return np.array(contacts)
-
-    @property
-    def default_metric(self):
-        return 'average_precision'
+        pdbid_1, chain_1 = protein_1['protein']['ID'].split('_')
+        pdbid_2, chain_2 = protein_2['protein']['ID'].split('_')
+        shape = (len(protein_1['residue']['chain_id']), len(protein_2['residue']['chain_id']))
+        return self.dataset.interfaces[pdbid_1][chain_1][chain_2], shape
+    
+    def target_transform(self, target):
+        residue_indices, shape = target
+        transformed_target = np.zeros(shape, dtype=bool)
+        transformed_target[tuple(zip(*residue_indices))] = True
+        return transformed_target
 
     def evaluate(self, y_true, y_pred):
         """ Evaluate performance of an interface classifier.
         """
-        raw_values = {'auroc': np.zeros(len(y_true)),
-                  'auprc': np.zeros(len(y_true)),
-                  'sizes':np.zeros(len(y_true))
-                   }
+        raw_values = {
+            'auroc': np.zeros(len(y_true)),
+            'auprc': np.zeros(len(y_true)),
+            'sizes': np.zeros(len(y_true))
+        }
 
         for i, (y, y_pred) in enumerate(zip(y_true, y_pred)):
             y = y.flatten()
@@ -106,26 +53,17 @@ def evaluate(self, y_true, y_pred):
             raw_values['auprc'][i] = metrics.average_precision_score(y, y_pred)
             raw_values['sizes'][i] = len(y)
 
-        result = {}
         tot = np.sum(raw_values['sizes'])
         norm = raw_values['sizes'] / tot
-        result['auroc_weighted'] = np.mean(raw_values['auroc'] * norm)
-        result['auprc_weighted'] = np.mean(raw_values['auprc'] * norm)
-        result['auroc_median'] = np.median(raw_values['auroc'])
-        result['auprc_median'] = np.median(raw_values['auprc'])
-        result['auroc_mean'] = np.mean(raw_values['auroc'])
-        result['auprc_mean'] = np.mean(raw_values['auprc'])
 
-        return result
-
-    def to_graph(self, *args, **kwargs):
-        self.dataset = self.dataset.to_graph(*args, **kwargs, transform=Compose([CenterTransform(), RandomRotateTransform()]))
-        return self
-
-    def to_point(self, *args, **kwargs):
-        self.dataset = self.dataset.to_point(*args, **kwargs, transform=Compose([CenterTransform(), RandomRotateTransform()]))
-        return self
-
-    def to_voxel(self, *args, **kwargs):
-        self.dataset = self.dataset.to_voxel(*args, **kwargs, transform=Compose([CenterTransform(), RandomRotateTransform()]))
-        return self
+        return {
+            'AUROC (weighted)': np.mean(raw_values['auroc'] * norm),
+            'AUPRC (weighted)':  np.mean(raw_values['auprc'] * norm),
+            'AUROC (median)':  np.median(raw_values['auroc']),
+            'AUPRC (median)':  np.median(raw_values['auprc']),
+            'AUROC (mean)':  np.mean(raw_values['auroc']),
+            'AUPRC (mean)':  np.mean(raw_values['auprc']),
+        }
+        
+    def y_dummy(self):
+        return [np.where(np.random.randint(0, 2, p.shape) == 0, 0, 1) for p in self.test_targets]
\ No newline at end of file
diff --git a/proteinshake/tasks/structural_class.py b/proteinshake/tasks/structural_class.py
index f13ab72f..ff0866d3 100644
--- a/proteinshake/tasks/structural_class.py
+++ b/proteinshake/tasks/structural_class.py
@@ -1,5 +1,4 @@
 from sklearn import metrics
-from functools import cached_property
 import numpy as np
 
 from proteinshake.datasets import SCOPDataset
@@ -7,7 +6,6 @@
 
 class StructuralClassTask(Task):
     """ Predict the SCOP class of a protein structure. This is a protein-level multi-class prediction.
-
     """
 
     DatasetClass = SCOPDataset
@@ -15,55 +13,27 @@ class StructuralClassTask(Task):
     type = 'Multiclass Classification'
     input = 'Protein'
     output = 'SCOP Class'
+    default_metric = 'Accuracy'
     
     def __init__(self, scop_level='SCOP-FA', *args, **kwargs):
         self.scop_level = scop_level
         super().__init__(*args, **kwargs)
 
-    @cached_property
-    def token_map(self):
-        labels = {p['protein'][self.scop_level] for p in self.proteins}
-        return {label: i for i, label in enumerate(sorted(list(labels)))}
-
-    @property
-    def target_dim(self):
-        return len(self.token_map)
-
-    def dummy_output(self):
-        import random
-        tokens = list(self.token_map.values())
-        return [random.choice(tokens) for _ in range(len(self.test_targets))]
-
-    @property
-    def task_in(self):
-        return ('protein')
-
-    @property
-    def task_type(self):
-        return ('protein', 'multi-class')
-
-    @property
-    def task_out(self):
-        return ('multi_class')
-
-    @property
-    def num_features(self):
-        return 20
+    def compute_token_map(self):
+        labels = {p['protein'][self.scop_level] for p in self.dataset.proteins()}
+        return {k:v for v,k in enumerate(sorted(list(labels)))}
 
     def target(self, protein):
         return self.token_map[protein['protein'][self.scop_level]]
 
-    @property
-    def default_metric(self):
-        return 'precision'
-
     def evaluate(self, y_true, y_pred):
-        """ Using metrics from https://doi.org/10.1073/pnas.1821905116 """
         y_true = np.array(y_true, dtype=int)
         y_pred = np.array(y_pred, dtype=int)
         return {
-            'precision': metrics.precision_score(y_true, y_pred, average='macro', zero_division=0),
-            'recall': metrics.recall_score(y_true, y_pred, average='macro', zero_division=0),
-            'accuracy': metrics.accuracy_score(y_true, y_pred),
-            #'AUROC': metrics.roc_auc_score(self.test_targets, y_pred, average='macro', multi_class='ovo'),
+            'Precision': metrics.precision_score(y_true, y_pred, average='macro', zero_division=0),
+            'Recall': metrics.recall_score(y_true, y_pred, average='macro', zero_division=0),
+            'Accuracy': metrics.accuracy_score(y_true, y_pred),
         }
+    
+    def dummy(self):
+        return np.random.choice(self.token_map.values(), size=len(self.test_targets))
diff --git a/proteinshake/tasks/structure_search.py b/proteinshake/tasks/structure_search.py
index 84ca0212..0505f900 100644
--- a/proteinshake/tasks/structure_search.py
+++ b/proteinshake/tasks/structure_search.py
@@ -24,24 +24,12 @@ class StructureSearchTask(Task):
     type = 'Retrieval'
     input = 'Protein'
     output = 'Similar Proteins'
+    default_metric = 'Precision@k'
 
     def __init__(self, min_sim=0.8, *args, **kwargs):
         self.min_sim = min_sim
         super().__init__(*args, **kwargs)
 
-    @property
-    def task_type(self):
-        return ('protein', 'retrieval')
-
-
-    @property
-    def task_in(self):
-        return ('protein')
-
-    @property
-    def task_out(self):
-        return ('retrieval')
-
     @cached_property
     def targets(self):
         """ Precompute the set of similar proteins for each query """
@@ -51,30 +39,16 @@ def targets(self):
         return targets
 
     def target(self, protein):
-        """ The target for a protein is a list of proteins deemed 'relevant'
-        according to `self.min_sim`.
+        """ The target for a protein is a list of proteins deemed 'relevant' according to `self.min_sim`.
         """
         return self.targets[protein['protein']['ID']]
 
-    def _precision_at_k(self, y_true, y_pred, k):
+    def precision_at_k(self, y_true, y_pred, k):
         return len(set(y_pred[:k]).intersection(set(y_true))) / len(y_pred)
 
-    def _recall_at_k(self, y_true, y_pred, k):
+    def recall_at_k(self, y_true, y_pred, k):
         return len(set(y_pred[:k]).intersection(set(y_true))) / len(y_true)
 
-    def dummy_output(self):
-        import random
-        pred = []
-        ids = [p['protein']['ID'] for p in self.proteins] 
-        for query in self.proteins[self.test_index]:
-            targets = self.target(query)
-            pred.append(random.sample(ids, len(targets)))
-        return pred
-
-    @property
-    def default_metric(self):
-        return 'precision_at_k'
-
     def evaluate(self, y_true, y_pred, k=5):
         """ Retrieval metrics.
 
@@ -83,9 +57,16 @@ def evaluate(self, y_true, y_pred, k=5):
         y_pred:
             List of indices of items (hits) in the dataset for a query.
         """
-        results = defaultdict(list)
-        for yt, yp in zip(y_true, y_pred):
-            results['precision_at_k'].append(self._precision_at_k(yt, yp, k))
-            results['recall_at_k'].append(self._recall_at_k(yt, yp, k))
+        return {
+            'Precision@k': np.mean([self.precision_at_k(yt, yp, k) for yt, yp in zip(y_true, y_pred)]),
+            'Recall@k': np.mean([self.recall_at_k(yt, yp, k) for yt, yp in zip(y_true, y_pred)]),
+        }
 
-        return {k: np.mean(v) for k, v in results.items()}
+    def dummy(self):
+        import random
+        pred = []
+        ids = [p['protein']['ID'] for p in self.proteins] 
+        for query in self.proteins[self.test_index]:
+            targets = self.target(query)
+            pred.append(random.sample(ids, len(targets)))
+        return pred
diff --git a/proteinshake/tasks/structure_similarity.py b/proteinshake/tasks/structure_similarity.py
index fdf17254..4094ec6f 100644
--- a/proteinshake/tasks/structure_similarity.py
+++ b/proteinshake/tasks/structure_similarity.py
@@ -1,4 +1,4 @@
-import itertools
+from collections import defaultdict
 
 from scipy.stats import spearmanr
 import numpy as np
@@ -19,57 +19,19 @@ class StructureSimilarityTask(Task):
     type = 'Regression'
     input = 'Protein and Protein'
     output = 'Local Distance Difference Test'
+    default_metric = 'Spearman R'
+    pairwise = True
 
-    def __init__(self, *args, **kwargs):
-
-        super().__init__(*args, **kwargs)
-
-    def update_index(self):
-        """ Transform to pairwise indexing """
-        self.train_index = self.compute_pairs(self.train_index)
-        self.val_index = self.compute_pairs(self.val_index)
-        self.test_index = self.compute_pairs(self.test_index)
-        
-    def compute_targets(self):
-        self.train_targets = np.array([self.target(*self.proteins[i]) for i in self.train_index])
-        self.val_targets = np.array([self.target(*self.proteins[i]) for i in self.val_index])
-        self.test_targets = np.array([self.target(*self.proteins[i]) for i in self.test_index])
-
-    @property
-    def task_in(self):
-        return ('protein', 'protein')
-
-    @property
-    def task_type(self):
-        return ('protein_pair', 'regression')
-
-    @property
-    def task_out(self):
-        return ('regression')
-
-    @property
-    def target_dim(self):
-        return (1)
-
-    def compute_pairs(self, index):
-        combinations = np.array(list(itertools.combinations(range(len(index)), 2)), dtype=int)
-        return index[combinations]
-
-    def target(self, protein1, protein2):
-        pdbid_1 = protein1['protein']['ID']
-        pdbid_2 = protein2['protein']['ID']
-        return self.dataset.lddt(pdbid_1,pdbid_2)
-
-    def dummy_output(self):
-        import random
-        return [random.random() for _ in range(len(self.test_targets))]
-
-    @property
-    def default_metric(self):
-        return 'spearman'
+    def target(self, protein_1, protein_2):
+        pdbid_1 = protein_1['protein']['ID']
+        pdbid_2 = protein_2['protein']['ID']
+        return float(self.dataset.lddt(pdbid_1,pdbid_2))
 
     def evaluate(self, y_true, y_pred):
         return {
-            'mse': metrics.mean_squared_error(y_true, y_pred),
-            'spearman':  spearmanr(y_true, y_pred)[0]
+            'Mean Squared Error': metrics.mean_squared_error(y_true, y_pred),
+            'Spearman R': spearmanr(y_true, y_pred)[0]
         }
+    
+    def dummy(self):
+        return np.random.uniform(size=len(self.test_targets))
diff --git a/proteinshake/tasks/task.py b/proteinshake/tasks/task.py
index 27b6579e..ccd1b965 100644
--- a/proteinshake/tasks/task.py
+++ b/proteinshake/tasks/task.py
@@ -1,37 +1,23 @@
 import os
-import json
+import requests
 import itertools
 
 import numpy as np
 from sklearn.model_selection import train_test_split
 
-from proteinshake.utils import download_url, save, load
+from proteinshake.utils import save, load, progressbar
 
 class Task:
-    """ Base class for task-related utilities.
-    This class wraps a proteinshake dataset and exposes split indices,
-    integer-coded labels for classification tasks, and an evaluator function.
-
-    Sample usage (assuming you have a model in the namespace):
-
-     .. code-block:: python
-
-        >>> from proteinshake.tasks import EnzymeClassTask
-        >>> task = EnzymeClassTask()
-        >>> data = task.dataset.to_graph(eps=8).pyg()
-        >>> y_pred = model(data[task.train])
-        >>> task.evaluate(y_pred)
-        ... {'roc_auc_score': 0.7}
-
+    """ Base class for a task.
 
     Arguments
     ----------
-    dataset: pytorch.datasets.Dataset
-        Dataset to use for this task.
-    split: str, default='random'
-        How to split the data. Can be 'random', 'sequence', or 'structure'.
+    root: str, default 'data'
+        The root directory to put the dataset and task data.
+    split: str, default 'random'
+        How to split the data. Can be 'random', 'sequence', 'structure' or 'custom'.
     split_similarity_threshold: float
-        Maximum similarity to allow between train and test samples.
+        Maximum similarity between train and test set.
     """
 
     DatasetClass = None
@@ -39,119 +25,171 @@ class Task:
     type = None
     input = None
     output = None
+    default_metric = None
+    pairwise = False
+    level = 'Protein'
 
     def __init__(self,
                  root                       = 'data',
                  split                      = 'random',
                  split_similarity_threshold = 0.7,
+                 use_precomputed_task       = True,
                  **kwargs
                 ):
         self.root = root
         self.dataset = self.DatasetClass(root=root, **kwargs)
-        proteins = self.dataset.proteins()
-        self.size = len(proteins)
-        self.split_similarity_threshold = split_similarity_threshold
         self.split = split
-    
-        class Proteins(): # dummy class to implement __getitem__, could be implemented directly on the task
-            def __init__(self, proteins):
-                self.proteins = list(proteins)
-
-            def __len__(self):
-                return len(self.proteins)
-
-            def __getitem__(self, idx):
-                try:
-                    idx = int(idx)
-                except:
-                    return [self.__getitem__(i) for i in idx]
-                if idx >= len(self.proteins):
-                    raise StopIteration
-                return self.proteins[idx]
-
-        self.proteins = Proteins(proteins)
-        self.name = self.__class__.__name__
-
-        # load split indices
-        if not self.split == 'none':
-            self.compute_index()
-            self.compute_targets()
-
-    def compute_index(self):
-        split_name = f'{self.split}_split_{self.split_similarity_threshold}' if self.split in ['sequence','structure'] else f'{self.split}_split'
-        if split_name in self.proteins[0]['protein']:
-            self.train_index = np.array([i for i,p in enumerate(self.proteins) if p['protein'][split_name] == 'train'])
-            self.val_index = np.array([i for i,p in enumerate(self.proteins) if p['protein'][split_name] == 'val'])
-            self.test_index = np.array([i for i,p in enumerate(self.proteins) if p['protein'][split_name] == 'test'])
-        else:
-            self.train_index, self.val_index, self.test_index = self.compute_custom_split(self.split)
-
-        self.update_index()
-
-    def update_index(self):
-        pass
+        self.split_similarity_threshold = split_similarity_threshold
 
-    def compute_targets(self):
-        # compute targets (e.g. for scaling)
-        self.train_targets = [self.target(self.proteins[i]) for i in self.train_index]
-        self.val_targets = [self.target(self.proteins[i]) for i in self.val_index]
-        self.test_targets = [self.target(self.proteins[i]) for i in self.test_index]
-            
-    def compute_custom_split(self, split):
-        """ Implements random splitting. Only necessary when not using the precomputed splits, e.g. when implementing a custom task.
-        Note that the random, sequence and structure splits will be automatically computed for your custom task if it is merged into ProteinShake main.
-        Override this method to implement your own splitting logic.
-        Compare also the proteinshake_release repository.
+        if not self.files_exist:
+            if use_precomputed_task and self.files_hosted: self.download_precomputed()
+            else: self.compute()
+        
+        task_data = load(f'{self.root}/{self.__class__.__name__}.json.gz')
+        self.load_splits(task_data)
+        self.load_targets(task_data)
 
-        Arguments
-        ------------
-        split: str
-            Name of the custom split as passed to the task. ('random', 'sequence', 'structure', 'none')
+    def __getattr__(self, key):
+        """ Captures method calls and forwards them to the dataset if they are a representation or framework conversion.
+        """
+        if key in ['to_graph','to_point','to_voxel','np','dgl','pyg','torch','nx','tf']:
+            def proxy(*args, **kwargs):
+                self.dataset = getattr(self.dataset, key)(*args, **kwargs)
+                return self
+            return proxy
+        else: return object.__getattribute__(self, key)
+        
+    @property
+    def files_exist(self):
+        return os.path.exists(f'{self.root}/{self.__class__.__name__}.json.gz')
 
-        Returns:
-        --------
-        train_index
-            Numpy array with the index of proteins in the train split.
-        val_index
-            Numpy array with the index of proteins in the validation split.
-        test_index
-            Numpy array with the index of proteins in the test split.
+    @property
+    def files_hosted(self):
+        return requests.head(f'{self.dataset.repository_url}/{self.__class__.__name__}.npz', timeout=5).status_code == 200
+    
+    def download_precomputed(self):
+        raise NotImplementedError
+
+    def compute(self):
+        """ Computes the task file with splits, token maps, and targets.
         """
-        inds = list(range(len(self.dataset.proteins())))
-        train, test = train_test_split(inds, test_size=0.2)
-        val, test = train_test_split(test, test_size=0.5)
+        self.token_map = self.compute_token_map()
+        self.targets = self.compute_paired_targets() if self.pairwise else self.compute_targets()
+        save({
+            'random_split': self.compute_random_split(),
+            'sequence_split': self.compute_sequence_split(),
+            'structure_split': self.compute_structure_split(),
+            'custom_split': self.compute_custom_split(),
+            'token_map': self.token_map,
+            'targets': self.targets,
+        }, f'{self.root}/{self.__class__.__name__}.json.gz')
+
+    def compute_random_split(self):
+        indices = np.arange(len(self.dataset.proteins()))
+        train_indices, test_val_indices = train_test_split(indices, test_size=0.2, random_state=0)
+        test_indices, val_indices = train_test_split(test_val_indices, test_size=0.5, random_state=0)
+        return {
+            'train': train_indices.tolist(),
+            'test': test_indices.tolist(),
+            'val': val_indices.tolist(),
+        }
+
+    def compute_sequence_split(self):
+        return None
+
+    def compute_structure_split(self):
+        return None
+
+    def compute_custom_split(self):
+        return None
+
+    def compute_token_map(self):
+        return None
 
-        return train, val, test
+    def compute_targets(self):
+        return [
+            self.target(protein_dict)
+            for protein_dict in progressbar(self.dataset.proteins(), verbosity=self.dataset.verbosity, desc='Computing targets')
+        ]
+        
+    def compute_paired_targets(self):
+        print(len(self.dataset.proteins()))
+        index = self.compute_paired_index(np.arange(len(self.dataset.proteins())))
+        print(len(index))
+        exit()
+        return [
+            self.target(A,B)
+            for A in progressbar(self.dataset.proteins(), verbosity=self.dataset.verbosity, desc='Computing targets')
+            for B in self.dataset.proteins()
+        ]
+        
+    def load_splits(self, task_data):
+        split_data = task_data[f'{self.split}_split']
+        if self.split != 'random': split_data = split_data[f'similarity_{self.split_similarity_threshold}']
+        self.train_index, self.test_index, self.val_index = split_data['train'], split_data['test'], split_data['val']
+        if self.pairwise:
+            self.train_index = self.compute_paired_index(self.train_index)
+            self.val_index = self.compute_paired_index(self.val_index)
+            self.test_index = self.compute_paired_index(self.test_index)
+    
+    def compute_paired_index(self, index):
+        """ Computes all pairs between each element in the index.
+        """
+        combinations = np.array(list(itertools.combinations(range(len(index)), 2)), dtype=int)
+        return np.array(index)[combinations]
+        
+    def load_targets(self, task_data):
+        self.token_map = task_data['token_map']
+        self.targets = np.array([self.target_transform(target) for target in task_data['targets']], dtype=object)
+        if self.pairwise: self.targets = self.targets.reshape(int(np.square(len(self.targets))), int(np.square(len(self.targets))))
+        
+    def target_transform(self, target):
+        return target
 
     @property
-    def task_type(self):
-        """ Returns a string describing the type of task."""
-        raise NotImplementedError
+    def X_train(self):
+        return self.dataset[self.train_index]
 
     @property
-    def num_features(self):
-        """ Number of input features to use for this task """
-        raise NotImplementedError
+    def X_test(self):
+        return self.dataset[self.test_index]
+    
+    @property
+    def X_val(self):
+        return self.dataset[self.val_index]
+    
+    @property
+    def y_train(self):
+        return np.take(self.targets, self.train_index)
 
     @property
-    def num_classes(self):
-        """ Size of the output dimension for this task """
+    def y_test(self):
+        return np.take(self.targets, self.test_index)
+    
+    @property
+    def y_val(self):
+        return np.take(self.targets, self.val_index)
+    
+    @property
+    def y_dummy(self):
+        """ Generates a random data object of the right type and shape to compare against the test set.
+        """
         raise NotImplementedError
 
-    @property
-    def target(self, protein):
+    def target(self, protein_dict):
         """ Return the prediction target for one protein in the dataset.
+        Can be a scalar or a fixed size array.
 
         Arguments
         ------------
-        protein: dict
-            proteinshake protein dictionary
-
+        protein_dict: dict
+            A protein dictionary.
 
-        .. code-block: python
+        Returns
+        -------
+        target: float
+            The prediction target.
 
-            >>> from proteinshake.tasks import EnzymeCommissionTask
-            >>> ta = EnzymeCommissionTask()
         """
         raise NotImplementedError
 
@@ -171,51 +209,5 @@ def evaluate(self, y_true, y_pred):
             Dictionary with evaluation results. Key-value pairs correspond to metric-score pairs. E.g. 'roc-auc': 0.7
         """
         raise NotImplementedError
-
-    @property
-    def train(self):
-        return self.dataset[self.train_index]
-
-    @property
-    def val(self):
-        return self.dataset[self.val_index]
-
-    @property
-    def test(self):
-        return self.dataset[self.test_index]
-
-    def to_graph(self, *args, **kwargs):
-        self.dataset = self.dataset.to_graph(*args, **kwargs)
-        return self
-
-    def to_point(self, *args, **kwargs):
-        self.dataset = self.dataset.to_point(*args, **kwargs)
-        return self
-
-    def to_voxel(self, *args, **kwargs):
-        self.dataset = self.dataset.to_voxel(*args, **kwargs)
-        return self
-
-    def pyg(self, *args, **kwargs):
-        self.dataset = self.dataset.pyg(*args, **kwargs)
-        return self
-
-    def dgl(self, *args, **kwargs):
-        self.dataset = self.dataset.dgl(*args, **kwargs)
-        return self
-
-    def nx(self, *args, **kwargs):
-        self.dataset = self.dataset.nx(*args, **kwargs)
-        return self
-
-    def np(self, *args, **kwargs):
-        self.dataset = self.dataset.np(*args, **kwargs)
-        return self
-
-    def tf(self, *args, **kwargs):
-        self.dataset = self.dataset.tf(*args, **kwargs)
-        return self
-
-    def torch(self, *args, **kwargs):
-        self.dataset = self.dataset.torch(*args, **kwargs)
-        return self
+    
+    
\ No newline at end of file
diff --git a/proteinshake/tasks/virtual_screen.py b/proteinshake/tasks/virtual_screen.py
index 6fc1f48f..97fd2a22 100644
--- a/proteinshake/tasks/virtual_screen.py
+++ b/proteinshake/tasks/virtual_screen.py
@@ -6,33 +6,18 @@
 
 class VirtualScreenTask(Task):
     """ Test an affinity scoring model on a virtual screen.
-    The goal in a virtual screen is: for a given protein and a library of potential
-    binders, bring the binders to the top of the list.
+    The goal in a virtual screen is: for a given protein and a library of potential binders, bring the binders to the top of the list.
     In this task, the model is given a protein and a list of ligands to score.
-    The model scores each ligand in a library with a score proportional to the likelihood
-    that the protein and ligand will bind. This can be a docking score, energy calculation,
-    or just a probability.
-    Each protein's ligand library contains a certain number of active molecules (ligands)
-    and a certai (larger) number of decoys (non-binders).
-    We use the predicted scores to sort the whole library and calculate the position of each
-    active ligand in the sorted library.
-    Ligands in the topi percentiles which are known to be active contribute a 1 to the score
-    and those below the cutoff contribute a 0.
+    The model scores each ligand in a library with a score proportional to the likelihood that the protein and ligand will bind.
+    This can be a docking score, energy calculation, or just a probability.
+    Each protein's ligand library contains a certain number of active molecules (ligands) and a certai (larger) number of decoys (non-binders).
+    We use the predicted scores to sort the whole library and calculate the position of each active ligand in the sorted library.
+    Ligands in the topi percentiles which are known to be active contribute a 1 to the score and those below the cutoff contribute a 0.
 
     .. warning::
 
-        This is a zero-shot task so we use the whole dataset in
-        evaluation. No train/test split.
-
-    .. code-block:: python
-
-        >>> from proteinshake.tasks import VirtualScreenTask
-        >>> import numpy as np
-        >>> task = VirtualScreenTask()
-        # predict a (random) binding score for each molecule 
-        >>> preds = [np.random.rand(len(task.target(p))) for p in task.dataset.proteins()]
-        >>> task.evaluate(preds, cutoff_fraction=.2)
-        {'enrichment_factor-@.2': 0.6}
+        This is a zero-shot task so we use the whole dataset in evaluation.
+        There are no train/test/val splits.
 
     """
 
@@ -41,28 +26,13 @@ class VirtualScreenTask(Task):
     type = 'Ranking'
     input = 'Protein and Molecule'
     output = 'Affinity Score Ranking'
+    default_metric = 'Enrichment Factor'
 
     def __init__(self, *args, **kwargs):
         kwargs['split'] = 'none'
         super().__init__(*args, **kwargs)
         self.test_targets = [self.target(p) for p in self.proteins]
 
-    @property
-    def task_in(self):
-        return ('protein', 'molecule')
-
-    @property
-    def task_type(self):
-        return ('protein', 'virtual_screen')
-
-    @property
-    def task_out(self):
-        return ('virtual_screen')
-
-    @property
-    def target_dim(self):
-        return (1)
-
     def target(self, protein):
         """ The target here is a sorted list of smiles where the true ligands
         occupy the top of the list and the decoys occupy the rest.
@@ -71,19 +41,6 @@ def target(self, protein):
         """
         return protein['protein']['ligands_smiles'] + protein['protein']['decoys_smiles']
 
-    @property
-    def num_features(self):
-        return 20
-
-    def dummy_output(self):
-        import random
-        return [[random.random() for _ in range(len(self.target(p)))] for p in self.proteins]
-
-    @property
-    def default_metric(self):
-        return 'enrichment_factor'
-        pass
-
     def evaluate(self, y_true, y_pred, cutoff_fraction=.2):
         """ Computing enrichment factor on the whole dataset.
         To compute the EF, for each active ligand we check if it is in the top
@@ -119,5 +76,9 @@ def evaluate(self, y_true, y_pred, cutoff_fraction=.2):
             mean_active_rank = np.mean([ranks_dict[lig_id] for lig_id in active_ids])
             efs.append(mean_active_rank)
 
-        return {f'enrichment_factor': np.mean(efs)}
+        return {'Enrichment Factor': np.mean(efs)}
+    
+    def dummy(self):
+        import random
+        return [[random.random() for _ in range(len(self.target(p)))] for p in self.proteins]
 
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 81e1a46b..70ed730b 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -4,7 +4,6 @@
 
 import unittest, tempfile
 from proteinshake.datasets import *
-from proteinshake.datasets.alphafold import AF_DATASET_NAMES
 
 class TestDatasets(unittest.TestCase):
 
diff --git a/tests/test_tasks.py b/tests/test_tasks.py
index e8ed05fd..59b69e8d 100644
--- a/tests/test_tasks.py
+++ b/tests/test_tasks.py
@@ -2,7 +2,6 @@
 Tests prediction task classes.
 '''
 
-import random
 import unittest, tempfile
 from proteinshake.tasks import *