diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 15eed753..22067046 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7] + python-version: [3.9] steps: - uses: actions/checkout@v2 diff --git a/README.md b/README.md index fb5e3d89..2098ba13 100644 --- a/README.md +++ b/README.md @@ -221,11 +221,10 @@ By default, `selfies` is tested against a random subset * 130K molecules from [QM9](https://www.nature.com/articles/sdata201422) * 250K molecules from [ZINC](https://en.wikipedia.org/wiki/ZINC_database) * 50K molecules from a dataset of [non-fullerene acceptors for organic solar cells](https://www.sciencedirect.com/science/article/pii/S2542435117301307) - * 160K+ molecules from various [MoleculeNet](http://moleculenet.ai/datasets-1) datasets - * 36M+ molecules from the [eMolecules Database](https://www.emolecules.com/info/products-data-downloads.html). - Due to its large size, this dataset is not included on the repository. To run tests - on it, please download the dataset into the ``tests/test_sets`` directory - and run the ``tests/run_on_large_dataset.py`` script. + * 160K+ molecules from various [MoleculeNet](https://moleculenet.org/datasets-1) datasets + +In first releases, we also tested the 36M+ molecules from the [eMolecules Database](https://downloads.emolecules.com/free/2024-12-01/). + ## Version History See [CHANGELOG](https://github.com/aspuru-guzik-group/selfies/blob/master/CHANGELOG.md). diff --git a/selfies/bond_constraints.py b/selfies/bond_constraints.py index 1854af66..c075731e 100644 --- a/selfies/bond_constraints.py +++ b/selfies/bond_constraints.py @@ -9,9 +9,9 @@ "B": 3, "B+1": 2, "B-1": 4, "O": 2, "O+1": 3, "O-1": 1, "N": 3, "N+1": 4, "N-1": 2, - "C": 4, "C+1": 5, "C-1": 3, - "P": 5, "P+1": 6, "P-1": 4, - "S": 6, "S+1": 7, "S-1": 5, + "C": 4, "C+1": 3, "C-1": 3, + "P": 5, "P+1": 4, "P-1": 6, + "S": 6, "S+1": 5, "S-1": 5, "?": 8 } @@ -48,7 +48,7 @@ def get_preset_constraints(name: str) -> Dict[str, int]: +-----------------+-----------+---+---+-----+-----+---+-----+-----+ | | Cl, Br, I | N | P | P+1 | P-1 | S | S+1 | S-1 | +-----------------+-----------+---+---+-----+-----+---+-----+-----+ - | ``default`` | 1 | 3 | 5 | 6 | 4 | 6 | 7 | 5 | + | ``default`` | 1 | 3 | 5 | 4 | 6 | 6 | 5 | 5 | +-----------------+-----------+---+---+-----+-----+---+-----+-----+ | ``octet_rule`` | 1 | 3 | 3 | 4 | 2 | 2 | 3 | 1 | +-----------------+-----------+---+---+-----+-----+---+-----+-----+ diff --git a/tests/test_specific_cases.py b/tests/test_specific_cases.py index b0706942..22ee652e 100644 --- a/tests/test_specific_cases.py +++ b/tests/test_specific_cases.py @@ -359,4 +359,10 @@ def test_radical_kekulization(): assert roundtrip_eq("c1ccs[n+]1c2ccccc2", "C=1C=CS[N+1]=1C2=CC=CC=C2") assert roundtrip_eq("c1ccs[nH+]1", "C=1C=CS[NH1+1]=1") - \ No newline at end of file + +def test_novel_charged_symbols(): + """Test decoding of updated constraints for charged atoms (update in 2.2.0).""" + assert decode_eq("[N][#C+1][#NH1][#C@H1]", "N#[C+1]") + assert decode_eq("[O+1][=P+1][#P-1][#C@@]", "[O+1]=[P+1]=[P-1]#[C@@]") + assert decode_eq("[=C-1][#S+1][#B]", "[C-1]#[S+1]=B") +