diff --git a/4_Substructure and Similarity Search using rdKit.ipynb b/4_Substructure and Similarity Search using rdKit.ipynb new file mode 100644 index 0000000..bdeef4a --- /dev/null +++ b/4_Substructure and Similarity Search using rdKit.ipynb @@ -0,0 +1,956 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 4_Substructure and Similarity Search using rdKit\n", + "## Substructure and similarity searches against database of our interest!\n", + "\n", + "\n", + "#http://rdkit.org/docs/Cookbook.html#highlight-a-substructure-in-a-molecule\n", + "Credits: hellerm2 " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from rdkit import Chem\n", + "from rdkit.Chem import PandasTools\n", + "from rdkit.Chem.Draw import MolsToGridImage\n", + "from rdkit.Chem.Draw import IPythonConsole\n", + "from rdkit.Chem import rdDepictor\n", + "rdDepictor.SetPreferCoordGen(True)\n", + "IPythonConsole.ipython_useSVG = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Highlight a Substructure in a Molecule" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "m = Chem.MolFromSmiles('c1cc(C(=O)O)c(OC(=O)C)cc1')\n", + "substructure = Chem.MolFromSmarts('C(=O)O')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO3deVSTd74/8HcSIOygCLIryhZww4Vg664oKLhHLbhVp1PbmTIz7dzBmV/vqXN65lyt17niba+1Mx0Fa3GMuIHKpm0VlcUdCZuAAooSNtkChCS/PxJZIgpIVvJ5Hc9p8s2T5JNz7Nvv83yXhyGTyUAIIeRtMbVdACGE6DeKUUIIGRKKUUIIGRKKUUIIGRIjbRdADFJHB0QiSKUwNoaZGVgsbRdEyNujGCUaIRajpARlZXjyBDU1aG/v9aqVFezt4eaG8ePh6goGQ0tVEvI2GDThiahXfT2uXUNuLjo6BnS8jQ1mzMCMGTAxUXNlhKgGxShRG4kEP/2EGzcglQ76vRYWCA2Fv78ayiJExShGiXq0tuLHH/HkyeDeJZP1OqPncrFkCZ3jEx1HI/VEDTo7cezYoDMUUCRm1z/tWVm4dEmVhRGiBhSjRA1++QVPn/ZqyczErl2KP8XFisa6Ouza1esweUvP7ue1aygvV2+1hAwNjdQTVevoQHZ2r5bMTOTldSfmrl0ICUFQ0EA/MCMDEREqLJAQ1aLeKFG1igrlQfnkZGzf3v00KgrJyYP4wLIy0BV8osMoRomqNTf3elpcDDe3Xi0jR8LNrfvUvl+dnWhrU01thKgBndQTVbO0VG5xcHhTi9Ll0VcZGYHNHmpVhKgNxShRNTc3GBtDLH7TMdXV8PVVPO4Zo3V1OHBA+eCxY8Gk0yaiu+hvJ1E1Fgs2NorHnZ3w8sKtW70OqKtDRQW8vAb6gbNmqbI8QlSNYlQPSaWorUVVFRobtV3KKzo6EB+PmhoAEArx9dfIz0dISK8u54EDCAkZ6AfOnIkxY1ReJiEqRCf1ekUoREYGCgq6h8KtrDBpEt55B+bmWq0MANDSgmPHUFUFAOXliI+HSIRbt7BxI+zsupM0MvK1XVGlEfkZMxAcrMaCCVEFWgyqPzIy8NNPfa9PNzfHqlXw9NR4TT00NODoUdTVAUBhIU6ehFgMX1+sWQNjY+VVnv0yN0dICCZOVFOxhKgQxaieuHQJGRlvOoDJxPr18PbWVEG9VVXh2DG0tADA3bs4dw5SKQICEB4OJhMMBphMSCQD+ihra0yfjsBAGp0n+oJiVB8UFSE+/rWvdnX02Gx8/DGsrTVWl0JZGf79b8UWohkZSE8HgFmzsGgRABgbY906uLvj4cPu/UaVxvHNzeHgoNhv1N2d9iIh+oViVOfJZPj6a8XJcr8CArB8uZoL6i0/H6dOobMTUikuXkRODphMLF2K6dMBwMwMERFwdVV+V1sbRCIAYDJhYQEjukZP9Bj99dV5paXKGZqZ2b2YsudwzcOH6OjAkiWaOx3OykJKCmQydHbi9Gnk5cHICKtWKfYJtbVVDC69ytQUpqYaKpIQNaMY1XklJb2evm6bj/Z2HD8OqRTJydi8GWvXwsVFjVXJZPjlF/zyCwC0tSE+Ho8fw9QU772nmJ/k4ICNG2FlpcYaCNENNG9U5wmFvZ6+bpuP1laMHQsGA7dv4/e/h7s7Zs3C3/+OR49UX5JUisRERYY2NeHIETx+DCsrbN2qyNCxY7FtG2UoMRDUG9V5PXdLesM2H15e2LgRbW0oKUFREfLzce0arl3DZ5/Bzw88HtavB4ejgnrEYvD58o1FGmtqrH/4AQ0NsLfHxo2KxUscDlavpsudxHDQEJPOi49HUZHicXExCgoQHt7rgMRE+PoqT2gXi1FaCoEABQXdt+H080N4OMLC3n55pUiE+HhUVADIefIk/McfM+zsPCUSREYq5v8HBiIkhIbaiUGhLoPOc3DojtE+9dzmo4uxMXx84OODzk48fIj8fBQVQSCAQIA9eyRTp/6/4OA1a9ZMnz6dMfDIa2jADz+gthbA+aKidXx+q1j8Fze3E/IJ9gDmzsW8eYP9fYToO+qN6ryKCvzrX91P5ffh6CLfEqnfveYASKWorEReHgSCEicnz6IiAG5ubqtWrQoPD583b55Rz9PwZ89QWIiaGrS1wdISzs6wt8fp0/JV/Efv3dt+7pxYItk8efI/ly83ZrHAYCAsDFOnquYnE6JXKEb1wXffKRaq4+Vsp54j9YsXo60Nvr5wchrQ2bRUmldZeSgv71R+/pOXm5s4OjquXLlyzZo18yZONEpJQVnZ694dk5n5h+RkGRDF5e4PCWEwGDA2Bo83iB2bCBleKEb1QWUlDh/uXk1fXIxjxxSPIyMhk+HHHwHAxga+vvDxGfgGnXnV1XyBID43t6i2Vt4ywswszNub5+e3ePx4du9hIplM9qe0tP++fp0B7F28+LN33gEAMzO8957ywBchhoRiVE/cvInz5/t+SShETg4KCrr3zbOygq8vOJzB5ik/L0/wcn6VubHxAg8Pnr//Kl9fKza7QyLZcvr08QcPTFisuFWr1k+YAAA2Nti4EaNGDfnnEaLHKEb1x4MHOH++77sS2dlh7VrU1oLP7zWyb2YGb2/4+WH8+AHOQBIIhacEgoT8/LvPnslbfjV16v+EhKw9cSLl4UNLE5OE9esXjx8PQDHJSfNL+AnRMRSjeqW1FVlZyM9HTQ1kMrBYcHHBpEmYMgUsVvdheXng88HnQyBQtBgbw8MD/v7w9R3gUtHS+voEgSAhPz+Ky913/frtqipHS8sLkZEBTk4AMGYMNmygBZ2EgGJUX0kkEIvBZvczpiQQICEBp07h7l15w7MxY35rYbGGw1nm7W09gDwtq69f8sMPxbW140aMSNm0yXPkSMULGzdC3iclxOBRjBqGkhJ5nh5isXZcvw6AbWQ02909zNt7w4QJo1+9lycA4Elj49RDh6pbWma4uJyPiLC3sOh+LSQEXK5maidEx1GMGpbKysoLFy4kJiYmJyd3dnYCYDGZQa6uPD8/nr+/c+9V8D+VlUVdvMhiMK5u22al1HVdsACzZ2uyckJ0FsWogaqtrT1//jyfz09NTe3o6ADAZDBmurmFe3uv8fOTn7z/z40bn6akfDBt2ndKy08BLFum2FGUEINHMWro6urqEhMTEw4dSsvJaevsBMAApru4rOFwvO3sVv/731McHe/s2KH8tl/9Sr0b8RGiPyhGCQDg2bPWr7++VFrKFwjOFBQ0tbcDOLJy5QeJiVKZ7MXOnRYmJt0H29oiKor2HyFEjrYmIQAAR0dzf/9wY+NwHx+RWJz88OGZgoJVHM6BrKzbVVW3q6pm97xZ/Jw5lKGEdKFtm8lLYWGwtARgZmy8isOJXbXKms3muroCyH7ypPswb29MmaKtGgnRQRSj5CUrK2zapLQqKdDFBUBWV4x6emLtWuqKEtITxSjpwcEBO3Zg2rSuNVFcFxfIe6OWlggNRUSEYmtRQshLNMRE+tLaipISCIWytraR69c3NDc/rahwevU+yUSdOjs729raLF+zOILoDuqNkr6Ym2PiRCxYwFi6dBqXCyDn9m1t12RY2tvb33vvvfDw8LY+N6MhuoRilPSDy+UCyMrK0nYhBqSurm7BggUnT57Mzc0tLS3VdjmkHxSjpB/yGM3OztZ2IYbi6dOn8+fPv379urOz808//eTn56ftikg/6Noo6Ud1dfXo0aNtbGzq6uqYA9sEmrw1gUAQGhpaXl7u5+eXnJzsRrcV0Af0fwXph4ODg7u7+4sXLwoLC7Vdy9uSSFBdjaqq7hsE6KSsrKy5c+eWl5cHBQVduXKFMlRf0Com0j8ul1teXp6VlcXhcLRdyyBVViIjAyUl6OxUtNjYYPJkzJypa3tOJyYmrl+/XiQSrVixIj4+3szMTNsVkYGi3ijpX2BgIPTu8qhMhpQUfP89Cgu7MxTAixe4cgX/938oL9deccqOHDmyevVqkUj0/vvvnzx5kjJUv1CMkv7p5WD92bPIzFRu7BoJaGrC0aM6kqR79uzZtm1bZ2dndHT0v/71L6OB3TWL6A4aYiL9a21ttbGxYTAYL1680I+O0u3bSEzs/zBLS/zmN1o8u5dIJJ988snBgwdZLNbXX3+949UNCYk+oN4o6Z+5ufmECRPEYvGdO3e0XcsAiMW4dGlARzY3IyNDzdW8Vnt7e0RExMGDB9lsdnx8PGWo/qLTBzIggYGBd+/ezcrKeuedd7RdS38EArS29mrJzERysuJxZCS8vLpfun0bCxZA4xO5mpqa1qxZk5aWZmtre/bs2Tlz5mi4AKJC1BslA6JPl0dLSno9zcxEXh527VL8OXas1zVTkQhPn2q4wGfPns2ZMyctLc3Jyennn3+mDNV3FKNkQPRpLVNNTa+nycnYvr37aVRUd89ULiUFRUWQSDRRG1BSUjJ79uy7d+9yOJzMzMzJkydr5nuJ+tBJPRkQDodjbW1dVlb2/Pnz0aNHa7ucN+ro6H5cXAylSewjR8LNDcXF8PRUbJxaWYn4eJiawtsbPj7w8lLfZoA5OTnLli0TCoWBgYFJSUn29vZq+iKiSdQbJQPCZDKnTZsGICcnR9u19Edp5N3BQfkABwe0tWHvXpw+jbw8Rey2teH+ffD52LsX8fG4d69XHKtCenr6woULhUJhcHDwpUuXKEOHDYpRMlB6c17fb2dZvjC0tRX37oHPx759SEiAQACxGADEYhQV4cyZ7jxVxVZ1R48eXbp0aVNT0+bNm8+fP0+7iA4ndFJPBkq+lkkPRpm8vHD7NqRSiMXw8sKxYwgP7361rg4VFdi+HTNmoLAQAgHKy5Gbi9xcGBlh3Dj4+8PHB6am6OxEURGKisBiwcMDfn7w8YG5ed9fWl6O4mIIhRCLYW0NV1dwOF0Hx8TEfPrpp1KpNCoqav/+/Qy6C8vwQtPvyUBVVVU5Ozvb2trW1tbq9FZPUin278f336OxEZs34/ZtJCdj1y7Fq7t2ISQEQUHdxzc0oKBAkadyTCZcXeHnhwkT0LPbyGDA1RX+/vDzg5WVovHpU5w/38dwv5ERgoJkc+dG/+Uve/fuZTAYe/fu/eyzz9Txi4l2UYySQXBzc6usrCwoKPDx8dF2La9XX4/gYNy6BTMzbN2K0aNRXIxjxxSvKs0b7amxEfn5yM9HeTmkUgBgMjF27IsJE1q9vJy6chMAgwFHR3h7w9QU6emvG+XvkEi2pqTEZ2ebmJjExsZu2LBBlT+T6AyKUTIIa9euTUhIiIuL27Rpk7ZreY2qKoSG4t49jBoFHu9N10ktLMDjoaYGRUV4+FCRm3KtrSguRl4eSkogkRwYM+YP5eUBTk5h3t6REyd62dkNpJDmjg7eiRPJDx9ampomnDmzeMmSIf82oqNYu7pOdgjpz6NHj9LT052dnZcuXartWvqSn49581BUBD8/XL0KX1+UlfXKxy7Ozti4EY6OcHbGxIkIDIS9PaRSNDRAJoOxMRwdFe0ODklicVZ1dcWLF788evR1dnZSUVGdSDTawsLudddJgefNzcFxcVfLyx0tLdM3bZo1dy6cnNT4w4lWUW+UDMIvv/wyb9686dOn6+K0p6wshIWhpgZBQUhMxKhRANDUhOxsFBSgthYyGUxM4OqKgAD4+6PPcR6RCIWFyM9HaWnP7fVEYnF6aSlfIDhbUNDY3i5v9LO3D/fxCfP2nuXu3vMzyurrl/zwQ3Ft7bgRI5I3bvSys4OdHX77W/X9dKJdFKNkEFpaWmxtbXVxq6fERGzYgNZWLF+O48fxam0SCcTiQWzmJBajrAx5eSgo6DmBtK2zM62kJKmo6HRBgbClRd7oMWJEuLc3z9//XTe321VVy3788Xlz83Rn5/ORkQ4WFop3fvwxaKLoMEUxSgZn8uTJ9+/fv3HjRlDPwW7tio3FBx9ALMbWrfjHP6Da/To7O1Fairw8FBbiZT8UQIdEcrmsLEEgOFtY2JWnzlZW9SKRqLMzePz43wQGNra1bepa67lmDSZMUGVhRGfQvFEyOIGBgffv38/KytKVGN2zB3/+M2QyREdj927Vf76REby94e0NmQwVFRAIkJeH5mYTFivE0zPE0/OQTHa9ooKfl5eQn/+ksTHE09PO3HxPcLDrvn1WbHbkpElM+dUDVa+JIrqDYpQMTkBAAIDY2NglS5b4+vpqsxSZDP/xH9i3DywW/vd/8dFH6v06BgPu7nB3x5IlqKjAiRNoaQHwvLn5T2lpje3t5X/4Q/aTJwFOTiZMJoPBcLexKX/xorCmhiM/l9epayBEpXR4EjXRPfX19YcPH7axsblz5w6Hwxk/fvzvfve7jIwMzV8aam9vF23bhn37YGqKEyfUnqE9yfP05cxZBwuL3OfPBdXVdSJRkKsrm8WSL1LiuroCyHryRPEuZ2fNVUg0i2KUDNTjx4/feeedmzdvWlhYrF69euTIkaWlpQcOHJg9e7aXl1d0dHRWVpZm8rS5uXn58uUbr12TursjJQWrV2vgS5X5+8v/y2Iypzo5yYCcrsQEAAS6uADIqqwEABcX2NhovESiIRSjZEDy8vJmz55dUFDg7++flZWVkJBQXV199erVqKgoJyenkpKSr776KigoyN3d/cMPP0xMTOzseTNOlaqqqpo1a1ZqampmS8vD8+ehrT2PPTy6tuBT7njKG11cAGTLG+fO1XR5RIMoRkn/MjMz586dW1FRMXfu3GvXrrm6ugJgsVizZs2KiYmprKy8evVqdHS0p6dnZWXld999t3z5ckdHx82bNycmJnaodGiltLR0zpw59+7dGz9+/JUrV7y1OPbNYGDFCvkMqsCeifnSNGdnIyYzt7q61d//tctPybBAMUr6cfbs2QULFtTW1q5cufLixYs2r5ycMpnMWbNm7d69u7i4+MGDB1988YWPj09tbe3Ro0e78pTP57e8nBX01m7evDlz5syHDx/OmDHjxo0b48ePH+IHDpWdHSIjYW7OfXn+3vOahrmx8QQHB7FEckfHd7kmQ0YxSt7k8OHDa9euFYlEH3/8cUJCQr9T7v39/Xft2lVQUCDP02nTptXX1x89enTdunUODg7h4eFxcXFNTU1vUcmlS5cWLFhQXV29aNEiHdrz2NUVH33kNmeOs5VVnUhUUl/f80XFyb4OrvgiKkUxSl5rz54927Zt6+zsjI6O/uabbwa1OZ48T2/evFlaWrp///53331XJBIlJSVt2bKlK08bGhoG+Gk//PBDaGhoU1PTpk2bLly4YNVzsyWts7TEypWB8+aha0DpJcXJvu5vdE2GhmKU9EEikXz00Uc7d+5ksVjffvvt7tdMa5fJZK1KtzJ+hYeHh3xS1KNHj+R52tHRIc/TUaNGya+uPnv27A2fEBMTs2XLFrFYHBUVFRsba6y2GyUNReDMmXjl8qjiZF/3N7omQyQjpLe2tjYejweAzWbz+fw3HHnz5k1TU9OwsLDY2NiGhoYBfn51dXVsbGxYWFhXILJYrHfffXf//v2VlZU9j5RKpX/6058AMBiMr7766u1/kvpdunQJANfVVbZrV9cfyRdfWLPZAJ49e6btAokaUYySXurr6+W3TR8xYsSVK1fefPA333zTdT8MNpu9dOnS77//vqamZoDfVVtbK89TExMT+YfIb5z3xRdfFBYWtre3R0READAxMfnxxx+H/MvUq7GxkcVisY2M2j7/vGeSLvDwAHDu3DltF0jUiLYmId2qqqpCQ0Pv3bvn7Ox88eLFSZMm9fsWoVB48eJFPp+fkpIiFosBsFisoKAgHo/H4/GcB7Z0p6GhITEx8dSpUykpKSKRSN5ob28vFAqtra1PnTq1cOHCofwuzZgwYUJeXl7WBx/IL4nK/Tk9fXdGxueff/7ll19qsTaiVhSjRCE/Pz8kJKS8vJzD4SQnJ7v33kOzX7W1tefPn+fz+ampqfK5okwmMyAgICwsLDIy0mtgEydFIlF6ejqfzz9z5oyFhYVIJLp8+fLUqVPf5vdo3LZt2w4fPnwgNPQTLrer8UxBwarjx4ODg1NTU7VYG1ErilECANnZ2WFhYUKhkMvlJiUljZLvefxWGhoa0tLS5L3Lrrmifn5+PB5v/fr1HA5nIB/S1NTk4ODQ3t5eV1dna2v71sVo0qFDh3bs2LFx0qSjPRanPm9qyj17doKzs+PPP/e9UTTRfzRST5CUlDR//nyhUBgeHn758uWhZCgAW1tbHo8XFxcnFArPnTu3adMma2trgUDw17/+1c/Pz9/ff+fOnRkZGW/+ECsrqylTpshkslu3bg2lGE3icrnmxsajm5t7No62slokFDpeuYLCQm0VRtSNYtTQxcXFrV69urW1dcuWLadOnTJ//f2FBsvMzEw+P/T58+fnzp379a9/bW9vLxAI9uzZM3v27K6JUK87H+JyudCr2UKTJkxotrT877IyvLy8qyC/VEqzR4cvilGDFhMTs3XrVrFYHB0dffjwYSPV7hv/kqmpaXh4+KFDhyorK5OTkz/44AN7e/tHjx7Jd4caO3ZsTEzMq+8KDAyEXs1dZxoZMSZMgEyG3rNHFTGqP/8ekMGiGDVQMpnsj3/84+9//3smk/nNN9/s3r2bof4rdyYmJkuWLPnuu++ePXsm3x3KxcWlvLy8z+X2etcbBYDAQADKMerqClBvdDijISZD1NHRsWXLluPHj7PZ7Li4uHXr1mmrEplMlpmZOWbMmFenRslkMgcHh5qamsePHw922oDW8PlYtw7e3oiI6G4Ui/Ff/wUWCy9e0B74wxL1Rg1Oc3NzeHj48ePHLS0tExMTtZihABgMxsyZM/ucXspgMPTuvF7RG62sRM/eibEx7O0hFuPuXW3VRdSKYtSwPHv2bM6cOampqY6OjlevXg0ODtZ2RW8ij1F9Oq8fMwZOTmhthdKuK/Lzej36IWQwKEYNSGlp6ezZs+/cuTN+/PirV69OmTJF2xX1Qy8vj86YAQC9t3qiwfrhjWLUUCjteezp6antivoXGBjIYDBu3bqlvluSqF6fo0w0WD+sUYwahEuXLi1cuFDn9jzuz8iRIz09PVtbW/Py8rRdy4DJV4IqxaiDA9hslJaiulorRRG1ohgd/kpKSpYuXdrY2Lh582ad2/O4P/p3eXTGDDCZqKqCRNLdyGDAyQkAbt7UVl1EfShGh7+MjAxHR8fw8PAjR47o5p7HbyC/PKpPg/U2NvDxQWcnnj/v1U7n9cMXxejwV11dXV5e7ubmpoEJ9iqnf71RvHES/qlTtLh++KEYHf70crz7pSlTprDZbIFA0NjYqO1aBqzPy6Py3mhJCeLjkZAAPRo0I/2hGB3+pk+fbmRkdP/+fZHSlhlAe3v7wO8rpxVsNnvy5MlSqVSPtnqCfDWB0pwna2tERuI3vwGDgQcPcOIEaAHhcEExOvyZm5v7+/uLxeI7d+70bD948KC1tfWePXu0VdgA6dnl0ZYWCARwdcW4ccpB6eUFS0vF4+Ji3Lih+eqIOlCMGoQ+V1W6ubl1dHTo/sm+nl0evXoVHR341a8QGtrPPs1XrqCtTVNlETVSy8ZoRNdwudx//OMfSkkk7+XdvHlTIpGwWCwtldY/eZ2ZmZnaLmQAJJLuhfPyDM3MRHKyoiUyEj1vptLejgcPMH26Zkskqke9UYPQ5yiTvb29h4dHU1NTfn6+luoaEE9PTzs7u6qqqkqlq4066OlTtLd3P83MRF4edu1S/Dl2DEr/GJSVabhAog4UowbBz8/P2tq6rKysuvcqGr247MhgMGbMmAGdrxMAamt7PU1Oxvbt3U+jorp7pn0eT/QTxahBYDKZ8vtr5uTk9GzXl8uO+lJnr2lMxcVwc+v16siRcHNDcXF3i1isocKIOlGMGoo+z+v1ojcK/alTeVdmBwflA5RaaBfnYYFi1FD02aGbOnWqsbFxbm5un7fx0B1cLpfBYOTk5Oj6Vk+Ojv0coLQ1Sb/HE31AMWoogoKCAGRnZ/e8bYypqenEiRMlEsnt27e1V1r/7OzsPDw8WlpadHw0DHZ2sLNTPPbygtKSgbo6VFT0Gqz38dFcbURtKEYNhbOzs4uLS0NDQ3HPa3P6s1RUX+rEzJndj0NCsGtX99MDBxAS0v3U3h76sOsr6RfFqAHR68ujenNfpqlTFbuQAAgKQmRk94SnyEjFcnsADAaWLetnfj7RExSjBqTPJNKXQXC96Y0yGFi3DiNGKJ56eXXHqJdXd26GhmLMGG3VSFSLYtSA9JlEvr6+tra25eXlT58+1VJdAxIQEGBiYpKXl9fc3KztWvpjZYXt23tdA+3J3Bzr1ilu2USGBYpRAzJjxgwjI6O7d+/23OqJwWBMnz4dr0wp1TWmpqaTJk2SSCT6sdWThQUiIrB5MyZNgq0tjIxgbo4xY7B4MaKiwOFouz6iSrSm3oBYWFhwOJzc3Nx79+7JB+7luFxuenp6dnb2ihUrtFhev7hc7s2bN7Ozs+fOnavtWgbGwwMeHtougqgd9UYNS5/n9bp2ebS1tbXPdl2rkxA5ilHD0ucoU9eUUqlUqp2yenj69GlQUNDf//73V1/Sl0kFxNBQjBqWPnujDg4OY8aMaWpqKtT2bYIePHgQGBiYm5sbFxfX0dGh9Kq3t7eVlVVFRYWOj4YRQ0Mxalj8/f0tLS1LS0tramp6tuvC+XJmZua8efOePHkyd+7cn3/+2cTEROmAr776qqmp6ZNPPnF4da06IdpDMWpYWCzW1KlTZTKZ0qmx1mdlnj17dsGCBbW1tStXrrx48aKtrW3PVyUSyY4dO3bu3MlisSZOnGhkREOjRIdQjBqcPq8wavey45EjR9auXSsSibZt28bn881673vU3t6+YcOGQ4cOmZubnzlz5oMPPtBKkYS8DsWowemz4zl16lQjI6Pc3NzXjZKrz549e95///3Ozs7o6Ojvv/9eqadZX1+/aNGikydPjhgxIjU1NSwsTMPlEdI/GTEw5eXlAEaOHCmVSnu2T5kyBUBGRobGKuns7NyxYwcAFov17bffvnrAkydPJk2aBMDZ2fn+/fsaK4yQQaHeqMFxc3Nzdnauq6srKSnp2a7hy6Pt7e3vvffet99+y2azjx8//uGHHyodIMx9+t4AAASESURBVBAIZs6cef/+fT8/v8zMzIkTJ2qmMEIGiy7VG6LAwMAzZ85kZWV59tiobe3ata6urgsXLtRAAQ0NDStWrLhy5cqIESPOnj07e/ZspQOysrLCwsJqamqCgoKSkpLsujbxJET3UG/UEPXZ8Vy0aNHnn38+efJkdX97VVXV/Pnzr1y54uTk9NNPP72aoYmJifPnz6+pqVmxYsXly5cpQ4mOoxg1RFrcuzM/Pz8oKOju3bscDiczM/PV1D5y5Mjq1atFItH7779/8uRJM7pbEdF5DFmPW0oQA9HU1DRixAgjI6MXL16w2WyNfW92dnZYWJhQKORyuUlJSaNGjVI6YM+ePX/+859lMll0dPTu3bs1VhghQ0G9UUNkZWXl6+vb3t5+7949jX1pWlraokWLhEJheHj45cuXlTJUIpF8/PHHO3fuZDKZBw8epAwleoRi1EBpeFw+Li5u2bJlTU1NW7ZsOXXqlLm5ec9X29vbIyIiDh48yGaz4+Pj5bOgCNEXFKMGSn55NCYm5ty5c21tbWr9rpiYmK1bt4rF4ujo6MOHDytNsG9qagoPDz9x4oStrW1qaiqPx1NrMYSonrYnrhLtePr06a6XN600NzcPCwuLjY1tbGxU7bdIpdLPPvsMAIPB2Ldv36sHVFVVyaf9Ozk53blzR7XfTohmUIwarkePHv3tb3+bNm1a17+pZmZmK1euPHr0aH19vUq+4ssvvwTAZrP5fP6rrz58+FA+cZXD4Tx+/Fgl30iI5tFIPUF5efnp06f5fP6NGzfkOzezWKygoCAej7d+/XpHR8e3/uTGxsawsLD//M//DA4OVnopJydn2bJlQqEwMDAwKSnJ3t5+SL+BEO2hGCXdampqLly4wOfzU1JSxGIxeuTp2rVrXVxcVPVF6enpq1evbmpqCg4OTkhIsLKyUtUnE6J5FKOkD3V1dUlJSXw+PzU1tWsXej8/Px6PFxER4e3tPZQPP3r06Pbt28Vi8ebNm//5z38aGxuromRCtIZilLxJQ0NDWlpaYmLi6dOnu24QL8/T9evXcwZ/o+CYmJhPP/1UKpVGRUXt37+fwWCoumRCNI1ilAyISCRKT0/n8/lnz55tbGyUN44bNy4sLIzH47377rv9BqJMJouOjt67dy+Dwdi7d698BJ+QYYBilAxOW1tbWlpaUlLSmTNnqqur5Y1jx45dvnz5G/K0o6Nj69at8fHxJiYmsbGxGzZs0GzVhKgRxSh5SxKJ5MaNG3w+n8/nV1VVyRvd3NxCQ0PDwsJCQ0O7ptk3NzfzeLzk5GRLS8uEhITFixdrr2pCVI9ilAyVVCq9fv16UlLSyZMnu7aCtrOzW7p0KY/HCwgIWLFixe3btx0dHS9cuBAQEKDdaglROYpRojIymSwrK+vUqVMJCQmlpaXyRgsLi5aWFi8vr5SUFA8PD+1WSIg6UIwStcjLy+Pz+UlJSePGjZNKpQcPHqQJ9mS4ohgl6tXW1mZqaqrtKghRI4pRQggZEtoojxBChoRilBBChoRilBBChoRilBBChuT/A+ufIEhAbfLNAAAAAElFTkSuQmCC\n", + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "O\n", + "OH\n", + "O\n", + "O\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "((3, 4, 5), (8, 9, 7))\n" + ] + } + ], + "source": [ + "print(m.GetSubstructMatches(substructure))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((3, 4, 5), (8, 9, 7))" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "((3, 4, 5), (8, 9, 7))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO3daVRUV7o38P+pYh5kRiaVeXaKERAwEo0GVCQIhTgvkk5M0kZz0/fG9F3rJua+fd9O+vqm2yR21I5T1BgtQBQEFExEERWNGoWSSSQIyDxTTFV13g+nQEBUVKgqqp7fl8jmVNWDC/85Z599ns2wLAtCCCHPi6fsAgghZHyjGCWEkBdCMUoIIS+EYpQQQl4IxSghhLwQilFCCHkhFKOEEPJCKEYJIeSFUIwShQgKAsOAYbBhg3ykpARBQcP8mZDxhmKUjL2gIMTEgGXBPTLXn6SEqAWKUTLGSkoAYPNm+Ze7dmH3biWWQ8iooxglY6y4GL6+g0YCA+XZmpMjv9J3c1NKaYSMCopRojyBgfIr/eJiZZdCyPOjGCVjzM0NeXmDRnJy4OqqpGoIGX0Uo2TMtLYiLg6GhgCwfbt8cMMGvPOOEosiZNRRjJKxUV2NkBDs34+4OFy8iGPH5NOgAHbtUnZxhIwmhto2k9F37x5efx3FxXB2xunTdAlP1BudjZLR9uuvCAhAcTFefhmXLlGGErVHMUpG1c8/Y/581NZiwQKcPQtra2UXRMiYoxgloychAUuWoLUVa9YgLQ0TJii7IEIUgWJUvdTVKe2jv/4aMTHo6sKmTThwANraSquEEMWiGB23Hm32AWDOHDg7Y/NmZGdDYTcPWRZbt2LzZrAsvvgC27eDR79XRIPQr/v4NGyzj8pKtLbi3j18/TXmzoWTEz76CNnZkMnGsBKJBG+/jc8/h5YWvv8eW7aM4WcRopJowdM4VFKC9etx8eLDEYaR56lMhpwcCIVISEBlpfy7lpYIC4NAgNDQ0b3W7uno0BEIkJYGIyMkJGDRolF8c0LGCzobHYee0OyDx0NwMLZvR0UF8vLw2Wdwd0d9PQ4exLJlsLHBunVITkZ394tX0djY+OqiRdfEYpib48wZylCisShG1ZePD7ZuRWEhfv0V//mf8PREY6M8TydOfPDRR4mJiWKx+Pneu6ysLDAwMCcnZ2VFReeVK5gzZ3RrJ2QcoYv6cegJF/VPVlqK5GQIhcjJ+SE4eP2FC/r6+gsWLBAIBBERESYmJiP8/Ly8vLCwsIqKCl9f3/T0dHt7++f6MQhRExSj4xN3i4nrhczdX5o8GQEBmDcPWlpPf/ndu/uSk3f99FNubi73C6Crq7tw4cLly5cvW7bMwsLiCS/NysqKiIhoaWkJCQlJSkoaefgSorZYMk4FBrIAC7DvvMPeu8cyDAuw5ubs2rXsyZNsV9dI3uP+/fu7du1aunSpVl/48vn8oKCgf/zjH5WVlY8ef/z4cX19fQCRkZGdnZ2j/SMRMi7R2ahaqKrCP/+JhAQUFMhHTEywZAmiohAaCgODp75BQ0PDqVOnhELhmTNnenp6APB4vDlz5oSHh0dFRbm6ugLYsWPHpk2bZDLZxo0bt2/fzqPFoYQAoIt6dTNg9lM+W6qvjwULIBAgIgIjuABvbGxMTk5OSEjIyMjo6uoCYGxsXF9f//e///2TTz5hGObTTz/dunXrGP8YhIwnFKNq6vffkZQ0KE91dTF3LpYuRWwsJk586hu0tbWdOnUqMTHRyMhIR0dn165dfD7/u+++e/vtt8e8eELGFYpRdVdRgdRUJCcjPR0SCYDUV1/9vz09AoFAIBDY2dk9+dXd3d1r1qyJj483MDA4evTo0qVLFVI0IeMJxajGqKlBUhISExeWlGSWlgLg8XhBQUHLly9fvnz55MmTh31RbGzs0aNHLSwskpOT59DiUEKGQzGqcZqampKTk1NSUlJTUzs6OrhBb29vgUAQGxvr6enZf2RNTc177733888/X7hwYerUqUqqlxBVRzGqucRi8dmzZ4VCYVJSUltbGzfI5Wl4ePisWbM6OztNTExkMllLS4shtzMdIeQRFKMEnZ2d6enpiYmJycnJLS0t3KCHh0dsbGxycvL169fPnz8/d+5c5RZJiMqipX8E+vr6kZGRBw8ebGhouHDhwqZNm2xsbAoLC3Nzc/38/ADk5uYqu0ZCVBedjZJhSKXSrKwsXV3d4uLiuLg4gUBw7NgxZRdFiIoawfPXRPPw+fz58+cDMDMzA3DlyhVlV0SI6qKzUfIkLMuam5s3NzdXVVXZ2toquxxCVBHNjZInYRhm1qxZAK5evarsWghRURSj5Cn8/f1B1/WKcuzYsbt37yq7CvJsKEbJU9DNeoX56quvYmNjQ0ND29vblV0LeQYUo+QpuLPR3Nxc2ZjuMKrZWJbdunXrn/70JwBvv/22kZGRsisiz4BuMZGnmzJlSnl5uUgk8vLyUnYtakgikbz77rt79uzR0dHZt2/fqlWrlF0ReTZ0NkqejqZHx05HR8eyZcv27NljaGiYlJREGToeUYySp6Pp0THS2Ni4aNGitLQ0CwuLjIyMsLAwZVdEngctvydPR2ejY6GsrCw0NLSwsNDJySk9Pd3d3V3ZFZHnRHOj5OnEYrGJiQnDMM3NzQYj2NmJPFVeXl5oaGhlZeXUqVPT0tJok+pxjS7qydMZGBj4+vr29vbeuHFD2bWog3PnzgUHB1dWVr766qsXLlygDB3vKEbJiHDTo3Rd/+KOHz8eFhbW0tISGRmZmppqMoJ9BomKoxglI9K/elTZhTyjoCAwDBgGGzYouxQA2LFjR3R0dFdX18aNG+Pj4/X09JRdERkFFKNkRMblXaagIMTEgGXle6MqNUm5BfYbN25kWfazzz775ptveDz616cm6BYTGRGZTGZmZtba2lpdXT1xBPszK19JCdavx8WLD0cYBkr6bZdKpe+///7u3btpk2q1RP8/JCPC4/HGWaun4mL4+g4aCQxESQk6OxVcSHd394oVK3bv3m1gYHDixAnKUPVDMUpGalxe1z8qLg4uLti8GdnZCjg5bWpqeu211xISEszNzc+cObNkyZKx/kSieBSjZKTGWYy6uSEvb9BITg6cnXHlCkpL8fXXmDsXrq74j//A5ctjlKdVVVUhISHZ2dlTpky5ePFiUFDQWHwKUTqKUTJS46bVE8vihx/g7AwA27fLBzdswDvvgMdDSQkuXMCmTbCzQ2kptm3DnDmYOBHr1iE5Gb29o1WFSCQKCAi4deuWj4/PhQsXPD09R+udicphCRkxBwcHAAUFBcou5PF6etjVq1mA3bSJZVk2MJAFWIB9552hR0ql7IUL7JYtrKur/BiAtbBg165lT55ku7tfpIpLly5ZWloCCAgIqK+vf5G3IqqP7tSTEWtujo6ISDh//sDKletmz4a5OVxcMHMmDA2VXVmfjg4IBEhLg5ERhEKEho7oVSyLa9eQkICEBJSUyAfNzBpXrMgOC1u0aNGzru48efJkbGxsZ2dnRETEkSNH9PX1n/HHIOMMXdSTEZDJcPYsvvnGT1cXQO7du2htRVkZzp7F9u24dElZC4kGaWjAwoVIS8PEiTh3bqQZCoBhMHs2vvgCxcXIy8MXXyAoCE1NCfn5ERER5ubm4eHhP/zwQ2tr60jebP/+/VFRUZ2dnXFxcfHx8ZShmoBilDyNTIajR5GdDZnM394ewJWKioff7e3FmTM4dUrJSVpWhsBAXLoEJyecP49Zs57zfXx8sGULsrNRWGgqEMyaNauzszMlJWX9+vU2NjZvvPHGwYMHm5ubH/fqL7/8Mi4uTiKRbNmyZe/evVpa1EFNI9BFPXmazMz+RewdPT2mX3zBMEzLJ5/oa2sPOiwsDH5+SigPQF4eQkNRWYmpU5GeDju7UXzv8vLy48ePp6SknDt3TiKRAODz+QEBAQKBYMWKFTY2NtxhUqn0gw8++O677/h8/rfffvvuu++OYg1ExVGMkidqbMSOHRhwa376d9/dqqnJeeutOZMmDTpSVxcffgjFPyT+yy+IjERLC159FUlJmDBhjD6nvr4+NTVVKBSePn26t7cXAI/HmzNnjkAgWLZs2SeffHLs2DFdXd2DBw8KBIIxqoGoJopR8kQDTkU57yQn/+vXX/8eGvphQMDQgxV/Qnr8OFatQlcXli/H4cOKCfH6+voTJ04kJiZmZmb29PQAYBiGZVkzM7NPP/30+vXr4eHhlKQaheZGyRPduzdkwM/eHkBuZeUwB9++DUUuKf32W0RHo6sLH3wAoVBhJ8KWlpZvvfXWqVOnrl279tVXX61du9bX1/fDDz88f/48n88/ePBgenq6YiohKoKmwMkTPXI7xdbICEBKUdG648cF3t6hrq7afL78exUV2LYNbm7w8YGLC/rHRx3L4vPP8fnnYBh89hm2bh2rD3q8rKyskJCQ2bNnc88jcO2axGIxxtGDXmSU0EU9eaL//V+Ixf1fXa2sXPLjj3UdHf0j1oaGEZ6eUV5e852ctAfmpp4enJ3h7g4vL+jojGJFEonk440b/3L+vEFJCf71L6xfP4pvPnIdHR2mpqYMw7S0tPSvaurp6ZkwYUJvb29TU9OEMZulJaqGYpQ80a5dqK7m/ni2tHT50aOt3d0LXVz+Mn9+anHx0by8gvp67rumenoLXVyWurtHeXkZDsxNLS04O8PbG56e0NV9wXLEYvGKFStSUlJmT5mSs3On1sgXh46BadOm3b59+9KlSwEDpokDAgKuXLly9uzZ+fPnK7E2okj8rcq4ICLjQ20tcnIglQI4dOtWjFDY2du7bvr0o9HRk01MQhwdN/r5Cby9J5uYdEokxQ0Norq6pIKCv1++fLmiQiKTOZmZ6WppQSZDQwMKCnD5Mn7/HV1dMDN70vmpRIIHD1BVhbY2aGsPPLKpqWnJkiWZmZnm5uY/JCRMefVVBfwdPMH169evX78+bdo0rtsA5/bt21euXPH09Jw7d64SayOKRHOj5DEqKvDjj+jpAbD98uWPTp+Wsewmf/9/hIYyDNN/lI+1tY+19Zbg4HtNTScLC4UiUU55eUpRUUpRkZ6W1mvOzuEeHpGenlaGhpBIUFqK0lKcOQMnJ3h5wdNz0IOkTU3IyoJINKg/iL09goLg5VVVVRUaGnr79m1HR8f09HQPDw/F/VU8hp+f3/fffz9kJpTbtGr87bZCXgBd1JPhFBYiPh4SCcuyn2Rm/u3iRQb426JF/x4Y+NSXlre0HL9zJ6Wo6FxZmUQmA8Dn8QIcHATe3it8fW2MjB4eyjBwcICHB7y9UVHxhAZLIhOT0K++un//vq+vb1paGtchRelu3bo1ffp0V1fX4uLi/sGSkhI3NzdbW9uqqiol1kYUiWKUPOLmTSQnQyaTyGQbkpP33rihw+cfiIyM9fHBgPPQQRwdIZWiomLgI6H1YnFqcbEwP//03bu9UikAHsPMmTRJ4O0d7e1tP+I7MJcrKpb++GODWBwSEpKUlKQ6W2lKpVJTU9OOjo7a2lqunxMAlmWtra3r6+vLy8snDXlCgagpilEy2MWLyMwE0NHTIxAK04qLjXR04mNiXnd1Hf54PT289pr8GXaxGMXFEIlQUjJwAWljZ2dKUZEwPz+jtLRbIuEGva2sBD4+q6ZOdbeweEI5JwoKViYkdPb2vuHp+eOuXfqvvDJKP+foCAkJycrKSk1NDQsL6x9cvHhxWlqaUCiMjo5WYm1EYegWE+nDskhNRXY2gAaxOOzw4V/u3ZtoZJS5bt3cKVMAwNYWs2ZBSws8HgwN4eBQ7+6+JjGxx8pqypQpenp60NaGjQ2mTsXs2bCwgEyGlhawrL629nQbm1VTp340Z06Ag4MWn1/a1FTZ1pZVVvZtbq4wP79OLLY2NLR6pOHevhs31h4/3iOV/tHPb/8bb+jU1iIgAKq0oaZIJMrJyXF1dQ0JCekfLC4uzsrKcnR0XLhwofJKI4pDZ6MEACCVIjERIhGAe01NoYcOFTU0OJuZpa9Z48adLXp4IDoag1sWff/999wGbcN26wCAri6UlqKwEAUF3N0qTmdvb2ZpqVAkOllQ0NLdzQ06m5ktdXcX+PgETZrEMMyX2dmfZGYC2BIc/MVrr8lfuWoV3NzG8O/hGSUkJERHR4eFhaWmpvYPpqWlLV68+JVXXsnKylJibURhKEYJ0NWFI0dQXg7gdk1N2OHDla2tL9vZnVq92po7Q5wxA+Hhj54GVldXJyYmJiQkZGVlSaVSAHw+/5VXXlm+fHlkZKS9vf3DQ7nb9Pn5KCxEX24C6JJIMktLE0Sik4WFjX17djqbmZnr61+rqtLi8XYuXfrWSy89fJ+QEMybNzZ/C8/j/v37kydPNjc3r6+v71/A0NjYaGlpqa+v39LSQr3yNAHFqMZrb8ehQ6ipAfDLvXuRR4+2dHXNd3I6Hhs7gVstHxSE/pPBx2hsbExJSREKhRkZGd19Kent7S0QCFatWuXu7v7wUC5Pi4pQUIABT0NJZbJLFRXC/HyhSPSgrW2SiUl1e/uRqKgob+9Bn+Tv/wz9mBXC3t6+qqqquLjYdcD0sZubW0lJyc2bN6dPn67E2ohiUIxqtvp6HDqElhYAx+/cWZWQ0CWRrJ42bW9EhA6fD4bB4sV4+eWRv59YLD579qxQKDx+/Hh7ezs3yOVpTEyM98BMZFncv48bN3Dz5sB3kLHssfz8lfHxlgYGdR9/PPQD5s6Fij0dFBkZmZSUdPjw4VWrVvUPvrluXceVK1s+/fSl1auVWBtRDBWarSeKVlmJffu4DP02Nzf62LEuiWSTv/8PkZE6fD74fERFPVOGAjAwMOC23KitrT158uTatWtNTExEItHnn3/u4+Pj4uKyefPm7OxslmXBMJg8GcuWDenMxGOYWF9fa0PDerG47NE+8wMnXlUDt95+yCL8vS+/fLSo6KVfflFSUUShKEY1VVERDhyAWMyy7NZz5z5ITWVZ9rOQkO1hYTyGgZ4e1q6Fj89zv72+vj6XpzU1NRkZGZs2bbK2ti4tLf3666/nzp3r5OQkz1MAw+08PHvYdnw6OnBxee6SxsjKoKAH/v5/KywcNMo9HkrPMmkGuqjXSL/9hpMnIZNJZbL3Tp3616+/avF4u8LD35w5EwCMjLB69aif9/X29p47dy4hISEpKammpoYbdHBwWBMV9VduddQA/ycr69Nffvlozpz/9/rrD0dV74oeANraYGoKbW20tDzsvdLdDRMT9PaiqWnsGvITFUG3EdUay6KkBHfuoLYWYjGMjWFvD6mUO0sS9/bGCIWniooMdXSEAkEYt5DIzAxr18LMbNRr0dbWXrhw4cKFC//5z3/m5OSkpKQkJCSUlJTcrapCTAwyMgYePExzaFtbqNjaezljY3h6QiTCb789bP6vq4tp03D1Kq5fx4AlpUQtUYyqr5oaJCX1t7kDgKYmblUTgMbOzmVHjlwsLzfX109ZtUq+sZKdHVavhoHBmNbF4/GCg4ODg4P/+te/Xr16VVtbGzNnorcX5871H+Pv4MAAvz540CuVavP58hNklV085O8PkQi5uYP2UPH3x9WruHKFYlTt0dyomiorw549gzJ0gN+bm4P27LlYXu5oavpwczpnZ6xfP9YZOhDDMH5+fjO5mYR587B+Pfp6jpjq6blZWHT29ubV1gKAhQUeecZJhXAzoUOa3nORStOjGoBiVB21tuLo0cd1S8qvrQ3eu7egvt7X2jr7zTc9uJ4a06dj9erRbVP/zBwd8dZbiI3lvvJ3cABwhbuur6pS6C5Pz2rYxOSy9fJlJdRDFItiVB1lZqKra9jvXK6omLd/f0Vra4ijY/abb8rbLPn7IyJCVZ5Vd3ODtjaGTI/29qKvzb4qmjoVhoYoLkZDw8NBNzdYWKCqCsNu/0fUiGr8yyGjqKMD+fnDfudEQcH8AwcaxOJIL6+0NWtMuAWboaEIDX1sBzzF4/FgawvA394ewJWKCvl4/x9UkJYWZs4Ey+LatYeDDCNfdUvX9eqOYlTtlJUNe/2778aN6GPHOnt7/+jnFx8To9d/u2a4ZZtKZm8PYLqNjZ6W1p26umbuzFrFz+meMD1KG4WqO4pRtdPU9OhYvVj872fOSGWy/1mw4NvFi3kDzz2HO17J7O0B6PD5M2xsWOBXro28isfoE6ZH6WxU3VGMaoSjeXntPT3hHh7/OS72Weu7Xz9oerS2dmCrPZXTf0Np4PMsXLZeu8ZtC0jUFcWo2hlujw07Y+MeqbSrr/M8gIf/2lVmT46HTEy45U2DbtazLB48UG5dTzJlCmxt0dCAe/ceDlpZwckJbW24c0d5lZExRzGqdhwdH71fFODgACC3slLWn57cMaamY/HA0iiwt8ejd5lU/Lp+9mzgkZnQYedMiXqhGFU7xsZ4ZPNhW2NjhwkTmru6igeuyAHyzM137NhRVlamuPJGyMEBgLOZmaWBQXV7e3lLC6DaN+vxmOnRf/s3nD2LmBilVEQUg2JUHS1cyC29HGiYp9QtLb88fXrjxo2nT59WZHUjYm8PgGGYQa2eVPxsNDgYc+cO3ePEzw/z58PYWEk1EUWgGFVH5uaIigKfP3Bs0DwjAAMDxMb6+fsDyFXBW8n29ty0w6Dr+tZWtLUpt64nmTcP58/j/feVXQdRNIpRNeXhgXXrBs57Dp1nnDcPFhb+/v54pOWwStDVhYUFHj2JVvET0n5BQWAYMAw2bFB2KWTMUYyqr8mT8cc/IjISpqYAXraz0+Lxfqup6eSetW9sBDBjxgxdXd07d+60trYqt9hhcHeZHBwY4FpVlYR7pmBcxGhQEGJiwLLy5RCUpOqOYlSt8fmYNo3bStNQR8fLyqpXKv2Na5lcWQlAR0dnxowZMpns2sCnGFWEvT0Ac319F3NzcW9vPtfqSfVjtKQEADZvln+5axd271ZiOUQBKEY1QN9Gx4Ou6x884NaED7uVkEroW4Q/aFa3shIqvl9DcTF8fQeNBAbKs5WoKYpRDWBpyW0bx80zyvNIKuU2VfZX2btMEydyfZoHTY/29Kh0qyeikShGNQDDyHsm9S3Cl49XVqIvRlXxbHQ8tnoC4OaGvLxBIzk5GLCFPVE/FKOawcEBgI+VlZGOzt3GxtqODkAeoy4uLhYWFg8ePLh//75yaxyGvT2AGTY2ulpaorq61u5uQOWnR7nE3L5d/uWGDXjnHSWWQxSAYlQz2NsD4PN4s+zsAFzjeiZVVIBb4j57NlTzhNTeHoCultb0iRNlLCtv9VRUpNIP1wO4eBHHjskXPAHYtUvZBZGxRTGqGQbfZZJf1zc0cE3yVXd6dNhWT21t2L0b+/ahb5dmVXTxonzBE2WoBqAY1QxGRlwnJ7/hmn2o7vTozZvcf4c+ggWgvBzff4+CAqXURchAFKMao281O4DcykqWWzbUF6MMw1y7dk0ysJOe0l27hqws7o/cSfTlITeXJBLEx4O70idEeShGNYa9PQCHCRPsjI0bOztLGhsBeYyam5s7OzuLxeL8x2zipATt7cjI6P/K1dzcwsDgQVtbxZCnraRSnDyp6itJibqjGNUYg+cZ5RfIfed3qjM92sO1uL92bWCve4ZhXrazw5AOVZyaGpSWKq4+Qh5BMaoxbG25LZQHrR4Vi9HcDJWZHq2qqpo9e/a+fftQVDTkW0NXjw70yMGEKBLFqMbQ1oaVFR7TUl4VHgnNy8vz8/O7devWt99+K+WeoB9gqrU1htxl6kfPNRGlohjVJA4OAF62s+PzeDerq7u5G0oVFQBmzpypq6srEomU1erp8uXLISEhlZWV8+bNO3v2LH/wHtE17e3/c+GCvrb2n4ODh3mxSt0ZI5qHYlST2NsDMNbV9bS07Bnc6klXV3fatGkymez69euKr+vEiRPz589vaGiIiIhIS0szNTUd2C7+XlPT3H37blZX2xoZuZibD/N66i1PlIpiVJM8rtWTTAblTY/u378/Ojq6s7PzzTffjI+P19fXR2dn/wnm7Zqa4L17ixsaXrazu/SHP7gOG6OTJim0YkIGoxjVJFZW0NXFkJv1Egn3OJBSpke//PLLuLg4iUSyZcuWPXv2aGlpoa0NBw5ALAbw8717wXv3VrW1LXB2/nn9emtDw2HeQktraGM6QhRLS9kFEAXiWj2VlQ1t9VRRAVtb7mz08uXLiqlFKpVu3Lhx586dfD5/x44dG7gW8XV1OHQIra0AEkSiNYmJXRLJmmnT9kZEaA/eWuqhwEAMG6+EKAqdjWoYBwcAU62tDXV0ShoaGsRiADh9GidOuJmZca2eKse+hVJ3d/fKlSt37typq6v7008/yTO0shL793MZ+vWVKzFCYZdEssnf/8Abbzw2Q11cuN7+hCgRxaiGYVkAfB7vJVtbFrjKPUkpleLmTWbnziP/9V937tyxs7Mb0xKam5sXLVokFArNzMwyMjKio6MBoLAQ+/dDLGZZduu5c5vT0liW/XLhwu1hYTzeY35LZ8xAbCwe911CFIUu6jVJfj5ycrg/+tvbX/j99ysVFaH9HYVlsoXNzaiuhqfn2JXw4MGDxYsX37x509bWNi0tbfr06QBw8yaSkyGTSWSy91JSvr9+XYfP3/fGG6umTgUAPT2Ym6Omhtv1BLq6cHGBvz8mTx67OgkZOYpRjdHcjBMn+h8/H7pxcb+sLDg6wtFxLEooKCh4/fXXy8vLvby80tPTJ3M5ePEiMjMBdPT0xAiFqcXFhjo68TEx8ny3tMSaNTAxgUyG9nbweDA0lPfxJEQ10AWRxjh/HtzWygAGtJ5jH+3rkZk5Fp+fm5v7yiuvlJeX+/v7nz9/fvLkyWBZnDrFfVxjZ+eigwdTi4stDAwy162TZ6i9PeLiuBZ/4PEwYQKMjChDiaqhGNUMUikGd2+abGJia2zcIBaXNjUNPbiyEg0No/v5GRkZr732Wl1dXXh4+M8//2xpaQmpFAkJuHYNQFlzc+CePTn37zuZmeW89VYA10XF3R3r18PAYHQrIWTUUYxqhpqagQ2TOLPt7ACcvXdvmOOzstDWNlof/sMPPyxZsqStrW39+vWJiYkGBgbo6sLBg1yy59XWzt27t7C+furEiRfi4twtLABg+nSsWAFt7dGqgYq+qHIAAAUcSURBVJCxw9+6dauyayBjr7oat28PGasXi6va2vbeuJFRWtrW3T3ZxMRYV1f+vdpaXL6MwkK0tWHCBOjrP/cnb9++/b333pNKpVu2bPnmm2/4fD7a23HwIPcQ6rmyskUHD9Z1dLzq5HRm7VorbgWovz+WLKFb8GS8YIaZGiPqp6wMBw48OvyX8+f/OyurVyoFwGOYwEmTory9l3t5TeamI/tZWcHbG76+sLQc+WeyLPvxxx9v27aNYZht27Z99NFHANDUhIMH0dQEIKmgYGV8fJdEstzL63BUlJ6WFgAsWoQ5c577ByVE8ShGNUNHB7ZtG/Y7TZ2dyUVFKUVFqcXFHX0X/t5WVgIfn1hfX88huWllBXd3uLuPZLERy7JxcXE//fTTgQMHVqxYAQBVVTh8mHvQc0du7qa0NBnLfuDv/4/QUB7DgM9HZCR8fF7sRyVE0ShGNcaePRi253Gfzt7ezNJSoUh0oqBAviM84G1lFe7hsdTdPXhIbpqawsMD3t6YNGn4W+diMUpKequqbhUWzpoxQ957/9gxbor2y+zsTzIzGeDTkJCtISEAoKODmBi4uIzCT0qIYlGMaoyCAhw9OpIDuySSjLt3U4qKjhcU1HV0cINOZmbh7u4CH5+gSZOYgbk5YQLc3ODuDldX+WxmdzfOnsX16/LV8oNJZbL3T53a/euvfB5v59Klf3jpJQAwMsLq1bCxedGfkRBloBjVJPHxeJZN66Qy2aWKCmF+/rH8/Or2dm5wsonJG56e4R4eIY6OWgPvAhkYwNUVU6YgOxuPLqICAHRLJKsTExNEIgNt7WMCwRJ3dwAwM8OaNRi2Ax4h4wHFqCaRSCAUPmnnookTMX8+SkuRn4++3AQgY9mc+/eF+fkJd+5U9rXHtzQwCHNzE3h7h7q6PrZ1SD+WberqWnbkSHZ5ubm+/smVK4O4WQI7O6xaRS2ayLhGMaphWBa5uTh/nrvP85C2Nvz8EBIC7nY5y+L+fYhEEImGLCDNr60VikRHbt8u6luib66vv8TdXeDtvcjFRVdr+MeLq9rawg4dulVTM8XUNH31ak8rKwBwckJsLHR0Rv/HJESBKEY1klSK0lJUVqKjA3p6sLGBqyv6F40OxLKorkZhIW7fBre1fR8uT4X5+aK6Om7EQFt7vpOTwMdnuZeX0YBwFNXVhR46dL+lxcfaOn3NGocJEwBg2jRERNDiUKIGKEbJiNXVIT8f+flDduIsbWpKLiwUikQ55eXcL5O+tvYCJyeBj0+Eh0dBff3SH3+sF4vnOTomxcaa6ukBAMPgz3+mh5SIeqAYJc+urg5FRSgsxP37A4fvNjYm3LmTIBJdrazkfqt0tLRYlu2VSqO8vQ8tX6438JL//fe5DZ8JGe8oRskLaG5GYSHy84fk6f2WlrSSkuTCwvSSkrXTpjEMsys8XGvI9fvbb2OM+0MTohgUo2Q0tLYiNRWFhUOGa9rbLQwMhgYo58MPMeSRU0LGJ5rgJ6NhwgQEBT06PNHIaPgMNTYGd6OJkPGPYpSMEgeHZ1hCP306dV8maoNilIwShsGCBSM60sAAgYFjXA0hikMxSkaPtzdmz37KMXw+oqJepIEpIaqGbjGRUcWy+OUXZGdj2N8rQ0MsXw5nZ4WXRcgYohglY6C6Gjk5KCx8uHOJqSmmTUNAAJ2HEvVDMUrGDMuipQVdXTA2puYjRI1RjBJCyAuhW0yEEPJCKEYJIeSFUIwSQsgLoRglhJAX8v8BUhClIatgek0AAAAASUVORK5CYII=\n", + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "O\n", + "OH\n", + "O\n", + "O\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# you can also manually set the atoms that should be highlighted:\n", + "m.__sssAtoms = [0,1,2,6,11,12]\n", + "m" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Substructure Search against database " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "PandasTools.RenderImagesInAllDataFrames(images=True)\n", + "\n", + "from IPython.core.display import HTML\n", + "\n", + "def show(df):\n", + " return HTML(df.to_html(notebook=True))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "mydf = pd.read_pickle('./data/58_molobj.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilesmolport_idmol
0CC(=O)c1ccc(c(c1)OC)OMolPort-000-000-274\"Mol\"/
1c1ccc2c(c1)CCC(C(=O)N2)NMolPort-000-000-286\"Mol\"/
2c1cc2c(cc1N)NC(=O)CCC2MolPort-000-000-287\"Mol\"/
3C1CCCC(CC1)(C(=O)O)NMolPort-000-000-293\"Mol\"/
4Cc1cc(c2c(n1)CCCC2)NMolPort-000-000-304\"Mol\"/
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show(mydf.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "query = Chem.MolFromSmiles('c1ccncc1')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAXHUlEQVR4nO3daVRU5x0G8GcGGBREQDYVVI4IiBt6QCtKYyptEi0u0QaRY5ugxo0oA2GLiIBsYSkz4FYXMKlRIamaKok2uDQu0UYjeIxsLSYulDNR1EJkm+X2w1jSpkbnInfeWf6/T3zwz3kC8fG9c+99XxHHcSCEENJbYtYBCCHEuFGNEkLIc6EaJYSQ50I1Sgghz8WSdQBirJqamu7du8c6hZ74+flJJBLWKYiBoholvVFSUlJZWVleXs46iJ5kZ2eHhYV5eXmxDkIMkYgeeCJ81dbW+vv7A/D29ra0NP1/iRUKhUKhmD9//uHDh1lnIYaIapTwNmvWrOPHj69Zs2br1q2ss+iDQqHw8fFpbW09fvz4yy+/zDoOMTh0i4nwc/To0ePHjzs6Oqanp7POoidubm7vvPMOgNjYWJVKxToOMThUo4SH7u7uuLg4AGlpac7Ozqzj6E9sbKy3t3dNTc2OHTtYZyEGh2qU8FBUVNTQ0ODn57d69WrWWfRKIpHk5uYCSElJaWlpYR2HGBaqUaKr7777LisrC0BhYaGVlRXrOPr26quvvvTSSw8ePDCfTzOIjugWE9HV8uXLS0pK5syZc+TIEdZZ2KipqfH39+c4rqqqavz48azjEENBq1Gik6qqqvfee08ikRQUFLDOwsyYMWNWrlypVqulUinrLMSAUI0SnUilUrVavW7dOh8fH9ZZWMrIyHBycjp16pTZLsnJ/6OLevJsZWVlixcvdnV1bWhosLe3Zx2HseLi4ujoaC8vr+vXr1tbW7OOQ9ij1Sh5ho6OjqSkJABZWVnUoQDWrFkzfvz4xsbGoqIi1lmIQaAaJc+Ql5d38+bNSZMmRUZGss5iECwtLWUyGYDMzMzm5mbWcQh7VKPkae7cuZOfnw9ALpdbWFiwjmMoQkJC5s6d29bWtmHDBtZZCHv02Sh5moiIiAMHDixatKisrIx1FsPS2Ng4duxYpVJ58eLFyZMns45DWKIaJT/pwoUL06dP79evX01NjaenZy++Q3l5ueFf9oaGho4aNaoXgwkJCfn5+UFBQefPnxeJRH0ejBgNjpAnUavV2kVWampqr7/Jz372M9b/gz/bRx991Lv/utbW1iFDhgDYv39/r39ExATQapQ8WWlp6bJly9zd3evr621tbXv3TeRy+bffftunufpeZGSkdvvUXigpKVm+fLmHh0ddXV2vf0rE2FGNkidoa2vz9fVtbm7et29fREQE6ziGS6PRTJ069dKlS6mpqWlpaazjEDaoRskTJCYm5uXl0ad+uuj5BLm2tnbEiBGs4xAG6IEn8mPaB8vFYrFcLqcOfaagoKBFixb1vKRAzBDVKPmx2NjYrq6uN954Y8qUKayzGIf8/HxbW9uysrIzZ86wzkIYoBol/0O76YadnV1mZibrLEbDw8NDeyiAdgMX1nGIvlGNkh/0bAGXnJysfZSH6CgxMXHEiBFVVVV79uxhnYXoG91iIj/YvHnzunXrRo4cWVNTQ3sX8UX7YJktWo2Sx3qOxygsLKQO7YXw8PAXXnih56gVYj5oNUoeW7t27ZYtW2bOnHny5EnWWYxVVVVVYGCgpaXltWvXzHx/a7NCq1ECANqjgy0sLORyOessRky7nWDPMdTETFCNEgBITX1XqVSuWrWKTmp7TtnZ2fb29kePHj1+/DjrLERPqEYJjhzBJ5/smDUrm44Ofn6urq7JyckAYmNjlUol6zhEH6hGzV13N+Li0NHRf/bsd5ycnFjHMQXR0dE+Pj61tbXbt29nnYXoA91iMne5uUhKgp8frl6FlRXrNKbiyJEj8+bNc3R0bGhocHZ2Zh2HCItWo2ZNoUBODgDIZNShfWnu3LmvvPLKgwcPUlNTWWchgqPVqFlbuhR79mDePHz8MesoJqe2ttbf31+j0Vy5cmXChAms4xAB0WrUfFVV4f33IZEgL491FFPk5+e3evXqnvdriQmjGjVTHIfoaGg0kEpBz4kLJC0tzdnZ+fTp04cPH2adhQiIatRMHTiAs2fh6or161lHMV2Ojo7aLfHj4uI6OztZxyFCoRo1Rx0dj9szJwe0h4agVq1aNWHChBs3bshkMtZZiFDoFpM5Sk3Fpk2YNAmXL0NM/5IK7NSpUyEhIQMGDKivrx86dCjrOKTv0d8hs3PnDgoKIBKhqIg6VB9mzpz56quvfv/99+vpAxQTRatRsxMejvJyLF6M/ftZRzEbN27cGDt2bFdX18WLF+loFtNDqxHz8sUX+PBD9O//+Kl7oh8jR46USqUcx0VHR9PCxfRQjZoRjQbR0eA4JCaCTgLWs+Tk5KFDh168eHHfvn2ss5A+Rhf1ZmTXLqxYAQ8P1NXB1pZ1GvPz3nvvRUZGuru719XVDRgwgHUc0mdoNWou2tqwcSMAFBRQh7Lx+uuvT5kypampKTc3l3UW0pdoNWou4uNRUIBp03DuHEQi1mnM1cWLF6dNm2ZtbV1bW+vp6ck6DukbtBo1C42N2LwZYjHkcupQlqZOnRoREdHZ2ZmQkMA6C+kztBo1C3PmoKICy5dj1y7WUcxeU1OTr6/vo0ePTp8+/eKLL7KOQ/oArUZN34kTqKiAnR02bWIdhQDu7u6JiYkApFKpWq1mHYf0AapRE6dSISYGADZuxJAhrNMQAEB8fLynp+fVq1d3797NOgvpA3RRb+KKiiCVwssL16/D2pp1GvIfH330UVhYmIuLS0NDg4ODA+s45LnQatSU3b+PjAwAkMmoQw3La6+9NmPGjLt372Zof0PEmNFq1JStWYPt2xESghMnWEch/6e6ujowMFAkElVXV48dO5Z1HNJ7tBo1WdevY9cuWFpCLmcdhTzJxIkTly1bplKpYrSfXhOjRTVqsmJioFIhKgrjxrGOQn5CZmamg4NDZWXlp59+yjoL6T2qUdN06BAqKzFoEFJSWEchP83FxSUlJQVAdHR0d3c36zikl6hGTVB3N5KSACAjA05OrNOQp1q7dq2vr+8//vGPrVu3ss5CeoluMZmg9nZkZODECVy4AEtL1mnIs3zyySehoaEDBw6sr68fPHgw6ziEN6pRk6VWw8KCdQiim9mzZx87dmzlypV/+MMfWGchvFGNEsJeXV3dhAkT1Gr1l19+GRAQwDoO4Yc+GyWEvdGjR7/11lsajYZOGTFGVKNGZvp0iETP2OyuqAgiEaZP11cm0hfS0tLc3NzOnz9/8OBB1lkIP1SjxmrlStYJSJ8aOHBgWloagNjY2Pb2dtZxCA9Uo8Zq504cO8Y6BOlTK1asCAgIuH37dmFhIesshAeqUSM2ezbrBKRPicViuVwuEolycnJu3brFOg7RFdWoUVqx4vEXdGlvYoKDgxcuXNje3p6cnMw6C9EV1ahRGjPm8YYjdGlvegoLC21sbPbt23fu3DnWWYhOqEaNVXQ0pk0D6NLe5AwbNiwmJobjOKlUqtFoWMchz0Y1asTef//xF3Rpb2KSkpJcXFxu3Ljx97//nXUW8mxUo0Zs1Ci6tCeEPapR40aX9gaud9vf5eTk3L17d+TIkd7e3n0eifQ5qlGjR5f2Busvf/mLj4/Pn//8Z15T33zzTWFhoUgkksvlYjH9DTUC9EsyenRpb5hUKlVcXNzNmzf5fr4ZHx/f2dm5ZMmS4OBggbKRvkU1agqiox9/QZf2hmPLli1ff/21l5fX2rVrdZ86ffr0wYMHbWxsMjMzhctG+hbVqInoOcunqIhpDgIAuH//vrYH5XK5tc5nW6vVau3xduvXrx8+fLiA+Uifoho1EbNmPX61SSpFTQ3rNGYvOTm5paXll7/8ZWhoqO5TO3fuvHr16vDhw+msUONCNWo6dux4/MXOnUxzmL3r16/v3r3b0tJSJpPpPvXw4cONGzfiP28xCZaO9D2qUZNCx/QagpiYGJVKFRUVNY7P2dbp6en37t0LDg5esGCBcNmIEKhGTUrPpT1h5eDBg5WVlYMGDUrhc7Z1XV3d1q1bLSwstmzZInr6ptzE8FCNmpqeS3s6ikL/urq6kpKSAGRmZjrxOds6NjZWqVS++eab/v7+gqUjQqEj7UzQ8xywXFsLZ2e4uAiTzNRlZ2cnJyePHTu2urraUucffUVFxZw5cxwcHBoaGlzoR2+EaDVqgiwtcfAgLl/+YWWqo5IS+PtjwwZhYpk6hUKRm5sLQCaT6d6hSqUyLi4OQGpqKnWokaIaNUESCXJzASAlBS0tPAZnzIBIhN278dVXAkUzZQkJCa2trQsWLPjVr36l+1RxcXF9ff3o0aOjoqKEy0aExRET9dJLHMCtW8dvKiaGA7jp0zmNRphYJury5ctisVgikTQ0NOg+pVAoHBwcAHz66afCZSNCo9WoyZLJYGmJbdtw7RqPqbQ0DB6M8+fxpz8JlszkcBwXHR2t0Wji4uJ47cm0YcOGhw8f/vrXv541a5Zw8YjQ6BaTKYuKwrZtCAnBiRM8pnbswKpVGDYMdXWgx8B1sXfv3t/97ndubm4NDQ0DBw7Ucaq6ujowMFAsFl+7ds3X11fQhERQtBo1ZRkZcHLCyZM4coTH1JtvIiAAt2+DTvnVRXt7+4YNGwDk5eXp3qEApFKpWq1et24ddajRY/2pAhFWUREHcF5eXGcnj6mzZzmRiLOx4W7eFCyZqdAe4RkQEKBWq3WfKi8vB+Di4vLgwQPhshH9oNWoiVuzBuPGobERxcU8poKDsXAh2ttBp/w+3a1bt2QyGd8tljs6OhITEwFkZmZqbzER48a6x4ngTpzgAM7OjvvnP3lM3brF2dhwIhF39qxgyYzfwoULAfz2t7/lNZWeng5g4sSJKpVKoGBEn6hGzcKcORzALVvGbyo5mQO4gACOz9WqGTl79qxIJLKxsbnJ57OPO3fu2NraAvjrX/8qXDaiT3RRbxZkMlhbY88eXLrEY2r9egwfjq++wt69giUzWmq1+q233uI4ju8WywkJCY8ePQoLC5sxY4Zw8Yhese5xoifx8RzABQXxe67+j3/kAM7NjfvXvwRLZpy2b98OYNiwYY8ePdJ96osvvhCJRP369fvmm28Ei0b0jVaj5iIlBUOG4MIFlJfzmFqyBMHBUCjw7ruCJTNCvdtiWaPRSKVSjuPi4+M9PT0FzEf0jHWPE/3ZtYsDOA8P7vvveUxdvsyJxZxEwvF5y9HESaVSAMHBwRo+a/vS0lIA7u7u3/P6BRCDR6tRM7J0KSZPxp07yM/nMRUQgCVL0N2NpCTBkhkV7RbLYrFYLpfrvsVyW1ub9gnT3Nxc7S0mYjKoRs2IWAy5HCIR8vJw8yaPwbw8DByIQ4fw2WeChTMePVssBwQE6D6VlZXV3NwcFBQUEREhXDbCBuvlMNG38HAO4MLD+U1lZXEAN2YMp1QKE8tIVFRUABg4cGBzc7PuU42Njf369ROJRH/729+Ey0ZYodWo2cnPh60tyspw5gyPqbg4eHujpga7dgmWzOAplcq3334bQFpa2uDBg3UffPvttzs7O19//fUpU6YIlo6ww7rHCQOpqRzATZrE77n6gwc5gBs0iLt3T7Bkhq2goADAqFGjurq6dJ86efIkgAEDBjQ1NQmXjTBEq1FzlJiIESNQVYU9e3hMLViAX/wCHMdt385nHWsq7t69m5mZCaC4uFgikeg4pVartbf1k5OThw4dKmA+whDrHids7N/PAZyrK/fwIY+pr7/+3tnZ18rKqqamRrBoBmrFihUAZs+ezWtq8+bNAEaOHNnR0SFQMMIc1aj5euEFDuDi4/lNrV69GkBISIgwoQxUVVWVhYWFlZVVXV2d7lP37993dnYGcPjwYeGyEeaoRs3XlSuPn6uvr+cx1dLSoj2B/ejRo4JFMzja999jY2N5Ta1duxbAzJkzBUpFDATVqFlbupQDuLlz+U3J5XIAXl5enbz2gjZavdtiuaamxsrKysLC4urVq8JlI4aAatSsKRScvT0HcMeO8ZhSKpXjxo0DkJ+fL1g0Q9HR0aF9/33Hjh28Bl9++WUAUVFRAgUjhoNq1Nzl5nIA5+fHdXfzmKqsrARgZ2f3T157QRuhTZs2AfD39+e1xfLHH38MwNHR8e7du8JlIwaCatTcdXVxPj4cwBUX8xsMDQ0FsHz5cmFyGYSeLZZPnz6t+1RXV5ePjw+AYr4/U2Kc6LlRcyeRID8f/fvj2LFTLS0tug/K5XJra+vS0tLLly8LF4+txMTER48evfbaay+++KLuU3K5vKGhwc/Pb9WqVYJFI4aEdY8Tg/Cb36wF/w/ytG9GTps2jdd+ccaid1ssKxQKe3t7AMd4fd5MjBnVKOG43t5Wbm1t1b5aXlZWJlw2JjQajfb995SUFF6DS5cuBTCX79MPxJhRjZLHeveQ486dOwF4eHiY2FbEe/bsAf8tlq9cuSIWiyUSST2vZ3GJkaMaJY/17pUbtVodGBgIID09XbhsetbW1qZ9/33v3r28Bn/+858DSEhIECgYMUxUo+QHvXsB/Pz58yKRqH///t9++61w2fQpKSkJwNSpU3l95rt//34Arq6uD3ntU0CMH9Uo+YFKpRo/fjyAnJwcXoNhYWEAIiIiBAqmT73bYrm9vX3EiBEAdu/eLVw2YpioRsn/6Nkck9dz9bdu3bKxsRGJRGfOnBEum37Mnz8fwBtvvMFrKjU1FcCkSZPUvPZwJSaBapT82Lx58wBERkbymtIeOGzsPdK7LZZv376tfUr/888/Fy4bMVhUo+THGhsbra2txWIx36va4cOHAygtLRUum6BUKtWECRMAZGdn8xpctGgRgMWLFwsUjBg4qlHyBImJiQCCgoJ43WP54IMPALi5uRnpPZZz585ZWVnRHTbCF9UoeYLW1tYhQ4YA+OCDD3Sf0mg02id+EhMThcsmqNra2rNnz+r+59Vq9eTJkwGkpqYKFooYOnqnnjyBnZ1dVlYW/vNSuY5TIpHo97//vUQiUalUQqYT0OjRo4ODg3X/86WlpZcuXfLw8IiPjxcuFTFwIo7jWGcghkij0QQFBX355ZcbN25MT0/XfbC5uVm7kjV5bW1tvr6+zc3NBw4cCA8PZx2HMEM1Sn7ShQsXpk+f3q9fv5qaGu3WxeS/xcfHFxQUTJs27dy5cyKRiHUcwgxd1JOfFBQUtHjx4o6ODu0dJ/LfGhsbN2/eLBaL5XI5daiZoxolT5OXl2dra/vhhx9+/vnnrLMYlpiYmK6ursjISO0tJmLOqEbJ07i7uyckJACQSqVqtZp1HENx8uTJo0eP2tnZZWRksM5C2KMaJc8QHx/v6elZXV1dWlrKOotBUKlUUqkUQEpKipncTCNPR7eYyLOVl5eHh4e7urrW19c7ODiwjsNYUVGRVCr18vK6fv26tbU16ziEPVqNkmdbtGjRjBkzvvvuu8zMTNZZGLt//772Ql4mk1GHEi1ajRKdVFdXBwYGisXia9eu+fr6so7DTFRU1LZt20JCQk6cOME6CzEUtBolOpk4cWJkZKRSqYyLi2OdhZmampqdO3daWlrKZDLWWYgBoRolusrKyrK3t6+oqDh27BjrLGzExMSoVKo1a9ZoN7cmRItqlOjK1dV1w4YNAGJjY5VKJes4+nbo0KHPPvts0KBB2p1VCelBNUp4WLduna+vb11d3bZt21hn0avu7m7tAU0ZGRlOTk6s4xDDQreYCD8VFRVz5sxxdHRsaGjQniRqDnJyctavXz9mzJirV69aWlqyjkMMC61GCT+hoaGvvPLKgwcPzOfaVqFQvPvuuwBkMhl1KPl/tBolvNXW1vr7+wPw9vY2h1pRKBQKhWL+/PmHDx9mnYUYIqpR0hslJSWVlZXl5eWsg+hJdnZ2WFiYl5cX6yDEEFGNkl5qamq6d+8e6xR64ufnJ5FIWKcgBopqlBBCngvdYiKEkOdCNUoIIc+FapQQQp4L1SghhDyXfwP8hKhUJ/PfiAAAAABJRU5ErkJggg==\n", + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "N\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "hits = mydf[mydf['mol'] >= query]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(4, 3)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hits.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smilesmolport_idmol
4Cc1cc(c2c(n1)CCCC2)NMolPort-000-000-304\"Mol\"/
8c1cc2c(ccnc2cc1Cl)NMolPort-000-000-321\"Mol\"/
13c1cc2cc(ccc2nc1)CNMolPort-000-000-390\"Mol\"/
16c1cc(cnc1)C(C(=O)O)NMolPort-000-000-411\"Mol\"/
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show(hits.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "PandasTools.WriteSDF(hits, 'substruct_hits.sdf', molColName='mol', idName='molport_id')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Similarity Searches" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "from rdkit.Chem import AllChem\n", + "from rdkit import DataStructs" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "mydf['fp_mrgn_2'] = mydf['mol'].map(\n", + " lambda mol: AllChem.GetMorganFingerprintAsBitVect(mol, 2)\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAATjUlEQVR4nO3dfUxT9/4H8E+xyKOKAj7jcIDgAyo4HgQfkSWCbMllwbgl1eW3pGzJLF22BJeY1C3LXc32R7tl11v+uendstw0W3ZTEb0BdDpvBRHROREoooDDJywiDwpIv78/Duu4PtH2nPb0tO9X/Edpv+fTRt495/M9329ljDECAAB3BYldAACAtCFGAQB4QYwCAPCCGAUA4AUxCn6CMYb5UhAFYhQkz2azlZWVvf3226tWraqoqLDb7WJXBIFFhg9wkK6xsbG//e1vn3zySV9fn1wuf/z4MRGtX79er9dnZGSIXR0ECpyNglTV1tamp6er1eq+vr5t27adP3/eZDLFxcWdOXMmKytr586dXV1dYtcIgYEBSE1bW1tJSQn3HzgpKclkMjl+NDg4qNFoQkNDiSgiIkKj0Tx8+FDEUiEQIEZBSgYGBjQaTUhIiCMlHz169PTDurq6FAoFl7NxcXFGo9H7pULgQIyCNIyPjxuNxnnz5hFRUFCQQqG4devWi59y/Pjx1atXc2G6devWixcveqdUCDSIUZCAurq6rKwsLhAzMzPPnDnj5BO58I2NjXWE7+3btz1aKgQgxCj4tO7uboVCIZPJiGjRokVGo9Fut7s6iM1mKy8vnz59OhHNnj1bq9WOjIx4oloITIhR8FFDQ0NarTYyMpKIwsPDy8vLBwYG+AzY0tJSWFjIndImJycfOXJEqFIhwOG+UfBFhw8f3rt3b2dnJxEVFRV9/fXX8fHxgoxcU1NTVlbW3NxMRPn5+Xq9fsWKFYKMDIFL7BwH+B+NjY0bNmzg/nOmp6efOnVK8EOMjo7qdLpZs2YRUXBwsEqlun//vuBHgcCBGAVfcffuXZVKNW3aNCKKiYnR6XSPHz/23OF6e3sdh4uOjvb04cCPIUZBfNzp4cyZMx2nh/39/d45dGNj48aNGz168gt+DzEKIjObzQkJCVyQ5efnNzc3e7kAu93+/fffx8XFEZFMJtu1a1d39z0v1wCShjX1IJrW1tbCwsLXX3/96tWr3NR5dXX18uXLvVyGTCZ78803W1patFptRETEqVPX1q6dvW8fDQ56uRCQLLFzHAKRzWZTqVRyuZwEvZGzq6tr165dHR0dbo/Q2dmpVl+VyRgRW7KE/etfzPW7VCHgIEbBq8bGxgwGA7esSC6XK5XKO3fuCDX4nj17iCg0NPTjjz/mc5NpfT3LzmZEjIhlZjKn10xBgEKMgvfU1tampqZyl0F5eXmCL3Lv6elRKpVBQUFEtGDBAoPBMD4+7t5QdjszGtn8+YyIyWRMoWA3bwpbLPgPxCh4g9VqdWxtl5iYOHlrO8E1NDTk5uZyx1q3bt3p06fdHmpwkGk0LCSEEbGICKbRsGftJwWBDjEKniXKBqB2u91kMi1ZsoSIZDJZSUnJ9evX3R7NamUlJRPX+ImJzJMfASBJiFHwFLvdbjQa58+fz2WZQqG46d0L46GhIUeCh4eH80zwmhqWmjoRpnl57NdfBawUpA0xCh5x9uzZ9evXc1fWGRkZFotFrEq4LZy5PaIWL17s3h5RnLExZjCw2FhGxORyplQy4abHQMIQoyCwGzdu8N/aTnAnTpxYs2YNF+ubN2++cOGC20Pdu8dUKiaXMyI2ezbT6djYmICVgvQgRkEww8PDWq12xowZRBQWFlZeXv7gwQOxi/oTt4Xz3Llzyen981/gyhVWUDBxjZ+SwqqqBKwUJAYxCsIwm81Lly7lTveKior43APvUTabTa1WBwcHE1FUVJRe/83oqPujmc0sIWEiTIuKWHu7cIWCdCBGga/z589v2rSJC9C0tLSTJ0+KXdHUWltbi4qKiGjz5p+WLWOHD7s/1Ogo0+nYzJmMiAUHM5WKeWtbFfAViFFwn9T3mjt8uHLdujHuXHLHDtbS4v5QPT1MqWRBQYyILVjADAYmqXcCeEGMgjv8Zudj7lxy1qw/zyX7+twfbfIq0owM1t3NdDpGxHJynvHgqipGxHQ69w8HPgI7PIHLampq1q5dq1ar+/v78/Pzm5qa9Ho9F6mSExxMZWV09SqpVGS301dfUUIC6fU0Pu7OaJmZZLGQyUQvvUQDAzRv3sS/Wyx09KiAVYNvQYyCC9ra2oqKil599dXm5uZly5ZVVlZWV1evXLlS7Lr4io4mvZ4aGmjTJrLZSK2mV16hkyfdGUomo5ISunyZfvqJgoP//Pc/vkwP/BBiFJxy//79ffv2paamHjlyJCoqSqvVXrp0aceOHWLXJaS0NDp5ksxmWrqULlygLVvotdfo2jV3hoqIoJSUP/+ak0NEVFoqTJ3gaxCjMDWTyZSUlHTw4MHx8fF3333XarU6vvbd/7z2Gl2+TFotzZhBlZWUkkJlZTQwwGvM/fspJ4cqKqi9XaAqwZcgRmEKt27dOnXqVG9v75YtWxobGw8dOhQTEyN2UZ4VFkbl5dTSQgoFjY3RV19RSgpVVJDd7v6Y+/cTEe3ZI1SN4EMQozAFq9X6zTffpKamTl5PGQgWLqR//pPq62n9eurpodJSys4mi8XN0QoKSKkki4X0ekGrBB+AGAWnSHQinr+MDDp9mv7xD1qwgBoaaONGOnjQzaEMBiIitVrA6sAnIEYBphAURG+/TVYraTQUEkJbtrg/lE5HhLkmv4MYBXBKRAQdOECdnZSV5f4gZWUTc024jdSfIEYBXBAby3cEo5GI6LPP+NcCvgIxCuBViYkTc01IUr+BGAXwNm6uye1Jf/A1iFEAEVRViV0BCAcxCuAlfX3097+TyUREVFAwsUIU/ABiFIRx8ODB+Pj4Q4cOiV2IbykrI8aooICIqKeH3nuPPvlk4kf//S8xRmVlIlYHwkCMgjD6+vo6Ozv7+/vFLgTA2xCjAAC8IEYBAHhBjAIA8IIYBQDgBTEKAMALYhQAgBfEKAAAL4hRAABeEKMAALwgRgEAeEGMAgDwghgFAOAFMQoAwAtiFACAF8QoAAAviFEAAF4QowAAvCBGAQB4QYwCAPCCGAUA4AUxCgDAC2IUAIAXxCgAAC+IUQAAXhCjAF4SFPQ4Lu5xbOwjsQsBgSFGAbzEbm/t7g6+e3ed2IWAwBCjAAC8IEYBAHhBjAIA8IIYBQDgBTEKAMALYhQAgBfEKAAAL4hRAABeEKMAALwgRgEAeEGMAgDwghgFAOAFMQoAwAtiFACAF8QoAAAviFEAAF4QoyCMzMzMd955Z82aNWIXAuBtcrELAD9RXFxcXFwsdhUAIsDZKAAAL4hRAPfV1tbm5eX19fWJXQiICTEK4I6Ojo7i4uL8/PwTJ07o9XqxywExoTcK4JqhoaEvvvji4MGDjx49ioiI+Oijj/bt2yd2USAmxCiAsxhjP/zww4cfftjd3S2TyUpKSr788sslS5aIXReIDDEK4JSGhga1Wm2xWIjolVde0ev1OTk5YhcFPgG9UYAp9PT0lJaWZmdnWyyWhQsXGgyG+vp6ZCg4IEZhCnV1dUTU2dnZ29srdi3eNjo6qtfrU1JSKioq5HK5SqVqaWlRKpVBQe784pw7d46I7t6929PTI3SlICoG8BxtbW0lJSVEFB4eTkRRUVFarXZkZETsurzEbDYvXbqU+zUpKirq6Ohwe6ju7m6FQiGTyUJCQrj3U6PRPHz4UMBqQUSIUXiGvr6+8vJy7nc+MjLy/fffLygo4AJl2bJllZWVYhfoWU1NTZs2beJe7/Lly48dO+b2UENDQxqNJiwsjEvPDz744K233uJGjouLMxqNApYNYkGMwv8YHx83Go3z5s0joqCgIIVCcevWLe5H1dXVK1as4CIgPz//t99+E7dUT7h3755KpZo2bRoRzZkzR6fTPX782L2h7Ha7yWR66aWXiIib1r9+/Tr3o+PHj69evZp7J7ds2XLhwgXhXgGIADEKf/r555/Xrl3L/XpnZWXV1dU98YDR0VGdTjdr1iwiCg4OVqlU9+/fF6VUwXEvLSoqyvHS+vr63B7t3Llzubm53Du5bt26X3755YkHcB9Xc+fOdXxc3b59m98rANEgRoGxSc07Ilq8eLHRaLTb7c97cG9vr+OULTo6ms8pm4+orq5euXKlICfaPT09jjmoBQsWGAyG8fHx5z2Ya55Mnz49AFvP/gQxGuieaN5pNJrh4WFnnnj+/HlHAzEtLe3kyZOeLtUTWltbi4qKuFeRlJR0+PBht4fizmdnzpzpOJ/t7+93soYdO3YETuvZ/yBGA9cLmnfOE3A628smT6NxZ4KPHj1yezSz2ZyQkOB4H9rb210dIRBaz/4KMRqgpmzeOW94eFir1c6YMYOIwsLCysvLHzx4IGCpgnu6L+mYRnPDlStXtm/fzr2TKSkpVVVVbg/lx61n/4YYDThPN+8E6WzeuHHD0V1dtGjRi7urIjpx4oRji/7Nmzc3NTW5PRQ3rS+Xy4lo9uzZOp1ubGyMf4X+13r2e4jRAOJ28855Z8+eXb9+PRdSGRkZFotF2PH5cGka7cXGxsYMBkNsbCwRyeVypVJ5584dYav1j9ZzgECMBgr+zTsn2e12o9E4f/58ruWqUChu3rzpoWM5iZtGCw0NdXUa7ZlqampSU1O5dzIvL+/XX38VsNQnmM3m+Ph4KbaeAwpi1P8J2Lxz3uDgoCO5IiIixFr7KMg0moPVauVWxxJRYmKiyWQSsNTnkVzrOQAhRv3Z5OYdtyZHkOad8ybnTkJCgndyx+GJabTTp0+7PRT3qcBN63OfCnym9d0gldZzYEKM+qexMabXs+3bf+Kad3v37r13755YxdTW1k6+Cr548aKnj+jSPfAv5lM9Cl9uPQcyxKgf+s9/2IoVjIjNnGn/y1/+7/Lly2JX5I05Gc7IyIhjGm369Ok8p9Hq6+uzs7O52MrMzDxz5oyApbrHp2IdOIhRv2K1spISRsSIWGIi8+419NRsNtvkO4Q8sfbxvffe41KvuLj46tWrbo/j4xfRPtJ6Bg5i1E8MDjKNhoWEMCIWEcE0Gubd3p0LWlpaHNvuJScnHzlyRMDBOzo6MjIyamtr3R6Bm9KJjIx0TOkMDAwIWKGARJnygqchRiXPbmdGI5s/nxExmYwpFEwSF3mTb8DKz89vbm4WuyLGnrrB6Nq1a2JXNDXvt57hCYhRaauvZ9nZE1fxmZnMB3p3LvDCcgDnNTY2bty4kQuj9PT0U6dOiVWJG7zWeoZnQoxK1Y0bTKFgMhkjYosWMaOR+VLvzgV37951rH2MiYnx/tpHv1l8+UTr2fv3twUsxKj0DA8zrZZFRjIiFhbGysuZr/buXNDY2LhhwwYvnwz65VYgV65c8VzrGZ4JMSoxZjOLj5+4ii8qYlLo3bnAbDZzK4680Jp8YmM6X7gtTEC+2Xr2V4hRyWhsZBs3TgRoejqTVO/OBUNDQ46J8vDwcE9MlLe0tBQWFvr9+ZpPtZ79G2JUAnp7mUrFpk1jRCw6mul0TJq9OxdM3o1JwNs2bTab40s7PHTjqq/hFnSJ2HoOBIhRnzY6ynQ6NmsWI2LBwUylYtLv3bmgrq4uKytLkEVE3FbN3Fx2AH6F3BOtZz67dMPTEKO+q7p6Yk0nEcvPZ/7Vu3PWC77w2XmTv9B469atAXtnpTdbzwEFMeoCnY4RsWfuM1dVxYiYTjfFc3NynHpuSwsrLJwI0ORk5qe9OxcMDAy4t8FSV1eXQqHggiMuLs5oNHq6VB/nhdZzAEKMuoB/jD7z6U8/99NPGRGbPZtptczfe3cuaGtrc6x9TEpKevHaR6w6fwEBvwgAGGLUJYLEKD31lj/93OFh9vHHrLdXiKL9Tk1NzapVq7gw3bZt26VLl554ALdVc1xcHP2xVXNnZ6copfq4ya3nrKwsX9i/SqKCCLwoJ4eIqLR0ioeFhdFf/0rR0V6oSHq2bdvW1NRkMBhiYmJqa2vT0tJKS0t7e3u5nzY0NOTm5u7cubO7uzsjI+P06dMmk2nJkiXi1uybsrKyLBYL13qur6/Pzc3dvXv37du3xa5LehCjXrV/P+XkUEUFtbeLXYqUccvGW1tbVSoVEVVUVCQnJ3/66acKhYI7q1q4cKHBYKirq8vhPrjgOYKCgnbv3t3e3q7RaIKDg7/99tvExMQDBw6MjIyIXZqUIEa9bf9+IqI9e8SuQ/rmzJmj1+sbGxu3bt1qs9k+//zz7777LjQ0dP/+/W1tbY7d72FKkZGRBw4cuHjx4o4dOwYHB7Va7e+//y52UVIiF7sA6flj/YubCgpIqaSKCtLrqaxMoJoC2OrVq48fP/7jjz/abLbLly+r1WrHTnfgkuTk5MrKymPHjlmt1pdfflnscqQEMSoCg4EqKkitRowK5o033hC7BD+xfft2x/fIgpNw1eOyqipi7Mk/VVWuDaLTETkx1wQAvg8xKo6ysom5pqNHxS4FAPhBjIrGaCQi+uwzsesAAH4Qo6JJTCSlkiwWJCmAtCFGPSU3l2SyiT96/bMfYzAQEVks3qwLAASGGPWI3FyyWMhqJcZIpyO1mkymZz/S1bkpAPA1iFHhtbeTxUJVVZSYSERUVkY63XNPOQsKCAttACRNxhgTuwb/d/QoFRaS1ToRrADgT3A26g3//jcRIUMB/BNi1OOOHqWKion77QHA/+Ci3rO4y3mlcmJSHgD8D85GPYjLUJ0OGQrgzxCjnqLXT2Qo9h8B8G/Y4ckj9HpSq6mqigoKxC4FADzs/wF5iyEZi2/JJAAAAABJRU5ErkJggg==\n", + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "N\n", + "H2N\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "qmol = Chem.MolFromSmiles(\"Cc1cc(ccn1)N\")\n", + "qmol" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "q_fp_mrgn_2 = AllChem.GetMorganFingerprintAsBitVect(qmol, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "mydf['fp_mrgn_2_sim'] = [DataStructs.TanimotoSimilarity(q_fp_mrgn_2, x) for x in mydf.fp_mrgn_2]" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "mydf.sort_values(\"fp_mrgn_2_sim\", inplace=True, ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "top10_df = mydf.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "N\n", + "Cl\n", + "NH2\n", + "0.29\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "N\n", + "NH2\n", + "0.26\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "OH\n", + "S\n", + "N\n", + "H2N\n", + "0.22\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "N\n", + "H2N\n", + "0.22\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "NH2\n", + "N\n", + "H\n", + "N\n", + "0.21\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "OH\n", + "H2N\n", + "0.21\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "NH2\n", + "N\n", + "H\n", + "O\n", + "0.21\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "O\n", + "O\n", + "NH2\n", + "Br\n", + "0.20\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "O\n", + "O\n", + "Br\n", + "H2N\n", + "0.20\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "O\n", + "Cl\n", + "NH2\n", + "0.19\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MolsToGridImage(top10_df.mol, molsPerRow=5, legends=[\"%.2f\" % x for x in top10_df.fp_mrgn_2_sim])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}