jvfe · jvfe · May 1, 2024 · Apr 30, 2024 · May 1, 2024 · May 1, 2024
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -28,3 +28,14 @@ History
 ------------------
 
 * Add min_rnk option to get_study_fields - #12
+
+
+1.0.0 (2024-05-01)
+------------------
+
+Migrates to version 2.0 of the ClinicalTrials API
+
+* Add support for the new API version
+* Add support for the new API fields
+* **Remove get_study_count function**
+* Allow CSV format in full_studies
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -4,6 +4,7 @@ include HISTORY.rst
 include LICENSE
 include README.rst
 
+recursive-include pytrials *.csv
 recursive-include tests *
 recursive-exclude * __pycache__
 recursive-exclude * *.py[co]

diff --git a/README.rst b/README.rst
@@ -41,23 +41,17 @@ Basic Usage
 
     ct = ClinicalTrials()
 
-    # Get 50 full studies related to Coronavirus and COVID in json format.
+    # Get 50 full studies related to Coronavirus and COVID in csv format.
     ct.get_full_studies(search_expr="Coronavirus+COVID", max_studies=50)
 
-    # Get the NCTId, Condition and Brief title fields from 500 studies related to Coronavirus and Covid, in csv format.
+    # Get the NCTId, Condition and Brief title fields from 1000 studies related to Coronavirus and Covid, in csv format.
     corona_fields = ct.get_study_fields(
         search_expr="Coronavirus+COVID",
-        fields=["NCTId", "Condition", "BriefTitle"],
-        max_studies=500,
+        fields=["NCT Number", "Conditions", "Study Title"],
+        max_studies=1000,
         fmt="csv",
     )
 
-    # Get the count of studies related to Coronavirus and COVID.
-    # ClinicalTrials limits API queries to 1000 records
-    # Count of studies may be useful to build loops when you want to retrieve more than 1000 records
-
-    ct.get_study_count(search_expr="Coronavirus+COVID")
-
     # Read the csv data in Pandas
     import pandas as pd
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -19,8 +19,7 @@
 #
 import os
 import sys
-
-sys.path.insert(0, os.path.abspath(".."))
+sys.path.insert(0, os.path.abspath('..'))
 
 import pytrials
 
@@ -32,24 +31,24 @@
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode", "sphinxcontrib.napoleon"]
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ["_templates"]
+templates_path = ['_templates']
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
 # source_suffix = ['.rst', '.md']
-source_suffix = ".rst"
+source_suffix = '.rst'
 
 # The master toctree document.
-master_doc = "index"
+master_doc = 'index'
 
 # General information about the project.
-project = "pytrials"
-copyright = "2020, João Vitor F. Cavalcante"
-author = "João Vitor F. Cavalcante"
+project = 'Pytrials'
+copyright = "2024, jvfe"
+author = "jvfe"
 
 # The version info for the project you're documenting, acts as replacement
 # for |version| and |release|, also used in various other places throughout
@@ -70,10 +69,10 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = "sphinx"
+pygments_style = 'sphinx'
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = False
@@ -84,7 +83,7 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = "alabaster"
+html_theme = 'alabaster'
 
 # Theme options are theme-specific and customize the look and feel of a
 # theme further.  For a list of options available for each theme, see the
@@ -95,13 +94,13 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ["_static"]
+html_static_path = ['_static']
 
 
 # -- Options for HTMLHelp output ---------------------------------------
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = "pytrialsdoc"
+htmlhelp_basename = 'pytrialsdoc'
 
 
 # -- Options for LaTeX output ------------------------------------------
@@ -110,12 +109,15 @@
     # The paper size ('letterpaper' or 'a4paper').
     #
     # 'papersize': 'letterpaper',
+
     # The font size ('10pt', '11pt' or '12pt').
     #
     # 'pointsize': '10pt',
+
     # Additional stuff for the LaTeX preamble.
     #
     # 'preamble': '',
+
     # Latex figure (float) alignment
     #
     # 'figure_align': 'htbp',
@@ -125,21 +127,21 @@
 # (source start file, target name, title, author, documentclass
 # [howto, manual, or own class]).
 latex_documents = [
-    (
-        master_doc,
-        "pytrials.tex",
-        "pytrials Documentation",
-        "João Vitor F. Cavalcante",
-        "manual",
-    ),
+    (master_doc, 'pytrials.tex',
+     'Pytrials Documentation',
+     'jvfe', 'manual'),
 ]
 
 
 # -- Options for manual page output ------------------------------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [(master_doc, "pytrials", "pytrials Documentation", [author], 1)]
+man_pages = [
+    (master_doc, 'pytrials',
+     'Pytrials Documentation',
+     [author], 1)
+]
 
 
 # -- Options for Texinfo output ----------------------------------------
@@ -148,14 +150,14 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (
-        master_doc,
-        "pytrials",
-        "pytrials Documentation",
-        author,
-        "pytrials",
-        "One line description of project.",
-        "Miscellaneous",
-    ),
+    (master_doc, 'pytrials',
+     'Pytrials Documentation',
+     author,
+     'pytrials',
+     'One line description of project.',
+     'Miscellaneous'),
 ]
 
+
+
+
diff --git a/pytrials/__init__.py b/pytrials/__init__.py
@@ -1,5 +1,11 @@
 """Top-level package for pytrials."""
 
+from pathlib import Path
+
 __author__ = """João Vitor F. Cavalcante"""
 __email__ = "[email protected]"
 __version__ = "0.3.0"
+
+HERE = Path(__file__).parent.resolve()
+
+study_fields = Path(HERE, "fields.csv")
diff --git a/pytrials/client.py b/pytrials/client.py
@@ -1,4 +1,6 @@
 from pytrials.utils import json_handler, csv_handler
+from pytrials import study_fields
+import csv
 
 
 class ClinicalTrials:
@@ -14,34 +16,40 @@ class ClinicalTrials:
         time the database was updated.
     """
 
-    _BASE_URL = "https://classic.clinicaltrials.gov/api/"
-    _INFO = "info/"
-    _QUERY = "query/"
-    _JSON = "fmt=json"
-    _CSV = "fmt=csv"
+    _BASE_URL = "https://clinicaltrials.gov/api/v2/"
+    _JSON = "format=json"
+    _CSV = "format=csv"
 
     def __init__(self):
         self.api_info = self.__api_info()
 
     @property
     def study_fields(self):
-        fields_list = json_handler(
-            f"{self._BASE_URL}{self._INFO}study_fields_list?{self._JSON}"
-        )
-        return fields_list["StudyFields"]["Fields"]
+        """List of all study fields you can use in your query."""
+
+        csv_fields = []
+        json_fields = []
+        with open(study_fields, "r") as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                csv_fields.append(row["Column Name"])
+                json_fields.append(row["Included Data Fields"].split("|"))
+
+        return {
+            "csv": csv_fields,
+            "json": [item for sublist in json_fields for item in sublist],
+        }
 
     def __api_info(self):
         """Returns information about the API"""
-        last_updated = json_handler(
-            f"{self._BASE_URL}{self._INFO}data_vrs?{self._JSON}"
-        )["DataVrs"]
-        api_version = json_handler(f"{self._BASE_URL}{self._INFO}api_vrs?{self._JSON}")[
-            "APIVrs"
-        ]
+        req = json_handler(f"{self._BASE_URL}version")
+        last_updated = req["dataTimestamp"]
+
+        api_version = req["apiVersion"]
 
         return api_version, last_updated
 
-    def get_full_studies(self, search_expr, max_studies=50):
+    def get_full_studies(self, search_expr, max_studies=50, fmt="csv"):
         """Returns all content for a maximum of 100 study records.
 
         Retrieves information from the full studies endpoint, which gets all study fields.
@@ -60,16 +68,27 @@ def get_full_studies(self, search_expr, max_studies=50):
         Raises:
             ValueError: The number of studies can only be between 1 and 100
         """
-        if max_studies > 100 or max_studies < 1:
-            raise ValueError("The number of studies can only be between 1 and 100")
+        if fmt == "csv":
+            format = self._CSV
+            handler = csv_handler
+        elif fmt == "json":
+            format = self._JSON
+            handler = json_handler
+        else:
+            raise ValueError("Format argument has to be either 'csv' or 'json")
+
+        if max_studies > 1000 or max_studies < 1:
+            raise ValueError("The number of studies can only be between 1 and 1000")
 
-        req = f"full_studies?expr={search_expr}&max_rnk={max_studies}&{self._JSON}"
+        req = f"studies?{format}&markupFormat=legacy&query.term={search_expr}&pageSize={max_studies}"
 
-        full_studies = json_handler(f"{self._BASE_URL}{self._QUERY}{req}")
+        full_studies = handler(f"{self._BASE_URL}{req}")
 
         return full_studies
 
-    def get_study_fields(self, search_expr, fields, max_studies=50, min_rnk=1, fmt="csv"):
+    def get_study_fields(
+        self, search_expr, fields, max_studies=50, fmt="csv"
+    ):
         """Returns study content for specified fields
 
         Retrieves information from the study fields endpoint, which acquires specified information
@@ -96,49 +115,28 @@ def get_study_fields(self, search_expr, fields, max_studies=50, min_rnk=1, fmt="
                 for a list of valid ones.
             ValueError: Format argument has to be either 'csv' or 'json'
         """
+        if fmt == "csv":
+            format = self._CSV
+            handler = csv_handler
+        elif fmt == "json":
+            format = self._JSON
+            handler = json_handler
+        else:
+            raise ValueError("Format argument has to be either 'csv' or 'json")
+
         if max_studies > 1000 or max_studies < 1:
             raise ValueError("The number of studies can only be between 1 and 1000")
-        elif not set(fields).issubset(self.study_fields):
+        elif not set(fields).issubset(self.study_fields[fmt]):
             raise ValueError(
-                "One of the fields is not valid! Check the study_fields attribute for a list of valid ones."
+                "One of the fields is not valid!"
+                "Check the study_fields attribute for a list of valid ones."
+                "They are different depending on the return format, json or csv."
             )
         else:
-            concat_fields = ",".join(fields)
-            req = f"study_fields?expr={search_expr}&min_rnk={min_rnk}&max_rnk={max_studies+min_rnk-1}&fields={concat_fields}"
-            if fmt == "csv":
-                url = f"{self._BASE_URL}{self._QUERY}{req}&{self._CSV}"
-                return csv_handler(url)
-
-            elif fmt == "json":
-                url = f"{self._BASE_URL}{self._QUERY}{req}&{self._JSON}"
-                return json_handler(url)
-
-            else:
-                raise ValueError("Format argument has to be either 'csv' or 'json'")
-
-    def get_study_count(self, search_expr):
-        """Returns study count for specified search expression
-
-        Retrieves the count of studies matching the text entered in search_expr.
-
-        Args:
-            search_expr (str): A string containing a search expression as specified by
-                `their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.
-
-        Returns:
-            An integer
-
-        Raises:
-            ValueError: The search expression cannot be blank.
-        """
-        if not set(search_expr):
-            raise ValueError("The search expression cannot be blank.")
-        else:
-            req = f"study_fields?expr={search_expr}&max_rnk=1&fields=NCTId"
-            url = f"{self._BASE_URL}{self._QUERY}{req}&{self._JSON}"
-            returned_data = json_handler(url)
-            study_count = returned_data["StudyFieldsResponse"]["NStudiesFound"]
-            return study_count
+            concat_fields = "|".join(fields)
+            req = f"&query.term={search_expr}&markupFormat=legacy&fields={concat_fields}&pageSize={max_studies}"
+            url = f"{self._BASE_URL}studies?{format}{req}"
+            return handler(url)
 
     def __repr__(self):
         return f"ClinicalTrials.gov client v{self.api_info[0]}, database last updated {self.api_info[1]}"