Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Update to API v2 #15

Merged
merged 10 commits into from
May 1, 2024
11 changes: 11 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,14 @@ History
------------------

* Add min_rnk option to get_study_fields - #12


1.0.0 (2024-05-01)
------------------

Migrates to version 2.0 of the ClinicalTrials API

* Add support for the new API version
* Add support for the new API fields
* **Remove get_study_count function**
* Allow CSV format in full_studies
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ include HISTORY.rst
include LICENSE
include README.rst

recursive-include pytrials *.csv
recursive-include tests *
recursive-exclude * __pycache__
recursive-exclude * *.py[co]
Expand Down
14 changes: 4 additions & 10 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,23 +41,17 @@ Basic Usage

ct = ClinicalTrials()

# Get 50 full studies related to Coronavirus and COVID in json format.
# Get 50 full studies related to Coronavirus and COVID in csv format.
ct.get_full_studies(search_expr="Coronavirus+COVID", max_studies=50)

# Get the NCTId, Condition and Brief title fields from 500 studies related to Coronavirus and Covid, in csv format.
# Get the NCTId, Condition and Brief title fields from 1000 studies related to Coronavirus and Covid, in csv format.
corona_fields = ct.get_study_fields(
search_expr="Coronavirus+COVID",
fields=["NCTId", "Condition", "BriefTitle"],
max_studies=500,
fields=["NCT Number", "Conditions", "Study Title"],
max_studies=1000,
fmt="csv",
)

# Get the count of studies related to Coronavirus and COVID.
# ClinicalTrials limits API queries to 1000 records
# Count of studies may be useful to build loops when you want to retrieve more than 1000 records

ct.get_study_count(search_expr="Coronavirus+COVID")

# Read the csv data in Pandas
import pandas as pd

Expand Down
64 changes: 33 additions & 31 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
#
import os
import sys

sys.path.insert(0, os.path.abspath(".."))
sys.path.insert(0, os.path.abspath('..'))

import pytrials

Expand All @@ -32,24 +31,24 @@

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode", "sphinxcontrib.napoleon"]
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']

# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
templates_path = ['_templates']

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = ".rst"
source_suffix = '.rst'

# The master toctree document.
master_doc = "index"
master_doc = 'index'

# General information about the project.
project = "pytrials"
copyright = "2020, João Vitor F. Cavalcante"
author = "João Vitor F. Cavalcante"
project = 'Pytrials'
copyright = "2024, jvfe"
author = "jvfe"

# The version info for the project you're documenting, acts as replacement
# for |version| and |release|, also used in various other places throughout
Expand All @@ -70,10 +69,10 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"
pygments_style = 'sphinx'

# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
Expand All @@ -84,7 +83,7 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "alabaster"
html_theme = 'alabaster'

# Theme options are theme-specific and customize the look and feel of a
# theme further. For a list of options available for each theme, see the
Expand All @@ -95,13 +94,13 @@
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
html_static_path = ['_static']


# -- Options for HTMLHelp output ---------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = "pytrialsdoc"
htmlhelp_basename = 'pytrialsdoc'


# -- Options for LaTeX output ------------------------------------------
Expand All @@ -110,12 +109,15 @@
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',

# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',

# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',

# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
Expand All @@ -125,21 +127,21 @@
# (source start file, target name, title, author, documentclass
# [howto, manual, or own class]).
latex_documents = [
(
master_doc,
"pytrials.tex",
"pytrials Documentation",
"João Vitor F. Cavalcante",
"manual",
),
(master_doc, 'pytrials.tex',
'Pytrials Documentation',
'jvfe', 'manual'),
]


# -- Options for manual page output ------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, "pytrials", "pytrials Documentation", [author], 1)]
man_pages = [
(master_doc, 'pytrials',
'Pytrials Documentation',
[author], 1)
]


# -- Options for Texinfo output ----------------------------------------
Expand All @@ -148,14 +150,14 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(
master_doc,
"pytrials",
"pytrials Documentation",
author,
"pytrials",
"One line description of project.",
"Miscellaneous",
),
(master_doc, 'pytrials',
'Pytrials Documentation',
author,
'pytrials',
'One line description of project.',
'Miscellaneous'),
]




6 changes: 6 additions & 0 deletions pytrials/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
"""Top-level package for pytrials."""

from pathlib import Path

__author__ = """João Vitor F. Cavalcante"""
__email__ = "[email protected]"
__version__ = "0.3.0"

HERE = Path(__file__).parent.resolve()

study_fields = Path(HERE, "fields.csv")
116 changes: 57 additions & 59 deletions pytrials/client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from pytrials.utils import json_handler, csv_handler
from pytrials import study_fields
import csv


class ClinicalTrials:
Expand All @@ -14,34 +16,40 @@ class ClinicalTrials:
time the database was updated.
"""

_BASE_URL = "https://classic.clinicaltrials.gov/api/"
_INFO = "info/"
_QUERY = "query/"
_JSON = "fmt=json"
_CSV = "fmt=csv"
_BASE_URL = "https://clinicaltrials.gov/api/v2/"
_JSON = "format=json"
_CSV = "format=csv"

def __init__(self):
self.api_info = self.__api_info()

@property
def study_fields(self):
fields_list = json_handler(
f"{self._BASE_URL}{self._INFO}study_fields_list?{self._JSON}"
)
return fields_list["StudyFields"]["Fields"]
"""List of all study fields you can use in your query."""

csv_fields = []
json_fields = []
with open(study_fields, "r") as f:
reader = csv.DictReader(f)
for row in reader:
csv_fields.append(row["Column Name"])
json_fields.append(row["Included Data Fields"].split("|"))

return {
"csv": csv_fields,
"json": [item for sublist in json_fields for item in sublist],
}

def __api_info(self):
"""Returns information about the API"""
last_updated = json_handler(
f"{self._BASE_URL}{self._INFO}data_vrs?{self._JSON}"
)["DataVrs"]
api_version = json_handler(f"{self._BASE_URL}{self._INFO}api_vrs?{self._JSON}")[
"APIVrs"
]
req = json_handler(f"{self._BASE_URL}version")
last_updated = req["dataTimestamp"]

api_version = req["apiVersion"]

return api_version, last_updated

def get_full_studies(self, search_expr, max_studies=50):
def get_full_studies(self, search_expr, max_studies=50, fmt="csv"):
"""Returns all content for a maximum of 100 study records.

Retrieves information from the full studies endpoint, which gets all study fields.
Expand All @@ -60,16 +68,27 @@ def get_full_studies(self, search_expr, max_studies=50):
Raises:
ValueError: The number of studies can only be between 1 and 100
"""
if max_studies > 100 or max_studies < 1:
raise ValueError("The number of studies can only be between 1 and 100")
if fmt == "csv":
format = self._CSV
handler = csv_handler
elif fmt == "json":
format = self._JSON
handler = json_handler
else:
raise ValueError("Format argument has to be either 'csv' or 'json")

if max_studies > 1000 or max_studies < 1:
raise ValueError("The number of studies can only be between 1 and 1000")

req = f"full_studies?expr={search_expr}&max_rnk={max_studies}&{self._JSON}"
req = f"studies?{format}&markupFormat=legacy&query.term={search_expr}&pageSize={max_studies}"

full_studies = json_handler(f"{self._BASE_URL}{self._QUERY}{req}")
full_studies = handler(f"{self._BASE_URL}{req}")

return full_studies

def get_study_fields(self, search_expr, fields, max_studies=50, min_rnk=1, fmt="csv"):
def get_study_fields(
self, search_expr, fields, max_studies=50, fmt="csv"
):
"""Returns study content for specified fields

Retrieves information from the study fields endpoint, which acquires specified information
Expand All @@ -96,49 +115,28 @@ def get_study_fields(self, search_expr, fields, max_studies=50, min_rnk=1, fmt="
for a list of valid ones.
ValueError: Format argument has to be either 'csv' or 'json'
"""
if fmt == "csv":
format = self._CSV
handler = csv_handler
elif fmt == "json":
format = self._JSON
handler = json_handler
else:
raise ValueError("Format argument has to be either 'csv' or 'json")

if max_studies > 1000 or max_studies < 1:
raise ValueError("The number of studies can only be between 1 and 1000")
elif not set(fields).issubset(self.study_fields):
elif not set(fields).issubset(self.study_fields[fmt]):
raise ValueError(
"One of the fields is not valid! Check the study_fields attribute for a list of valid ones."
"One of the fields is not valid!"
"Check the study_fields attribute for a list of valid ones."
"They are different depending on the return format, json or csv."
)
else:
concat_fields = ",".join(fields)
req = f"study_fields?expr={search_expr}&min_rnk={min_rnk}&max_rnk={max_studies+min_rnk-1}&fields={concat_fields}"
if fmt == "csv":
url = f"{self._BASE_URL}{self._QUERY}{req}&{self._CSV}"
return csv_handler(url)

elif fmt == "json":
url = f"{self._BASE_URL}{self._QUERY}{req}&{self._JSON}"
return json_handler(url)

else:
raise ValueError("Format argument has to be either 'csv' or 'json'")

def get_study_count(self, search_expr):
"""Returns study count for specified search expression

Retrieves the count of studies matching the text entered in search_expr.

Args:
search_expr (str): A string containing a search expression as specified by
`their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.

Returns:
An integer

Raises:
ValueError: The search expression cannot be blank.
"""
if not set(search_expr):
raise ValueError("The search expression cannot be blank.")
else:
req = f"study_fields?expr={search_expr}&max_rnk=1&fields=NCTId"
url = f"{self._BASE_URL}{self._QUERY}{req}&{self._JSON}"
returned_data = json_handler(url)
study_count = returned_data["StudyFieldsResponse"]["NStudiesFound"]
return study_count
concat_fields = "|".join(fields)
req = f"&query.term={search_expr}&markupFormat=legacy&fields={concat_fields}&pageSize={max_studies}"
url = f"{self._BASE_URL}studies?{format}{req}"
return handler(url)

def __repr__(self):
return f"ClinicalTrials.gov client v{self.api_info[0]}, database last updated {self.api_info[1]}"
Loading
Loading