Skip to content

Commit

Permalink
Rxnorm auto pipeline/extractor progress
Browse files Browse the repository at this point in the history
Co-authored-by: @Panthevm <[email protected]>
  • Loading branch information
ApricotLace and Panthevm committed Oct 18, 2023
1 parent 893faff commit 83bb104
Show file tree
Hide file tree
Showing 4 changed files with 398 additions and 2 deletions.
142 changes: 142 additions & 0 deletions src/ftr/ci_pipelines/rxnorm/core.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
(ns ftr.ci-pipelines.rxnorm.core
(:require
[clj-http.client :as client]
[clojure.java.io :as io]
[clojure.pprint]
[ftr.ci-pipelines.utils]
[ftr.utils.unifn.core :as u]
[clojure.string :as str])
(:import [java.io File]))


(def
^{:doc "A set of default configuration values for the RxNorm FTR pipeline,
designed to be particularly useful in dev environments."}
config-defaults
{:automated-downloads-url "https://uts-ws.nlm.nih.gov/download"
:rxnorm-current-version-url "https://download.nlm.nih.gov/umls/kss/rxnorm/RxNorm_full_current.zip"
:db "jdbc:postgresql://localhost:5125/ftr?user=ftr&password=password"
:ftr-path "/tmp/ftr/"
:working-dir-path "/tmp/rxnorm_work_dir"})


(defn extract-rxnorm-version
"Searches for a 'Readme' file in the provided RxNorm bundle directory. The function assumes the 'Readme'
file follows the naming pattern: 'Readme_full_<version>.txt'. Extracts and returns the version
sub-string. Throws an exception if the expected pattern is not found.
Parameters:
- `extract-destination`: A File object pointing to the RxNorm directory.
Returns:
- A string representing the extracted RxNorm version."
[^File extract-destination]
(let [readme-file-name (->> extract-destination
(.listFiles)
(mapv (memfn ^File getName))
(filter #(str/starts-with? % "Readme"))
(first))
rxnorm-version (-> readme-file-name
(str/split #"_")
(last)
(str/split #"\.")
(first))]
rxnorm-version))


(defmethod u/*fn ::get-rxnorm-bundle!
[{:as _ctx,
:keys [automated-downloads-url rxnorm-current-version-url
api-key working-dir-path]}]
(let [_ (.mkdirs (io/file working-dir-path))
download-destination (io/file working-dir-path "rxnorm-bundle.zip")
extract-destination (io/file working-dir-path "uncompessed-rxnorm-bundle")
response (client/get automated-downloads-url
{:query-params {"url" rxnorm-current-version-url
"apiKey" api-key}
:as :byte-array})
response-body (:body response)]
(with-open [w (io/output-stream download-destination)]
(.write w response-body)
(ftr.ci-pipelines.utils/unzip-file! download-destination extract-destination))

{:rxnorm-version (extract-rxnorm-version extract-destination)
:extract-destination extract-destination
:download-destination download-destination}))


(defmethod u/*fn ::build-ftr-cfg
[{:as _ctx,
:keys [db ftr-path rxnorm-version extract-destination module]}]
{:cfg {:module (or module "rxnorm")
:source-url extract-destination
:ftr-path ftr-path
:tag "prod"
:source-type :rxnorm
:extractor-options {:db db
:code-system {:resourceType "CodeSystem"
:id "rxnorm-cs"
:url "http://www.nlm.nih.gov/research/umls/rxnorm"
:description "RxNorm provides normalized names for clinical drugs and links its names to many of the drug vocabularies commonly used in pharmacy management and drug interaction software, including those of First Databank, Micromedex, and Gold Standard Drug Database. By providing links between these vocabularies, RxNorm can mediate messages between systems not using the same software and vocabulary. RxNorm now includes the United States Pharmacopeia (USP) Compendial Nomenclature from the United States Pharmacopeial Convention. USP is a cumulative data set of all Active Pharmaceutical Ingredients (API)."
:content "not-present"
:version rxnorm-version
:name "RxNorm"
:publisher "National Library of Medicine (NLM)"
:status "active"}
:value-set {:id "rxnorm-vs"
:resourceType "ValueSet"
:version rxnorm-version
:compose { :include [{:system "http://www.nlm.nih.gov/research/umls/rxnorm"}]}
:status "active"
:name "RxNorm"
:url "http://www.nlm.nih.gov/research/umls/rxnorm/valueset"}}}})


(defmethod u/*fn ::generate-rxnorm-zen-package
[{:as _ctx,
:keys [working-dir-path rxnorm-version module]}]
(when working-dir-path
(io/make-parents (str working-dir-path "/zen-package.edn"))
(spit (str working-dir-path "/zen-package.edn")
{:deps {'zen.fhir "https://github.com/zen-fhir/zen.fhir.git"}})
(spit (doto (io/file (str working-dir-path "/zrc/rxnorm.edn"))
(-> (.getParentFile) (.mkdirs)))
(with-out-str (clojure.pprint/pprint {'ns 'rxnorm
'import #{'zen.fhir}
'value-set
{:zen/tags #{'zen.fhir/value-set}
:zen/desc "Includes all concepts from RxNorm."
:zen.fhir/version (ftr.ci-pipelines.utils/get-zen-fhir-version!)
:fhir/code-systems
#{{:fhir/url "http://www.nlm.nih.gov/research/umls/rxnorm"
:zen.fhir/content :bundled}}
:uri "http://www.nlm.nih.gov/research/umls/rxnorm/valueset"
:version rxnorm-version
:ftr
{:module (or module "rxnorm")
:source-url "https://storage.googleapis.com"
:ftr-path "ftr"
:source-type :cloud-storage
:tag "prod"}}})))))


(defn pipeline [args]
(let [cfg (-> (merge config-defaults args)
(assoc :ftr.utils.unifn.core/tracers [:ftr.logger.core/log-step]))]
(clojure.pprint/pprint
(u/*apply [#_:ftr.ci-pipelines.utils/download-previous-ftr-version!
::get-rxnorm-bundle!
::build-ftr-cfg
:ftr.core/apply-cfg
#_::clear-working-dir
#_:ftr.ci-pipelines.utils/upload-to-gcp-bucket
#_::generate-rxnorm-zen-package
#_:ftr.ci-pipelines.utils/push-zen-package
#_:ftr.ci-pipelines.utils/send-tg-notification]
cfg))))


(comment
(pipeline {})

)
11 changes: 11 additions & 0 deletions src/ftr/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,14 @@
::write-tag-index-hash])


(def rxnorm-pipeline
[::extract-terminology
::write-terminology-file
::shape-ftr-layout
:ftr.post-write-coordination.core/coordinate
::write-tag-index-hash])


(defmethod u/*fn ::select-ftr-pipeline [{:as _ctx,
::keys [commit-type]
{:keys [source-type]} :cfg}]
Expand All @@ -165,6 +173,9 @@
[:append :loinc]
loinc-pipeline

[:append :rxnorm]
rxnorm-pipeline

[:tag-merge nil]
tag-merge-pipeline)})

Expand Down
7 changes: 5 additions & 2 deletions src/ftr/extraction/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
[ftr.extraction.serialized-objects-array]
[ftr.extraction.ftr]
[ftr.extraction.icd10]
[ftr.extraction.loinc]))
[ftr.extraction.loinc]
[ftr.extraction.rxnorm]))


(defn extract [cfg]
(let [{:keys [source-type source-url source-urls extractor-options]} cfg
Expand All @@ -21,4 +23,5 @@
:serialized-objects-array (ftr.extraction.serialized-objects-array/import-from-cfg extractor-cfg)
:ftr (ftr.extraction.ftr/import-from-cfg extractor-cfg)
:icd10 (ftr.extraction.icd10/import-from-cfg extractor-cfg)
:loinc (ftr.extraction.loinc/import-from-cfg extractor-cfg))))
:loinc (ftr.extraction.loinc/import-from-cfg extractor-cfg)
:rxnorm (ftr.extraction.rxnorm/import-from-cfg extractor-cfg))))
Loading

0 comments on commit 83bb104

Please sign in to comment.