diff --git a/endpointmanager/pkg/chplendpointquerier/chntechsolutionswebscraper.go b/endpointmanager/pkg/chplendpointquerier/chntechsolutionswebscraper.go new file mode 100644 index 000000000..175fa75bd --- /dev/null +++ b/endpointmanager/pkg/chplendpointquerier/chntechsolutionswebscraper.go @@ -0,0 +1,44 @@ +package chplendpointquerier + +import ( + "regexp" + "strings" + + "github.com/PuerkitoBio/goquery" + "github.com/onc-healthit/lantern-back-end/endpointmanager/pkg/helpers" + log "github.com/sirupsen/logrus" +) + +func ChntechsolutionsWebscraper(CHPLURL string, fileToWriteTo string) { + + var lanternEntryList []LanternEntry + var endpointEntryList EndpointList + var entry LanternEntry + + doc, err := helpers.ChromedpQueryEndpointList(CHPLURL, "") + if err != nil { + log.Fatal(err) + } + + doc.Find("p").Each(func(index int, phtml *goquery.Selection) { + phtml.Find("code").Each(func(index int, phtml *goquery.Selection) { + urlString := strings.ReplaceAll(phtml.Text(), "\n", " ") + pattern := `https[^\s]*metadata` + re := regexp.MustCompile(pattern) + match := re.FindString(urlString) + if len(match) != 0 { + match = strings.TrimSpace(match) + entry.URL = strings.TrimSuffix(match, "/metadata") + lanternEntryList = append(lanternEntryList, entry) + endpointEntryList.Endpoints = lanternEntryList + } + + }) + }) + + err = WriteCHPLFile(endpointEntryList, fileToWriteTo) + if err != nil { + log.Fatal(err) + } + +} diff --git a/endpointmanager/pkg/chplendpointquerier/chplendpointquerier.go b/endpointmanager/pkg/chplendpointquerier/chplendpointquerier.go index 856d8d033..ba48e6ae6 100644 --- a/endpointmanager/pkg/chplendpointquerier/chplendpointquerier.go +++ b/endpointmanager/pkg/chplendpointquerier/chplendpointquerier.go @@ -39,7 +39,7 @@ var carefluenceURL = "https://carefluence.com/carefluence-fhir-endpoints/" var practiceSuiteURL = "https://academy.practicesuite.com/fhir-server-links/" var bizmaticsURL = "https://prognocis.com/fhir/index.html" var indianHealthServiceURL = "https://www.ihs.gov/cis/" -var geniusSolutionsURL = "https://gsehrwebapi.geniussolutions.com/Help/html/ServiceUrl.html" +var geniusSolutionsURL = "http://www.media.geniussolutions.com/ehrTHOMAS/ehrWebApi/Help/html/ServiceUrl.html" var assureCareURL = "https://ipatientcare.com/onc-acb-certified-2015-edition/" var intelichartURL = "https://fhirtest.intelichart.com/Help/BaseUrl" var healthCare2000URL = "https://www.provider.care/FHIR/MDVitaFHIRUrls.csv" @@ -52,7 +52,7 @@ var qualifactsInsyncURL = "https://qualifacts.com/api-page/platform/insync/insyn var qualifactsCredibleURL = "https://qualifacts.com/api-page/_downloads/credible-fhir-org-list.json" var medinfoengineeringURL = "https://docs.webchartnow.com/resources/system-specifications/fhir-application-programming-interface-api/endpoints/" var relimedsolutionsURL = "https://help.relimedsolutions.com/fhir/fhir-service-urls.csv" -var eclinicalworksURL = "https://fhir.eclinicalworks.com/ecwopendev" +var eclinicalworksURL = "https://fhir.eclinicalworks.com/ecwopendev/external/practiceList" var integraconnectURL = "https://portal.minerva.integracloud.com/minerva/fhir/r4/us-core/bundle" var streamlinemdURL = "https://patientportal.streamlinemd.com/FHIRReg/Practice%20Service%20based%20URL%20List.csv" @@ -62,7 +62,6 @@ var microfourURL = "https://oauth.patientwebportal.com/Fhir/Documentation#servic var magilenenterprisesURL = "https://www.qsmartcare.com/api-documentation.html" var interopxURL = "https://demo.interopx.com/ix-auth-server/#/endpoints" var mphrxURL = "https://www.mphrx.com/fhir-service-base-url-directory/" -var correctekURL = "https://ulrichmedicalconcepts.com/home/the-ehr/meaningful-use/disclosure-and-transparency/" var varianmedicalURL = "https://varian.dynamicfhir.com/" var caretrackerURL = "https://hag-fhir.amazingcharts.com/ac/endpoints" var zhhealthcareURL = "https://blueehr.com/fhir-urls/" @@ -93,7 +92,7 @@ var carecloudURL = "https://api-datamanager.carecloud.com/" var ethizoURL = "https://fhir-api.ethizo.com/#55b1b3d2-fd9a-4afa-8d17-5bf78943702d" var hmsfirstURL = "https://fhir-api.hmsfirst.com/r4/EndPoints" var praxisemrURL = "https://www.praxisemr.com/applicationaccess/api/help/" -var escribeHOSTURL = "https://ehr.escribe.com/ehr/api/fhir" +var escribeHOSTURL = "https://ehr.escribe.com/ehr/api/fhir/swagger-ui/" var mdlogicEHRURL = "https://www.mdlogic.com/solutions/standard-api-documentation" var altheaURL = "https://altheafhir.mdsynergy.com" var webchartnowURL = "https://docs.webchartnow.com/resources/system-specifications/fhir-application-programming-interface-api/endpoints/" @@ -198,6 +197,8 @@ var emdscloudURL = "https://identity.emdscloud.com/api/api-resource/fhir" var betaAfoundriaURL = "https://beta.afoundria.com/api/fhir/urls" var ehealthlineURL = "http://ehealthline.com/dev/pdf/FHIR%20API%20Endpoints.htm" var fhirptURL = "https://fhirpt-stage.officeally.com/fhir/r4/endpoints" +var chntechURL = "https://onc.chntechsolutions.com/ic-ehr-fhir-api/" +var zoobooksystemsURL = "https://zoobooksystems.com/api-documentation/" var bundleQuerierArray = [30]string{"https://ac-fhir.harrisambulatory.com/endpoints/r4", "https://dynamicfhirpresentation.dynamicfhirsandbox.com/fhir/r4/endpoints", "https://ct-fhir.harrisambulatory.com/Endpoints/R4", "https://kantime.com/wp-content/uploads/2024/03/fhir-base-urls.json", @@ -206,7 +207,7 @@ var bundleQuerierArray = [30]string{"https://ac-fhir.harrisambulatory.com/endpoi "https://smartserver.novoclinical.com/.well-known/bundle", "https://fhir.prod.flatiron.io/fhir", "https://public-files.oystehr.com/endpoint.json", "https://fhirpresentation.pcsdataxchg.com/fhir/r4/endpoints", "https://pic-fhir.harrisambulatory.com/endpoints/r4", "https://pul-fhir.harrisambulatory.com/endpoints/r4", "https://fhir.qsmartcare.com:8000", "https://appstudio.interopengine.com/partner/fhirR4endpoints-umc.json", "https://testauth.strateqhealth.com/SmartOnFHIR/ValidURLs.json", - "https://fhir.ethizo.com/api/4.0.0/service_based_url"} + "https://fhir.ethizo.com/api/4.0.0/service_based_url", "https://fhir.maximus.care/api/service_based_url"} var ontadaURL = "https://g2fhir-int.mckesson.com/docs/index.html" var mdlandURL = "https://api-fhir-proxy-2.mdland.net/" @@ -314,8 +315,6 @@ func QueryCHPLEndpointList(chplURL string, fileToWriteTo string) { InteropxWebscraper(chplURL, fileToWriteTo) } else if chplURL == mphrxURL { SwaggerUIWebscraper("https://atdevsandbox.mphrx.com/", fileToWriteTo) - } else if chplURL == correctekURL { - CorrecTekWebscraper(chplURL, fileToWriteTo) } else if chplURL == varianmedicalURL { VarianMedicalWebscraper(chplURL+"dhit/basepractice/r4/Home/ApiDocumentation", fileToWriteTo) } else if chplURL == caretrackerURL { @@ -480,7 +479,7 @@ func QueryCHPLEndpointList(chplURL string, fileToWriteTo string) { } else if URLsEqual(chplURL, NexusURL) { CSVParser("https://www.nexusclinical.net/nexusehr-fhirapi-base-urls.csv", fileToWriteTo, "./nexusehr-fhirapi-base-urls.csv", 1, 0, true, 2, 1) } else if URLsEqual(chplURL, MEDENTURL) { - CSVParser(MEDENTURL, fileToWriteTo, "./ServiceBaseURL.csv", 1, 0, true, 1, 0) + CSVParser(MEDENTURL, fileToWriteTo, "./ServiceBaseURL.csv", -1, 2, true, 1, 0) } else if URLsEqual(chplURL, canvasMedicalURL) { CanvasMedicalURLWebscraper(chplURL, fileToWriteTo) } else if URLsEqual(chplURL, maximusURL) { @@ -591,6 +590,10 @@ func QueryCHPLEndpointList(chplURL string, fileToWriteTo string) { EhealthlineWebscraper(ehealthlineURL, fileToWriteTo) } else if URLsEqual(chplURL, fhirptURL) { BundleQuerierParser(chplURL, fileToWriteTo) + } else if URLsEqual(chplURL, chntechURL) { + ChntechsolutionsWebscraper(chplURL, fileToWriteTo) + } else if URLsEqual(chplURL, zoobooksystemsURL) { + err = ZoobooksystemsWebscraper(zoobooksystemsURL, fileToWriteTo) } else { log.Warnf("Handler is required for url %s", chplURL) } diff --git a/endpointmanager/pkg/chplendpointquerier/commomwebscrapers_test.go b/endpointmanager/pkg/chplendpointquerier/commomwebscrapers_test.go index 13d9a0c34..ea5346f21 100644 --- a/endpointmanager/pkg/chplendpointquerier/commomwebscrapers_test.go +++ b/endpointmanager/pkg/chplendpointquerier/commomwebscrapers_test.go @@ -39,6 +39,11 @@ func TestWebScrapers(t *testing.T) { url: "https://www.ezemrx.com/fhir", fileName: "ezEMRx_Inc_EndpointSources.json", }, + { + scraperFunc: ZoobooksystemsWebscraper, + url: "https://zoobooksystems.com/api-documentation/", + fileName: "Zoobook_Systems_LLC_EndpointSources.json", + }, } for _, tc := range testCases { diff --git a/endpointmanager/pkg/chplendpointquerier/correctekwebscraper.go b/endpointmanager/pkg/chplendpointquerier/correctekwebscraper.go deleted file mode 100644 index 8fd727a0f..000000000 --- a/endpointmanager/pkg/chplendpointquerier/correctekwebscraper.go +++ /dev/null @@ -1,59 +0,0 @@ -package chplendpointquerier - -import ( - "strings" - - "github.com/PuerkitoBio/goquery" - "github.com/onc-healthit/lantern-back-end/endpointmanager/pkg/helpers" - log "github.com/sirupsen/logrus" -) - -func CorrecTekWebscraper(chplURL string, fileToWriteTo string) { - - var lanternEntryList []LanternEntry - var endpointEntryList EndpointList - - stu3EndpointList := "https://www.interopengine.com/2017/open-api-documentation.html" - r4EndpointList := "https://www.interopengine.com/2021/open-api-documentation.html" - count := 0 - - for count <= 1 { - endpointListURL := stu3EndpointList - if count == 1 { - endpointListURL = r4EndpointList - } - - doc, err := helpers.ChromedpQueryEndpointList(endpointListURL, "article") - if err != nil { - log.Fatal(err) - } - - doc.Find("article").Each(func(index int, articleElem *goquery.Selection) { - articleElem.Find("h4").Each(func(index int, h4Elem *goquery.Selection) { - if strings.Contains(h4Elem.Text(), "General Concepts") { - pElemURL := h4Elem.Next().Next() - aElems := pElemURL.Find("a") - if aElems.Length() > 0 { - hrefText, exists := aElems.Eq(0).Attr("href") - if exists { - var entry LanternEntry - - fhirURL := strings.TrimSpace(hrefText) - entry.URL = fhirURL - lanternEntryList = append(lanternEntryList, entry) - } - } - } - }) - }) - - endpointEntryList.Endpoints = append(endpointEntryList.Endpoints, lanternEntryList...) - - count++ - } - - err := WriteCHPLFile(endpointEntryList, fileToWriteTo) - if err != nil { - log.Fatal(err) - } -} diff --git a/endpointmanager/pkg/chplendpointquerier/curemdwebscraper.go b/endpointmanager/pkg/chplendpointquerier/curemdwebscraper.go index 40a561370..f83b3e3b9 100644 --- a/endpointmanager/pkg/chplendpointquerier/curemdwebscraper.go +++ b/endpointmanager/pkg/chplendpointquerier/curemdwebscraper.go @@ -26,6 +26,7 @@ func CuremdWebscraper(CHPLURL string, fileToWriteTo string) { if pTag.Length() > 0 { pTag.Find("a").Each(func(i int, link *goquery.Selection) { url, exists := link.Attr("href") + url = strings.TrimSuffix(url, "/metadata") if exists { entry.URL = url lanternEntryList = append(lanternEntryList, entry) diff --git a/endpointmanager/pkg/chplendpointquerier/unifywebscraper.go b/endpointmanager/pkg/chplendpointquerier/unifywebscraper.go index 84396453f..bc4fd9b29 100644 --- a/endpointmanager/pkg/chplendpointquerier/unifywebscraper.go +++ b/endpointmanager/pkg/chplendpointquerier/unifywebscraper.go @@ -19,23 +19,27 @@ func UnifyWebscraper(unifyURL string, fileToWriteTo string) { } doc.Find(".box").Each(func(index int, boxElems *goquery.Selection) { - h3Elem := boxElems.Find("h3") - if h3Elem.Length() > 0 && h3Elem.Text() == "Request" { - pEntries := boxElems.Find("p") + boxElems.Find(".tbl-container").Each(func(index int, tblElems *goquery.Selection) { + table := tblElems.Find("table") + tbody := table.Find("tbody") + tr := tbody.Find("tr").First() + td1 := tr.Find("td").First() - if pEntries.Length() > 0 && strings.Contains(pEntries.Text(), "FHIR Base URL: ") { - var entry LanternEntry + if td1.Length() > 0 && strings.Contains(td1.Text(), "Sandbox Base URL") { + tr.Find("td").Each(func(index int, tdElems *goquery.Selection) { + if tdElems.Length() > 0 && strings.Contains(tdElems.Text(), "https") { + var entry LanternEntry - aElem := pEntries.Find("a") + entryURL := strings.TrimSpace(tdElems.Text()) + entry.URL = entryURL - entryURL := strings.TrimSpace(aElem.Text()) - entry.URL = entryURL + lanternEntryList = append(lanternEntryList, entry) - lanternEntryList = append(lanternEntryList, entry) - - return + return + } + }) } - } + }) }) endpointEntryList.Endpoints = lanternEntryList diff --git a/endpointmanager/pkg/chplendpointquerier/unitedhealthwebscraper.go b/endpointmanager/pkg/chplendpointquerier/unitedhealthwebscraper.go index 1c35d0fd2..1728d8f4a 100644 --- a/endpointmanager/pkg/chplendpointquerier/unitedhealthwebscraper.go +++ b/endpointmanager/pkg/chplendpointquerier/unitedhealthwebscraper.go @@ -21,7 +21,7 @@ func UnitedHealthURLWebscraper(CHPLURL string, fileToWriteTo string) { count := 1 doc.Find("div").Each(func(index int, divhtml *goquery.Selection) { dataOpen, exists := divhtml.Attr("data-opensnewwindow") - if exists && dataOpen != "" && count == 237 { + if exists && dataOpen != "" && count == 240 { pElem := divhtml.Find("p").First() if pElem.Length() > 0 && strings.Contains(pElem.Text(), ".fhir.") { var entry LanternEntry diff --git a/endpointmanager/pkg/chplendpointquerier/zoobooksystemswebscraper.go b/endpointmanager/pkg/chplendpointquerier/zoobooksystemswebscraper.go new file mode 100644 index 000000000..8c19ef0e2 --- /dev/null +++ b/endpointmanager/pkg/chplendpointquerier/zoobooksystemswebscraper.go @@ -0,0 +1,51 @@ +package chplendpointquerier + +import ( + "strings" + + "github.com/PuerkitoBio/goquery" + "github.com/onc-healthit/lantern-back-end/endpointmanager/pkg/helpers" + log "github.com/sirupsen/logrus" +) + +func ZoobooksystemsWebscraper(CHPLURL string, fileToWriteTo string) error { + + var lanternEntryList []LanternEntry + var endpointEntryList EndpointList + doc, err := helpers.ChromedpQueryEndpointList(CHPLURL, ".col-lg-6.text-secondary.fw-bold") + if err != nil { + return err + } + + inProduction := false + + doc.Find(".col-lg-6").Each(func(index int, divhtml *goquery.Selection) { + if divhtml.Text() == "PRODUCTION" { + inProduction = true + } + + if inProduction { + divhtml.Find("a").Each(func(indextr int, ahtml *goquery.Selection) { + if strings.Contains(ahtml.Text(), "https") && !strings.Contains(ahtml.Text(), "oauth") { + var entry LanternEntry + + entryURL := strings.TrimSpace(ahtml.Text()) + entry.URL = entryURL + + lanternEntryList = append(lanternEntryList, entry) + + endpointEntryList.Endpoints = lanternEntryList + + err = WriteCHPLFile(endpointEntryList, fileToWriteTo) + if err != nil { + log.Fatal(err) + } + + return + } + }) + } + }) + + return nil +} diff --git a/scripts/automatic_populatedb_prod.sh b/scripts/automatic_populatedb_prod.sh index da21cd547..06aec4f06 100755 --- a/scripts/automatic_populatedb_prod.sh +++ b/scripts/automatic_populatedb_prod.sh @@ -56,3 +56,5 @@ rm -f npidata_pfile.csv rm -f npidata_pfile echo "$current_datetime - done" >> $log_file + +docker cp lantern-back-end_endpoint_manager_1:/etc/lantern/populatedb_logs.txt /etc/lantern/logs/ \ No newline at end of file diff --git a/scripts/populatedb.sh b/scripts/populatedb.sh index 83d9df04d..31597dde3 100755 --- a/scripts/populatedb.sh +++ b/scripts/populatedb.sh @@ -1,12 +1,15 @@ #!/bin/sh +log_file="/etc/lantern/populatedb_logs.txt" +current_datetime=$(date +"%Y-%m-%d %H:%M:%S") + set -e # get endpoint data cd cmd/endpointpopulator # Populates the database with State Medicaid endpoints -go run main.go /etc/lantern/resources/MedicaidState_EndpointSources.json Lantern StateMedicaid false StateMedicaid +go run main.go /etc/lantern/resources/MedicaidState_EndpointSources.json Lantern StateMedicaid false StateMedicaid >> $log_file 2>&1 jq -c '.[]' /etc/lantern/resources/MedicareStateEndpointResourcesList.json | while read endpoint; do NAME=$(echo $endpoint | jq -c -r '.EndpointName') @@ -15,7 +18,7 @@ jq -c '.[]' /etc/lantern/resources/MedicareStateEndpointResourcesList.json | whi LISTURL=$(echo $endpoint | jq -c -r '.URL') if [ -f "/etc/lantern/resources/$FILENAME" ]; then - go run main.go /etc/lantern/resources/$FILENAME $FORMAT "${NAME}" true $LISTURL + go run main.go /etc/lantern/resources/$FILENAME $FORMAT "${NAME}" true $LISTURL >> $log_file 2>&1 fi done @@ -25,7 +28,7 @@ jq -c '.[]' /etc/lantern/resources/EndpointResourcesList.json | while read endpo FILENAME=$(echo $endpoint | jq -c -r '.FileName') LISTURL=$(echo $endpoint | jq -c -r '.URL') - go run main.go /etc/lantern/resources/$FILENAME $FORMAT $NAME false $LISTURL + go run main.go /etc/lantern/resources/$FILENAME $FORMAT $NAME false $LISTURL >> $log_file 2>&1 done jq -c '.[]' /etc/lantern/resources/CHPLEndpointResourcesList.json | while read endpoint; do @@ -35,7 +38,7 @@ jq -c '.[]' /etc/lantern/resources/CHPLEndpointResourcesList.json | while read e LISTURL=$(echo $endpoint | jq -c -r '.URL') if [ -f "/etc/lantern/resources/$FILENAME" ]; then - go run main.go /etc/lantern/resources/$FILENAME $FORMAT "${NAME}" true $LISTURL + go run main.go /etc/lantern/resources/$FILENAME $FORMAT "${NAME}" true $LISTURL >> $log_file 2>&1 fi done @@ -46,7 +49,7 @@ cd .. # get CHPL info into db cd chplquerier -go run main.go +go run main.go >> $log_file 2>&1 cd .. # get NPPES contact (endpoint) pfile into db @@ -64,5 +67,5 @@ cd .. # run data validation to ensure number of endpoints does not exceed maximum for query interval cd datavalidation -go run main.go +go run main.go >> $log_file 2>&1 cd .. \ No newline at end of file