Skip to content

Commit

Permalink
Merge branch 'main' into LANTERN-804-Add-webscraper-fhirpt-url
Browse files Browse the repository at this point in the history
  • Loading branch information
rishi-salunkhe-mettle authored Dec 16, 2024
2 parents d7938e0 + 81d9061 commit 93faccd
Show file tree
Hide file tree
Showing 10 changed files with 140 additions and 86 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package chplendpointquerier

import (
"regexp"
"strings"

"github.com/PuerkitoBio/goquery"
"github.com/onc-healthit/lantern-back-end/endpointmanager/pkg/helpers"
log "github.com/sirupsen/logrus"
)

func ChntechsolutionsWebscraper(CHPLURL string, fileToWriteTo string) {

var lanternEntryList []LanternEntry
var endpointEntryList EndpointList
var entry LanternEntry

doc, err := helpers.ChromedpQueryEndpointList(CHPLURL, "")
if err != nil {
log.Fatal(err)
}

doc.Find("p").Each(func(index int, phtml *goquery.Selection) {
phtml.Find("code").Each(func(index int, phtml *goquery.Selection) {
urlString := strings.ReplaceAll(phtml.Text(), "\n", " ")
pattern := `https[^\s]*metadata`
re := regexp.MustCompile(pattern)
match := re.FindString(urlString)
if len(match) != 0 {
match = strings.TrimSpace(match)
entry.URL = strings.TrimSuffix(match, "/metadata")
lanternEntryList = append(lanternEntryList, entry)
endpointEntryList.Endpoints = lanternEntryList
}

})
})

err = WriteCHPLFile(endpointEntryList, fileToWriteTo)
if err != nil {
log.Fatal(err)
}

}
19 changes: 11 additions & 8 deletions endpointmanager/pkg/chplendpointquerier/chplendpointquerier.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ var carefluenceURL = "https://carefluence.com/carefluence-fhir-endpoints/"
var practiceSuiteURL = "https://academy.practicesuite.com/fhir-server-links/"
var bizmaticsURL = "https://prognocis.com/fhir/index.html"
var indianHealthServiceURL = "https://www.ihs.gov/cis/"
var geniusSolutionsURL = "https://gsehrwebapi.geniussolutions.com/Help/html/ServiceUrl.html"
var geniusSolutionsURL = "http://www.media.geniussolutions.com/ehrTHOMAS/ehrWebApi/Help/html/ServiceUrl.html"
var assureCareURL = "https://ipatientcare.com/onc-acb-certified-2015-edition/"
var intelichartURL = "https://fhirtest.intelichart.com/Help/BaseUrl"
var healthCare2000URL = "https://www.provider.care/FHIR/MDVitaFHIRUrls.csv"
Expand All @@ -52,7 +52,7 @@ var qualifactsInsyncURL = "https://qualifacts.com/api-page/platform/insync/insyn
var qualifactsCredibleURL = "https://qualifacts.com/api-page/_downloads/credible-fhir-org-list.json"
var medinfoengineeringURL = "https://docs.webchartnow.com/resources/system-specifications/fhir-application-programming-interface-api/endpoints/"
var relimedsolutionsURL = "https://help.relimedsolutions.com/fhir/fhir-service-urls.csv"
var eclinicalworksURL = "https://fhir.eclinicalworks.com/ecwopendev"
var eclinicalworksURL = "https://fhir.eclinicalworks.com/ecwopendev/external/practiceList"

var integraconnectURL = "https://portal.minerva.integracloud.com/minerva/fhir/r4/us-core/bundle"
var streamlinemdURL = "https://patientportal.streamlinemd.com/FHIRReg/Practice%20Service%20based%20URL%20List.csv"
Expand All @@ -62,7 +62,6 @@ var microfourURL = "https://oauth.patientwebportal.com/Fhir/Documentation#servic
var magilenenterprisesURL = "https://www.qsmartcare.com/api-documentation.html"
var interopxURL = "https://demo.interopx.com/ix-auth-server/#/endpoints"
var mphrxURL = "https://www.mphrx.com/fhir-service-base-url-directory/"
var correctekURL = "https://ulrichmedicalconcepts.com/home/the-ehr/meaningful-use/disclosure-and-transparency/"
var varianmedicalURL = "https://varian.dynamicfhir.com/"
var caretrackerURL = "https://hag-fhir.amazingcharts.com/ac/endpoints"
var zhhealthcareURL = "https://blueehr.com/fhir-urls/"
Expand Down Expand Up @@ -93,7 +92,7 @@ var carecloudURL = "https://api-datamanager.carecloud.com/"
var ethizoURL = "https://fhir-api.ethizo.com/#55b1b3d2-fd9a-4afa-8d17-5bf78943702d"
var hmsfirstURL = "https://fhir-api.hmsfirst.com/r4/EndPoints"
var praxisemrURL = "https://www.praxisemr.com/applicationaccess/api/help/"
var escribeHOSTURL = "https://ehr.escribe.com/ehr/api/fhir"
var escribeHOSTURL = "https://ehr.escribe.com/ehr/api/fhir/swagger-ui/"
var mdlogicEHRURL = "https://www.mdlogic.com/solutions/standard-api-documentation"
var altheaURL = "https://altheafhir.mdsynergy.com"
var webchartnowURL = "https://docs.webchartnow.com/resources/system-specifications/fhir-application-programming-interface-api/endpoints/"
Expand Down Expand Up @@ -198,6 +197,8 @@ var emdscloudURL = "https://identity.emdscloud.com/api/api-resource/fhir"
var betaAfoundriaURL = "https://beta.afoundria.com/api/fhir/urls"
var ehealthlineURL = "http://ehealthline.com/dev/pdf/FHIR%20API%20Endpoints.htm"
var fhirptURL = "https://fhirpt-stage.officeally.com/fhir/r4/endpoints"
var chntechURL = "https://onc.chntechsolutions.com/ic-ehr-fhir-api/"
var zoobooksystemsURL = "https://zoobooksystems.com/api-documentation/"

var bundleQuerierArray = [30]string{"https://ac-fhir.harrisambulatory.com/endpoints/r4", "https://dynamicfhirpresentation.dynamicfhirsandbox.com/fhir/r4/endpoints",
"https://ct-fhir.harrisambulatory.com/Endpoints/R4", "https://kantime.com/wp-content/uploads/2024/03/fhir-base-urls.json",
Expand All @@ -206,7 +207,7 @@ var bundleQuerierArray = [30]string{"https://ac-fhir.harrisambulatory.com/endpoi
"https://smartserver.novoclinical.com/.well-known/bundle", "https://fhir.prod.flatiron.io/fhir", "https://public-files.oystehr.com/endpoint.json",
"https://fhirpresentation.pcsdataxchg.com/fhir/r4/endpoints", "https://pic-fhir.harrisambulatory.com/endpoints/r4", "https://pul-fhir.harrisambulatory.com/endpoints/r4", "https://fhir.qsmartcare.com:8000",
"https://appstudio.interopengine.com/partner/fhirR4endpoints-umc.json", "https://testauth.strateqhealth.com/SmartOnFHIR/ValidURLs.json",
"https://fhir.ethizo.com/api/4.0.0/service_based_url"}
"https://fhir.ethizo.com/api/4.0.0/service_based_url", "https://fhir.maximus.care/api/service_based_url"}

var ontadaURL = "https://g2fhir-int.mckesson.com/docs/index.html"
var mdlandURL = "https://api-fhir-proxy-2.mdland.net/"
Expand Down Expand Up @@ -314,8 +315,6 @@ func QueryCHPLEndpointList(chplURL string, fileToWriteTo string) {
InteropxWebscraper(chplURL, fileToWriteTo)
} else if chplURL == mphrxURL {
SwaggerUIWebscraper("https://atdevsandbox.mphrx.com/", fileToWriteTo)
} else if chplURL == correctekURL {
CorrecTekWebscraper(chplURL, fileToWriteTo)
} else if chplURL == varianmedicalURL {
VarianMedicalWebscraper(chplURL+"dhit/basepractice/r4/Home/ApiDocumentation", fileToWriteTo)
} else if chplURL == caretrackerURL {
Expand Down Expand Up @@ -480,7 +479,7 @@ func QueryCHPLEndpointList(chplURL string, fileToWriteTo string) {
} else if URLsEqual(chplURL, NexusURL) {
CSVParser("https://www.nexusclinical.net/nexusehr-fhirapi-base-urls.csv", fileToWriteTo, "./nexusehr-fhirapi-base-urls.csv", 1, 0, true, 2, 1)
} else if URLsEqual(chplURL, MEDENTURL) {
CSVParser(MEDENTURL, fileToWriteTo, "./ServiceBaseURL.csv", 1, 0, true, 1, 0)
CSVParser(MEDENTURL, fileToWriteTo, "./ServiceBaseURL.csv", -1, 2, true, 1, 0)
} else if URLsEqual(chplURL, canvasMedicalURL) {
CanvasMedicalURLWebscraper(chplURL, fileToWriteTo)
} else if URLsEqual(chplURL, maximusURL) {
Expand Down Expand Up @@ -591,6 +590,10 @@ func QueryCHPLEndpointList(chplURL string, fileToWriteTo string) {
EhealthlineWebscraper(ehealthlineURL, fileToWriteTo)
} else if URLsEqual(chplURL, fhirptURL) {
BundleQuerierParser(chplURL, fileToWriteTo)
} else if URLsEqual(chplURL, chntechURL) {
ChntechsolutionsWebscraper(chplURL, fileToWriteTo)
} else if URLsEqual(chplURL, zoobooksystemsURL) {
err = ZoobooksystemsWebscraper(zoobooksystemsURL, fileToWriteTo)
} else {
log.Warnf("Handler is required for url %s", chplURL)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ func TestWebScrapers(t *testing.T) {
url: "https://www.ezemrx.com/fhir",
fileName: "ezEMRx_Inc_EndpointSources.json",
},
{
scraperFunc: ZoobooksystemsWebscraper,
url: "https://zoobooksystems.com/api-documentation/",
fileName: "Zoobook_Systems_LLC_EndpointSources.json",
},
}

for _, tc := range testCases {
Expand Down
59 changes: 0 additions & 59 deletions endpointmanager/pkg/chplendpointquerier/correctekwebscraper.go

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ func CuremdWebscraper(CHPLURL string, fileToWriteTo string) {
if pTag.Length() > 0 {
pTag.Find("a").Each(func(i int, link *goquery.Selection) {
url, exists := link.Attr("href")
url = strings.TrimSuffix(url, "/metadata")
if exists {
entry.URL = url
lanternEntryList = append(lanternEntryList, entry)
Expand Down
28 changes: 16 additions & 12 deletions endpointmanager/pkg/chplendpointquerier/unifywebscraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,27 @@ func UnifyWebscraper(unifyURL string, fileToWriteTo string) {
}

doc.Find(".box").Each(func(index int, boxElems *goquery.Selection) {
h3Elem := boxElems.Find("h3")
if h3Elem.Length() > 0 && h3Elem.Text() == "Request" {
pEntries := boxElems.Find("p")
boxElems.Find(".tbl-container").Each(func(index int, tblElems *goquery.Selection) {
table := tblElems.Find("table")
tbody := table.Find("tbody")
tr := tbody.Find("tr").First()
td1 := tr.Find("td").First()

if pEntries.Length() > 0 && strings.Contains(pEntries.Text(), "FHIR Base URL: ") {
var entry LanternEntry
if td1.Length() > 0 && strings.Contains(td1.Text(), "Sandbox Base URL") {
tr.Find("td").Each(func(index int, tdElems *goquery.Selection) {
if tdElems.Length() > 0 && strings.Contains(tdElems.Text(), "https") {
var entry LanternEntry

aElem := pEntries.Find("a")
entryURL := strings.TrimSpace(tdElems.Text())
entry.URL = entryURL

entryURL := strings.TrimSpace(aElem.Text())
entry.URL = entryURL
lanternEntryList = append(lanternEntryList, entry)

lanternEntryList = append(lanternEntryList, entry)

return
return
}
})
}
}
})
})

endpointEntryList.Endpoints = lanternEntryList
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func UnitedHealthURLWebscraper(CHPLURL string, fileToWriteTo string) {
count := 1
doc.Find("div").Each(func(index int, divhtml *goquery.Selection) {
dataOpen, exists := divhtml.Attr("data-opensnewwindow")
if exists && dataOpen != "" && count == 237 {
if exists && dataOpen != "" && count == 240 {
pElem := divhtml.Find("p").First()
if pElem.Length() > 0 && strings.Contains(pElem.Text(), ".fhir.") {
var entry LanternEntry
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package chplendpointquerier

import (
"strings"

"github.com/PuerkitoBio/goquery"
"github.com/onc-healthit/lantern-back-end/endpointmanager/pkg/helpers"
log "github.com/sirupsen/logrus"
)

func ZoobooksystemsWebscraper(CHPLURL string, fileToWriteTo string) error {

var lanternEntryList []LanternEntry
var endpointEntryList EndpointList
doc, err := helpers.ChromedpQueryEndpointList(CHPLURL, ".col-lg-6.text-secondary.fw-bold")
if err != nil {
return err
}

inProduction := false

doc.Find(".col-lg-6").Each(func(index int, divhtml *goquery.Selection) {
if divhtml.Text() == "PRODUCTION" {
inProduction = true
}

if inProduction {
divhtml.Find("a").Each(func(indextr int, ahtml *goquery.Selection) {
if strings.Contains(ahtml.Text(), "https") && !strings.Contains(ahtml.Text(), "oauth") {
var entry LanternEntry

entryURL := strings.TrimSpace(ahtml.Text())
entry.URL = entryURL

lanternEntryList = append(lanternEntryList, entry)

endpointEntryList.Endpoints = lanternEntryList

err = WriteCHPLFile(endpointEntryList, fileToWriteTo)
if err != nil {
log.Fatal(err)
}

return
}
})
}
})

return nil
}
2 changes: 2 additions & 0 deletions scripts/automatic_populatedb_prod.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,5 @@ rm -f npidata_pfile.csv
rm -f npidata_pfile

echo "$current_datetime - done" >> $log_file

docker cp lantern-back-end_endpoint_manager_1:/etc/lantern/populatedb_logs.txt /etc/lantern/logs/
15 changes: 9 additions & 6 deletions scripts/populatedb.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
#!/bin/sh

log_file="/etc/lantern/populatedb_logs.txt"
current_datetime=$(date +"%Y-%m-%d %H:%M:%S")

set -e

# get endpoint data
cd cmd/endpointpopulator

# Populates the database with State Medicaid endpoints
go run main.go /etc/lantern/resources/MedicaidState_EndpointSources.json Lantern StateMedicaid false StateMedicaid
go run main.go /etc/lantern/resources/MedicaidState_EndpointSources.json Lantern StateMedicaid false StateMedicaid >> $log_file 2>&1

jq -c '.[]' /etc/lantern/resources/MedicareStateEndpointResourcesList.json | while read endpoint; do
NAME=$(echo $endpoint | jq -c -r '.EndpointName')
Expand All @@ -15,7 +18,7 @@ jq -c '.[]' /etc/lantern/resources/MedicareStateEndpointResourcesList.json | whi
LISTURL=$(echo $endpoint | jq -c -r '.URL')

if [ -f "/etc/lantern/resources/$FILENAME" ]; then
go run main.go /etc/lantern/resources/$FILENAME $FORMAT "${NAME}" true $LISTURL
go run main.go /etc/lantern/resources/$FILENAME $FORMAT "${NAME}" true $LISTURL >> $log_file 2>&1
fi
done

Expand All @@ -25,7 +28,7 @@ jq -c '.[]' /etc/lantern/resources/EndpointResourcesList.json | while read endpo
FILENAME=$(echo $endpoint | jq -c -r '.FileName')
LISTURL=$(echo $endpoint | jq -c -r '.URL')

go run main.go /etc/lantern/resources/$FILENAME $FORMAT $NAME false $LISTURL
go run main.go /etc/lantern/resources/$FILENAME $FORMAT $NAME false $LISTURL >> $log_file 2>&1
done

jq -c '.[]' /etc/lantern/resources/CHPLEndpointResourcesList.json | while read endpoint; do
Expand All @@ -35,7 +38,7 @@ jq -c '.[]' /etc/lantern/resources/CHPLEndpointResourcesList.json | while read e
LISTURL=$(echo $endpoint | jq -c -r '.URL')

if [ -f "/etc/lantern/resources/$FILENAME" ]; then
go run main.go /etc/lantern/resources/$FILENAME $FORMAT "${NAME}" true $LISTURL
go run main.go /etc/lantern/resources/$FILENAME $FORMAT "${NAME}" true $LISTURL >> $log_file 2>&1
fi
done

Expand All @@ -46,7 +49,7 @@ cd ..

# get CHPL info into db
cd chplquerier
go run main.go
go run main.go >> $log_file 2>&1
cd ..

# get NPPES contact (endpoint) pfile into db
Expand All @@ -64,5 +67,5 @@ cd ..

# run data validation to ensure number of endpoints does not exceed maximum for query interval
cd datavalidation
go run main.go
go run main.go >> $log_file 2>&1
cd ..

0 comments on commit 93faccd

Please sign in to comment.