From 4319987affa59d381b6bf1ac22e903063994592d Mon Sep 17 00:00:00 2001 From: archita-ekkirala Date: Wed, 4 Dec 2024 22:52:25 -0600 Subject: [PATCH 1/2] LANTERN-803: Add webscraper chntechsolns --- .../chntechsolutionswebscraper.go | 43 +++++++++++++++++++ .../chplendpointquerier.go | 3 ++ 2 files changed, 46 insertions(+) create mode 100644 endpointmanager/pkg/chplendpointquerier/chntechsolutionswebscraper.go diff --git a/endpointmanager/pkg/chplendpointquerier/chntechsolutionswebscraper.go b/endpointmanager/pkg/chplendpointquerier/chntechsolutionswebscraper.go new file mode 100644 index 000000000..c6f0259b5 --- /dev/null +++ b/endpointmanager/pkg/chplendpointquerier/chntechsolutionswebscraper.go @@ -0,0 +1,43 @@ +package chplendpointquerier + +import ( + "regexp" + "strings" + + "github.com/PuerkitoBio/goquery" + "github.com/onc-healthit/lantern-back-end/endpointmanager/pkg/helpers" + log "github.com/sirupsen/logrus" +) + +func ChntechsolutionsWebscraper(CHPLURL string, fileToWriteTo string) { + + var lanternEntryList []LanternEntry + var endpointEntryList EndpointList + var entry LanternEntry + + doc, err := helpers.ChromedpQueryEndpointList(CHPLURL, "") + if err != nil { + log.Fatal(err) + } + + doc.Find("p").Each(func(index int, phtml *goquery.Selection) { + phtml.Find("code").Each(func(index int, phtml *goquery.Selection) { + urlString := strings.ReplaceAll(phtml.Text(), "\n", " ") + pattern := `https[^\s]*metadata` + re := regexp.MustCompile(pattern) + match := re.FindString(urlString) + if len(match) != 0 { + entry.URL = strings.TrimSpace(match) + lanternEntryList = append(lanternEntryList, entry) + endpointEntryList.Endpoints = lanternEntryList + } + + }) + }) + + err = WriteCHPLFile(endpointEntryList, fileToWriteTo) + if err != nil { + log.Fatal(err) + } + +} diff --git a/endpointmanager/pkg/chplendpointquerier/chplendpointquerier.go b/endpointmanager/pkg/chplendpointquerier/chplendpointquerier.go index efad48361..250ba9b77 100644 --- a/endpointmanager/pkg/chplendpointquerier/chplendpointquerier.go +++ b/endpointmanager/pkg/chplendpointquerier/chplendpointquerier.go @@ -197,6 +197,7 @@ var curemdURL = "https://www.curemd.com/developer/base-fhir-urls/" var emdscloudURL = "https://identity.emdscloud.com/api/api-resource/fhir" var betaAfoundriaURL = "https://beta.afoundria.com/api/fhir/urls" var ehealthlineURL = "http://ehealthline.com/dev/pdf/FHIR%20API%20Endpoints.htm" +var chntechURL = "https://onc.chntechsolutions.com/ic-ehr-fhir-api/" var bundleQuerierArray = [30]string{"https://ac-fhir.harrisambulatory.com/endpoints/r4", "https://dynamicfhirpresentation.dynamicfhirsandbox.com/fhir/r4/endpoints", "https://ct-fhir.harrisambulatory.com/Endpoints/R4", "https://kantime.com/wp-content/uploads/2024/03/fhir-base-urls.json", @@ -588,6 +589,8 @@ func QueryCHPLEndpointList(chplURL string, fileToWriteTo string) { CSVParser(chplURL, fileToWriteTo, "./endpoints.csv", -1, 0, true, 1, 0) } else if URLsEqual(chplURL, ehealthlineURL) { EhealthlineWebscraper(ehealthlineURL, fileToWriteTo) + } else if URLsEqual(chplURL, chntechURL) { + ChntechsolutionsWebscraper(chplURL, fileToWriteTo) } else { log.Warnf("Handler is required for url %s", chplURL) } From 980c79d111870d19d7999e198db40551d97123ba Mon Sep 17 00:00:00 2001 From: rishi-salunkhe-mettle Date: Fri, 13 Dec 2024 14:13:35 -0600 Subject: [PATCH 2/2] Removed /metadata from the FHIR endpoint URL for CHN Tech Solutions --- .../pkg/chplendpointquerier/chntechsolutionswebscraper.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/endpointmanager/pkg/chplendpointquerier/chntechsolutionswebscraper.go b/endpointmanager/pkg/chplendpointquerier/chntechsolutionswebscraper.go index c6f0259b5..175fa75bd 100644 --- a/endpointmanager/pkg/chplendpointquerier/chntechsolutionswebscraper.go +++ b/endpointmanager/pkg/chplendpointquerier/chntechsolutionswebscraper.go @@ -27,7 +27,8 @@ func ChntechsolutionsWebscraper(CHPLURL string, fileToWriteTo string) { re := regexp.MustCompile(pattern) match := re.FindString(urlString) if len(match) != 0 { - entry.URL = strings.TrimSpace(match) + match = strings.TrimSpace(match) + entry.URL = strings.TrimSuffix(match, "/metadata") lanternEntryList = append(lanternEntryList, entry) endpointEntryList.Endpoints = lanternEntryList }