diff --git a/DESCRIPTION b/DESCRIPTION index a0b6730..8f7eff7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Type: Package Package: inbolims Title: Utilities to query the INBO lab analysis datawarehouse -Version: 0.2.9 -Date: 2023-12-08 +Version: 0.2.10 +Date: 2024-06-17 Authors@R: person("Pieter", "Verschelde", , "pieter.verschelde@inbo.be", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9199-421X", affiliation = "Research Institute for Nature and Forest (INBO)")) @@ -14,27 +14,26 @@ License: GPL-3 URL: https://github.com/inbo/inbolims, https://inbo.github.io/inbolims/ BugReports: https://github.com/inbo/inbolims/issues Depends: - R (>= 3.5.0) + R (>= 4.0.0) Imports: DBI, dplyr, ggplot2, magrittr, odbc, - purrr, readr, RODBC, stats, - tidyr, - tidyverse + stringr, + tidyr Suggests: knitr, rmarkdown, - testthat (>= 2.1.0) + testthat (>= 3.0.0) VignetteBuilder: knitr Config/checklist/communities: inbo Config/checklist/keywords: lims; lab results; COA; Certificate of Analysis Encoding: UTF-8 Language: nl-BE -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 diff --git a/NAMESPACE b/NAMESPACE index 928e1b6..2b22d30 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,15 +3,12 @@ export(create_archief_labels_bat) export(date_as_text) export(get_all_report_fields) -export(get_available_report_fields) export(get_report_config_info) -export(interprate_texture_content) +export(interprete_texture_content) export(lims_connect) -export(lims_measured_parameters) export(lims_report_export) export(lims_report_samples) export(lims_report_xtab) -export(lims_report_xtab_old) export(lims_result_statistics) export(link_labo_id) export(parse_sql_report_query) @@ -23,7 +20,6 @@ import(dplyr) import(magrittr) importFrom(DBI,dbGetQuery) importFrom(RODBC,odbcDriverConnect) -importFrom(dplyr,"%>%") importFrom(dplyr,filter) importFrom(dplyr,lead) importFrom(dplyr,mutate) @@ -40,11 +36,11 @@ importFrom(ggplot2,rel) importFrom(ggplot2,scale_y_log10) importFrom(ggplot2,theme) importFrom(ggplot2,xlab) -importFrom(purrr,"%||%") importFrom(readr,read_delim) importFrom(readr,read_tsv) importFrom(readr,write_excel_csv2) importFrom(stats,quantile) +importFrom(stringr,str_split_1) importFrom(tidyr,pivot_longer) importFrom(tidyr,pivot_wider) importFrom(tidyr,separate) diff --git a/R/inbolims-package.R b/R/inbolims-package.R new file mode 100644 index 0000000..fce5932 --- /dev/null +++ b/R/inbolims-package.R @@ -0,0 +1,10 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +## usethis namespace: end +NULL + +## quiets concerns of R CMD check re: the .'s that appear in pipelines +if (getRversion() >= "2.15.1") utils::globalVariables(c(".")) + diff --git a/R/inbolims.R b/R/inbolims.R deleted file mode 100644 index b766254..0000000 --- a/R/inbolims.R +++ /dev/null @@ -1,12 +0,0 @@ -#' \code{inbolims} package -#' -#' Google spreadsheets R API -#' -#' @docType package -#' @name inbolims -#' @importFrom dplyr %>% -#' @importFrom purrr %||% -NULL - -## quiets concerns of R CMD check re: the .'s that appear in pipelines -if (getRversion() >= "2.15.1") utils::globalVariables(c(".")) diff --git a/R/lims_report.R b/R/lims_report.R deleted file mode 100644 index 807dd98..0000000 --- a/R/lims_report.R +++ /dev/null @@ -1,386 +0,0 @@ -#' Show available fields -#' -#' -#' @return dataset containing 2 colums: the name of a field in the database -#' and the corresponding description -#' @export -#' @importFrom readr read_tsv -#' @importFrom dplyr filter select -#' -#' @examples{ -#' get_available_report_fields() -#' } - -get_available_report_fields <- function() { - read_tsv(file.path( - system.file(package = "inbolims"), - "report_config", - "template_fields.tsv" - )) %>% - filter(.data$Type == "select") %>% - select(.data$Veldnaam, .data$Beschrijving) -} - -################################################# - - -#' Get report config information -#' -#' @param template a character vector of templates you want info from. -#' If "all" then all templates are shown -#' -#' @return dataset wih report template information -#' @export -#' -#' @examples{ -#' get_report_config_info(template = "default") -#' } - -get_report_config_info <- function(template = "default") { - data <- read_tsv(file.path( - system.file(package = "inbolims"), - "report_config", - "template_fields.tsv" - )) - names <- colnames(data) - - if (template == "all") { - template_cols <- names[substring(names, 1, 9) == "template:"] - } else { - template_cols <- paste0("template:", template) - } - - base_cols <- c( - "Type", "Veldnaam", "Tabel", "Afkorting", - "Kolom", "Beschrijving" - ) - - data %>% select(all_of(c(base_cols, template_cols))) -} - - -################################################# - -#' Haal rapportdata uit LIMS DWH -#' -#' @param connection DBI connection object (see odbc::dbConnect()) -#' @param project charactervector met projectnamen -#' @param sql_template indien "default" wordt de standaardquery uitgevoerd -#' @param show_query indien TRUE toon de query op het scherm -#' net voordat deze uitgevoerd wordt, -#' je kan deze eventueel kopiƫren en aanpassen en doorgeven aan custom_sql_query -#' @param custom_fields charactervector die controleert -#' welke veldnamen je in de output ziet. -#' De velden ORIGINAL_SAMPLE, ANALYSIS, COMPONENT en ENTRY -#' worden altijd standaard meegeleverd. -#' Via de functie get_available_report_fields kan je alle namen -#' van de velden zien. -#' @param custom_where_clause custom SQL where clause dat geplakt wordt aan -#' "where PROJECT in ('project_x')", -#' dus een where clause dat je specificeerd als -#' " ResultNumeric > 0 OR ResultNumeric < 100, -#' zal vertaald worden naar de sql where clause: -#' "where PROJECT in ('project_x') -#' AND (ResultNumeric > 0 OR ResultNumeric < 100)" -#' @param custom_sql_query geldige Sql Query bruikbaar voor de connectie. -#' Indien dit veld actief is, wordt sql_template, -#' custom_fields en custom_where_clause genegeerd -#' @param deployment default "prd", "uat" indien op de ontwikkelomgeving -#' gewerkt wordt. -#' Voorlopig is enkel "prd" actief ondersteund -#' @return data.frame met minstens de velden ORIGINAL_SAMPLE, -#' ANALYSIS, COMPONENT en ENTRY -#' @export -#' -#' @examples -#' \dontrun{ -#' conn <- lims_connect() -#' reportdata <- read_lims_data(conn, project = c("I-19W001-01")) -#' } -read_lims_data <- function(connection, - project, - sql_template = "default", - show_query = FALSE, - custom_fields = NULL, - custom_where_clause = NULL, - custom_sql_query = NULL, - deployment = "prd") { - # if custom query defined, just execute the query and exit - if (!is.null(custom_sql_query)) { - rv <- DBI::dbGetQuery(connection, custom_sql_query) - return(rv) - } - - template_information <- get_report_config_info(template = sql_template) %>% - select(.data$Type, .data$Veldnaam, .data$Afkorting, .data$Kolom, - template = contains(sql_template) - ) %>% - filter(.data$template > 0) %>% - arrange(.data$template) - - sql_query <- parse_sql_report_query(template_information, project) - if (show_query) { - cat(sql_query) - } - - rv <- DBI::dbGetQuery(connection, sql_query) -} - -######################################################################### - -#' Parse the query based on the template information and the chosen projects -#' -#' @param template dataset containing the template_information. -#' The structure should be exact as the file shipped with the package -#' @param project character string of projects to filter -#' -#' @return SQL server query -#' @export -#' -parse_sql_report_query <- function(template, project) { - projects <- paste0("('", paste(project, collapse = "','"), "')") - template <- template %>% arrange(.data$template) - - fields <- template %>% - filter(.data$Type == "select") %>% - mutate(ss = paste0( - .data$Veldnaam, " = ", - .data$Afkorting, ".[", .data$Kolom, "]" - )) %>% - pull(.data$ss) %>% - paste(collapse = ",\n") - - tables <- template %>% - filter(.data$Type == "tabel") %>% - pull(.data$Kolom) %>% - paste(collapse = " \n") - - filters <- template %>% - filter(.data$Type == "filter") %>% - mutate(flt = paste0(.data$Afkorting, ".", .data$Kolom)) %>% - pull(.data$flt) %>% - paste(collapse = " AND \n") - filters <- gsub("NA.where 1 = 1 AND", "", filters) - filters <- gsub("<>", projects, filters) - - qry <- paste("select ", fields, "from ", tables, "where ", filters) - return(qry) -} - - -#' Maak kruistabel van de ingelezen rapportdata -#' -#' @param reportdata data verkregen uit de functie lims_report_data -#' @return kruistabel met resultaten -#' @export -#' @importFrom dplyr mutate -#' @importFrom tidyr pivot_wider -#' @examples -#' \dontrun{ -#' long_format <- lims_report_data(project = c("I-19W001-01")) -#' XTAB_format <- lims_report_xtab(long_format) -#' } -lims_report_xtab <- function(reportdata) { - sampledata <- lims_report_samples(reportdata) - reportdata <- reportdata %>% - dplyr::mutate(COMBI = paste(.data$LimsAnalyseNaam, - .data$Component, - paste(.data$TestReplicaat, - .data$ResultaatReplicaat, - sep = "." - ), - sep = "__" - )) - xtab <- reportdata %>% - tidyr::pivot_wider( - id_cols = .data$OrigineelStaal, - names_from = .data$COMBI, - values_from = .data$WaardeRuw - ) - xtab <- sampledata %>% - inner_join(xtab, by = "OrigineelStaal") - - xtab -} - - -#' Verkrijg de sample metadata -#' -#' @param reportdata data verkregen uit de functie lims_report_data -#' -#' @return dataset met sample informatie -#' @export -#' @examples -#' \dontrun{ -#' reportdata <- lims_report_data(project = c("I-19W001-01")) -#' sampledata <- lims_report_samples(reportdata) -#' } -#' -lims_report_samples <- function(reportdata) { - df_samples_on_orig <- reportdata %>% - group_by(.data$Project, .data$OrigineelStaal, .data$ExternSampleID) %>% - summarize( - FirstSample = min(.data$LimsStaalNummer), - Aantal_stalen = n_distinct(.data$LimsStaalNummer), - Aantal_analyses = n_distinct(.data$LimsAnalyseNaam), - Aantal_resultaten = n_distinct(paste0( - .data$LimsAnalyseNaam, - .data$Component - )), - .groups = "drop_last" - ) %>% - ungroup() - - df_parent <- reportdata %>% - select(.data$OrigineelStaal, - .data$LimsStaalNummer, - HoofdLaboCode = .data$LaboCode - ) %>% - filter(.data$OrigineelStaal == .data$LimsStaalNummer) %>% - distinct(.data$OrigineelStaal, .data$HoofdLaboCode) - - df_samples_on_orig <- df_samples_on_orig %>% - left_join(df_parent, by = "OrigineelStaal") - - df_samples_all <- reportdata %>% - group_by( - .data$OrigineelStaal, .data$ContractID, .data$Klant, - .data$Project, .data$VerantwoordelijkLabo, - .data$LimsStaalNummer, .data$ExternSampleID, - .data$LaboCode, .data$SampleProduct, - .data$ProductGrade, .data$Matrix, .data$Monsternamedatum, - .data$Monsternemer, .data$Toestand, - .data$VoorbehandelingExtern, .data$Opmerking - ) %>% - summarise( - Aantal_records = n(), - ArchiefStaal = max(.data$ArchiefStaal), - Xcoord = max(.data$Xcoord), - Ycoord = max(.data$Ycoord), - Diepte = max(.data$Diepte), - Toponiem = max(.data$Toponiem), .groups = "drop_last" - ) %>% - ungroup() %>% - select(-.data$OrigineelStaal, -.data$ExternSampleID, -.data$Project) - - df_samples <- df_samples_on_orig %>% - inner_join(df_samples_all, - by = c("FirstSample" = "LimsStaalNummer") - ) %>% - select( - .data$Project, .data$OrigineelStaal, .data$HoofdLaboCode, - .data$LaboCode, .data$ExternSampleID, - .data$ProductGrade, .data$Matrix, - .data$Monsternemer, .data$Monsternamedatum, .data$Toestand, - .data$VoorbehandelingExtern, .data$Opmerking, - .data$ArchiefStaal, .data$Xcoord, .data$Ycoord, - .data$Diepte, .data$Toponiem, - .data$Aantal_analyses, - .data$Aantal_resultaten, - .data$Aantal_stalen - ) %>% - arrange(.data$Project, .data$ExternSampleID) - - df_samples -} - -#' Maak kruistabel van de ingelezen rapportdata -#' -#' @param reportdata data verkregen uit de functie lims_report_data -#' @return kruistabel met resultaten -#' @export -#' @importFrom dplyr mutate -#' @importFrom tidyr pivot_wider -lims_report_xtab_old <- function(reportdata) { - sampledata <- lims_report_samples(reportdata) - xtab <- reportdata %>% - tidyr::pivot_wider(id_cols = .data$OrigineelStaal, - names_from = .data$resultaatcode, - values_from = .data$WaardeRuw) - xtab <- sampledata %>% - inner_join(xtab, by = "OrigineelStaal") - - for (i in 16:ncol(xtab)) { - results <- xtab[, i, drop = TRUE] - results <- ifelse(results %in% c("OFL", "NA"), NA, results) - nas <- sum(is.na(results)) - suppressWarnings(new <- as.numeric(xtab[, i, drop = TRUE])) - newnas <- sum(is.na(new)) - if (newnas == nas) { - xtab[, i] <- new - } - } - xtab -} - -#' Kruistabel naar csv wegschtrijven met toevoeging header -#' -#' @param data kruistabeldata uit lims_report_xtab -#' @param path pad waar de file geschreven moet worden -#' @return csv file -#' @export -#' -lims_report_export <- function(data, path) { - namen <- tibble(Naam = colnames(data)) - - header <- separate(namen, - col = .data$Naam, sep = "___", - into = c("Analyse", "Component", "Iteratie"), - fill = "right" - ) - header$COMBI <- namen - newdata <- as.data.frame(t(header)) - newdata <- newdata %>% - mutate(across(.cols = everything(), .fns = as.character)) - colnames(newdata) <- make.names(header %>% pull(.data$COMBI) %>% unlist()) - - datach <- as.data.frame(data) # for loop (accross probleem) - for (i in seq_len(ncol(datach))) { - datach[, i] <- as.character(datach[, i]) - datach[, i] <- gsub("\\.", ",", datach[, i]) - } - - newdata <- bind_rows(newdata, datach) - newdata[is.na(newdata)] <- "" - write_excel_csv2(newdata, file = path) # niet csv2 want alles is character -} - - -#' Title -#' -#' @param data lims report data (from lims_report_data) -#' @param plot logical if values are to be put in a graph -#' @param log when plot, use the log-scale? -#' @importFrom ggplot2 scale_y_log10 geom_boxplot facet_wrap ggplot aes -#' @importFrom stats quantile -#' -#' @return list with measured parameters and some base statistics -#' @export -#' -#' @examples -#' \dontrun{ -#' view(lims_measured_parameters(reportdata)) -#' } -lims_measured_parameters <- function(data, plot = TRUE, log = TRUE) { - rv <- data %>% - group_by(.data$AnalyseNaam, .data$Component, .data$Eenheid) %>% - summarise( - min = min(.data$NumeriekeWaarde, na.rm = TRUE), - q25 = quantile(.data$NumeriekeWaarde, 0.25, na.rm = TRUE), - med = quantile(.data$NumeriekeWaarde, 0.50, na.rm = TRUE), - avg = round(mean(.data$NumeriekeWaarde, na.rm = TRUE), 5), - q75 = quantile(.data$NumeriekeWaarde, 0.75, na.rm = TRUE), - max = max(.data$NumeriekeWaarde, na.rm = TRUE), - aantal = n(), - aantalstalen = n_distinct(.data$OrigineelStaal), - aantalmissend = sum(is.na(.data$NumeriekeWaarde)), - ) - if (plot) { - ggobj <- ggplot(data, aes(x = .data$Component, y = .data$NumeriekeWaarde)) + - geom_boxplot() + - facet_wrap(~AnalyseNaam, scales = "free") - if (log) ggobj <- ggobj + scale_y_log10() - print(ggobj) - } - rv -} diff --git a/R/lims_result_statistics.R b/R/lims_result_statistics.R index 3d3286f..9dbaaeb 100644 --- a/R/lims_result_statistics.R +++ b/R/lims_result_statistics.R @@ -1,22 +1,4 @@ -################################################# - - - - -######################################################################### - - -################################################# - - -############################################################################## - - -########################################################################## - - - #' Title #' #' @param data lims report data (from lims_report_data) diff --git a/R/texture_parsing.R b/R/texture_parsing.R deleted file mode 100644 index d0de1aa..0000000 --- a/R/texture_parsing.R +++ /dev/null @@ -1,209 +0,0 @@ -#' Parste texture output file to R readable format -#' -#' @param filename path to the output file to be parsed -#' @param delim delimiter of the file -#' @param verbose moet output getoond worden tijdens de uitvoering? -#' -#' @return parsed dataset with clear column names and data -#' @import magrittr -#' @importFrom readr read_delim -#' @importFrom dplyr lead -#' @export -parse_texture_content <- function(filename, delim = "\t", verbose = TRUE) { - header <- readLines(con = filename, n = 7) # lees de 6 headerrijen + 1 datarij - textuur <- read_delim( - file = filename, - delim = "\t", - skip = 6, - col_names = FALSE - ) - - headersplitted <- unlist(strsplit(header[3], split = "\t")) - headersplitted[1] <- "lower_bound" # niet nodig maar voor duidelijkheid - - samplename_pos <- seq(2, length(headersplitted), by = 3) - samples <- gsub("\\.\\$av", "", headersplitted[samplename_pos]) - last_underscores <- sapply(gregexpr("\\_", samples), max) - samples <- substring(samples, 1, last_underscores - 1) - - header_names <- rep("", length(headersplitted)) # init lege rijen - header_names[1] <- "lower_boundary" - header_names[samplename_pos] <- paste(samples, "value", sep = "___") - header_names[samplename_pos + 1] <- paste(samples, "LCL1S", sep = "___") - header_names[samplename_pos + 2] <- paste(samples, "UCL1S", sep = "___") - - colnames(textuur) <- header_names - - textuur <- textuur %>% - mutate(upper_boundary = c(lead(.data$lower_boundary, 1))) - if (verbose) { - cat( - "\nOK\nDataset geimporteerd: \n", - "dimensies: ", - nrow(textuur), "x", ncol(textuur), "\n" - ) - } - return(textuur) -} - - -############## - - -#' Interpreteer de geparste textuurfile -#' -#' @param textuurdata geparste textuurdata -#' @param verbose moet output getoond worden tijdens de routine -#' @param digits aantal digits voor de waarde en sd -#' @import dplyr -#' @importFrom tidyr pivot_longer separate -#' @import magrittr -#' -#' @return tidy dataset met alle gegevens -#' @export -#' -interprate_texture_content <- - function(textuurdata, verbose = TRUE, digits = 3) { - textuur_long <- textuurdata %>% - pivot_longer(cols = -ends_with("boundary")) %>% - separate(.data$name, into = c("sample", "param"), sep = "___") - - textuur_wide <- textuur_long %>% - pivot_wider( - id_cols = c( - .data$lower_boundary, - .data$upper_boundary, - .data$sample - ), - names_from = .data$param, - values_from = .data$value - ) %>% - mutate( - value = round(.data$value, digits), - sd = round(.data$UCL1S - .data$value, digits), - lower_boundary = round(.data$lower_boundary, 2), - upper_boundary = round(.data$upper_boundary, 2) - ) %>% - select(c(1:4, 7, 5, 6)) - - if (verbose) { - cat( - "\nOK\ndataset geinterpreteerd:\n", - "dimensies: ", nrow(textuur_wide), "x", ncol(textuur_wide), "\n", - "aantal stalen: ", length(unique(textuur_wide %>% pull(sample))), "\n" - ) - } - return(textuur_wide) -} - -#################### - -#' Link het labo_id met het externe id -#' -#' @param conn db connectie -#' @param data dataset waarvan het labo_id gelinkt moet worden aan het extern id -#' @param labo_id_col naam van de kolom die het labo_id bevat -#' @param extern_id_col naam van de kolom waar je het externe id in wil -#' @importFrom DBI dbGetQuery -#' -#' @return dezelfde dataset met een extra kolom die het extern staalid bevat -#' @export -#' @import dplyr -#' -link_labo_id <- function(conn, - data, - labo_id_col = "sample", - extern_id_col = "FieldSampleID") { - data$sample <- data[[labo_id_col]] - - # aanpassing omdat soms ---testnummer aan de sample gehangen wordt - found_indices <- unlist(gregexpr("---", data$sample)) - last_index <- ifelse(found_indices < 0, nchar(data$sample), found_indices - 1) - data$sample <- substring(data$sample, 1, last_index) - - qry <- paste0( - "select sample = LabSampleID, \n", - extern_id_col, " = FieldSampleID, \n", - "datum = AnalysisDate\n", - " from dimSample s inner join factResult r ", - "on s.SampleKey = r.SampleKey \n", - " where s.LabSampleID in ", - "('", paste(unique(data %>% pull(sample)), - collapse = "','" - ), - "') and LimsAnalysisName like 'TEXTUUR_LD%' \n" - ) - - linktable <- dbGetQuery(conn, qry) - - returndata <- data %>% - left_join(linktable, by = c("sample" = "sample")) - - if (labo_id_col != "sample") { - returndata <- returndata %>% select(-sample) - } - - returndata -} - -################### - -#' Converteer datumvector of POSIXct vector naar text in formaat yyyy-mm-dd -#' -#' @param x vector met datums (character) -#' @param na_result resultaat indien geen geldige datum -#' -#' @return character vector with datums -#' @export -#' -date_as_text <- function(x, na_result = "NODATE") { - x[is.na(x) | substring(x, 1, 2) != "20"] <- NA - x <- paste0(substring(x, 1, 10)) - x[is.na(x)] <- na_result - x -} - - -################### - - -#' Schrijf textuurdata weg per staal in de target directory -#' -#' @param target_path locatie waar de files moeten terechtkomen -#' @param data textuurdata die weggeschreven moet worden -#' @param verbose toon output terwijl de routine loopt -#' -#' @return er wordt een file weggeschreven per aanwezig staal -#' @export -#' @importFrom dplyr filter select pull -#' @importFrom readr write_excel_csv2 -#' -write_texture_files <- function(target_path, data, verbose = TRUE) { - for (samp in unique(data %>% pull(.data$sample))) { - tmp <- data %>% - filter(.data$sample == samp) %>% - transmute( - .data$sample, .data$FieldSampleID, - .data$lower_boundary, .data$upper_boundary, - .data$value, .data$sd, - .data$datum - ) - if (any(is.na(tmp %>% pull(.data$FieldSampleID)))) { - if (verbose) { - print(paste0(samp, "niet weggeschreven want geen extern id")) - } - next - } - fn <- paste0( - max(tmp %>% pull(.data$FieldSampleID)), - "_", - date_as_text(max(tmp %>% pull(.data$datum))), - ".csv" - ) - if (verbose) print(fn) - write_excel_csv2(tmp %>% select(-.data$datum), - file = file.path(target_path, fn) - ) - } - return() -} diff --git a/R/txtp_date_as_text.R b/R/txtp_date_as_text.R new file mode 100644 index 0000000..76e1d4a --- /dev/null +++ b/R/txtp_date_as_text.R @@ -0,0 +1,18 @@ + +################### + +#' Converteer datumvector of POSIXct vector naar text in formaat yyyy-mm-dd +#' +#' @param x vector met datums (character) +#' @param na_result resultaat indien geen geldige datum +#' +#' @return character vector with datums +#' @export +#' +date_as_text <- function(x, na_result = "NODATE") { + x[is.na(x) | substring(x, 1, 2) != "20"] <- NA + x <- paste0(substring(x, 1, 10)) + x[is.na(x)] <- na_result + x +} + diff --git a/R/txtp_interpret_texture_content.R b/R/txtp_interpret_texture_content.R new file mode 100644 index 0000000..04bcb81 --- /dev/null +++ b/R/txtp_interpret_texture_content.R @@ -0,0 +1,48 @@ + +#' Interpreteer de geparste textuurfile +#' +#' @param textuurdata geparste textuurdata +#' @param verbose moet output getoond worden tijdens de routine +#' @param digits aantal digits voor de waarde en sd +#' @import dplyr +#' @importFrom tidyr pivot_longer separate +#' @import magrittr +#' +#' @return tidy dataset met alle gegevens +#' @export +#' +interprete_texture_content <- + function(textuurdata, verbose = TRUE, digits = 3) { + textuur_long <- textuurdata %>% + pivot_longer(cols = -ends_with("boundary")) %>% + separate(.data$name, into = c("sample", "param"), sep = "___") + + textuur_wide <- textuur_long %>% + pivot_wider( + id_cols = c( + .data$lower_boundary, + .data$upper_boundary, + .data$sample + ), + names_from = .data$param, + values_from = .data$value + ) %>% + mutate( + value = round(.data$value, digits), + sd = round(.data$UCL1S - .data$value, digits), + lower_boundary = round(.data$lower_boundary, 2), + upper_boundary = round(.data$upper_boundary, 2) + ) %>% + select(c(1:4, 7, 5, 6)) + + if (verbose) { + cat( + "\nOK\ndataset geinterpreteerd:\n", + "dimensies: ", nrow(textuur_wide), "x", ncol(textuur_wide), "\n", + "aantal stalen: ", length(unique(textuur_wide %>% pull(sample))), "\n" + ) + } + return(textuur_wide) +} + +#################### diff --git a/R/txtp_link_labo_id.R b/R/txtp_link_labo_id.R new file mode 100644 index 0000000..ba2de05 --- /dev/null +++ b/R/txtp_link_labo_id.R @@ -0,0 +1,63 @@ +#' Link het labo_id met het externe id +#' +#' De wetenschappers en labo gebruiken een ander ID voor stalen te identificeren, hiermee worden deze gekoppeld. +#' @param conn db connectie +#' @param data dataset waarvan het labo_id gelinkt moet worden aan het extern id +#' @param analysis naam van de textuuranalyse. Indien de exacte naam niet gekend is kan met jokertekens gewerkt worden, bv "TEXTUUR\%" wat zoekt in alle analyses die begint met TEXTUUR +#' @param labo_id_col naam van de kolom die het labo_id bevat +#' @param extern_id_col naam van de kolom waar je het externe id in wil +#' @importFrom DBI dbGetQuery +#' @import dplyr +#' @return dezelfde dataset met een extra kolom die het extern staalid bevat +#' @export +#' @examples +#' \dontrun{ +#' # example code +#' #conn <- #zorg dat je een connectie-object met de databank hebt +#' data <- data.frame(sample = c(('24-004722','24-004723','24-004724','24-004725'))) +#' link_labo_id(conn, data, analysis = "TEXTUUR%") +#' } +link_labo_id <- function(conn, + data, + analysis = "TEXTUUR_LD%", + labo_id_col = "sample", + extern_id_col = "FieldSampleID") { + data$sample <- data[[labo_id_col]] + + # aanpassing omdat soms ---testnummer aan de sample gehangen wordt + found_indices <- unlist(gregexpr("---", data$sample)) + last_index <- ifelse(found_indices < 0, nchar(data$sample), found_indices - 1) + data$sample <- substring(data$sample, 1, last_index) + + qry <- paste0( + "select sample = LabSampleID, \n", + extern_id_col, " = FieldSampleID, \n", + "datum = AnalysisDate, \n", + "analyse = LimsAnalysisName \n", + " from dimSample s inner join factResult r ", + "on s.SampleKey = r.SampleKey \n", + " where s.LabSampleID in ", + "('", paste(unique(data %>% pull(sample)), + collapse = "','" + ), + "') and LimsAnalysisName like '", analysis, "' \n" + ) + + linktable <- dbGetQuery(conn, qry) + unique_analyses <- unique(linktable$analysis) + if (length(unique_analyses) > 1) { + cat("Verschillende analyses:\n") + print(unique_analyses) + stop("Meer dan 1 analyse die voldoet aan de voorwaarde. Kies er 1 van") + } + + returndata <- data %>% + left_join(linktable, by = c("sample" = "sample")) + + if (labo_id_col != "sample") { + returndata <- returndata %>% select(-sample) + } + + returndata +} + diff --git a/R/txtp_parse_texture_content.R b/R/txtp_parse_texture_content.R new file mode 100644 index 0000000..1cf96bd --- /dev/null +++ b/R/txtp_parse_texture_content.R @@ -0,0 +1,68 @@ +#' Parste texture output file to R readable format +#' +#' @param filename path to the output file to be parsed +#' @param delim delimiter of the file +#' @param verbose moet output getoond worden tijdens de uitvoering? +#' +#' @return parsed dataset with clear column names and data +#' @import magrittr +#' @importFrom readr read_delim +#' @importFrom dplyr lead +#' @importFrom stringr str_split_1 +#' @export +parse_texture_content <- function(filename, delim = "\t", verbose = TRUE) { + header <- readLines(con = filename, n = 7) # lees de 6 headerrijen + 1 datarij + textuur <- read_delim( + file = filename, + delim = "\t", + skip = 6, + col_names = FALSE + ) + #formaat verschilt als er 1 staal is of als er meerdere zijn + + #MEERDERE STALEN + if (substring(header[3],1,7) != "Channel") { + headersplitted <- str_split_1(header[3], pattern = "\t") + headersplitted[1] <- "lower_bound" # niet nodig maar voor duidelijkheid + + samplename_pos <- seq(2, length(headersplitted), by = 3) + samples <- gsub("\\.\\$av", "", headersplitted[samplename_pos]) + last_underscores <- sapply(gregexpr("\\_", samples), max) + samples <- substring(samples, 1, last_underscores - 1) + + header_names <- rep("", length(headersplitted)) # init lege rijen + header_names[1] <- "lower_boundary" + header_names[samplename_pos] <- paste(samples, "value", sep = "___") + header_names[samplename_pos + 1] <- paste(samples, "LCL1S", sep = "___") + header_names[samplename_pos + 2] <- paste(samples, "UCL1S", sep = "___") + + #ENKEL 1 STAAL + } else { + filenamerecord <- str_split_1(header[2], pattern = "\t") + sample <- gsub("\\.\\$av", "", filenamerecord[2]) + last_underscores <- sapply(gregexpr("\\_", sample), max) + sample <- substring(sample, 1, last_underscores - 1) + + header_names <- str_split_1(header[3], pattern = "\t") + header_names[1] <- "lower_boundary" + header_names[2] <- paste(sample, "value", sep = "___") + header_names[3] <- paste(sample, "LCL1S", sep = "___") + header_names[4] <- paste(sample, "UCL1S", sep = "___") + } + + colnames(textuur) <- header_names + + textuur <- textuur %>% + mutate(upper_boundary = c(lead(.data$lower_boundary, 1))) + if (verbose) { + cat( + "\nOK\nDataset geimporteerd: \n", + "dimensies: ", + nrow(textuur), "x", ncol(textuur), "\n" + ) + } + return(textuur) +} + + +############## diff --git a/R/txtp_write_texture_files.R b/R/txtp_write_texture_files.R new file mode 100644 index 0000000..0858595 --- /dev/null +++ b/R/txtp_write_texture_files.R @@ -0,0 +1,43 @@ +################### + + +#' Schrijf textuurdata weg per staal in de target directory +#' +#' @param target_path locatie waar de files moeten terechtkomen +#' @param data textuurdata die weggeschreven moet worden +#' @param verbose toon output terwijl de routine loopt +#' +#' @return er wordt een file weggeschreven per aanwezig staal +#' @export +#' @importFrom dplyr filter select pull +#' @importFrom readr write_excel_csv2 +#' +write_texture_files <- function(target_path, data, verbose = TRUE) { + for (samp in unique(data %>% pull(.data$sample))) { + tmp <- data %>% + filter(.data$sample == samp) %>% + transmute( + .data$sample, .data$FieldSampleID, + .data$lower_boundary, .data$upper_boundary, + .data$value, .data$sd, + .data$datum + ) + if (any(is.na(tmp %>% pull(.data$FieldSampleID)))) { + if (verbose) { + print(paste0(samp, "niet weggeschreven want geen extern id")) + } + next + } + fn <- paste0( + max(tmp %>% pull(.data$FieldSampleID)), + "_", + date_as_text(max(tmp %>% pull(.data$datum))), + ".csv" + ) + if (verbose) print(fn) + write_excel_csv2(tmp %>% select(-.data$datum), + file = file.path(target_path, fn) + ) + } + return() +} diff --git a/README.md b/README.md index e41a4a8..e889963 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ system2("mkdir", target_dir) textuur_parsed <- parse_texture_content(filename, delim = "\t") #interpreteer de dataset tot een inhoudelijk bruikbaar formaat -textuur_interpreted <- interprate_texture_content(textuur_parsed) +textuur_interpreted <- interpret_texture_content(textuur_parsed) #maak een connectie met het LIMS datawarehouse conn <- lims_connect() #connect to dwh diff --git a/man/date_as_text.Rd b/man/date_as_text.Rd index 313d472..f4d5952 100644 --- a/man/date_as_text.Rd +++ b/man/date_as_text.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/texture_parsing.R +% Please edit documentation in R/txtp_date_as_text.R \name{date_as_text} \alias{date_as_text} \title{Converteer datumvector of POSIXct vector naar text in formaat yyyy-mm-dd} diff --git a/man/get_available_report_fields.Rd b/man/get_available_report_fields.Rd deleted file mode 100644 index 4dc024a..0000000 --- a/man/get_available_report_fields.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lims_report.R -\name{get_available_report_fields} -\alias{get_available_report_fields} -\title{Show available fields} -\usage{ -get_available_report_fields() -} -\value{ -dataset containing 2 colums: the name of a field in the database -and the corresponding description -} -\description{ -Show available fields -} -\examples{ -{ -get_available_report_fields() -} -} diff --git a/man/get_report_config_info.Rd b/man/get_report_config_info.Rd index 566c763..d4f776b 100644 --- a/man/get_report_config_info.Rd +++ b/man/get_report_config_info.Rd @@ -1,11 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_report_config_info.R, R/lims_report.R +% Please edit documentation in R/get_report_config_info.R \name{get_report_config_info} \alias{get_report_config_info} \title{Get report config information} \usage{ -get_report_config_info(template = "default") - get_report_config_info(template = "default") } \arguments{ @@ -13,20 +11,13 @@ get_report_config_info(template = "default") If "all" then all templates are shown} } \value{ -dataset wih report template information - dataset wih report template information } \description{ -Get report config information - Get report config information } \examples{ { get_report_config_info(template = "default") } -{ -get_report_config_info(template = "default") -} } diff --git a/man/inbolims-package.Rd b/man/inbolims-package.Rd new file mode 100644 index 0000000..b4bb5f5 --- /dev/null +++ b/man/inbolims-package.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inbolims-package.R +\docType{package} +\name{inbolims-package} +\alias{inbolims} +\alias{inbolims-package} +\title{inbolims: Utilities to query the INBO lab analysis datawarehouse} +\description{ +Some utilities that expand the Lims program in INBO. It mainly focuses on post-processing in connection with Quality Assurance. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/inbo/inbolims} + \item \url{https://inbo.github.io/inbolims/} + \item Report bugs at \url{https://github.com/inbo/inbolims/issues} +} + +} +\author{ +\strong{Maintainer}: Pieter Verschelde \email{pieter.verschelde@inbo.be} (\href{https://orcid.org/0000-0002-9199-421X}{ORCID}) (Research Institute for Nature and Forest (INBO)) + +} +\keyword{internal} diff --git a/man/inbolims.Rd b/man/inbolims.Rd deleted file mode 100644 index 23c6171..0000000 --- a/man/inbolims.Rd +++ /dev/null @@ -1,9 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inbolims.R -\docType{package} -\name{inbolims} -\alias{inbolims} -\title{\code{inbolims} package} -\description{ -Google spreadsheets R API -} diff --git a/man/interprate_texture_content.Rd b/man/interprete_texture_content.Rd similarity index 64% rename from man/interprate_texture_content.Rd rename to man/interprete_texture_content.Rd index 619f8c6..172c629 100644 --- a/man/interprate_texture_content.Rd +++ b/man/interprete_texture_content.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/texture_parsing.R -\name{interprate_texture_content} -\alias{interprate_texture_content} +% Please edit documentation in R/txtp_interpret_texture_content.R +\name{interprete_texture_content} +\alias{interprete_texture_content} \title{Interpreteer de geparste textuurfile} \usage{ -interprate_texture_content(textuurdata, verbose = TRUE, digits = 3) +interprete_texture_content(textuurdata, verbose = TRUE, digits = 3) } \arguments{ \item{textuurdata}{geparste textuurdata} diff --git a/man/lims_measured_parameters.Rd b/man/lims_measured_parameters.Rd deleted file mode 100644 index a3a4d97..0000000 --- a/man/lims_measured_parameters.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lims_report.R -\name{lims_measured_parameters} -\alias{lims_measured_parameters} -\title{Title} -\usage{ -lims_measured_parameters(data, plot = TRUE, log = TRUE) -} -\arguments{ -\item{data}{lims report data (from lims_report_data)} - -\item{plot}{logical if values are to be put in a graph} - -\item{log}{when plot, use the log-scale?} -} -\value{ -list with measured parameters and some base statistics -} -\description{ -Title -} -\examples{ -\dontrun{ -view(lims_measured_parameters(reportdata)) -} -} diff --git a/man/lims_report_export.Rd b/man/lims_report_export.Rd index 3727b7a..903cdb5 100644 --- a/man/lims_report_export.Rd +++ b/man/lims_report_export.Rd @@ -1,11 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lims_report.R, R/lims_report_export.R +% Please edit documentation in R/lims_report_export.R \name{lims_report_export} \alias{lims_report_export} \title{Kruistabel naar csv wegschtrijven met toevoeging header} \usage{ -lims_report_export(data, path) - lims_report_export(data, path) } \arguments{ @@ -14,12 +12,8 @@ lims_report_export(data, path) \item{path}{pad waar de file geschreven moet worden} } \value{ -csv file - csv file } \description{ -Kruistabel naar csv wegschtrijven met toevoeging header - Kruistabel naar csv wegschtrijven met toevoeging header } diff --git a/man/lims_report_samples.Rd b/man/lims_report_samples.Rd index 654bffc..2c58096 100644 --- a/man/lims_report_samples.Rd +++ b/man/lims_report_samples.Rd @@ -1,32 +1,21 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lims_report.R, R/lims_report_samples.R +% Please edit documentation in R/lims_report_samples.R \name{lims_report_samples} \alias{lims_report_samples} \title{Verkrijg de sample metadata} \usage{ -lims_report_samples(reportdata) - lims_report_samples(reportdata) } \arguments{ \item{reportdata}{data verkregen uit de functie lims_report_data} } \value{ -dataset met sample informatie - dataset met sample informatie } \description{ -Verkrijg de sample metadata - Verkrijg de sample metadata } \examples{ -\dontrun{ -reportdata <- lims_report_data(project = c("I-19W001-01")) -sampledata <- lims_report_samples(reportdata) -} - \dontrun{ conn <- lims_connect() reportdata <- read_lims_data(conn, project = c("I-19W001-01")) diff --git a/man/lims_report_xtab.Rd b/man/lims_report_xtab.Rd index a411429..03258fa 100644 --- a/man/lims_report_xtab.Rd +++ b/man/lims_report_xtab.Rd @@ -1,15 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lims_report.R, R/lims_report_xtab.R +% Please edit documentation in R/lims_report_xtab.R \name{lims_report_xtab} \alias{lims_report_xtab} \title{Maak kruistabel van de ingelezen rapportdata} \usage{ -lims_report_xtab( - reportdata, - resulttype = "measured", - sample_fields = c("Project", "ExternSampleID") -) - lims_report_xtab( reportdata, resulttype = "measured", @@ -26,21 +20,13 @@ of "measured" de originele waarde bv 1.185455} in the xtab file which are present in the report data} } \value{ -kruistabel met resultaten - kruistabel met resultaten } \description{ -Maak kruistabel van de ingelezen rapportdata - Maak kruistabel van de ingelezen rapportdata } \examples{ \dontrun{ -long_format <- lims_report_data(project = c("I-19W001-01")) -XTAB_format <- lims_report_xtab(long_format) -} -\dontrun{ conn <- lims_connect() reportdata <- read_lims_data(conn, project = c("I-19W001-01")) xtab <- lims_report_xtab(reportdata) diff --git a/man/lims_report_xtab_old.Rd b/man/lims_report_xtab_old.Rd deleted file mode 100644 index 2a83bbf..0000000 --- a/man/lims_report_xtab_old.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lims_report.R -\name{lims_report_xtab_old} -\alias{lims_report_xtab_old} -\title{Maak kruistabel van de ingelezen rapportdata} -\usage{ -lims_report_xtab_old(reportdata) -} -\arguments{ -\item{reportdata}{data verkregen uit de functie lims_report_data} -} -\value{ -kruistabel met resultaten -} -\description{ -Maak kruistabel van de ingelezen rapportdata -} diff --git a/man/link_labo_id.Rd b/man/link_labo_id.Rd index 221d247..d9f8cd6 100644 --- a/man/link_labo_id.Rd +++ b/man/link_labo_id.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/texture_parsing.R +% Please edit documentation in R/txtp_link_labo_id.R \name{link_labo_id} \alias{link_labo_id} \title{Link het labo_id met het externe id} @@ -7,6 +7,7 @@ link_labo_id( conn, data, + analysis = "TEXTUUR_LD\%", labo_id_col = "sample", extern_id_col = "FieldSampleID" ) @@ -16,6 +17,8 @@ link_labo_id( \item{data}{dataset waarvan het labo_id gelinkt moet worden aan het extern id} +\item{analysis}{naam van de textuuranalyse. Indien de exacte naam niet gekend is kan met jokertekens gewerkt worden, bv "TEXTUUR\%" wat zoekt in alle analyses die begint met TEXTUUR} + \item{labo_id_col}{naam van de kolom die het labo_id bevat} \item{extern_id_col}{naam van de kolom waar je het externe id in wil} @@ -24,5 +27,13 @@ link_labo_id( dezelfde dataset met een extra kolom die het extern staalid bevat } \description{ -Link het labo_id met het externe id +De wetenschappers en labo gebruiken een ander ID voor stalen te identificeren, hiermee worden deze gekoppeld. +} +\examples{ +\dontrun{ + # example code + #conn <- #zorg dat je een connectie-object met de databank hebt + data <- data.frame(sample = c(('24-004722','24-004723','24-004724','24-004725'))) + link_labo_id(conn, data, analysis = "TEXTUUR\%") +} } diff --git a/man/parse_sql_report_query.Rd b/man/parse_sql_report_query.Rd index 6da1913..150fa89 100644 --- a/man/parse_sql_report_query.Rd +++ b/man/parse_sql_report_query.Rd @@ -1,11 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lims_report.R, R/parse_sql_report_query.R +% Please edit documentation in R/parse_sql_report_query.R \name{parse_sql_report_query} \alias{parse_sql_report_query} \title{Parse the query based on the template information and the chosen projects} \usage{ -parse_sql_report_query(template, project) - parse_sql_report_query(template, project) } \arguments{ @@ -15,13 +13,9 @@ The structure should be exact as the file shipped with the package} \item{project}{character string of projects to filter} } \value{ -SQL server query - SQL server query } \description{ -Parse the query based on the template information and the chosen projects - Parse the query based on the template information and the chosen projects } \examples{ diff --git a/man/parse_texture_content.Rd b/man/parse_texture_content.Rd index 04cb1bd..6ed9b7a 100644 --- a/man/parse_texture_content.Rd +++ b/man/parse_texture_content.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/texture_parsing.R +% Please edit documentation in R/txtp_parse_texture_content.R \name{parse_texture_content} \alias{parse_texture_content} \title{Parste texture output file to R readable format} diff --git a/man/read_lims_data.Rd b/man/read_lims_data.Rd index 2b13c16..a8efdcb 100644 --- a/man/read_lims_data.Rd +++ b/man/read_lims_data.Rd @@ -1,20 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lims_report.R, R/read_lims_data.R +% Please edit documentation in R/read_lims_data.R \name{read_lims_data} \alias{read_lims_data} \title{Haal rapportdata uit LIMS DWH} \usage{ -read_lims_data( - connection, - project, - sql_template = "default", - show_query = FALSE, - custom_fields = NULL, - custom_where_clause = NULL, - custom_sql_query = NULL, - deployment = "prd" -) - read_lims_data( connection, project, @@ -63,24 +52,15 @@ gewerkt wordt. Voorlopig is enkel "prd" actief ondersteund} } \value{ -data.frame met minstens de velden ORIGINAL_SAMPLE, -ANALYSIS, COMPONENT en ENTRY - data.frame met minstens de velden ORIGINAL_SAMPLE, ANALYSIS, COMPONENT en ENTRY } \description{ -Haal rapportdata uit LIMS DWH - Haal rapportdata uit LIMS DWH } \examples{ \dontrun{ conn <- lims_connect() -reportdata <- read_lims_data(conn, project = c("I-19W001-01")) -} -\dontrun{ -conn <- lims_connect() reportdata <- read_lims_data(conn, project = c("I-19W001-01"), sql_template = "default") diff --git a/man/write_texture_files.Rd b/man/write_texture_files.Rd index 1b5f133..bfb6b9e 100644 --- a/man/write_texture_files.Rd +++ b/man/write_texture_files.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/texture_parsing.R +% Please edit documentation in R/txtp_write_texture_files.R \name{write_texture_files} \alias{write_texture_files} \title{Schrijf textuurdata weg per staal in de target directory}