Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

textuur voor files met 1 staal en voor staalnamen waar geen appendix … #16

Merged
merged 1 commit into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# inbolims 0.2.11

* some improvements in texture_parsing
* handle files with only one sample (slightly different format)
* handle sample_file_names that have no underscore behind the sample name

# inbolims 0.2.9

* Change the minimal necessary fields
Expand Down
1 change: 0 additions & 1 deletion R/inbolims-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,3 @@ NULL

## quiets concerns of R CMD check re: the .'s that appear in pipelines
if (getRversion() >= "2.15.1") utils::globalVariables(c("."))

16 changes: 7 additions & 9 deletions R/txtp_parse_texture_content.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,28 @@ parse_texture_content <- function(filename, delim = "\t", verbose = TRUE) {
header <- readLines(con = filename, n = 7) # lees de 6 headerrijen + 1 datarij
textuur <- read_delim(
file = filename,
delim = "\t",
delim = delim,
skip = 6,
col_names = FALSE
)
# formaat verschilt als er 1 staal is of als er meerdere zijn

# MEERDERE STALEN
if (substring(header[3], 1, 7) != "Channel") {
if (substring(header[3], 1, 7) != "Channel") { # MEERDERE STALEN
headersplitted <- str_split_1(header[3], pattern = "\t")
headersplitted[1] <- "lower_bound" # niet nodig maar voor duidelijkheid
samplename_pos <- seq(2, length(headersplitted), by = 3)
samples <- gsub("\\.\\$av", "", headersplitted[samplename_pos])
last_underscores <- sapply(gregexpr("\\_", samples), max)
samples <- substring(samples, 1, last_underscores - 1)
if (!all(last_underscores == -1)) {
samples <- substring(samples, 1, last_underscores - 1)
}
header_names <- rep("", length(headersplitted)) # init lege rijen
header_names[1] <- "lower_boundary"
header_names[samplename_pos] <- paste(samples, "value", sep = "___")
header_names[samplename_pos + 1] <- paste(samples, "LCL1S", sep = "___")
header_names[samplename_pos + 2] <- paste(samples, "UCL1S", sep = "___")
# ENKEL 1 STAAL
} else {

} else { # ENKEL 1 STAAL
filenamerecord <- str_split_1(header[2], pattern = "\t")
sample <- gsub("\\.\\$av", "", filenamerecord[2])
last_underscores <- sapply(gregexpr("\\_", sample), max)
Expand All @@ -58,6 +59,3 @@ parse_texture_content <- function(filename, delim = "\t", verbose = TRUE) {
}
return(textuur)
}


##############
18 changes: 8 additions & 10 deletions R/txtp_tex_csv_2_json.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,21 @@ tex_csv_2_json <- function(fullfilename) {
nc <- nchar(fullfilename)
obsdate <- substr(fullfilename, nc - 13, nc - 4)
# making a list for json data export
# names(TEXTUUR.CSV)
SID <- unique(textuur_csv$FieldSampleID)
LabSampleCode <- unique(textuur_csv$sample)
ObservationDate <- obsdate
AnalyseVariabele <- "FRAC.0.2000\u00B5m.ld.c0"
s_id <- unique(textuur_csv$FieldSampleID)
lab_sample_code <- unique(textuur_csv$sample)
observation_date <- obsdate
analyse_variabele <- "FRAC.0.2000\u00B5m.ld.c0"
metingen <- textuur_csv[, c(3:6)]

textuur_list <- list(
SID = SID,
LabSampleCode = LabSampleCode,
ObservationDate = ObservationDate,
AnalyseVariabele = AnalyseVariabele,
s_id = s_id,
lab_sample_code = lab_sample_code,
observation_date = observation_date,
analyse_variabele = analyse_variabele,
metingen = metingen
)

# convert to json in compact format
# TEXTUUR.json.pretty<-toJSON(TEXTUUR.list, pretty=TRUE)
textuur_json <- toJSON(textuur_list, pretty = FALSE) ### INBOdem readin
textuur_json
# write output in same folder but with json extension
Expand Down
108 changes: 40 additions & 68 deletions inst/textuurbatch/texture_parsing_testscript.R
Original file line number Diff line number Diff line change
@@ -1,84 +1,56 @@
## LD Texture_processor COULTER files from LAB
## Programmed by Pieter Verschelde 9/06/2022
## adapted by Bruno De Vos
## readapted by Pieter Verschelde 28/08/2024

### IMPORTANT
### !!! Be sure to have VPN connection to link to LIMS system !!!


#load necessary libraries

library(dplyr)
library(jsonlite)
library(tidyverse) #package met veel datafunctionaliteit
library(DBI) #package voor DB communicatie
library(tidyverse)
library(DBI)
library(readxl)

# Download/update met laatste versie

remotes::install_github('inbo/inbolims')
library(inbolims) #package die de verwerking van de textuurfiles regelt
getwd() #gewoon om te tonen in welke werkdirectory je zit

## CENTRAL LOOP

## load raw filenames in folder "C:/R/IN/LDTEX/" voor labproject V-22V057 (Cmon)

source_path <- "tests/testdata/"
source_pattern <- "sample"
target_path <- "tests/testdata/result/"

list_fn <- list.files(path = source_path,
pattern = source_pattern,
full.names = TRUE)
n_list_fn <- length(list_fn)

## Loop to process all files serially ####
for (i in 1:n_list_fn) {
filename <- list_fn[i]

#parse de file naar een geldige R dataset
# Download/update inbolims (core texture parsing functionalities)
remotes::install_github("inbo/inbolims")
library(inbolims)
getwd()

# get input files
file_input_path <- "."
files_list <- list.files(file_input_path,
pattern = "V-24V057",
full.names = TRUE)
n_files <- length(files_list)

#output path
target_dir <- "./output"
dir.create(target_dir)

#db connection
conn <- lims_connect() #connect to dwh to link lab id

# main loop parsing
for (i in 1:n_files) {
filename <- files_list[i]
print(filename)
textuur_parsed <- parse_texture_content(filename, delim = "\t")
View(textuur_parsed)

#interpreteer de dataset tot een inhoudelijk bruikbaar formaat
textuur_interpreted <- interpret_texture_content(textuur_parsed)
View(textuur_interpreted)

#maak een connectie met het LIMS datawarehouse
conn <- lims_connect() #connect to dwh
textuur_linked <- link_labo_id(conn, textuur_interpreted)
dim(textuur_linked)

#schrijf de files weg in /R/OUT/LDTEX/
write_texture_files(target_path, textuur_linked)

} # loop end

#### Process files and save to CSV and json ####


# listFNOUT<-list.files(path="C:/R_scripts/_GIT_REPO/Cmon/out/LDTEX/2023/deel2", pattern=".csv", full.names = TRUE)
# nlist<-length(listFNOUT)
#
#
# for (j in 1:nlist) {
# TEMPfile<-read.csv2(listFNOUT[j])
# dim(TEMPfile)
# UNIfile<-distinct(TEMPfile) ## remove all duplicate rows
# dim(UNIfile)
# # write output csv file
# write.csv2(UNIfile,listFNOUT[j], row.names = FALSE)
#
# # run function to convert to json and write in same directory
# TEX_CSV2JSON(listFNOUT[j])
# }
#









write_texture_files(target_dir, textuur_linked)
}

#conversion output to json
files_list_out <- list.files(target_dir, pattern = ".csv", full.names = TRUE)
n_files_out <- length(files_list_out)

for (j in 1:n_files_out) {
tmp <- read.csv2(files_list_out[j])
tmp_uni <- distinct(tmp) #remove all duplicate rows
write.csv2(tmp_uni, files_list_out[j], row.names = FALSE)
tex_csv_2_json(files_list_out[j])
}
Loading