diff --git a/NEWS.md b/NEWS.md index 75d8acd..a82f172 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# inbolims 0.2.11 + +* some improvements in texture_parsing + * handle files with only one sample (slightly different format) + * handle sample_file_names that have no underscore behind the sample name + # inbolims 0.2.9 * Change the minimal necessary fields diff --git a/R/inbolims-package.R b/R/inbolims-package.R index fce5932..3370c78 100644 --- a/R/inbolims-package.R +++ b/R/inbolims-package.R @@ -7,4 +7,3 @@ NULL ## quiets concerns of R CMD check re: the .'s that appear in pipelines if (getRversion() >= "2.15.1") utils::globalVariables(c(".")) - diff --git a/R/txtp_parse_texture_content.R b/R/txtp_parse_texture_content.R index be96279..c73ab33 100644 --- a/R/txtp_parse_texture_content.R +++ b/R/txtp_parse_texture_content.R @@ -14,27 +14,28 @@ parse_texture_content <- function(filename, delim = "\t", verbose = TRUE) { header <- readLines(con = filename, n = 7) # lees de 6 headerrijen + 1 datarij textuur <- read_delim( file = filename, - delim = "\t", + delim = delim, skip = 6, col_names = FALSE ) # formaat verschilt als er 1 staal is of als er meerdere zijn - # MEERDERE STALEN - if (substring(header[3], 1, 7) != "Channel") { + if (substring(header[3], 1, 7) != "Channel") { # MEERDERE STALEN headersplitted <- str_split_1(header[3], pattern = "\t") headersplitted[1] <- "lower_bound" # niet nodig maar voor duidelijkheid samplename_pos <- seq(2, length(headersplitted), by = 3) samples <- gsub("\\.\\$av", "", headersplitted[samplename_pos]) last_underscores <- sapply(gregexpr("\\_", samples), max) - samples <- substring(samples, 1, last_underscores - 1) + if (!all(last_underscores == -1)) { + samples <- substring(samples, 1, last_underscores - 1) + } header_names <- rep("", length(headersplitted)) # init lege rijen header_names[1] <- "lower_boundary" header_names[samplename_pos] <- paste(samples, "value", sep = "___") header_names[samplename_pos + 1] <- paste(samples, "LCL1S", sep = "___") header_names[samplename_pos + 2] <- paste(samples, "UCL1S", sep = "___") - # ENKEL 1 STAAL - } else { + + } else { # ENKEL 1 STAAL filenamerecord <- str_split_1(header[2], pattern = "\t") sample <- gsub("\\.\\$av", "", filenamerecord[2]) last_underscores <- sapply(gregexpr("\\_", sample), max) @@ -58,6 +59,3 @@ parse_texture_content <- function(filename, delim = "\t", verbose = TRUE) { } return(textuur) } - - -############## diff --git a/R/txtp_tex_csv_2_json.R b/R/txtp_tex_csv_2_json.R index efd8a40..a96d3d8 100644 --- a/R/txtp_tex_csv_2_json.R +++ b/R/txtp_tex_csv_2_json.R @@ -17,23 +17,21 @@ tex_csv_2_json <- function(fullfilename) { nc <- nchar(fullfilename) obsdate <- substr(fullfilename, nc - 13, nc - 4) # making a list for json data export - # names(TEXTUUR.CSV) - SID <- unique(textuur_csv$FieldSampleID) - LabSampleCode <- unique(textuur_csv$sample) - ObservationDate <- obsdate - AnalyseVariabele <- "FRAC.0.2000\u00B5m.ld.c0" + s_id <- unique(textuur_csv$FieldSampleID) + lab_sample_code <- unique(textuur_csv$sample) + observation_date <- obsdate + analyse_variabele <- "FRAC.0.2000\u00B5m.ld.c0" metingen <- textuur_csv[, c(3:6)] textuur_list <- list( - SID = SID, - LabSampleCode = LabSampleCode, - ObservationDate = ObservationDate, - AnalyseVariabele = AnalyseVariabele, + s_id = s_id, + lab_sample_code = lab_sample_code, + observation_date = observation_date, + analyse_variabele = analyse_variabele, metingen = metingen ) # convert to json in compact format - # TEXTUUR.json.pretty<-toJSON(TEXTUUR.list, pretty=TRUE) textuur_json <- toJSON(textuur_list, pretty = FALSE) ### INBOdem readin textuur_json # write output in same folder but with json extension diff --git a/inst/textuurbatch/texture_parsing_testscript.R b/inst/textuurbatch/texture_parsing_testscript.R index 4b0442a..7d5cb81 100644 --- a/inst/textuurbatch/texture_parsing_testscript.R +++ b/inst/textuurbatch/texture_parsing_testscript.R @@ -1,84 +1,56 @@ ## LD Texture_processor COULTER files from LAB ## Programmed by Pieter Verschelde 9/06/2022 ## adapted by Bruno De Vos +## readapted by Pieter Verschelde 28/08/2024 ### IMPORTANT ### !!! Be sure to have VPN connection to link to LIMS system !!! - +#load necessary libraries library(dplyr) library(jsonlite) -library(tidyverse) #package met veel datafunctionaliteit -library(DBI) #package voor DB communicatie +library(tidyverse) +library(DBI) library(readxl) -# Download/update met laatste versie - -remotes::install_github('inbo/inbolims') -library(inbolims) #package die de verwerking van de textuurfiles regelt -getwd() #gewoon om te tonen in welke werkdirectory je zit - -## CENTRAL LOOP - -## load raw filenames in folder "C:/R/IN/LDTEX/" voor labproject V-22V057 (Cmon) - -source_path <- "tests/testdata/" -source_pattern <- "sample" -target_path <- "tests/testdata/result/" - -list_fn <- list.files(path = source_path, - pattern = source_pattern, - full.names = TRUE) -n_list_fn <- length(list_fn) - -## Loop to process all files serially #### -for (i in 1:n_list_fn) { - filename <- list_fn[i] - - #parse de file naar een geldige R dataset +# Download/update inbolims (core texture parsing functionalities) +remotes::install_github("inbo/inbolims") +library(inbolims) +getwd() + +# get input files +file_input_path <- "." +files_list <- list.files(file_input_path, + pattern = "V-24V057", + full.names = TRUE) +n_files <- length(files_list) + +#output path +target_dir <- "./output" +dir.create(target_dir) + +#db connection +conn <- lims_connect() #connect to dwh to link lab id + +# main loop parsing +for (i in 1:n_files) { + filename <- files_list[i] + print(filename) textuur_parsed <- parse_texture_content(filename, delim = "\t") - View(textuur_parsed) - - #interpreteer de dataset tot een inhoudelijk bruikbaar formaat textuur_interpreted <- interpret_texture_content(textuur_parsed) - View(textuur_interpreted) - - #maak een connectie met het LIMS datawarehouse conn <- lims_connect() #connect to dwh textuur_linked <- link_labo_id(conn, textuur_interpreted) - dim(textuur_linked) - - #schrijf de files weg in /R/OUT/LDTEX/ - write_texture_files(target_path, textuur_linked) - -} # loop end - -#### Process files and save to CSV and json #### - - -# listFNOUT<-list.files(path="C:/R_scripts/_GIT_REPO/Cmon/out/LDTEX/2023/deel2", pattern=".csv", full.names = TRUE) -# nlist<-length(listFNOUT) -# -# -# for (j in 1:nlist) { -# TEMPfile<-read.csv2(listFNOUT[j]) -# dim(TEMPfile) -# UNIfile<-distinct(TEMPfile) ## remove all duplicate rows -# dim(UNIfile) -# # write output csv file -# write.csv2(UNIfile,listFNOUT[j], row.names = FALSE) -# -# # run function to convert to json and write in same directory -# TEX_CSV2JSON(listFNOUT[j]) -# } -# - - - - - - - - - + write_texture_files(target_dir, textuur_linked) +} + +#conversion output to json +files_list_out <- list.files(target_dir, pattern = ".csv", full.names = TRUE) +n_files_out <- length(files_list_out) + +for (j in 1:n_files_out) { + tmp <- read.csv2(files_list_out[j]) + tmp_uni <- distinct(tmp) #remove all duplicate rows + write.csv2(tmp_uni, files_list_out[j], row.names = FALSE) + tex_csv_2_json(files_list_out[j]) +}