textuur voor files met 1 staal en voor staalnamen waar geen appendix …

…aanhangt in de vorm van _xx (#16)
inbo · Aug 28, 2024 · 9c1e872 · 9c1e872
1 parent a6ed5ca
commit 9c1e872
Show file tree

Hide file tree

Showing 5 changed files with 61 additions and 88 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# inbolims 0.2.11
+
+* some improvements in texture_parsing
+    * handle files with only one sample (slightly different format)
+    * handle sample_file_names that have no underscore behind the sample name
+
 # inbolims 0.2.9
 
 * Change the minimal necessary fields

diff --git a/R/inbolims-package.R b/R/inbolims-package.R
@@ -7,4 +7,3 @@ NULL
 
 ## quiets concerns of R CMD check re: the .'s that appear in pipelines
 if (getRversion() >= "2.15.1") utils::globalVariables(c("."))
-
diff --git a/R/txtp_parse_texture_content.R b/R/txtp_parse_texture_content.R
@@ -14,27 +14,28 @@ parse_texture_content <- function(filename, delim = "\t", verbose = TRUE) {
   header <- readLines(con = filename, n = 7) # lees de 6 headerrijen + 1 datarij
   textuur <- read_delim(
     file = filename,
-    delim = "\t",
+    delim = delim,
     skip = 6,
     col_names = FALSE
   )
   # formaat verschilt als er 1 staal is of als er meerdere zijn
 
-  # MEERDERE STALEN
-  if (substring(header[3], 1, 7) != "Channel") {
+  if (substring(header[3], 1, 7) != "Channel") { # MEERDERE STALEN
     headersplitted <- str_split_1(header[3], pattern = "\t")
     headersplitted[1] <- "lower_bound" # niet nodig maar voor duidelijkheid
     samplename_pos <- seq(2, length(headersplitted), by = 3)
     samples <- gsub("\\.\\$av", "", headersplitted[samplename_pos])
     last_underscores <- sapply(gregexpr("\\_", samples), max)
-    samples <- substring(samples, 1, last_underscores - 1)
+    if (!all(last_underscores == -1)) {
+      samples <- substring(samples, 1, last_underscores - 1)
+    }
     header_names <- rep("", length(headersplitted)) # init lege rijen
     header_names[1] <- "lower_boundary"
     header_names[samplename_pos] <- paste(samples, "value", sep = "___")
     header_names[samplename_pos + 1] <- paste(samples, "LCL1S", sep = "___")
     header_names[samplename_pos + 2] <- paste(samples, "UCL1S", sep = "___")
-    # ENKEL 1 STAAL
-  } else {
+
+  } else { # ENKEL 1 STAAL
     filenamerecord <- str_split_1(header[2], pattern = "\t")
     sample <- gsub("\\.\\$av", "", filenamerecord[2])
     last_underscores <- sapply(gregexpr("\\_", sample), max)
@@ -58,6 +59,3 @@ parse_texture_content <- function(filename, delim = "\t", verbose = TRUE) {
   }
   return(textuur)
 }
-
-
-##############
diff --git a/R/txtp_tex_csv_2_json.R b/R/txtp_tex_csv_2_json.R
@@ -17,23 +17,21 @@ tex_csv_2_json <- function(fullfilename) {
   nc <- nchar(fullfilename)
   obsdate <- substr(fullfilename, nc - 13, nc - 4)
   # making a list for json data export
-  # names(TEXTUUR.CSV)
-  SID <- unique(textuur_csv$FieldSampleID)
-  LabSampleCode <- unique(textuur_csv$sample)
-  ObservationDate <- obsdate
-  AnalyseVariabele <- "FRAC.0.2000\u00B5m.ld.c0"
+  s_id <- unique(textuur_csv$FieldSampleID)
+  lab_sample_code <- unique(textuur_csv$sample)
+  observation_date <- obsdate
+  analyse_variabele <- "FRAC.0.2000\u00B5m.ld.c0"
   metingen <- textuur_csv[, c(3:6)]
 
   textuur_list <- list(
-    SID = SID,
-    LabSampleCode = LabSampleCode,
-    ObservationDate = ObservationDate,
-    AnalyseVariabele = AnalyseVariabele,
+    s_id = s_id,
+    lab_sample_code = lab_sample_code,
+    observation_date = observation_date,
+    analyse_variabele = analyse_variabele,
     metingen = metingen
   )
 
   # convert to json in compact format
-  #  TEXTUUR.json.pretty<-toJSON(TEXTUUR.list, pretty=TRUE)
   textuur_json <- toJSON(textuur_list, pretty = FALSE) ###  INBOdem readin
   textuur_json
   # write output in same folder but with json extension

diff --git a/inst/textuurbatch/texture_parsing_testscript.R b/inst/textuurbatch/texture_parsing_testscript.R
@@ -1,84 +1,56 @@
 ## LD Texture_processor COULTER files from LAB
 ## Programmed by Pieter Verschelde 9/06/2022
 ## adapted by Bruno De Vos
+## readapted by Pieter Verschelde 28/08/2024
 
 ### IMPORTANT
 ### !!! Be sure to have VPN connection to link to LIMS system !!!
 
-
+#load necessary libraries
 
 library(dplyr)
 library(jsonlite)
-library(tidyverse) #package met veel datafunctionaliteit
-library(DBI) #package voor DB communicatie
+library(tidyverse)
+library(DBI)
 library(readxl)
 
-# Download/update met laatste versie 
-
-remotes::install_github('inbo/inbolims')
-library(inbolims) #package die de verwerking van de textuurfiles regelt
-getwd() #gewoon om te tonen in welke werkdirectory je zit
-
-## CENTRAL LOOP
-
-## load raw filenames in folder  "C:/R/IN/LDTEX/" voor labproject V-22V057 (Cmon)
-
-source_path <- "tests/testdata/"
-source_pattern <- "sample"
-target_path <- "tests/testdata/result/"
-
-list_fn <- list.files(path = source_path,
-                      pattern = source_pattern,
-                      full.names = TRUE)
-n_list_fn <- length(list_fn)
-
-## Loop to process all files serially   ####
-for (i in 1:n_list_fn) {
-  filename <- list_fn[i]
-
-  #parse de file naar een geldige R dataset
+# Download/update inbolims (core texture parsing functionalities)
+remotes::install_github("inbo/inbolims")
+library(inbolims)
+getwd()
+
+# get input files
+file_input_path <- "."
+files_list <- list.files(file_input_path,
+                         pattern = "V-24V057",
+                         full.names = TRUE)
+n_files <- length(files_list)
+
+#output path
+target_dir <- "./output"
+dir.create(target_dir)
+
+#db connection
+conn <- lims_connect() #connect to dwh to link lab id
+
+# main loop parsing
+for (i in 1:n_files) {
+  filename <- files_list[i]
+  print(filename)
   textuur_parsed <- parse_texture_content(filename, delim = "\t")
-  View(textuur_parsed)
-
-  #interpreteer de dataset tot een inhoudelijk bruikbaar formaat
   textuur_interpreted <- interpret_texture_content(textuur_parsed)
-  View(textuur_interpreted)
-
-  #maak een connectie met het LIMS datawarehouse
   conn <- lims_connect() #connect to dwh
   textuur_linked <- link_labo_id(conn, textuur_interpreted)
-  dim(textuur_linked)
-
-  #schrijf de files weg in /R/OUT/LDTEX/
-  write_texture_files(target_path, textuur_linked)
-
-}   # loop end
-
-#### Process files and save to CSV and json ####
-
-
-# listFNOUT<-list.files(path="C:/R_scripts/_GIT_REPO/Cmon/out/LDTEX/2023/deel2", pattern=".csv", full.names = TRUE)
-# nlist<-length(listFNOUT)
-# 
-# 
-# for (j in 1:nlist) {
-#   TEMPfile<-read.csv2(listFNOUT[j]) 
-#   dim(TEMPfile)
-#   UNIfile<-distinct(TEMPfile)     ## remove all duplicate rows
-#   dim(UNIfile)
-#   # write output csv file
-#   write.csv2(UNIfile,listFNOUT[j], row.names = FALSE)
-#   
-#   # run function to convert to json and write in same directory
-#   TEX_CSV2JSON(listFNOUT[j])
-# }
-# 
-
-
-
-
-
-
-
-
-
+  write_texture_files(target_dir, textuur_linked)
+}
+
+#conversion output to json
+files_list_out <- list.files(target_dir, pattern = ".csv", full.names = TRUE)
+n_files_out <- length(files_list_out)
+
+for (j in 1:n_files_out) {
+  tmp <- read.csv2(files_list_out[j])
+  tmp_uni <- distinct(tmp) #remove all duplicate rows
+  write.csv2(tmp_uni, files_list_out[j], row.names = FALSE)
+  tex_csv_2_json(files_list_out[j])
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,4 +7,3 @@ NULL

		## quiets concerns of R CMD check re: the .'s that appear in pipelines
		if (getRversion() >= "2.15.1") utils::globalVariables(c("."))