Skip to content

Commit

Permalink
textuur voor files met 1 staal en voor staalnamen waar geen appendix …
Browse files Browse the repository at this point in the history
…aanhangt in de vorm van _xx (#16)
  • Loading branch information
pietervsd authored Aug 28, 2024
1 parent a6ed5ca commit 9c1e872
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 88 deletions.
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# inbolims 0.2.11

* some improvements in texture_parsing
* handle files with only one sample (slightly different format)
* handle sample_file_names that have no underscore behind the sample name

# inbolims 0.2.9

* Change the minimal necessary fields
Expand Down
1 change: 0 additions & 1 deletion R/inbolims-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,3 @@ NULL

## quiets concerns of R CMD check re: the .'s that appear in pipelines
if (getRversion() >= "2.15.1") utils::globalVariables(c("."))

16 changes: 7 additions & 9 deletions R/txtp_parse_texture_content.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,28 @@ parse_texture_content <- function(filename, delim = "\t", verbose = TRUE) {
header <- readLines(con = filename, n = 7) # lees de 6 headerrijen + 1 datarij
textuur <- read_delim(
file = filename,
delim = "\t",
delim = delim,
skip = 6,
col_names = FALSE
)
# formaat verschilt als er 1 staal is of als er meerdere zijn

# MEERDERE STALEN
if (substring(header[3], 1, 7) != "Channel") {
if (substring(header[3], 1, 7) != "Channel") { # MEERDERE STALEN
headersplitted <- str_split_1(header[3], pattern = "\t")
headersplitted[1] <- "lower_bound" # niet nodig maar voor duidelijkheid
samplename_pos <- seq(2, length(headersplitted), by = 3)
samples <- gsub("\\.\\$av", "", headersplitted[samplename_pos])
last_underscores <- sapply(gregexpr("\\_", samples), max)
samples <- substring(samples, 1, last_underscores - 1)
if (!all(last_underscores == -1)) {
samples <- substring(samples, 1, last_underscores - 1)
}
header_names <- rep("", length(headersplitted)) # init lege rijen
header_names[1] <- "lower_boundary"
header_names[samplename_pos] <- paste(samples, "value", sep = "___")
header_names[samplename_pos + 1] <- paste(samples, "LCL1S", sep = "___")
header_names[samplename_pos + 2] <- paste(samples, "UCL1S", sep = "___")
# ENKEL 1 STAAL
} else {

} else { # ENKEL 1 STAAL
filenamerecord <- str_split_1(header[2], pattern = "\t")
sample <- gsub("\\.\\$av", "", filenamerecord[2])
last_underscores <- sapply(gregexpr("\\_", sample), max)
Expand All @@ -58,6 +59,3 @@ parse_texture_content <- function(filename, delim = "\t", verbose = TRUE) {
}
return(textuur)
}


##############
18 changes: 8 additions & 10 deletions R/txtp_tex_csv_2_json.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,21 @@ tex_csv_2_json <- function(fullfilename) {
nc <- nchar(fullfilename)
obsdate <- substr(fullfilename, nc - 13, nc - 4)
# making a list for json data export
# names(TEXTUUR.CSV)
SID <- unique(textuur_csv$FieldSampleID)
LabSampleCode <- unique(textuur_csv$sample)
ObservationDate <- obsdate
AnalyseVariabele <- "FRAC.0.2000\u00B5m.ld.c0"
s_id <- unique(textuur_csv$FieldSampleID)
lab_sample_code <- unique(textuur_csv$sample)
observation_date <- obsdate
analyse_variabele <- "FRAC.0.2000\u00B5m.ld.c0"
metingen <- textuur_csv[, c(3:6)]

textuur_list <- list(
SID = SID,
LabSampleCode = LabSampleCode,
ObservationDate = ObservationDate,
AnalyseVariabele = AnalyseVariabele,
s_id = s_id,
lab_sample_code = lab_sample_code,
observation_date = observation_date,
analyse_variabele = analyse_variabele,
metingen = metingen
)

# convert to json in compact format
# TEXTUUR.json.pretty<-toJSON(TEXTUUR.list, pretty=TRUE)
textuur_json <- toJSON(textuur_list, pretty = FALSE) ### INBOdem readin
textuur_json
# write output in same folder but with json extension
Expand Down
108 changes: 40 additions & 68 deletions inst/textuurbatch/texture_parsing_testscript.R
Original file line number Diff line number Diff line change
@@ -1,84 +1,56 @@
## LD Texture_processor COULTER files from LAB
## Programmed by Pieter Verschelde 9/06/2022
## adapted by Bruno De Vos
## readapted by Pieter Verschelde 28/08/2024

### IMPORTANT
### !!! Be sure to have VPN connection to link to LIMS system !!!


#load necessary libraries

library(dplyr)
library(jsonlite)
library(tidyverse) #package met veel datafunctionaliteit
library(DBI) #package voor DB communicatie
library(tidyverse)
library(DBI)
library(readxl)

# Download/update met laatste versie

remotes::install_github('inbo/inbolims')
library(inbolims) #package die de verwerking van de textuurfiles regelt
getwd() #gewoon om te tonen in welke werkdirectory je zit

## CENTRAL LOOP

## load raw filenames in folder "C:/R/IN/LDTEX/" voor labproject V-22V057 (Cmon)

source_path <- "tests/testdata/"
source_pattern <- "sample"
target_path <- "tests/testdata/result/"

list_fn <- list.files(path = source_path,
pattern = source_pattern,
full.names = TRUE)
n_list_fn <- length(list_fn)

## Loop to process all files serially ####
for (i in 1:n_list_fn) {
filename <- list_fn[i]

#parse de file naar een geldige R dataset
# Download/update inbolims (core texture parsing functionalities)
remotes::install_github("inbo/inbolims")
library(inbolims)
getwd()

# get input files
file_input_path <- "."
files_list <- list.files(file_input_path,
pattern = "V-24V057",
full.names = TRUE)
n_files <- length(files_list)

#output path
target_dir <- "./output"
dir.create(target_dir)

#db connection
conn <- lims_connect() #connect to dwh to link lab id

# main loop parsing
for (i in 1:n_files) {
filename <- files_list[i]
print(filename)
textuur_parsed <- parse_texture_content(filename, delim = "\t")
View(textuur_parsed)

#interpreteer de dataset tot een inhoudelijk bruikbaar formaat
textuur_interpreted <- interpret_texture_content(textuur_parsed)
View(textuur_interpreted)

#maak een connectie met het LIMS datawarehouse
conn <- lims_connect() #connect to dwh
textuur_linked <- link_labo_id(conn, textuur_interpreted)
dim(textuur_linked)

#schrijf de files weg in /R/OUT/LDTEX/
write_texture_files(target_path, textuur_linked)

} # loop end

#### Process files and save to CSV and json ####


# listFNOUT<-list.files(path="C:/R_scripts/_GIT_REPO/Cmon/out/LDTEX/2023/deel2", pattern=".csv", full.names = TRUE)
# nlist<-length(listFNOUT)
#
#
# for (j in 1:nlist) {
# TEMPfile<-read.csv2(listFNOUT[j])
# dim(TEMPfile)
# UNIfile<-distinct(TEMPfile) ## remove all duplicate rows
# dim(UNIfile)
# # write output csv file
# write.csv2(UNIfile,listFNOUT[j], row.names = FALSE)
#
# # run function to convert to json and write in same directory
# TEX_CSV2JSON(listFNOUT[j])
# }
#









write_texture_files(target_dir, textuur_linked)
}

#conversion output to json
files_list_out <- list.files(target_dir, pattern = ".csv", full.names = TRUE)
n_files_out <- length(files_list_out)

for (j in 1:n_files_out) {
tmp <- read.csv2(files_list_out[j])
tmp_uni <- distinct(tmp) #remove all duplicate rows
write.csv2(tmp_uni, files_list_out[j], row.names = FALSE)
tex_csv_2_json(files_list_out[j])
}

0 comments on commit 9c1e872

Please sign in to comment.