Merge pull request #192 from Nanostring-Biostats/bioc3.19

Bioc3.19
Nanostring-Biostats · Feb 5, 2024 · 09f3a07 · 09f3a07
2 parents bc74cef + 53b6c20
commit 09f3a07
Show file tree

Hide file tree

Showing 11 changed files with 355 additions and 301 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: GeomxTools
 Title: NanoString GeoMx Tools
 Description: Tools for NanoString Technologies GeoMx Technology.  Package provides functions for reading in DCC and
          PKC files based on an ExpressionSet derived object.  Normalization and QC functions are also included.
-Version: 3.7.1
+Version: 3.7.2
 Encoding: UTF-8
 Authors@R: c(person("Maddy", "Griswold", email = "mgriswold@nanostring.com", role = c("cre", "aut")),
              person("Nicole", "Ortogero", email = "nortogero@nanostring.com", role = c("aut")), 
@@ -47,5 +47,5 @@ biocViews: GeneExpression, Transcription, CellBasedAssays, DataImport,
            Normalization, Spatial
 VignetteEngine: knitr::rmarkdown
 VignetteBuilder: knitr
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.3
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -25,7 +25,7 @@ importFrom(dplyr, bind_rows)
 importFrom(utils, capture.output)
 importFrom(utils, write.table)
 importFrom("utils", "packageVersion")
-importFrom(data.table, data.table, .SD)
+importFrom(data.table, data.table, .SD, as.data.table)
 importFrom(lmerTest, lmer)
 importFrom(lmerTest, ls_means)
 importFrom(parallel, mclapply)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# GeomxTools 3.7.2
+* Seurat v5 coercion
+* IPA data loading
+* improved Proteogenomics error handling
+* bug fix on area filtering
+
 # GeomxTools 3.1.1
 * Documentation correction
 

diff --git a/R/NanoStringGeoMxSet-autoplot.R b/R/NanoStringGeoMxSet-autoplot.R
@@ -103,13 +103,20 @@ qcProteinSignal <- function(object, neg.names=NULL) {
     snr <- snrOrder(object, neg.names)
 
     protnames <- rownames(snr)
+    ylim <- range(log2(snr))
+    if(ylim[1L] == -Inf){
+      ylim[1L] <- 0
+    }
+    if(ylim[2L] == -Inf){
+      ylim[2L] <- 0
+    }
 
     fig <- function(){
         par(mar = c(11, 4, 2, 1))
         boxplot(t(log2(snr)),
                 las = 2,
                 outline = FALSE,
-                ylim = range(log2(snr)),
+                ylim = ylim,
                 names = protnames,
                 ylab = "Log2 signal-to-background ratio",
                 cex.axis = .85 - 0.3 * (nrow(snr) > 60)

diff --git a/R/coercions.R b/R/coercions.R
@@ -26,6 +26,7 @@ NULL
 #' 
 #' @importFrom Seurat CreateSeuratObject 
 #' @importFrom Seurat AddMetaData 
+#' @importFrom data.table as.data.table
 #' 
 #' @export 
 #' @rdname as.Seurat
@@ -66,6 +67,8 @@ as.Seurat.NanoStringGeoMxSet <- function(x, ident = NULL, normData = NULL,
        forceRaw == FALSE){
         stop("It is NOT recommended to use Seurat's normalization for GeoMx data. 
              Normalize using GeomxTools::normalize() or set forceRaw to TRUE if you want to continue with Raw data")
+    }else if(length(grep(pattern = normFactor_names, names(sData(x)))) == 0){
+      message("Coercing raw data, it is NOT recommended to use Seurat's normalization for GeoMx data.")
     }
 
 
@@ -77,25 +80,54 @@ as.Seurat.NanoStringGeoMxSet <- function(x, ident = NULL, normData = NULL,
 
     QCMetrics <- "QCFlags"
 
-    seuratConvert <- suppressWarnings(Seurat::CreateSeuratObject(counts = assayDataElement(x, normData), 
-                                                                 assay = "GeoMx", 
-                                        project = expinfo(experimentData(x))[["title"]]))
-    seuratConvert <- suppressWarnings(Seurat::AddMetaData(object = seuratConvert, 
-                                                  metadata = sData(x)[,!colnames(sData(x)) %in% 
-                                                                               c(sequencingMetrics,
-                                                                                 QCMetrics)]))
-    seuratConvert@assays$GeoMx <- Seurat::AddMetaData(object = seuratConvert@assays$GeoMx, 
-                                                      metadata = fData(x))
+    meta <- as.data.frame(as.data.table(sData(x)[,!colnames(sData(x)) %in%  c(sequencingMetrics, QCMetrics)]))
 
-    if(!is.null(ident)){
+    if(any(grepl("_", rownames(x)))){
+      rownames(x) <- gsub("_", "-", rownames(x))
+      message("Feature names cannot have underscores ('_'), replacing with dashes ('-')")
+    }
+
+    if(packageVersion("Seurat") < 5){
+      seuratConvert <- suppressWarnings(Seurat::CreateSeuratObject(counts = assayDataElement(x, normData), 
+                                                                   assay = "GeoMx", 
+                                                                   project = expinfo(experimentData(x))[["title"]]))
+      seuratConvert <- suppressWarnings(Seurat::AddMetaData(object = seuratConvert, 
+                                                            metadata = meta))
+      seuratConvert@assays$GeoMx <- Seurat::AddMetaData(object = seuratConvert@assays$GeoMx, 
+                                                        metadata = fData(x))
+
+      if(!is.null(ident)){
         if(!ident %in% colnames(seuratConvert@meta.data)){
-            stop(paste0("ident \"", ident, "\" not found in GeoMxSet Object"))
+          stop(paste0("ident \"", ident, "\" not found in GeoMxSet Object"))
         }
 
         Seurat::Idents(seuratConvert) <- seuratConvert[[ident]]
+      }
+    }else{
+      projectName <- expinfo(experimentData(x))[["title"]]
+      if(projectName == ""){
+        projectName <- "GeoMx"
+      }
+
+      seuratConvert <- suppressWarnings(Seurat::CreateSeuratObject(counts = assayDataElement(x, "exprs"), 
+                                                                   assay = "GeoMx", 
+                                                                   project = projectName))
+      seuratConvert <- Seurat::SetAssayData(seuratConvert, layer = "data", 
+                                            new.data = assayDataElement(x, normData))
+      seuratConvert <- suppressWarnings(Seurat::AddMetaData(object = seuratConvert, 
+                                                            metadata = meta))
+      seuratConvert@assays$GeoMx <- Seurat::AddMetaData(object = seuratConvert@assays$GeoMx, 
+                                                        metadata = fData(x))
+
+      if(!is.null(ident)){
+        if(!ident %in% colnames(seuratConvert@meta.data)){
+          stop(paste0("ident \"", ident, "\" not found in GeoMxSet Object"))
+        }
+
+        Seurat::Idents(seuratConvert) <- as.factor(seuratConvert@meta.data[[ident]])
+      }
     }
-
-
+
     seuratConvert@misc <- otherInfo(experimentData(x)) 
     seuratConvert@misc[["sequencingMetrics"]] <- sData(x)[colnames(sData(x)) %in% 
                                                                    sequencingMetrics]
@@ -220,6 +252,8 @@ as.SpatialExperiment.NanoStringGeoMxSet <- function(x, normData = NULL,
        forceRaw == FALSE){
         stop("It is NOT recommended to use Seurat's normalization for GeoMx data. 
              Normalize using GeomxTools::normalize() or set forceRaw to TRUE if you want to continue with Raw data")
+    }else if(length(grep(pattern = normFactor_names, names(sData(x)))) == 0){
+      warning("Coercing raw data, it is NOT recommended to use Seurat's normalization for GeoMx data.")
     }
 
     sequencingMetrics <- c("FileVersion", "SoftwareVersion", "Date", "Plate_ID", 

diff --git a/R/readNanoStringGeoMxSet.R b/R/readNanoStringGeoMxSet.R
@@ -21,12 +21,12 @@ function(dccFiles,
   }
   # Read data rccFiles
   data <- structure(lapply(dccFiles, readDccFile), names = basename(dccFiles))
-
+  
   # Create assayData
   assay <- lapply(data, function(x)
     structure(x[["Code_Summary"]][["Count"]],
               names = rownames(x[["Code_Summary"]])))
-
+  
   # Create phenoData
   if (is.null(phenoDataFile)) {
     stop("Please specify an input for phenoDataFile.")
@@ -41,7 +41,7 @@ function(dccFiles,
     }
     # check protocolDataColNames
     if (!(all(protocolDataColNames %in% colnames(pheno))) &
-          !(is.null(protocolDataColNames))) {
+        !(is.null(protocolDataColNames))) {
       stop("Columns specified in `protocolDataColNames` are not found in `phenoDataFile`")
     }
     # check experimentDataColNames
@@ -53,44 +53,44 @@ function(dccFiles,
     pheno[[j]] <- ifelse(grepl(".dcc", pheno[[j]]), paste0(pheno[[j]]),
                          paste0(pheno[[j]], ".dcc"))
     if ("slide name" %in% colnames(pheno)) {
-        ntcs <- which(tolower(pheno[["slide name"]]) == "no template control")
-        if (length(ntcs) > 0) {
-            ntcData <- lapply(seq_along(ntcs), function(x) {
-                ntcID <- pheno[ntcs[x], j]
-                if(!is.na(ntcs[x + 1L])) {
-                    ntcNames <- rep(ntcID, ntcs[x + 1L] - ntcs[x])
-                    ntcCounts <-
-                        rep(sum(assay[[ntcID]]), ntcs[x + 1L] - ntcs[x])
-                    ntcDF <- data.frame("NTC_ID"=ntcNames, "NTC"=ntcCounts)
-                } else {
-                    ntcNames <- rep(ntcID, dim(pheno)[1L] - ntcs[x] + 1L)
-                    ntcCounts <-
-                        rep(sum(assay[[ntcID]]), dim(pheno)[1L] - ntcs[x] + 1L)
-                    ntcDF <- data.frame("NTC_ID"=ntcNames, "NTC"=ntcCounts)
-                }
-                return(ntcDF)
-            })
-            if (length(ntcs) > 1L) {
-                ntcData <- do.call(rbind, ntcData)
-            } else {
-                ntcData <- ntcData[[1L]]
-            }
-            pheno <- cbind(pheno, ntcData)
-            pheno <- pheno[!rownames(pheno) %in% ntcs, ]
-            assay <- assay[!names(assay) %in% unique(pheno[["NTC_ID"]])]
-            data <- data[!names(data) %in% unique(pheno[["NTC_ID"]])]
-            protocolDataColNames <- c(protocolDataColNames, "NTC_ID", "NTC")
+      ntcs <- which(tolower(pheno[["slide name"]]) == "no template control")
+      if (length(ntcs) > 0) {
+        ntcData <- lapply(seq_along(ntcs), function(x) {
+          ntcID <- pheno[ntcs[x], j]
+          if(!is.na(ntcs[x + 1L])) {
+            ntcNames <- rep(ntcID, ntcs[x + 1L] - ntcs[x])
+            ntcCounts <-
+              rep(sum(assay[[ntcID]]), ntcs[x + 1L] - ntcs[x])
+            ntcDF <- data.frame("NTC_ID"=ntcNames, "NTC"=ntcCounts)
+          } else {
+            ntcNames <- rep(ntcID, dim(pheno)[1L] - ntcs[x] + 1L)
+            ntcCounts <-
+              rep(sum(assay[[ntcID]]), dim(pheno)[1L] - ntcs[x] + 1L)
+            ntcDF <- data.frame("NTC_ID"=ntcNames, "NTC"=ntcCounts)
+          }
+          return(ntcDF)
+        })
+        if (length(ntcs) > 1L) {
+          ntcData <- do.call(rbind, ntcData)
+        } else {
+          ntcData <- ntcData[[1L]]
         }
+        pheno <- cbind(pheno, ntcData)
+        pheno <- pheno[!rownames(pheno) %in% ntcs, ]
+        assay <- assay[!names(assay) %in% unique(pheno[["NTC_ID"]])]
+        data <- data[!names(data) %in% unique(pheno[["NTC_ID"]])]
+        protocolDataColNames <- c(protocolDataColNames, "NTC_ID", "NTC")
+      }
     }
     rownames(pheno) <- pheno[[j]]
     zeroReads <- names(which(lapply(assay, length) == 0L))
     if (length(zeroReads) > 0L) {
-        warning("The following DCC files had no counts: ",
-                paste0(zeroReads, sep=", "),
-                "These will be excluded from the GeoMxSet object.")
-        pheno <- pheno[!rownames(pheno) %in% zeroReads, ]
-        assay <- assay[!names(assay) %in% zeroReads]
-        data <- data[!names(data) %in% zeroReads]
+      warning("The following DCC files had no counts: ",
+              paste0(zeroReads, sep=", "),
+              "These will be excluded from the GeoMxSet object.")
+      pheno <- pheno[!rownames(pheno) %in% zeroReads, ]
+      assay <- assay[!names(assay) %in% zeroReads]
+      data <- data[!names(data) %in% zeroReads]
     }
     missingDCCFiles <- pheno[[j]][!pheno[[j]] %in% names(assay)]
     missingPhenoData <- names(assay)[!names(assay) %in% pheno[[j]]]
@@ -127,24 +127,24 @@ function(dccFiles,
     }
 
     pheno <- Biobase::AnnotatedDataFrame(pheno,
-                                dimLabels = c("sampleNames", "sampleColumns"))
+                                         dimLabels = c("sampleNames", "sampleColumns"))
   }
-
+  
   #stopifnot(all(sapply(feature, function(x) identical(feature[[1L]], x))))
   if (is.null(pkcFiles)) {
     stop("Please specify an input for pkcFiles")
   } else if (!is.null(pkcFiles)) {
     pkcData <- readPKCFile(pkcFiles, default_pkc_vers=defaultPKCVersions)
-
+    
     pkcHeader <- S4Vectors::metadata(pkcData)
     # pkcHeader[["PKCFileDate"]] <- as.character(pkcHeader[["PKCFileDate"]])
-
+    
     pkcData$RTS_ID <- gsub("RNA", "RTS00", pkcData$RTS_ID)
-
+    
     pkcData <- as.data.frame(pkcData)
     rownames(pkcData) <- pkcData[["RTS_ID"]]
   }
-
+  
   probeAssay <- lapply(names(data), function(x)
     data.frame(data[[x]][["Code_Summary"]],
                Sample_ID = x))
@@ -153,10 +153,10 @@ function(dccFiles,
   missingProbes <- setdiff(unique(probeAssay[["RTS_ID"]]), rownames(pkcData))
   if (length(missingProbes) > 0L){
     warning("Not all probes are found within PKC probe metadata.",
-    " The following probes are ignored from analysis",
-    " and were most likely removed from metadata while",
-    " resolving multiple module PKC version conflicts.\n",
-    paste(missingProbes, sep=", "))
+            " The following probes are ignored from analysis",
+            " and were most likely removed from metadata while",
+            " resolving multiple module PKC version conflicts.\n",
+            paste(missingProbes, sep=", "))
   }
 
   if(!is.null(configFile)){
@@ -187,6 +187,13 @@ function(dccFiles,
     pkcHeader[[i]] <- pkcHeader[[i]][names(pkcHeader[[i]]) %in% pkcs]
   }
 
+  # Handle older assay probe labels
+  if (any(startsWith(pkcData[["RTS_ID"]][1L], "RTS")) &
+      any(startsWith(probeAssay[["RTS_ID"]][1L], "RNA"))) {
+    # replace RNA with RTS00 in probeAssay[["RTS_ID"]]
+    probeAssay[["RTS_ID"]] <- gsub("^RNA", "RTS00", probeAssay[["RTS_ID"]])
+  }
+
   probeAssay <- probeAssay[probeAssay[["RTS_ID"]] %in% pkcData[["RTS_ID"]],]
 
   zeroProbes <- setdiff(rownames(pkcData), unique(probeAssay[["RTS_ID"]]))
@@ -206,8 +213,7 @@ function(dccFiles,
 
     data <- data[which(names(data) %in% names(probeAssay))]
 
-    pheno <- pheno[which(names(data) %in% 
-                           sampleNames(pheno)),]
+    pheno <- pheno[names(data),]
   }
 
   assay <- as.matrix(probeAssay[, names(data)])