Skip to content

Commit

Permalink
Merge pull request #192 from Nanostring-Biostats/bioc3.19
Browse files Browse the repository at this point in the history
Bioc3.19
  • Loading branch information
mgrout81 authored Feb 5, 2024
2 parents bc74cef + 53b6c20 commit 09f3a07
Show file tree
Hide file tree
Showing 11 changed files with 355 additions and 301 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: GeomxTools
Title: NanoString GeoMx Tools
Description: Tools for NanoString Technologies GeoMx Technology. Package provides functions for reading in DCC and
PKC files based on an ExpressionSet derived object. Normalization and QC functions are also included.
Version: 3.7.1
Version: 3.7.2
Encoding: UTF-8
Authors@R: c(person("Maddy", "Griswold", email = "mgriswold@nanostring.com", role = c("cre", "aut")),
person("Nicole", "Ortogero", email = "nortogero@nanostring.com", role = c("aut")),
Expand Down Expand Up @@ -47,5 +47,5 @@ biocViews: GeneExpression, Transcription, CellBasedAssays, DataImport,
Normalization, Spatial
VignetteEngine: knitr::rmarkdown
VignetteBuilder: knitr
RoxygenNote: 7.1.2
RoxygenNote: 7.2.3
Config/testthat/edition: 3
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ importFrom(dplyr, bind_rows)
importFrom(utils, capture.output)
importFrom(utils, write.table)
importFrom("utils", "packageVersion")
importFrom(data.table, data.table, .SD)
importFrom(data.table, data.table, .SD, as.data.table)
importFrom(lmerTest, lmer)
importFrom(lmerTest, ls_means)
importFrom(parallel, mclapply)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# GeomxTools 3.7.2
* Seurat v5 coercion
* IPA data loading
* improved Proteogenomics error handling
* bug fix on area filtering

# GeomxTools 3.1.1
* Documentation correction

Expand Down
9 changes: 8 additions & 1 deletion R/NanoStringGeoMxSet-autoplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,20 @@ qcProteinSignal <- function(object, neg.names=NULL) {
snr <- snrOrder(object, neg.names)

protnames <- rownames(snr)
ylim <- range(log2(snr))
if(ylim[1L] == -Inf){
ylim[1L] <- 0
}
if(ylim[2L] == -Inf){
ylim[2L] <- 0
}

fig <- function(){
par(mar = c(11, 4, 2, 1))
boxplot(t(log2(snr)),
las = 2,
outline = FALSE,
ylim = range(log2(snr)),
ylim = ylim,
names = protnames,
ylab = "Log2 signal-to-background ratio",
cex.axis = .85 - 0.3 * (nrow(snr) > 60)
Expand Down
60 changes: 47 additions & 13 deletions R/coercions.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ NULL
#'
#' @importFrom Seurat CreateSeuratObject
#' @importFrom Seurat AddMetaData
#' @importFrom data.table as.data.table
#'
#' @export
#' @rdname as.Seurat
Expand Down Expand Up @@ -66,6 +67,8 @@ as.Seurat.NanoStringGeoMxSet <- function(x, ident = NULL, normData = NULL,
forceRaw == FALSE){
stop("It is NOT recommended to use Seurat's normalization for GeoMx data.
Normalize using GeomxTools::normalize() or set forceRaw to TRUE if you want to continue with Raw data")
}else if(length(grep(pattern = normFactor_names, names(sData(x)))) == 0){
message("Coercing raw data, it is NOT recommended to use Seurat's normalization for GeoMx data.")
}


Expand All @@ -77,25 +80,54 @@ as.Seurat.NanoStringGeoMxSet <- function(x, ident = NULL, normData = NULL,

QCMetrics <- "QCFlags"

seuratConvert <- suppressWarnings(Seurat::CreateSeuratObject(counts = assayDataElement(x, normData),
assay = "GeoMx",
project = expinfo(experimentData(x))[["title"]]))
seuratConvert <- suppressWarnings(Seurat::AddMetaData(object = seuratConvert,
metadata = sData(x)[,!colnames(sData(x)) %in%
c(sequencingMetrics,
QCMetrics)]))
seuratConvert@assays$GeoMx <- Seurat::AddMetaData(object = seuratConvert@assays$GeoMx,
metadata = fData(x))
meta <- as.data.frame(as.data.table(sData(x)[,!colnames(sData(x)) %in% c(sequencingMetrics, QCMetrics)]))

if(!is.null(ident)){
if(any(grepl("_", rownames(x)))){
rownames(x) <- gsub("_", "-", rownames(x))
message("Feature names cannot have underscores ('_'), replacing with dashes ('-')")
}

if(packageVersion("Seurat") < 5){
seuratConvert <- suppressWarnings(Seurat::CreateSeuratObject(counts = assayDataElement(x, normData),
assay = "GeoMx",
project = expinfo(experimentData(x))[["title"]]))
seuratConvert <- suppressWarnings(Seurat::AddMetaData(object = seuratConvert,
metadata = meta))
seuratConvert@assays$GeoMx <- Seurat::AddMetaData(object = seuratConvert@assays$GeoMx,
metadata = fData(x))

if(!is.null(ident)){
if(!ident %in% colnames(seuratConvert@meta.data)){
stop(paste0("ident \"", ident, "\" not found in GeoMxSet Object"))
stop(paste0("ident \"", ident, "\" not found in GeoMxSet Object"))
}

Seurat::Idents(seuratConvert) <- seuratConvert[[ident]]
}
}else{
projectName <- expinfo(experimentData(x))[["title"]]
if(projectName == ""){
projectName <- "GeoMx"
}

seuratConvert <- suppressWarnings(Seurat::CreateSeuratObject(counts = assayDataElement(x, "exprs"),
assay = "GeoMx",
project = projectName))
seuratConvert <- Seurat::SetAssayData(seuratConvert, layer = "data",
new.data = assayDataElement(x, normData))
seuratConvert <- suppressWarnings(Seurat::AddMetaData(object = seuratConvert,
metadata = meta))
seuratConvert@assays$GeoMx <- Seurat::AddMetaData(object = seuratConvert@assays$GeoMx,
metadata = fData(x))

if(!is.null(ident)){
if(!ident %in% colnames(seuratConvert@meta.data)){
stop(paste0("ident \"", ident, "\" not found in GeoMxSet Object"))
}

Seurat::Idents(seuratConvert) <- as.factor(seuratConvert@meta.data[[ident]])
}
}



seuratConvert@misc <- otherInfo(experimentData(x))
seuratConvert@misc[["sequencingMetrics"]] <- sData(x)[colnames(sData(x)) %in%
sequencingMetrics]
Expand Down Expand Up @@ -220,6 +252,8 @@ as.SpatialExperiment.NanoStringGeoMxSet <- function(x, normData = NULL,
forceRaw == FALSE){
stop("It is NOT recommended to use Seurat's normalization for GeoMx data.
Normalize using GeomxTools::normalize() or set forceRaw to TRUE if you want to continue with Raw data")
}else if(length(grep(pattern = normFactor_names, names(sData(x)))) == 0){
warning("Coercing raw data, it is NOT recommended to use Seurat's normalization for GeoMx data.")
}

sequencingMetrics <- c("FileVersion", "SoftwareVersion", "Date", "Plate_ID",
Expand Down
102 changes: 54 additions & 48 deletions R/readNanoStringGeoMxSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ function(dccFiles,
}
# Read data rccFiles
data <- structure(lapply(dccFiles, readDccFile), names = basename(dccFiles))

# Create assayData
assay <- lapply(data, function(x)
structure(x[["Code_Summary"]][["Count"]],
names = rownames(x[["Code_Summary"]])))

# Create phenoData
if (is.null(phenoDataFile)) {
stop("Please specify an input for phenoDataFile.")
Expand All @@ -41,7 +41,7 @@ function(dccFiles,
}
# check protocolDataColNames
if (!(all(protocolDataColNames %in% colnames(pheno))) &
!(is.null(protocolDataColNames))) {
!(is.null(protocolDataColNames))) {
stop("Columns specified in `protocolDataColNames` are not found in `phenoDataFile`")
}
# check experimentDataColNames
Expand All @@ -53,44 +53,44 @@ function(dccFiles,
pheno[[j]] <- ifelse(grepl(".dcc", pheno[[j]]), paste0(pheno[[j]]),
paste0(pheno[[j]], ".dcc"))
if ("slide name" %in% colnames(pheno)) {
ntcs <- which(tolower(pheno[["slide name"]]) == "no template control")
if (length(ntcs) > 0) {
ntcData <- lapply(seq_along(ntcs), function(x) {
ntcID <- pheno[ntcs[x], j]
if(!is.na(ntcs[x + 1L])) {
ntcNames <- rep(ntcID, ntcs[x + 1L] - ntcs[x])
ntcCounts <-
rep(sum(assay[[ntcID]]), ntcs[x + 1L] - ntcs[x])
ntcDF <- data.frame("NTC_ID"=ntcNames, "NTC"=ntcCounts)
} else {
ntcNames <- rep(ntcID, dim(pheno)[1L] - ntcs[x] + 1L)
ntcCounts <-
rep(sum(assay[[ntcID]]), dim(pheno)[1L] - ntcs[x] + 1L)
ntcDF <- data.frame("NTC_ID"=ntcNames, "NTC"=ntcCounts)
}
return(ntcDF)
})
if (length(ntcs) > 1L) {
ntcData <- do.call(rbind, ntcData)
} else {
ntcData <- ntcData[[1L]]
}
pheno <- cbind(pheno, ntcData)
pheno <- pheno[!rownames(pheno) %in% ntcs, ]
assay <- assay[!names(assay) %in% unique(pheno[["NTC_ID"]])]
data <- data[!names(data) %in% unique(pheno[["NTC_ID"]])]
protocolDataColNames <- c(protocolDataColNames, "NTC_ID", "NTC")
ntcs <- which(tolower(pheno[["slide name"]]) == "no template control")
if (length(ntcs) > 0) {
ntcData <- lapply(seq_along(ntcs), function(x) {
ntcID <- pheno[ntcs[x], j]
if(!is.na(ntcs[x + 1L])) {
ntcNames <- rep(ntcID, ntcs[x + 1L] - ntcs[x])
ntcCounts <-
rep(sum(assay[[ntcID]]), ntcs[x + 1L] - ntcs[x])
ntcDF <- data.frame("NTC_ID"=ntcNames, "NTC"=ntcCounts)
} else {
ntcNames <- rep(ntcID, dim(pheno)[1L] - ntcs[x] + 1L)
ntcCounts <-
rep(sum(assay[[ntcID]]), dim(pheno)[1L] - ntcs[x] + 1L)
ntcDF <- data.frame("NTC_ID"=ntcNames, "NTC"=ntcCounts)
}
return(ntcDF)
})
if (length(ntcs) > 1L) {
ntcData <- do.call(rbind, ntcData)
} else {
ntcData <- ntcData[[1L]]
}
pheno <- cbind(pheno, ntcData)
pheno <- pheno[!rownames(pheno) %in% ntcs, ]
assay <- assay[!names(assay) %in% unique(pheno[["NTC_ID"]])]
data <- data[!names(data) %in% unique(pheno[["NTC_ID"]])]
protocolDataColNames <- c(protocolDataColNames, "NTC_ID", "NTC")
}
}
rownames(pheno) <- pheno[[j]]
zeroReads <- names(which(lapply(assay, length) == 0L))
if (length(zeroReads) > 0L) {
warning("The following DCC files had no counts: ",
paste0(zeroReads, sep=", "),
"These will be excluded from the GeoMxSet object.")
pheno <- pheno[!rownames(pheno) %in% zeroReads, ]
assay <- assay[!names(assay) %in% zeroReads]
data <- data[!names(data) %in% zeroReads]
warning("The following DCC files had no counts: ",
paste0(zeroReads, sep=", "),
"These will be excluded from the GeoMxSet object.")
pheno <- pheno[!rownames(pheno) %in% zeroReads, ]
assay <- assay[!names(assay) %in% zeroReads]
data <- data[!names(data) %in% zeroReads]
}
missingDCCFiles <- pheno[[j]][!pheno[[j]] %in% names(assay)]
missingPhenoData <- names(assay)[!names(assay) %in% pheno[[j]]]
Expand Down Expand Up @@ -127,24 +127,24 @@ function(dccFiles,
}

pheno <- Biobase::AnnotatedDataFrame(pheno,
dimLabels = c("sampleNames", "sampleColumns"))
dimLabels = c("sampleNames", "sampleColumns"))
}

#stopifnot(all(sapply(feature, function(x) identical(feature[[1L]], x))))
if (is.null(pkcFiles)) {
stop("Please specify an input for pkcFiles")
} else if (!is.null(pkcFiles)) {
pkcData <- readPKCFile(pkcFiles, default_pkc_vers=defaultPKCVersions)

pkcHeader <- S4Vectors::metadata(pkcData)
# pkcHeader[["PKCFileDate"]] <- as.character(pkcHeader[["PKCFileDate"]])

pkcData$RTS_ID <- gsub("RNA", "RTS00", pkcData$RTS_ID)

pkcData <- as.data.frame(pkcData)
rownames(pkcData) <- pkcData[["RTS_ID"]]
}

probeAssay <- lapply(names(data), function(x)
data.frame(data[[x]][["Code_Summary"]],
Sample_ID = x))
Expand All @@ -153,10 +153,10 @@ function(dccFiles,
missingProbes <- setdiff(unique(probeAssay[["RTS_ID"]]), rownames(pkcData))
if (length(missingProbes) > 0L){
warning("Not all probes are found within PKC probe metadata.",
" The following probes are ignored from analysis",
" and were most likely removed from metadata while",
" resolving multiple module PKC version conflicts.\n",
paste(missingProbes, sep=", "))
" The following probes are ignored from analysis",
" and were most likely removed from metadata while",
" resolving multiple module PKC version conflicts.\n",
paste(missingProbes, sep=", "))
}

if(!is.null(configFile)){
Expand Down Expand Up @@ -187,6 +187,13 @@ function(dccFiles,
pkcHeader[[i]] <- pkcHeader[[i]][names(pkcHeader[[i]]) %in% pkcs]
}

# Handle older assay probe labels
if (any(startsWith(pkcData[["RTS_ID"]][1L], "RTS")) &
any(startsWith(probeAssay[["RTS_ID"]][1L], "RNA"))) {
# replace RNA with RTS00 in probeAssay[["RTS_ID"]]
probeAssay[["RTS_ID"]] <- gsub("^RNA", "RTS00", probeAssay[["RTS_ID"]])
}

probeAssay <- probeAssay[probeAssay[["RTS_ID"]] %in% pkcData[["RTS_ID"]],]

zeroProbes <- setdiff(rownames(pkcData), unique(probeAssay[["RTS_ID"]]))
Expand All @@ -206,8 +213,7 @@ function(dccFiles,

data <- data[which(names(data) %in% names(probeAssay))]

pheno <- pheno[which(names(data) %in%
sampleNames(pheno)),]
pheno <- pheno[names(data),]
}

assay <- as.matrix(probeAssay[, names(data)])
Expand Down
Loading

0 comments on commit 09f3a07

Please sign in to comment.