florianhartig
diff --git a/‎DHARMa/DESCRIPTION
+4-4 b/‎DHARMa/DESCRIPTION
+4-4
diff --git a/‎DHARMa/NAMESPACE
+1 b/‎DHARMa/NAMESPACE
+1
diff --git a/‎DHARMa/NEWS
+4-2 b/‎DHARMa/NEWS
+4-2
diff --git a/‎DHARMa/R/DHARMa.R
-37 b/‎DHARMa/R/DHARMa.R
-37
diff --git a/‎DHARMa/R/plotResiduals.R
+42-4 b/‎DHARMa/R/plotResiduals.R
+42-4
diff --git a/‎DHARMa/R/random.R
+8-4 b/‎DHARMa/R/random.R
+8-4
diff --git a/‎DHARMa/R/simulateResiduals.R
+62-4 b/‎DHARMa/R/simulateResiduals.R
+62-4
@@ -1,14 +1,14 @@
 Package: DHARMa
 Title: Residual Diagnostics for Hierarchical (Multi-Level / Mixed) Regression Models
-Version: 0.1.6.4
+Version: 0.1.6.5
 Date: 2018-03-14
 Authors@R: c(person("Florian", "Hartig", email = "florian.hartig@biologie.uni-regensburg.de", role = c("aut", "cre"), comment = "Theoretical Ecology, University of Regensburg, Regensburg, Germany"))
 Description: The 'DHARMa' package uses a simulation-based approach to create
-    readily interpretable scaled (quantile) residuals for fitted generalized linear mixed
-    models. Currently supported are generalized linear mixed models from 'lme4' 
+    readily interpretable scaled (quantile) residuals for fitted (generalized) linear mixed
+    models. Currently supported are (generalized) linear mixed models from 'lme4' 
     (classes 'lmerMod', 'glmerMod') and 'glmmTMB', generalized additive models ('gam' from 'mgcv'), 
     'glm' (including 'negbin' from 'MASS', but excluding quasi-distributions) and 'lm' model
-    classes. Alternatively, externally created simulations, e.g. posterior predictive simulations 
+    classes. Moreover, externally created simulations, e.g. posterior predictive simulations 
     from Bayesian software such as 'JAGS', 'STAN', or 'BUGS' can be processed as well. 
     The resulting residuals are standardized to values between 0 and 1 and can be interpreted 
     as intuitively as residuals from a linear regression. The package also provides a number of 
 
@@ -12,6 +12,7 @@ export(plotConventionalResiduals)
 export(plotQQunif)
 export(plotResiduals)
 export(plotSimulatedResiduals)
+export(recalculateResiduals)
 export(runBenchmarks)
 export(simulateResiduals)
 export(testDispersion)
 
@@ -6,11 +6,13 @@ DHARMa 0.2.0
 New features
 
 - support for glmmTMB https://github.com/florianhartig/DHARMa/issues/16, implemented since https://github.com/florianhartig/DHARMa/releases/tag/v0.1.6.2
+- support for grouping of residuals, see https://github.com/florianhartig/DHARMa/issues/22
+
 
 Major changes
 
 - remodeled benchmarks functions in https://github.com/florianhartig/DHARMa/releases/tag/v0.1.6.3
-- remodeled dispersion tests, adresses https://github.com/florianhartig/DHARMa/issues/62 
+- remodeled dispersion testsin https://github.com/florianhartig/DHARMa/releases/tag/v0.1.6.4, adresses https://github.com/florianhartig/DHARMa/issues/62 
 
 Minor changes
 
@@ -19,7 +21,7 @@ Minor changes
 Bugfixes
 
 - fixed bug with zeroinflation test for k/n binomial data https://github.com/florianhartig/DHARMa/issues/55
-
+- fixed bug with p-value calculation via ecdf https://github.com/florianhartig/DHARMa/issues/55
 
 DHARMa 0.1.6
 
 
@@ -9,43 +9,6 @@
 NULL
 
 
-#' DHARMa standard residual plots
-#' 
-#' This function creates standard plots for the simulated residuals
-#' @param x an object with simualted residuals created by \code{\link{simulateResiduals}}
-#' @param rank if T (default), the values of pred will be rank transformed. This will usually make patterns easier to spot visually, especially if the distribution of the predictor is skewed. 
-#' @param ... further options for \code{\link{plotResiduals}}. Consider in particular parameters quantreg, rank and asFactor. xlab, ylab and main cannot be changed when using plotSimulatedResiduals, but can be changed when using plotResiduals.
-#' @details The function creates two plots. To the left, a qq-uniform plot to detect deviations from overall uniformity of the residuals (calling \code{\link{plotQQunif}}), and to the right, a plot of residuals against predicted values (calling \code{\link{plotResiduals}}). For a correctly specified model, we would expect 
-#' 
-#' a) a straight 1-1 line in the uniform qq-plot -> evidence for an overal uniform (flat) distribution of the residuals
-#' 
-#' b) uniformity of residuals in the vertical direction in the res against predictor plot
-#' 
-#' Deviations of this can be interpreted as for a liner regression. See the vignette for detailed examples. 
-#' 
-#' To provide a visual aid in detecting deviations from uniformity in y-direction, the plot of the residuals against the predited values also performs an (optional) quantile regression, which provides 0.25, 0.5 and 0.75 quantile lines across the plots. These lines should be straight, horizontal, and at y-values of 0.25, 0.5 and 0.75. Note, however, that some deviations from this are to be expected by chance, even for a perfect model, especially if the sample size is small. See further comments on this plot, and options, in \code{\link{plotResiduals}}
-#' 
-#' The quantile regression can take some time to calculate, especially for larger datasets. For that reason, quantreg = F can be set to produce a smooth spline instead. This is default for n > 2000. 
-#' 
-#' @seealso \code{\link{plotResiduals}}, \code{\link{plotQQunif}}
-#' @example inst/examples/plotsHelp.R
-#' @import graphics
-#' @import utils
-#' @export
-plot.DHARMa <- function(x, rank = TRUE, ...){
-
-  oldpar <- par(mfrow = c(1,2), oma = c(0,1,2,1))
-  
-  plotQQunif(x)
-  
-  plotResiduals(pred = x, residuals = NULL, xlab = "Predicted value (rank transformed)", ylab = "Standardized residual", main = "Residual vs. predicted\n lines should match", rank = T, ...)
-  
-  mtext("DHARMa scaled residual plots", outer = T)
-  
-  par(oldpar)
-}
-
-
 #' Print simulated residuals
 #' 
 #' @param x an object with simulated residuals created by \code{\link{simulateResiduals}}
 
@@ -1,3 +1,41 @@
+#' DHARMa standard residual plots
+#' 
+#' This function creates standard plots for the simulated residuals
+#' @param x an object with simualted residuals created by \code{\link{simulateResiduals}}
+#' @param rank if T (default), the values of pred will be rank transformed. This will usually make patterns easier to spot visually, especially if the distribution of the predictor is skewed. 
+#' @param ... further options for \code{\link{plotResiduals}}. Consider in particular parameters quantreg, rank and asFactor. xlab, ylab and main cannot be changed when using plotSimulatedResiduals, but can be changed when using plotResiduals.
+#' @details The function creates two plots. To the left, a qq-uniform plot to detect deviations from overall uniformity of the residuals (calling \code{\link{plotQQunif}}), and to the right, a plot of residuals against predicted values (calling \code{\link{plotResiduals}}). For a correctly specified model, we would expect 
+#' 
+#' a) a straight 1-1 line in the uniform qq-plot -> evidence for an overal uniform (flat) distribution of the residuals
+#' 
+#' b) uniformity of residuals in the vertical direction in the res against predictor plot
+#' 
+#' Deviations of this can be interpreted as for a liner regression. See the vignette for detailed examples. 
+#' 
+#' To provide a visual aid in detecting deviations from uniformity in y-direction, the plot of the residuals against the predited values also performs an (optional) quantile regression, which provides 0.25, 0.5 and 0.75 quantile lines across the plots. These lines should be straight, horizontal, and at y-values of 0.25, 0.5 and 0.75. Note, however, that some deviations from this are to be expected by chance, even for a perfect model, especially if the sample size is small. See further comments on this plot, and options, in \code{\link{plotResiduals}}
+#' 
+#' The quantile regression can take some time to calculate, especially for larger datasets. For that reason, quantreg = F can be set to produce a smooth spline instead. This is default for n > 2000. 
+#' 
+#' @seealso \code{\link{plotResiduals}}, \code{\link{plotQQunif}}
+#' @example inst/examples/plotsHelp.R
+#' @import graphics
+#' @import utils
+#' @export
+plot.DHARMa <- function(x, rank = TRUE, ...){
+  
+  oldpar <- par(mfrow = c(1,2), oma = c(0,1,2,1))
+  
+  plotQQunif(x)
+  
+  plotResiduals(pred = x, residuals = NULL, xlab = "Predicted value (rank transformed)", ylab = "Standardized residual", main = "Residual vs. predicted\n lines should match", rank = T, ...)
+  
+  mtext("DHARMa scaled residual plots", outer = T)
+  
+  par(oldpar)
+}
+
+
+
 #' DHARMa standard residual plots
 #' 
 #' DEPRECATED, use plot() instead
@@ -31,7 +69,7 @@ plotQQunif <- function(simulationOutput, testUniformity = T){
 
   gap::qqunif(simulationOutput$scaledResiduals,pch=2,bty="n", logscale = F, col = "black", cex = 0.6, main = "QQ plot residuals", cex.main = 1)
   if(testUniformity == TRUE){
-    temp = testUniformity(simulationOutput)
+    temp = testUniformity(simulationOutput, plot = F)
     legend("topleft", c(paste("KS test: p=", round(temp$p.value, digits = 5)), paste("Deviation ", ifelse(temp$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(temp$p.value < 0.05, "red", "black" ), bty="n")     
   }
 }
@@ -44,7 +82,7 @@ plotQQunif <- function(simulationOutput, testUniformity = T){
 #' @param pred either the predictor variable against which the residuals should be plotted, or a DHARMa object
 #' @param residuals residuals values. Leave empty if pred is a DHARMa object
 #' @param quantreg whether to perform a quantile regression on 0.25, 0.5, 0.75 on the residuals. If F, a spline will be created instead. Default NULL chooses T for nObs < 2000, and F otherwise. 
-#' @param rank if T, the values of pred will be rank transformed. This will usually make patterns easier to spot visually, especially if the distribution of the predictor is skewed. 
+#' @param rank if T, the values of pred will be rank transformed. This will usually make patterns easier to spot visually, especially if the distribution of the predictor is skewed. If pred is a factor, this has no effect. 
 #' @param asFactor should the predictor variable converted into a factor
 #' @param ... additional arguments to plot
 #' @details For a correctly specified model, we would expect uniformity in y direction when plotting against any predictor.
@@ -81,7 +119,7 @@ plotResiduals <- function(pred, residuals = NULL, quantreg = NULL, rank = FALSE,
 
   if (asFactor) pred = as.factor(pred)
 
-  if(! is.factor(pred)){
+  if(!is.factor(pred)){
     nuniq = length(unique(pred))
     ndata = length(pred)
     if(nuniq < 10 & ndata / nuniq > 10) message("DHARMa::plotResiduals - low number of unique predictor values, consider setting asFactor = T")
@@ -90,7 +128,7 @@ plotResiduals <- function(pred, residuals = NULL, quantreg = NULL, rank = FALSE,
     if (rank == T) pred = rank(pred, ties.method = "average")
     pred = pred / max(pred)    
   } else {
-    if (rank == T) warning("DHARMa::plotResiduals - predictor is a factor, rank = T has no effect")
+    # if (rank == T) warning("DHARMa::plotResiduals - predictor is a factor, rank = T has no effect")
   }
 
 
 
@@ -10,7 +10,7 @@
 #' 
 #' @return a list with various infos about the random state that after function execution, as well as a function to restore the previous state before the function execution
 #' 
-#' @param seed seed argument to set.seed()
+#' @param seed seed argument to set.seed(). NULL = no seed, but random state will be restored. F = random state will not be restored
 #' @export
 #' @example inst/examples/getRandomStateHelp.R
 #' @author Florian Hartig
@@ -22,10 +22,14 @@ getRandomState <- function(seed = NULL){
 
   current = mget(".Random.seed", envir = .GlobalEnv, ifnotfound = list(NULL))[[1]]
 
-  restoreCurrent <- function(){
-    if(is.null(current)) rm(".Random.seed", envir = .GlobalEnv) else assign(".Random.seed", current , envir = .GlobalEnv)
+  if(is.logical(seed) & seed == F){
+    restoreCurrent <- function(){}    
+  }else{
+    restoreCurrent <- function(){
+      if(is.null(current)) rm(".Random.seed", envir = .GlobalEnv) else assign(".Random.seed", current , envir = .GlobalEnv)
+    }    
   }
-  
+
   # setting seed
   if(is.numeric(seed)) set.seed(seed)
 
 
@@ -7,8 +7,8 @@
 #' @param integerResponse if T, noise will be added at to the residuals to maintain a uniform expectations for integer responses (such as Poisson or Binomial). Usually, the model will automatically detect the appropriate setting, so there is no need to adjust this setting.
 #' @param plot if T, \code{\link{plotSimulatedResiduals}} will be directly run after the simulations have terminated
 #' @param ... parameters to pass to the simulate function of the model object. An important use of this is to specify whether simulations should be conditional on the current random effect estimates. See details.
-#' @param seed the random seed. The default setting, recommended for any type of data analysis, is to reset the random number generator each time the function is run, meaning that you will always get the same result when running the same code. Setting seed = NA avoids the reset. This is only recommended for simulation experiments. See vignette for details.
-#' @return A list with various objects. The most important are scaledResiduals, which contain the scaled residuals, and scaledResidualsNormal, which are the the scaled residuals transformed to a normal distribution
+#' @param seed the random seed. The default setting, recommended for any type of data analysis, is to reset the random number generator each time the function is run, meaning that you will always get the same result when running the same code. NULL = no new seed is set, but previous random state will be restored after simulation. F = no seed is set, and random state will not be restored. The latter two options are only recommended for simulation experiments. See vignette for details.
+#' @return A list with various objects. The most important are scaledResiduals, which contain the scaled residuals, and scaledResidualsNormal, which are the the scaled residuals transformed to a normal distribution. 
 #' @details There are a number of important considerations when simulating from a more complex (hierarchical) model. 
 #' 
 #' \strong{Re-simulating random effects / hierarchical structure}: the first is that in a hierarchical model, several layers of stochasticity are aligned on top of each other. Specifically, in a GLMM, we have a lower level stochastic process (random effect), whose result enters into a higher level (e.g. Poisson distribution). For other hierarchical models such as state-space models, similar considerations apply. When simulating, we have to decide if we want to re-simulate all stochastic levels, or only a subset of those. For example, in a GLMM, it is common to only simulate the last stochastic level (e.g. Poisson) conditional on the fitted random effects. 
@@ -29,7 +29,7 @@
 #' 
 #' #' \strong{How many simulations}: about the choice of n: my simulations didn't show major problems with a small n (if you get down to the order of a few 10, you will start seeing discretization artifacts from the empirical cummulative density estimates though). The default of 250 seems safe to me. If you want to be on the safe side, choose a high value (e.g. 1000) for producing your definite results.
 #'  
-#' @seealso \code{\link{testSimulatedResiduals}}, \code{\link{plotSimulatedResiduals}}
+#' @seealso \code{\link{testResiduals}}, \code{\link{plot.DHARMa}}, \code{\link{print.DHARMa}}, \code{\link{recalculateResiduals}}
 #' @example inst/examples/simulateResidualsHelp.R
 #' @import stats
 #' @export
@@ -194,7 +194,7 @@ simulateResiduals <- function(fittedModel, n = 250, refit = F, integerResponse =
     }
 
   }
-  
+
   ########### Wrapup ############
 
   out$scaledResidualsNormal = qnorm(out$scaledResiduals + 0.00 )
@@ -299,6 +299,64 @@ securityAssertion <- function(context = "Not provided", stop = F){
 }
 
 
+#' Recalculate residuals with grouping
+#' 
+#' The purpose of this function is to recalculate scaled residuals per group, based on the simulations done by \code{\link{simulateResiduals}}
+#'
+#' @param simulationOutput an object with simualted residuals created by \code{\link{simulateResiduals}}
+#' @param group group of each data point
+#' @param aggregateBy function for the aggregation. Default is sum. This should only be changed if you know what you are doing. Note in particular that the expected residual distribution might not be flat any more if you choose general functions, such as sd etc. 
+#' 
+#' @return an object of class DHARMa, similar to what is returned by \code{\link{simulateResiduals}}, but with additional outputs for the new grouped calculations. Note that the relevant outputs are 2x in the object, the first is the grouped calculations (which is returned by $name access), and later another time, under identical name, the original output. Moreover, there is a function 'aggregateByGroup', which can be used to aggregate predictor variables in the same way as the variables calculated here 
+#' 
+#' @example inst/examples/simulateResidualsHelp.R
+#' @export
+recalculateResiduals <- function(simulationOutput, group = NULL, aggregateBy = sum){
+
+  if(!is.null(simulationOutput$original)) simulationOutput = simulationOutput$original
+
+  out = list()
+  
+  if(is.null(group)) return(simulationOutput)
+  else group =as.factor(group)
+  out$nGroups = nlevels(group)
+
+  aggregateByGroup <- function(x) aggregate(x, by=list(group), FUN=aggregateBy)[,2]
+  
+  out$observedResponse = aggregateByGroup(simulationOutput$observedResponse)
+  out$fittedPredictedResponse = aggregateByGroup(simulationOutput$fittedPredictedResponse)
+  out$simulatedResponse = apply(simulationOutput$simulatedResponse, 2, aggregateByGroup)
+  out$scaledResiduals = rep(NA, out$nGroups)
+
+  if (simulationOutput$refit == F){
+    if(simulationOutput$integerResponse == T){
+      for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$simulatedResponse[i,] + runif(out$nGroups, -0.5, 0.5))(out$observedResponse[i] + runif(1, -0.5, 0.5))
+    } else {
+      for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$simulatedResponse[i,])(out$observedResponse[i])
+    } 
+  ######## refit = T ##################   
+  } else {
+
+    out$refittedPredictedResponse <- apply(simulationOutput$refittedPredictedResponse, 2, aggregateByGroup)
+    out$fittedResiduals = aggregateByGroup(simulationOutput$fittedResiduals)
+    out$refittedResiduals = apply(simulationOutput$refittedResiduals, 2, aggregateByGroup)
+    out$refittedPearsonResiduals = apply(simulationOutput$refittedPearsonResiduals, 2, aggregateByGroup)
+    
+    if(simulationOutput$integerResponse == T){
+      for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$refittedResiduals[i,] + runif(out$nGroups, -0.5, 0.5))(out$fittedResiduals[i] + runif(1, -0.5, 0.5))
+    } else {
+      for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$refittedResiduals[i,])(out$fittedResiduals[i])
+    } 
+  }
+  # hack - the c here will result in both old and new outputs to be present resulting output, but a named access should refer to the new, grouped calculations
+  out$aggregateByGroup = aggregateByGroup
+  out = c(out, simulationOutput)
+  out$original = simulationOutput
+  class(out) = "DHARMa"
+  return(out)
+}
+  
+