|
7 | 7 | #' @param integerResponse if T, noise will be added at to the residuals to maintain a uniform expectations for integer responses (such as Poisson or Binomial). Usually, the model will automatically detect the appropriate setting, so there is no need to adjust this setting.
|
8 | 8 | #' @param plot if T, \code{\link{plotSimulatedResiduals}} will be directly run after the simulations have terminated
|
9 | 9 | #' @param ... parameters to pass to the simulate function of the model object. An important use of this is to specify whether simulations should be conditional on the current random effect estimates. See details.
|
10 |
| -#' @param seed the random seed. The default setting, recommended for any type of data analysis, is to reset the random number generator each time the function is run, meaning that you will always get the same result when running the same code. Setting seed = NA avoids the reset. This is only recommended for simulation experiments. See vignette for details. |
11 |
| -#' @return A list with various objects. The most important are scaledResiduals, which contain the scaled residuals, and scaledResidualsNormal, which are the the scaled residuals transformed to a normal distribution |
| 10 | +#' @param seed the random seed. The default setting, recommended for any type of data analysis, is to reset the random number generator each time the function is run, meaning that you will always get the same result when running the same code. NULL = no new seed is set, but previous random state will be restored after simulation. F = no seed is set, and random state will not be restored. The latter two options are only recommended for simulation experiments. See vignette for details. |
| 11 | +#' @return A list with various objects. The most important are scaledResiduals, which contain the scaled residuals, and scaledResidualsNormal, which are the the scaled residuals transformed to a normal distribution. |
12 | 12 | #' @details There are a number of important considerations when simulating from a more complex (hierarchical) model.
|
13 | 13 | #'
|
14 | 14 | #' \strong{Re-simulating random effects / hierarchical structure}: the first is that in a hierarchical model, several layers of stochasticity are aligned on top of each other. Specifically, in a GLMM, we have a lower level stochastic process (random effect), whose result enters into a higher level (e.g. Poisson distribution). For other hierarchical models such as state-space models, similar considerations apply. When simulating, we have to decide if we want to re-simulate all stochastic levels, or only a subset of those. For example, in a GLMM, it is common to only simulate the last stochastic level (e.g. Poisson) conditional on the fitted random effects.
|
|
29 | 29 | #'
|
30 | 30 | #' #' \strong{How many simulations}: about the choice of n: my simulations didn't show major problems with a small n (if you get down to the order of a few 10, you will start seeing discretization artifacts from the empirical cummulative density estimates though). The default of 250 seems safe to me. If you want to be on the safe side, choose a high value (e.g. 1000) for producing your definite results.
|
31 | 31 | #'
|
32 |
| -#' @seealso \code{\link{testSimulatedResiduals}}, \code{\link{plotSimulatedResiduals}} |
| 32 | +#' @seealso \code{\link{testResiduals}}, \code{\link{plot.DHARMa}}, \code{\link{print.DHARMa}}, \code{\link{recalculateResiduals}} |
33 | 33 | #' @example inst/examples/simulateResidualsHelp.R
|
34 | 34 | #' @import stats
|
35 | 35 | #' @export
|
@@ -194,7 +194,7 @@ simulateResiduals <- function(fittedModel, n = 250, refit = F, integerResponse =
|
194 | 194 | }
|
195 | 195 |
|
196 | 196 | }
|
197 |
| - |
| 197 | + |
198 | 198 | ########### Wrapup ############
|
199 | 199 |
|
200 | 200 | out$scaledResidualsNormal = qnorm(out$scaledResiduals + 0.00 )
|
@@ -299,6 +299,64 @@ securityAssertion <- function(context = "Not provided", stop = F){
|
299 | 299 | }
|
300 | 300 |
|
301 | 301 |
|
| 302 | +#' Recalculate residuals with grouping |
| 303 | +#' |
| 304 | +#' The purpose of this function is to recalculate scaled residuals per group, based on the simulations done by \code{\link{simulateResiduals}} |
| 305 | +#' |
| 306 | +#' @param simulationOutput an object with simualted residuals created by \code{\link{simulateResiduals}} |
| 307 | +#' @param group group of each data point |
| 308 | +#' @param aggregateBy function for the aggregation. Default is sum. This should only be changed if you know what you are doing. Note in particular that the expected residual distribution might not be flat any more if you choose general functions, such as sd etc. |
| 309 | +#' |
| 310 | +#' @return an object of class DHARMa, similar to what is returned by \code{\link{simulateResiduals}}, but with additional outputs for the new grouped calculations. Note that the relevant outputs are 2x in the object, the first is the grouped calculations (which is returned by $name access), and later another time, under identical name, the original output. Moreover, there is a function 'aggregateByGroup', which can be used to aggregate predictor variables in the same way as the variables calculated here |
| 311 | +#' |
| 312 | +#' @example inst/examples/simulateResidualsHelp.R |
| 313 | +#' @export |
| 314 | +recalculateResiduals <- function(simulationOutput, group = NULL, aggregateBy = sum){ |
| 315 | + |
| 316 | + if(!is.null(simulationOutput$original)) simulationOutput = simulationOutput$original |
| 317 | + |
| 318 | + out = list() |
| 319 | + |
| 320 | + if(is.null(group)) return(simulationOutput) |
| 321 | + else group =as.factor(group) |
| 322 | + out$nGroups = nlevels(group) |
| 323 | + |
| 324 | + aggregateByGroup <- function(x) aggregate(x, by=list(group), FUN=aggregateBy)[,2] |
| 325 | + |
| 326 | + out$observedResponse = aggregateByGroup(simulationOutput$observedResponse) |
| 327 | + out$fittedPredictedResponse = aggregateByGroup(simulationOutput$fittedPredictedResponse) |
| 328 | + out$simulatedResponse = apply(simulationOutput$simulatedResponse, 2, aggregateByGroup) |
| 329 | + out$scaledResiduals = rep(NA, out$nGroups) |
| 330 | + |
| 331 | + if (simulationOutput$refit == F){ |
| 332 | + if(simulationOutput$integerResponse == T){ |
| 333 | + for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$simulatedResponse[i,] + runif(out$nGroups, -0.5, 0.5))(out$observedResponse[i] + runif(1, -0.5, 0.5)) |
| 334 | + } else { |
| 335 | + for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$simulatedResponse[i,])(out$observedResponse[i]) |
| 336 | + } |
| 337 | + ######## refit = T ################## |
| 338 | + } else { |
| 339 | + |
| 340 | + out$refittedPredictedResponse <- apply(simulationOutput$refittedPredictedResponse, 2, aggregateByGroup) |
| 341 | + out$fittedResiduals = aggregateByGroup(simulationOutput$fittedResiduals) |
| 342 | + out$refittedResiduals = apply(simulationOutput$refittedResiduals, 2, aggregateByGroup) |
| 343 | + out$refittedPearsonResiduals = apply(simulationOutput$refittedPearsonResiduals, 2, aggregateByGroup) |
| 344 | + |
| 345 | + if(simulationOutput$integerResponse == T){ |
| 346 | + for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$refittedResiduals[i,] + runif(out$nGroups, -0.5, 0.5))(out$fittedResiduals[i] + runif(1, -0.5, 0.5)) |
| 347 | + } else { |
| 348 | + for (i in 1:out$nGroups) out$scaledResiduals[i] <- ecdf(out$refittedResiduals[i,])(out$fittedResiduals[i]) |
| 349 | + } |
| 350 | + } |
| 351 | + # hack - the c here will result in both old and new outputs to be present resulting output, but a named access should refer to the new, grouped calculations |
| 352 | + out$aggregateByGroup = aggregateByGroup |
| 353 | + out = c(out, simulationOutput) |
| 354 | + out$original = simulationOutput |
| 355 | + class(out) = "DHARMa" |
| 356 | + return(out) |
| 357 | +} |
| 358 | + |
| 359 | + |
302 | 360 |
|
303 | 361 |
|
304 | 362 |
|
|
0 commit comments