Skip to contents

We compare the Monte Carlo (MC) method with nonparametric bootstrapping (NB) using the simple mediation model with missing data. One advantage of MC over NB is speed. This is because the model is only fitted once in MC whereas it is fitted many times in NB.

Data

n <- 1000
a <- 0.50
b <- 0.50
cp <- 0.25
s2_em <- 1 - a^2
s2_ey <- 1 - cp^2 - a^2 * b^2 - b^2 * s2_em - 2 * cp * a * b
em <- rnorm(n = n, mean = 0, sd = sqrt(s2_em))
ey <- rnorm(n = n, mean = 0, sd = sqrt(s2_ey))
X <- rnorm(n = n)
M <- a * X + em
Y <- cp * X + b * M + ey
df <- data.frame(X, M, Y)

# Create data set with missing values.

miss <- sample(1:dim(df)[1], 300)
df[miss[1:100], "X"] <- NA
df[miss[101:200], "M"] <- NA
df[miss[201:300], "Y"] <- NA

Model Specification

The indirect effect is defined by the product of the slopes of paths X to M labeled as a and M to Y labeled as b. In this example, we are interested in the confidence intervals of indirect defined as the product of a and b using the := operator in the lavaan model syntax.

model <- "
  Y ~ cp * X + b * M
  M ~ a * X
  indirect := a * b
  direct := cp
  total := cp + (a * b)
"

Model Fitting

We can now fit the model using the sem() function from lavaan. We are using missing = "fiml" to handle missing data in lavaan. Since there are missing values in x, we also set fixed.x = FALSE.

fit <- sem(data = df, model = model, missing = "fiml", fixed.x = FALSE)

Monte Carlo Confidence Intervals

The fit lavaan object can then be passed to the MC() function from semmcci to generate Monte Carlo confidence intervals.

MC(fit, R = 5000L, alpha = c(0.001, 0.01, 0.05))
#> Monte Carlo Confidence Intervals
#>              est     se    R   0.05%    0.5%    2.5%  97.5%  99.5% 99.95%
#> cp        0.2335 0.0292 5000  0.1392  0.1575  0.1762 0.2907 0.3082 0.3251
#> b         0.5112 0.0293 5000  0.4213  0.4364  0.4526 0.5680 0.5858 0.6041
#> a         0.4809 0.0291 5000  0.3869  0.4057  0.4241 0.5378 0.5557 0.5776
#> Y~~Y      0.5542 0.0272 5000  0.4648  0.4816  0.5020 0.6078 0.6239 0.6439
#> M~~M      0.7564 0.0369 5000  0.6297  0.6595  0.6813 0.8289 0.8526 0.8816
#> X~~X      1.0591 0.0500 5000  0.9004  0.9322  0.9621 1.1564 1.1831 1.2139
#> Y~1      -0.0127 0.0252 5000 -0.0938 -0.0794 -0.0609 0.0370 0.0531 0.0669
#> M~1      -0.0223 0.0294 5000 -0.1170 -0.0999 -0.0794 0.0347 0.0503 0.0681
#> X~1       0.0025 0.0338 5000 -0.1118 -0.0862 -0.0637 0.0678 0.0870 0.1070
#> indirect  0.2458 0.0203 5000  0.1820  0.1951  0.2067 0.2863 0.2999 0.3164
#> direct    0.2335 0.0292 5000  0.1392  0.1575  0.1762 0.2907 0.3082 0.3251
#> total     0.4794 0.0285 5000  0.3905  0.4058  0.4230 0.5342 0.5492 0.5712

Nonparametric Bootstrap Confidence Intervals

Nonparametric bootstrap confidence intervals can be generated in lavaan using the following.

parameterEstimates(
  sem(
    data = df,
    model = model,
    missing = "fiml",
    fixed.x = FALSE,
    se = "bootstrap",
    bootstrap = 5000L
  )
)
#>         lhs op      rhs    label    est    se      z pvalue ci.lower ci.upper
#> 1         Y  ~        X       cp  0.234 0.031  7.623  0.000    0.174    0.294
#> 2         Y  ~        M        b  0.511 0.031 16.393  0.000    0.449    0.572
#> 3         M  ~        X        a  0.481 0.029 16.660  0.000    0.424    0.536
#> 4         Y ~~        Y           0.554 0.027 20.425  0.000    0.500    0.607
#> 5         M ~~        M           0.756 0.037 20.459  0.000    0.683    0.828
#> 6         X ~~        X           1.059 0.052 20.548  0.000    0.957    1.162
#> 7         Y ~1                   -0.013 0.026 -0.496  0.620   -0.062    0.038
#> 8         M ~1                   -0.022 0.030 -0.752  0.452   -0.079    0.037
#> 9         X ~1                    0.002 0.033  0.075  0.940   -0.063    0.067
#> 10 indirect :=      a*b indirect  0.246 0.021 11.866  0.000    0.205    0.286
#> 11   direct :=       cp   direct  0.234 0.031  7.623  0.000    0.174    0.294
#> 12    total := cp+(a*b)    total  0.479 0.030 16.240  0.000    0.420    0.538

Benchmark

Arguments

Variables Values Notes
R 5000 Number of Monte Carlo replications.
B 5000 Number of bootstrap samples.
benchmark01 <- microbenchmark(
  MC = {
    fit <- sem(
      data = df,
      model = model,
      missing = "fiml",
      fixed.x = FALSE
    )
    MC(
      fit,
      R = R,
      decomposition = "chol",
      pd = FALSE
    )
  },
  NB = sem(
    data = df,
    model = model,
    missing = "fiml",
    fixed.x = FALSE,
    se = "bootstrap",
    bootstrap = B
  ),
  times = 10
)

Summary of Benchmark Results

summary(benchmark01, unit = "ms")
#>   expr        min          lq        mean      median          uq         max
#> 1   MC    151.262    154.3726    162.7734    161.6727    173.1449    174.6402
#> 2   NB 110357.367 112509.2133 113260.2814 113023.2633 113432.4098 116819.5372
#>   neval
#> 1    10
#> 2    10

Summary of Benchmark Results Relative to the Faster Method

summary(benchmark01, unit = "relative")
#>   expr      min       lq     mean  median       uq      max neval
#> 1   MC   1.0000   1.0000   1.0000   1.000   1.0000   1.0000    10
#> 2   NB 729.5775 728.8161 695.8157 699.087 655.1299 668.9157    10

Plot

Benchmark - Monte Carlo Method with Precalculated Estimates

fit <- sem(
  data = df,
  model = model,
  missing = "fiml",
  fixed.x = FALSE
)
benchmark02 <- microbenchmark(
  MC = MC(
    fit,
    R = R,
    decomposition = "chol",
    pd = FALSE
  ),
  NB = sem(
    data = df,
    model = model,
    missing = "fiml",
    fixed.x = FALSE,
    se = "bootstrap",
    bootstrap = B
  ),
  times = 10
)

Summary of Benchmark Results

summary(benchmark02, unit = "ms")
#>   expr         min           lq        mean       median           uq
#> 1   MC     78.1039     79.45192     81.9369     81.42359     84.04895
#> 2   NB 105536.3963 105773.98855 106017.0123 105872.57857 106152.12996
#>            max neval
#> 1     87.29001    10
#> 2 107049.30634    10

Summary of Benchmark Results Relative to the Faster Method

summary(benchmark02, unit = "relative")
#>   expr      min       lq     mean   median      uq      max neval
#> 1   MC    1.000    1.000    1.000    1.000    1.00    1.000    10
#> 2   NB 1351.231 1331.296 1293.886 1300.269 1262.98 1226.364    10

Plot