Growth Curve Analysis: Exercise Solutions

Dan Mirman

28-29 November 2019

Exercise Solutions

Exercise 1A solution: WISQARS suicide data

wisqars.suicide$Time <- wisqars.suicide$Year - 1999
m.base <- lmer(Crude.Rate ~ Time + (Time | State), data = wisqars.suicide, REML=F)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0128235
## (tol = 0.002, component 1)
m.0 <- lmer(Crude.Rate ~ Time + Region + (Time | State), data = wisqars.suicide, REML=F)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00641891
## (tol = 0.002, component 1)
m.1 <- lmer(Crude.Rate ~ Time * Region + (Time | State), data = wisqars.suicide, REML=F)
anova(m.base, m.0, m.1) 
## Data: wisqars.suicide
## Models:
## m.base: Crude.Rate ~ Time + (Time | State)
## m.0: Crude.Rate ~ Time + Region + (Time | State)
## m.1: Crude.Rate ~ Time * Region + (Time | State)
##        Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)    
## m.base  6 1426 1451   -707     1414                            
## m.0     9 1400 1437   -691     1382 32.43      3    4.3e-07 ***
## m.1    12 1402 1452   -689     1378  3.57      3       0.31    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Regions differed in baseline suicide rate, did not differ in rate of change in suicide rate.

Exercise 1A solution: WISQARS suicide data plot

ggplot(wisqars.suicide, aes(Year, Crude.Rate, color=Region)) + 
  stat_summary(fun.data=mean_se, geom="pointrange") + 
  stat_summary(aes(y=fitted(m.1)), fun.y=mean, geom="line") + 
  scale_x_continuous(breaks=1999:2007)

Exercise 1B solution: Naming recovery data

nr.null <- lmer(Correct ~ 1 + (TestTime | SubjectID), data = NamingRecovery, REML=F)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0022 (tol
## = 0.002, component 1)
nr.base <- lmer(Correct ~ TestTime + (TestTime | SubjectID), data = NamingRecovery, REML=F)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00403936
## (tol = 0.002, component 1)
nr.0 <- lmer(Correct ~ TestTime + Diagnosis + (TestTime | SubjectID), data = NamingRecovery, REML=F)
nr.1 <- lmer(Correct ~ TestTime * Diagnosis + (TestTime | SubjectID), data = NamingRecovery, REML=F)
anova(nr.null, nr.base, nr.0, nr.1) 
## Data: NamingRecovery
## Models:
## nr.null: Correct ~ 1 + (TestTime | SubjectID)
## nr.base: Correct ~ TestTime + (TestTime | SubjectID)
## nr.0: Correct ~ TestTime + Diagnosis + (TestTime | SubjectID)
## nr.1: Correct ~ TestTime * Diagnosis + (TestTime | SubjectID)
##         Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)    
## nr.null  5 -126 -112   67.8     -136                            
## nr.base  6 -146 -130   79.2     -158 22.84      1    1.8e-06 ***
## nr.0     8 -151 -129   83.6     -167  8.72      2      0.013 *  
## nr.1    10 -147 -120   83.7     -167  0.26      2      0.880    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

There was overall recovery, sub-types differed in baseline naming ability, but not the sub-types did not differ in amount of recovery.

Exercise 1B solution: Naming recovery plot

ggplot(NamingRecovery, aes(TestTime, Correct, color=Diagnosis)) + 
  stat_summary(fun.data=mean_se, geom="pointrange") + 
  stat_summary(aes(y=fitted(nr.1)), fun.y=mean, geom="line") 

Exercise 2 Solution

CP (d’ peak at category boundary): Compare categorical perception along spectral vs. temporal dimensions using second-order orthogonal polynomials.

summary(CP)
##   Participant        Type        Stimulus       d.prime     
##  1      :  8   Temporal:128   Min.   :1.00   Min.   :-1.41  
##  2      :  8   Spectral:128   1st Qu.:2.75   1st Qu.: 0.40  
##  3      :  8                  Median :4.50   Median : 1.18  
##  4      :  8                  Mean   :4.50   Mean   : 1.34  
##  5      :  8                  3rd Qu.:6.25   3rd Qu.: 2.22  
##  6      :  8                  Max.   :8.00   Max.   : 4.77  
##  (Other):208
ggplot(CP, aes(Stimulus, d.prime, color=Type)) + 
  stat_summary(fun.data=mean_se, geom="pointrange")

Exercise 2 Solution: Fit the models

# prep for analysis
CP <- code_poly(CP, predictor="Stimulus", poly.order = 2, draw.poly = FALSE)
# fit the models
m.base <- lmer(d.prime ~ (poly1 + poly2) + (poly1 + poly2 | Participant), data=CP, REML=F)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0201756
## (tol = 0.002, component 1)
m.0 <- lmer(d.prime ~ (poly1 + poly2) + Type + (poly1 + poly2 | Participant), data=CP, REML=F)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00438586
## (tol = 0.002, component 1)
m.1 <- lmer(d.prime ~ poly1*Type + poly2 + (poly1 + poly2 | Participant), data=CP, REML=F)
## boundary (singular) fit: see ?isSingular
m.2 <- lmer(d.prime ~ (poly1 + poly2)*Type + (poly1 + poly2 | Participant), data=CP, REML=F)
anova(m.base, m.0, m.1, m.2)
## Data: CP
## Models:
## m.base: d.prime ~ (poly1 + poly2) + (poly1 + poly2 | Participant)
## m.0: d.prime ~ (poly1 + poly2) + Type + (poly1 + poly2 | Participant)
## m.1: d.prime ~ poly1 * Type + poly2 + (poly1 + poly2 | Participant)
## m.2: d.prime ~ (poly1 + poly2) * Type + (poly1 + poly2 | Participant)
##        Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)  
## m.base 10 817 852   -398      797                          
## m.0    11 819 858   -398      797  0.09      1      0.767  
## m.1    12 821 863   -398      797  0.30      1      0.583  
## m.2    13 817 863   -396      791  5.30      1      0.021 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Exercise 2 Solution: Get p-values

get_pvalues(m.2, method = "all")
##                    Estimate Std..Error t.value  p.normal p.normal.star
## (Intercept)         1.40491     0.1087 12.9248 0.000e+00           ***
## poly1              -0.43154     0.2753 -1.5678 1.169e-01              
## poly2              -1.58482     0.3848 -4.1185 3.814e-05           ***
## TypeSpectral       -0.12523     0.1537 -0.8147 4.153e-01              
## poly1:TypeSpectral -0.06156     0.3893 -0.1581 8.744e-01              
## poly2:TypeSpectral  1.30526     0.5442  2.3985 1.646e-02             *
##                    df.KR      p.KR p.KR.star
## (Intercept)           30 8.527e-14       ***
## poly1                 30 1.274e-01          
## poly2                 30 2.754e-04       ***
## TypeSpectral          30 4.217e-01          
## poly1:TypeSpectral    30 8.754e-01          
## poly2:TypeSpectral    30 2.288e-02         *

Exercise 2 Solution: Plot model fit

ggplot(CP, aes(Stimulus, d.prime, color=Type)) + 
  stat_summary(fun.data=mean_se, geom="pointrange") + 
  stat_summary(aes(y=fitted(m.2)), fun.y=mean, geom="line")

Exercise 3 Solution

Az: Use natural (not orthogonal) polynomials to analyze decline in performance of 30 individuals with probable Alzheimer’s disease on three different kinds of tasks.

summary(Az)
##     Subject         Time          Task      Performance  
##  1      : 30   Min.   : 1.0   cADL  :300   Min.   : 2.0  
##  2      : 30   1st Qu.: 3.0   sADL  :300   1st Qu.:40.0  
##  3      : 30   Median : 5.5   Memory:300   Median :52.0  
##  4      : 30   Mean   : 5.5                Mean   :49.3  
##  5      : 30   3rd Qu.: 8.0                3rd Qu.:61.0  
##  6      : 30   Max.   :10.0                Max.   :85.0  
##  (Other):720
ggplot(Az, aes(Time, Performance, color=Task, fill=Task)) + 
  stat_summary(fun.data=mean_se, geom="ribbon", color=NA, alpha=0.5) +
  stat_summary(fun.y=mean, geom="line")

Exercise 3 Solution: Fit the models

# prep for analysis
Az <- code_poly(Az, predictor="Time", poly.order=2, orthogonal=F, draw.poly = F)
# fit the full model
# m.base <- lmer(Performance ~ (poly1 + poly2) + 
#                  (poly1 + poly2 | Subject) + (poly1 + poly2 | Subject:Task), 
#                data=Az, REML=F)
# m.0 <- lmer(Performance ~ (poly1 + poly2) + Task + 
#               (poly1 + poly2 | Subject) + (poly1 + poly2 | Subject:Task), 
#             data=Az, REML=F)
# m.1 <- lmer(Performance ~ poly1*Task + poly2 + 
#               (poly1 + poly2 | Subject) + (poly1 + poly2 | Subject:Task), 
#             data=Az, REML=F)
m.Az.full <- lmer(Performance ~ (poly1 + poly2)*Task + 
                  (poly1 + poly2 | Subject) + (poly1 + poly2 | Subject:Task), 
                data=Az, REML=F)
## boundary (singular) fit: see ?isSingular
# anova(m.base, m.0, m.1, m.Az.full)

Exercise 3 Solution: Get p-values

get_pvalues(m.Az.full)
##                  Estimate Std..Error  t.value  p.normal p.normal.star
## (Intercept)      67.16167    0.93074  72.1595 0.000e+00           ***
## poly1            -3.28780    0.34179  -9.6193 0.000e+00           ***
## poly2             0.00947    0.01243   0.7615 4.463e-01              
## TasksADL          0.09500    0.81172   0.1170 9.068e-01              
## TaskMemory        1.24000    0.81172   1.5276 1.266e-01              
## poly1:TasksADL    1.36220    0.26754   5.0916 3.550e-07           ***
## poly1:TaskMemory -3.97707    0.26754 -14.8655 0.000e+00           ***
## poly2:TasksADL   -0.01326    0.01748  -0.7585 4.482e-01              
## poly2:TaskMemory  0.33889    0.01748  19.3885 0.000e+00           ***

Exercise 3 Solution: Plot model fit

ggplot(Az, aes(Time, Performance, color=Task)) + 
  stat_summary(fun.data=mean_se, geom="pointrange") + 
  stat_summary(fun.y=mean, geom="line", aes(y=fitted(m.Az.full)))

Exercise 4 Solution

m.wisqars <- lmer(Crude.Rate ~ Time * Region + (Time | State), data = wisqars.suicide, REML=F)
get_pvalues(m.wisqars)
##                    Estimate Std..Error t.value  p.normal p.normal.star
## (Intercept)         9.07573    0.76130 11.9214 0.000e+00           ***
## Time                0.08428    0.04725  1.7838 7.446e-02             .
## RegionSouth         2.22908    0.94149  2.3676 1.790e-02             *
## RegionMidwest       1.59681    1.00710  1.5855 1.128e-01              
## RegionWest          6.10760    0.99036  6.1670 6.958e-10           ***
## Time:RegionSouth    0.01468    0.05843  0.2513 8.016e-01              
## Time:RegionMidwest  0.10122    0.06250  1.6195 1.053e-01              
## Time:RegionWest     0.05906    0.06146  0.9608 3.366e-01

Exercise 4 Solution: Differences in baseline suicide rate

contrast.matrix.int = rbind(
  "NE vs. S" = c(0, 0, 1, 0, 0, 0, 0, 0), 
  "NE vs. MW" = c(0, 0, 0, 1, 0, 0, 0, 0),
  "NE vs. W" = c(0, 0, 0, 0, 1, 0, 0, 0),
  "S vs. MW" = c(0, 0, -1, 1, 0, 0, 0, 0),
  "S vs. W" = c(0, 0, -1, 0, 1, 0, 0, 0),
  "MW vs. W" = c(0, 0, 0, 1, -1, 0, 0, 0))
comps.int <- glht(m.wisqars, contrast.matrix.int)
summary(comps.int, test = adjusted("none"))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: lmer(formula = Crude.Rate ~ Time * Region + (Time | State), data = wisqars.suicide, 
##     REML = F)
## 
## Linear Hypotheses:
##                Estimate Std. Error z value Pr(>|z|)    
## NE vs. S == 0     2.229      0.941    2.37    0.018 *  
## NE vs. MW == 0    1.597      1.007    1.59    0.113    
## NE vs. W == 0     6.108      0.990    6.17  7.0e-10 ***
## S vs. MW == 0    -0.632      0.861   -0.73    0.463    
## S vs. W == 0      3.879      0.841    4.61  4.0e-06 ***
## MW vs. W == 0    -4.511      0.914   -4.93  8.1e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- none method)

Exercise 4 Solution: Differences in rate of change of suicide rate

contrast.matrix.lin = rbind(
  "NE vs. S" = c(0, 0, 0, 0, 0, 1, 0, 0), 
  "NE vs. MW" = c(0, 0, 0, 0, 0, 0, 1, 0),
  "NE vs. W" = c(0, 0, 0, 0, 0, 0, 0, 1),
  "S vs. MW" = c(0, 0, 0, 0, 0, -1, 1, 0),
  "S vs. W" = c(0, 0, 0, 0, 0, -1, 0, 1),
  "MW vs. W" = c(0, 0, 0, 0, 0, 0, 1, -1))
comps.lin <- glht(m.wisqars, contrast.matrix.lin)
summary(comps.lin, test = adjusted("none"))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: lmer(formula = Crude.Rate ~ Time * Region + (Time | State), data = wisqars.suicide, 
##     REML = F)
## 
## Linear Hypotheses:
##                Estimate Std. Error z value Pr(>|z|)
## NE vs. S == 0    0.0147     0.0584    0.25     0.80
## NE vs. MW == 0   0.1012     0.0625    1.62     0.11
## NE vs. W == 0    0.0591     0.0615    0.96     0.34
## S vs. MW == 0    0.0865     0.0534    1.62     0.11
## S vs. W == 0     0.0444     0.0522    0.85     0.40
## MW vs. W == 0    0.0422     0.0567    0.74     0.46
## (Adjusted p values reported -- none method)

Exercise 5 Solution

Re-analyze the word learning data using logistic GCA.

# calculate number correct and incorrect
WordLearnEx$Correct <- round(WordLearnEx$Accuracy * 6)
WordLearnEx$Error <- 6 - WordLearnEx$Correct
# prep for GCA
WordLearnEx <- code_poly(WordLearnEx, predictor="Block", poly.order=2, draw.poly=F)
# fit logistic model
wl.logit <- glmer(cbind(Correct, Error) ~ (poly1+poly2)*TP + 
                    (poly1+poly2 | Subject),
                  data=WordLearnEx, family=binomial)
## boundary (singular) fit: see ?isSingular
coef(summary(wl.logit))
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept)   1.58261     0.1982  7.9861 1.392e-15
## poly1         2.21221     0.3234  6.8403 7.904e-12
## poly2        -0.08209     0.2318 -0.3542 7.232e-01
## TPHigh        0.56565     0.2842  1.9904 4.654e-02
## poly1:TPHigh  0.34726     0.4474  0.7762 4.376e-01
## poly2:TPHigh -0.98955     0.3254 -3.0407 2.360e-03

Exercise 5 Solution: Compare with linear GCA

wl.lin <- lmer(Accuracy ~ (poly1+poly2)*TP +
                 (poly1+poly2 | Subject),
               data=WordLearnEx, REML=F)
## boundary (singular) fit: see ?isSingular
get_pvalues(wl.lin)
##               Estimate Std..Error  t.value  p.normal p.normal.star
## (Intercept)   0.778525    0.02173 35.82968 0.000e+00           ***
## poly1         0.286315    0.03779  7.57631 3.553e-14           ***
## poly2        -0.050849    0.03319 -1.53223 1.255e-01              
## TPHigh        0.052961    0.03073  1.72349 8.480e-02             .
## poly1:TPHigh  0.001075    0.05344  0.02012 9.839e-01              
## poly2:TPHigh -0.116455    0.04693 -2.48132 1.309e-02             *

Exercise 5 Solution: Plot model fits

ggplot(WordLearnEx, aes(Block, Accuracy, color=TP)) + 
  stat_summary(fun.data = mean_se, geom="pointrange") + 
  stat_summary(aes(y=fitted(wl.logit)), fun.y = mean, geom="line") + 
  stat_summary(aes(y=fitted(wl.lin)), fun.y = mean, geom="line", linetype="dashed") +
  theme_bw() + expand_limits(y=c(0.5, 1.0)) + scale_x_continuous(breaks=1:10) +
  scale_color_manual(values=c("red", "blue")) + labs(y="Accuracy")