Growth Curve Analysis: Exercise Solutions

Dan Mirman

22 May 2020

Exercise Solutions

Exercise 1A solution: WISQARS suicide data

wisqars.suicide$Time <- wisqars.suicide$Year - 1999
m.base <- lmer(Crude.Rate ~ Time + (Time | State), data = wisqars.suicide, REML=F)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0128235 (tol = 0.002, component 1)
m.0 <- lmer(Crude.Rate ~ Time + Region + (Time | State), data = wisqars.suicide, REML=F)
m.1 <- lmer(Crude.Rate ~ Time * Region + (Time | State), data = wisqars.suicide, REML=F)
anova(m.base, m.0, m.1) 
## Data: wisqars.suicide
## Models:
## m.base: Crude.Rate ~ Time + (Time | State)
## m.0: Crude.Rate ~ Time + Region + (Time | State)
## m.1: Crude.Rate ~ Time * Region + (Time | State)
##        npar  AIC  BIC logLik deviance Chisq Df Pr(>Chisq)    
## m.base    6 1426 1451   -707     1414                        
## m.0       9 1400 1437   -691     1382 32.43  3    4.3e-07 ***
## m.1      12 1402 1452   -689     1378  3.57  3       0.31    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Regions differed in baseline suicide rate, did not differ in rate of change in suicide rate.

Exercise 1A solution: WISQARS suicide data plot

ggplot(wisqars.suicide, aes(Year, Crude.Rate, color=Region)) + 
  stat_summary(fun.data=mean_se, geom="pointrange") + 
  stat_summary(aes(y=fitted(m.1)), fun=mean, geom="line") + 
  scale_x_continuous(breaks=1999:2007)

Exercise 1B solution: Naming recovery data

nr.null <- lmer(Correct ~ 1 + (TestTime | SubjectID), data = NamingRecovery, REML=F)
nr.base <- lmer(Correct ~ TestTime + (TestTime | SubjectID), data = NamingRecovery, REML=F)
nr.0 <- lmer(Correct ~ TestTime + Diagnosis + (TestTime | SubjectID), data = NamingRecovery, REML=F)
nr.1 <- lmer(Correct ~ TestTime * Diagnosis + (TestTime | SubjectID), data = NamingRecovery, REML=F)
anova(nr.null, nr.base, nr.0, nr.1) 
## Data: NamingRecovery
## Models:
## nr.null: Correct ~ 1 + (TestTime | SubjectID)
## nr.base: Correct ~ TestTime + (TestTime | SubjectID)
## nr.0: Correct ~ TestTime + Diagnosis + (TestTime | SubjectID)
## nr.1: Correct ~ TestTime * Diagnosis + (TestTime | SubjectID)
##         npar  AIC  BIC logLik deviance Chisq Df Pr(>Chisq)    
## nr.null    5 -126 -112   67.8     -136                        
## nr.base    6 -146 -130   79.2     -158 22.84  1    1.8e-06 ***
## nr.0       8 -151 -129   83.6     -167  8.72  2      0.013 *  
## nr.1      10 -147 -120   83.7     -167  0.26  2      0.880    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

There was overall recovery, sub-types differed in baseline naming ability, but not the sub-types did not differ in amount of recovery.

Exercise 1B solution: Naming recovery plot

ggplot(NamingRecovery, aes(TestTime, Correct, color=Diagnosis)) + 
  stat_summary(fun.data=mean_se, geom="pointrange") + 
  stat_summary(aes(y=fitted(nr.1)), fun=mean, geom="line") 

Exercise 2 Solution

CP (d’ peak at category boundary): Compare categorical perception along spectral vs. temporal dimensions using second-order orthogonal polynomials.

summary(CP)
##   Participant        Type        Stimulus       d.prime     
##  1      :  8   Temporal:128   Min.   :1.00   Min.   :-1.41  
##  2      :  8   Spectral:128   1st Qu.:2.75   1st Qu.: 0.40  
##  3      :  8                  Median :4.50   Median : 1.18  
##  4      :  8                  Mean   :4.50   Mean   : 1.34  
##  5      :  8                  3rd Qu.:6.25   3rd Qu.: 2.22  
##  6      :  8                  Max.   :8.00   Max.   : 4.77  
##  (Other):208
ggplot(CP, aes(Stimulus, d.prime, color=Type)) + 
  stat_summary(fun.data=mean_se, geom="pointrange")

Exercise 2 Solution: Fit the models

# prep for analysis
CP <- code_poly(CP, predictor="Stimulus", poly.order = 2, draw.poly = FALSE)
# fit the models
m.base <- lmer(d.prime ~ (poly1 + poly2) + (poly1 + poly2 | Participant), data=CP, REML=F)
## boundary (singular) fit: see ?isSingular
m.0 <- lmer(d.prime ~ (poly1 + poly2) + Type + (poly1 + poly2 | Participant), data=CP, REML=F)
m.1 <- lmer(d.prime ~ poly1*Type + poly2 + (poly1 + poly2 | Participant), data=CP, REML=F)
## boundary (singular) fit: see ?isSingular
m.2 <- lmer(d.prime ~ (poly1 + poly2)*Type + (poly1 + poly2 | Participant), data=CP, REML=F)
## boundary (singular) fit: see ?isSingular
anova(m.base, m.0, m.1, m.2)
## Data: CP
## Models:
## m.base: d.prime ~ (poly1 + poly2) + (poly1 + poly2 | Participant)
## m.0: d.prime ~ (poly1 + poly2) + Type + (poly1 + poly2 | Participant)
## m.1: d.prime ~ poly1 * Type + poly2 + (poly1 + poly2 | Participant)
## m.2: d.prime ~ (poly1 + poly2) * Type + (poly1 + poly2 | Participant)
##        npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)  
## m.base   10 817 852   -398      797                      
## m.0      11 819 858   -398      797  0.09  1      0.767  
## m.1      12 821 863   -398      797  0.30  1      0.583  
## m.2      13 817 863   -396      791  5.30  1      0.021 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Exercise 2 Solution: Get p-values

summary(m.2)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: d.prime ~ (poly1 + poly2) * Type + (poly1 + poly2 | Participant)
##    Data: CP
## 
##      AIC      BIC   logLik deviance df.resid 
##    817.3    863.4   -395.6    791.3      243 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.276 -0.660 -0.136  0.647  2.875 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev. Corr       
##  Participant (Intercept) 0.0473   0.218               
##              poly1       0.0788   0.281    -0.20      
##              poly2       1.2353   1.111    -0.56  0.93
##  Residual                1.1336   1.065               
## Number of obs: 256, groups:  Participant, 32
## 
## Fixed effects:
##                    Estimate Std. Error t value
## (Intercept)          1.4049     0.1087   12.93
## poly1               -0.4315     0.2753   -1.57
## poly2               -1.5848     0.3848   -4.12
## TypeSpectral        -0.1252     0.1537   -0.81
## poly1:TypeSpectral  -0.0616     0.3893   -0.16
## poly2:TypeSpectral   1.3053     0.5442    2.40
## 
## Correlation of Fixed Effects:
##             (Intr) poly1  poly2  TypSpc pl1:TS
## poly1       -0.026                            
## poly2       -0.202  0.170                     
## TypeSpectrl -0.707  0.018  0.143              
## ply1:TypSpc  0.018 -0.707 -0.120 -0.026       
## ply2:TypSpc  0.143 -0.120 -0.707 -0.202  0.170
## convergence code: 0
## boundary (singular) fit: see ?isSingular

Exercise 2 Solution: Plot model fit

ggplot(CP, aes(Stimulus, d.prime, color=Type)) + 
  stat_summary(fun.data=mean_se, geom="pointrange") + 
  stat_summary(aes(y=fitted(m.2)), fun=mean, geom="line")

Exercise 3 Solution

Az: Use natural (not orthogonal) polynomials to analyze decline in performance of 30 individuals with probable Alzheimer’s disease on three different kinds of tasks.

summary(Az)
##     Subject         Time          Task      Performance  
##  1      : 30   Min.   : 1.0   cADL  :300   Min.   : 2.0  
##  2      : 30   1st Qu.: 3.0   sADL  :300   1st Qu.:40.0  
##  3      : 30   Median : 5.5   Memory:300   Median :52.0  
##  4      : 30   Mean   : 5.5                Mean   :49.3  
##  5      : 30   3rd Qu.: 8.0                3rd Qu.:61.0  
##  6      : 30   Max.   :10.0                Max.   :85.0  
##  (Other):720
ggplot(Az, aes(Time, Performance, color=Task, fill=Task)) + 
  stat_summary(fun.data=mean_se, geom="ribbon", color=NA, alpha=0.5) +
  stat_summary(fun=mean, geom="line")

Exercise 3 Solution: Fit the models

# prep for analysis
Az <- code_poly(Az, predictor="Time", poly.order=2, orthogonal=F, draw.poly = F)
# fit the full model
# m.base <- lmer(Performance ~ (poly1 + poly2) + 
#                  (poly1 + poly2 | Subject) + (poly1 + poly2 | Subject:Task), 
#                data=Az, REML=F)
# m.0 <- lmer(Performance ~ (poly1 + poly2) + Task + 
#               (poly1 + poly2 | Subject) + (poly1 + poly2 | Subject:Task), 
#             data=Az, REML=F)
# m.1 <- lmer(Performance ~ poly1*Task + poly2 + 
#               (poly1 + poly2 | Subject) + (poly1 + poly2 | Subject:Task), 
#             data=Az, REML=F)
m.Az.full <- lmer(Performance ~ (poly1 + poly2)*Task + 
                  (poly1 + poly2 | Subject) + (poly1 + poly2 | Subject:Task), 
                data=Az, REML=F)
## boundary (singular) fit: see ?isSingular
# anova(m.base, m.0, m.1, m.Az.full)

Exercise 3 Solution: Get p-values

get_pvalues(m.Az.full)
##                  Estimate Std..Error  t.value  p.normal p.normal.star
## (Intercept)      67.16167    0.92668  72.4754 0.000e+00           ***
## poly1            -3.28780    0.34106  -9.6399 0.000e+00           ***
## poly2             0.00947    0.01243   0.7615 4.463e-01              
## TasksADL          0.09500    0.81186   0.1170 9.068e-01              
## TaskMemory        1.24000    0.81186   1.5274 1.267e-01              
## poly1:TasksADL    1.36220    0.26751   5.0921 3.540e-07           ***
## poly1:TaskMemory -3.97707    0.26751 -14.8670 0.000e+00           ***
## poly2:TasksADL   -0.01326    0.01748  -0.7585 4.481e-01              
## poly2:TaskMemory  0.33889    0.01748  19.3892 0.000e+00           ***

Exercise 3 Solution: Plot model fit

ggplot(Az, aes(Time, Performance, color=Task)) + 
  stat_summary(fun.data=mean_se, geom="pointrange") + 
  stat_summary(fun=mean, geom="line", aes(y=fitted(m.Az.full)))

Exercise 4 Solution

Re-analyze the word learning data using logistic GCA.

# calculate number correct and incorrect
WordLearnEx <- mutate(WordLearnEx, 
                      Correct = round(Accuracy*6),
                      Error = 6-Correct)
# prep for GCA
WordLearnEx <- code_poly(WordLearnEx, predictor="Block", poly.order=2, draw.poly=F)
# fit logistic model
wl.logit <- glmer(cbind(Correct, Error) ~ (poly1+poly2)*TP + 
                    (poly1+poly2 | Subject),
                  data=WordLearnEx, family=binomial)
## boundary (singular) fit: see ?isSingular
coef(summary(wl.logit))
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept)   1.58261     0.1982  7.9861 1.392e-15
## poly1         2.21221     0.3234  6.8403 7.904e-12
## poly2        -0.08209     0.2318 -0.3542 7.232e-01
## TPHigh        0.56565     0.2842  1.9904 4.654e-02
## poly1:TPHigh  0.34726     0.4474  0.7762 4.376e-01
## poly2:TPHigh -0.98955     0.3254 -3.0407 2.360e-03

Exercise 4 Solution: Compare with linear GCA

wl.lin <- lmer(Accuracy ~ (poly1+poly2)*TP +
                 (poly1+poly2 | Subject),
               data=WordLearnEx, REML=F)
## boundary (singular) fit: see ?isSingular
summary(wl.lin)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: Accuracy ~ (poly1 + poly2) * TP + (poly1 + poly2 | Subject)
##    Data: WordLearnEx
## 
##      AIC      BIC   logLik deviance df.resid 
##   -332.6   -276.4    179.3   -358.6      547 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.618 -0.536  0.126  0.567  2.616 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr       
##  Subject  (Intercept) 0.01076  0.1037              
##           poly1       0.01542  0.1242   -0.33      
##           poly2       0.00628  0.0792   -0.28 -0.82
##  Residual             0.02456  0.1567              
## Number of obs: 560, groups:  Subject, 56
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)   0.77853    0.02173   35.83
## poly1         0.28632    0.03779    7.58
## poly2        -0.05085    0.03319   -1.53
## TPHigh        0.05296    0.03073    1.72
## poly1:TPHigh  0.00108    0.05344    0.02
## poly2:TPHigh -0.11645    0.04693   -2.48
## 
## Correlation of Fixed Effects:
##             (Intr) poly1  poly2  TPHigh p1:TPH
## poly1       -0.183                            
## poly2       -0.114 -0.229                     
## TPHigh      -0.707  0.129  0.081              
## poly1:TPHgh  0.129 -0.707  0.162 -0.183       
## poly2:TPHgh  0.081  0.162 -0.707 -0.114 -0.229
## convergence code: 0
## boundary (singular) fit: see ?isSingular

Exercise 4 Solution: Plot model fits

ggplot(WordLearnEx, aes(Block, Accuracy, color=TP)) + 
  stat_summary(fun.data = mean_se, geom="pointrange") + 
  stat_summary(aes(y=fitted(wl.logit)), fun = mean, geom="line") + 
  stat_summary(aes(y=fitted(wl.lin)), fun = mean, geom="line", linetype="dashed") +
  theme_bw() + expand_limits(y=c(0.5, 1.0)) + scale_x_continuous(breaks=1:10) +
  scale_color_manual(values=c("red", "blue")) + labs(y="Accuracy")

Exercise 5 Solution

Cohort and rhyme competition in three groups of participants.

Make a multi-panel plot showing each kind of competition for each group

ggplot(CohortRhyme, aes(Time, FixProp, color=Object)) + 
  facet_grid(Type ~ Group) + stat_summary(fun=mean, geom="line") +
  stat_summary(fun=mean, geom="point") + theme_bw()

Exercise 5 Solution: Group comparisons

Use fourth-order orthogonal polynomials to analyze (separately) the cohort and rhyme competition effects. Do the diagnosis groups differ in competition effect sizes?

# prep for GCA
CohortRhyme.gca <- code_poly(df=CohortRhyme, predictor="timeBin", poly.order=4, draw.poly = FALSE)
# group effects on cohort competition
cohort.base <- lmer(FixProp ~ (poly1+poly2+poly3+poly4)*Object + 
                      (1+poly1+poly2+poly3+poly4 | subjID) + 
                      (1+poly1+poly2 | subjID:Object), 
                    data=subset(CohortRhyme.gca, Type == "Cohort"), REML=F)
## boundary (singular) fit: see ?isSingular
cohort.group <- lmer(FixProp ~ (poly1+poly2+poly3+poly4)*Object*Group + 
                       (1+poly1+poly2+poly3+poly4 | subjID) + 
                       (1+poly1+poly2 | subjID:Object), 
                     data=subset(CohortRhyme.gca, Type == "Cohort"), REML=F)
## boundary (singular) fit: see ?isSingular
gt(anova(cohort.base, cohort.group), rownames_to_stub = TRUE) %>% 
  tab_header(title = "Cohort model comparisons")
Cohort model comparisons
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
cohort.base 32 -2278 -2128 1171 -2342 NA NA NA
cohort.group 52 -2304 -2060 1204 -2408 66 20 8.089e-07
#group effects on rhyme competition
rhyme.base <- lmer(FixProp ~ (poly1+poly2+poly3+poly4)*Object + 
                     (1+poly1+poly2+poly3+poly4 | subjID) + 
                     (1+poly1+poly2 | subjID:Object), 
                   data=subset(CohortRhyme.gca, Type == "Rhyme"), REML=F)
## boundary (singular) fit: see ?isSingular
rhyme.group <- lmer(FixProp ~ (poly1+poly2+poly3+poly4)*Object*Group + 
                      (1+poly1+poly2+poly3+poly4 | subjID) + 
                      (1+poly1+poly2 | subjID:Object),
                    data=subset(CohortRhyme.gca, Type == "Rhyme"), REML=F)
## boundary (singular) fit: see ?isSingular
gt(anova(rhyme.base, rhyme.group), rownames_to_stub = TRUE) %>% 
  tab_header(title = "Rhyme model comparisons")
Rhyme model comparisons
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
rhyme.base 32 -2270 -2120 1167 -2334 NA NA NA
rhyme.group 52 -2263 -2020 1184 -2367 33.65 20 0.02862

Exercise 5 Solution: Individual competition effect sizes

Use random effects to compute individual competition effect sizes (note: you’ll need to use a model without any group fixed effects to get random effects relative to the overall sample rather than the diagnosis sub-group).

# get cohort effect sizes
ES.coh <- get_ranef(cohort.base, 'subjID:Object') %>%
  group_by(subjID) %>%
  summarize(subjID = unique(subjID),
            Cohort.Intercept = Intercept[Object=="Competitor"] - Intercept[Object=="Unrelated"],
            Cohort.Linear = poly1[Object=="Competitor"] - poly1[Object=="Unrelated"], 
            Cohort.Quadratic = poly2[Object=="Competitor"] - poly2[Object=="Unrelated"])
#head(ES.coh)
# get rhyme effect sizes
ES.rhy <- get_ranef(rhyme.base, 'subjID:Object') %>%
  group_by(subjID) %>%
  summarize(subjID = unique(subjID),
            Rhyme.Intercept = Intercept[Object=="Competitor"] - Intercept[Object=="Unrelated"], 
            Rhyme.Linear = poly1[Object=="Competitor"] - poly1[Object=="Unrelated"], 
            Rhyme.Quadratic = poly2[Object=="Competitor"] - poly2[Object=="Unrelated"])
#head(ES.rhy)
# combine cohort and rhyme individual effect sizes
ES <- merge(ES.coh, ES.rhy, by="subjID")
# get grouping info from original data frame
group <- unique(subset(CohortRhyme, select=c(subjID, Group))) 
ES <- merge(ES, group, by="subjID")
summary(ES)
##      subjID    Cohort.Intercept   Cohort.Linear      Cohort.Quadratic 
##  Min.   :101   Min.   :-0.04923   Min.   :-0.15156   Min.   :-0.3099  
##  1st Qu.:106   1st Qu.:-0.01712   1st Qu.:-0.05346   1st Qu.:-0.0832  
##  Median :110   Median :-0.00985   Median :-0.00336   Median : 0.0263  
##  Mean   :110   Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.0000  
##  3rd Qu.:115   3rd Qu.: 0.02199   3rd Qu.: 0.06445   3rd Qu.: 0.0896  
##  Max.   :120   Max.   : 0.06869   Max.   : 0.13670   Max.   : 0.1444  
##  Rhyme.Intercept     Rhyme.Linear     Rhyme.Quadratic        Group   
##  Min.   :-0.04664   Min.   :-0.2445   Min.   :-0.1237   Control :12  
##  1st Qu.:-0.01774   1st Qu.:-0.1184   1st Qu.:-0.0522   Broca   : 5  
##  Median :-0.00936   Median : 0.0543   Median : 0.0168   Wernicke: 3  
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000                
##  3rd Qu.: 0.01676   3rd Qu.: 0.1060   3rd Qu.: 0.0541                
##  Max.   : 0.06739   Max.   : 0.1273   Max.   : 0.1193

Correlations among individual competition effect sizes

Test correlations between cohort and rhyme competition effects for patients and controls.

# compute correlations, tidy results, and bind into rows of a data frame
es_corrs <- rbind(
  #effect size correlations: Intercept
  tidy(cor.test(ES$Cohort.Intercept, ES$Rhyme.Intercept)),
  tidy(cor.test(ES$Cohort.Intercept[ES$Group != "Control"], ES$Rhyme.Intercept[ES$Group != "Control"])),
  tidy(cor.test(ES$Cohort.Intercept[ES$Group == "Control"], ES$Rhyme.Intercept[ES$Group == "Control"])),
  #effect size correlations: Linear
  tidy(cor.test(ES$Cohort.Linear, ES$Rhyme.Linear)),
  tidy(cor.test(ES$Cohort.Linear[ES$Group != "Control"], ES$Rhyme.Linear[ES$Group != "Control"])),
  tidy(cor.test(ES$Cohort.Linear[ES$Group == "Control"], ES$Rhyme.Linear[ES$Group == "Control"])),
  #effect size correlations: Quadratic
  tidy(cor.test(ES$Cohort.Quadratic, ES$Rhyme.Quadratic)),
  tidy(cor.test(ES$Cohort.Quadratic[ES$Group != "Control"], ES$Rhyme.Quadratic[ES$Group != "Control"])),
  tidy(cor.test(ES$Cohort.Quadratic[ES$Group == "Control"], ES$Rhyme.Quadratic[ES$Group == "Control"])))
# combine with labels
tab_df <- cbind(Term = rep(c("Intercept", "Linear", "Quadratic"), each=3), 
                Group = rep(c("Overall", "Aphasia", "Control"), 3),
                es_corrs)
#print correlations
gt(tab_df[, 1:8]) %>% 
  fmt_number(columns = c(3:5, 7:8), decimals = 3) %>% 
  tab_header(title = "Competition effect size correlations")
Competition effect size correlations
Term Group estimate statistic p.value parameter conf.low conf.high
Intercept Overall −0.383 −1.757 0.096 18 −0.706 0.072
Intercept Aphasia −0.861 −4.138 0.006 6 −0.974 −0.396
Intercept Control 0.338 1.135 0.283 10 −0.293 0.764
Linear Overall −0.022 −0.094 0.926 18 −0.460 0.425
Linear Aphasia −0.438 −1.195 0.277 6 −0.873 0.385
Linear Control 0.473 1.696 0.121 10 −0.139 0.823
Quadratic Overall −0.022 −0.092 0.928 18 −0.460 0.425
Quadratic Aphasia 0.056 0.137 0.895 6 −0.675 0.732
Quadratic Control −0.170 −0.545 0.598 10 −0.678 0.448

Scatterplot

Make a multi-panel scatterplot showing these correlations.

ES.p <- ES %>% 
  pivot_longer(cols=2:7, # pivot the effect size estimates into long form
               names_to="key", values_to="EffectSize") %>% 
  separate(key, c("Type", "Term"), sep = "\\.") %>% # separate column labels for plotting
  pivot_wider(names_from = Type, values_from = EffectSize) # pivot Cohort and Rhyme effects into separate columns

ggplot(ES.p, aes(Cohort, Rhyme, color=Group)) + 
  facet_wrap(~ Term, scales="free") + geom_point() + 
  scale_color_manual(values=c("grey", "red", "blue")) +
  theme_bw()