Dan Mirman
24 January 2019
ggplot(fortify(m), aes(.fitted, .resid)) + geom_point()
Logistic power peak function (Scheepers, Keller, & Lapata, 2008) fit to semantic competition data (Mirman & Magnuson, 2009).
Fits: Statistical models
Forecasts: Theoretical models
Intercept (\(\beta_0\)): Overall average
Intercept (\(\beta_0\)): Overall average
Linear (\(\beta_1\)): Overall slope
Quadratic (\(\beta_2\)): Centered rise and fall rate
Cubic, Quartic, … (\(\beta_3, \beta_4, ...\)): Inflection steepness
Effect of transitional probability on word learning
summary(WordLearnEx)
## Subject TP Block Accuracy
## 244 : 10 Low :280 Min. : 1.0 Min. :0.000
## 253 : 10 High:280 1st Qu.: 3.0 1st Qu.:0.667
## 302 : 10 Median : 5.5 Median :0.833
## 303 : 10 Mean : 5.5 Mean :0.805
## 305 : 10 3rd Qu.: 8.0 3rd Qu.:1.000
## 306 : 10 Max. :10.0 Max. :1.000
## (Other):500
ggplot(WordLearnEx, aes(Block, Accuracy, color=TP)) +
stat_summary(fun.data=mean_se, geom="pointrange") +
stat_summary(fun.y=mean, geom="line")
Create 2nd-order orthogonal polynomial
WordLearn.gca <- code_poly(WordLearnEx, predictor="Block", poly.order=2, orthogonal=TRUE)
summary(WordLearn.gca)
## Subject TP Block Accuracy Block.Index
## 244 : 10 Low :280 Min. : 1.0 Min. :0.000 Min. : 1.0
## 253 : 10 High:280 1st Qu.: 3.0 1st Qu.:0.667 1st Qu.: 3.0
## 302 : 10 Median : 5.5 Median :0.833 Median : 5.5
## 303 : 10 Mean : 5.5 Mean :0.805 Mean : 5.5
## 305 : 10 3rd Qu.: 8.0 3rd Qu.:1.000 3rd Qu.: 8.0
## 306 : 10 Max. :10.0 Max. :1.000 Max. :10.0
## (Other):500
## poly1 poly2
## Min. :-0.495 Min. :-0.348
## 1st Qu.:-0.275 1st Qu.:-0.261
## Median : 0.000 Median :-0.087
## Mean : 0.000 Mean : 0.000
## 3rd Qu.: 0.275 3rd Qu.: 0.174
## Max. : 0.495 Max. : 0.522
##
library(lme4)
#fit base model
m.base <- lmer(Accuracy ~ (poly1+poly2) + (poly1+poly2 | Subject),
data=WordLearn.gca, REML=F)
#add effect of TP on intercept
m.0 <- lmer(Accuracy ~ (poly1+poly2) + TP + (poly1+poly2 | Subject),
data=WordLearn.gca, REML=F)
#add effect on slope
m.1 <- lmer(Accuracy ~ (poly1+poly2) + TP + TP:poly1 + (poly1+poly2 | Subject),
data=WordLearn.gca, REML=F)
#add effect on quadratic
m.2 <- lmer(Accuracy ~ (poly1+poly2)*TP + (poly1+poly2 | Subject),
data=WordLearn.gca, REML=F)
anova(m.base, m.0, m.1, m.2)
## Data: WordLearn.gca
## Models:
## m.base: Accuracy ~ (poly1 + poly2) + (poly1 + poly2 | Subject)
## m.0: Accuracy ~ (poly1 + poly2) + TP + (poly1 + poly2 | Subject)
## m.1: Accuracy ~ (poly1 + poly2) + TP + TP:poly1 + (poly1 + poly2 |
## m.1: Subject)
## m.2: Accuracy ~ (poly1 + poly2) * TP + (poly1 + poly2 | Subject)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m.base 10 -331 -288 175 -351
## m.0 11 -330 -283 176 -352 1.55 1 0.213
## m.1 12 -329 -277 176 -353 0.36 1 0.550
## m.2 13 -333 -276 179 -359 5.95 1 0.015 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
get_pvalues(m.2)
## Estimate Std..Error t.value p.normal p.normal.star
## (Intercept) 0.778525 0.02173 35.83141 0.000e+00 ***
## poly1 0.286315 0.03779 7.57682 3.531e-14 ***
## poly2 -0.050849 0.03319 -1.53217 1.255e-01
## TPHigh 0.052961 0.03073 1.72357 8.478e-02 .
## poly1:TPHigh 0.001075 0.05344 0.02012 9.839e-01
## poly2:TPHigh -0.116455 0.04693 -2.48121 1.309e-02 *
ggplot(WordLearn.gca, aes(Block, Accuracy, color=TP)) +
stat_summary(fun.data=mean_se, geom="pointrange") +
stat_summary(aes(y=fitted(m.2)), fun.y=mean, geom="line") +
theme_bw() + expand_limits(y=c(0.5, 1)) + scale_x_continuous(breaks = 1:10)
Example: Target fixation in spoken word-to-picure matching (VWP)
ggplot(TargetFix, aes(Time, meanFix, color=Condition, fill=Condition)) +
stat_summary(fun.y=mean, geom="line") +
stat_summary(fun.data=mean_se, geom="ribbon", color=NA, alpha=0.3) +
theme_bw() + expand_limits(y=c(0,1)) + legend_positioning(c(1,1)) +
labs(y="Fixation Proportion", x="Time since word onset (ms)")
TargetFix.gca <- code_poly(TargetFix, predictor="Time", poly.order=3)
str(TargetFix.gca)
## 'data.frame': 300 obs. of 11 variables:
## $ Subject : Factor w/ 10 levels "708","712","715",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Time : num 300 300 350 350 400 400 450 450 500 500 ...
## $ timeBin : num 1 1 2 2 3 3 4 4 5 5 ...
## $ Condition : Factor w/ 2 levels "High","Low": 1 2 1 2 1 2 1 2 1 2 ...
## $ meanFix : num 0.1944 0.0286 0.25 0.1143 0.2778 ...
## $ sumFix : num 7 1 9 4 10 5 13 5 14 6 ...
## $ N : int 36 35 36 35 36 35 36 35 36 35 ...
## $ Time.Index: num 1 1 2 2 3 3 4 4 5 5 ...
## $ poly1 : num -0.418 -0.418 -0.359 -0.359 -0.299 ...
## $ poly2 : num 0.4723 0.4723 0.2699 0.2699 0.0986 ...
## $ poly3 : num -0.4563 -0.4563 -0.0652 -0.0652 0.1755 ...
m.full <- lmer(meanFix ~ (poly1+poly2+poly3)*Condition + #fixed effects
(poly1+poly2+poly3 | Subject) + #random effects of Subject
(poly1+poly2+poly3 | Subject:Condition), #random effects of Subj by Cond
data=TargetFix.gca, REML=F)
## singular fit
get_pvalues(m.full)
## Estimate Std..Error t.value p.normal p.normal.star
## (Intercept) 0.4773228 0.01385 34.457777 0.000e+00 ***
## poly1 0.6385604 0.05994 10.654180 0.000e+00 ***
## poly2 -0.1095979 0.03849 -2.847573 4.405e-03 **
## poly3 -0.0932612 0.02330 -4.002200 6.276e-05 ***
## ConditionLow -0.0581122 0.01879 -3.093227 1.980e-03 **
## poly1:ConditionLow 0.0003188 0.06579 0.004846 9.961e-01
## poly2:ConditionLow 0.1635455 0.05393 3.032545 2.425e-03 **
## poly3:ConditionLow -0.0020869 0.02704 -0.077168 9.385e-01
head(ranef(m.full)$"Subject")
## (Intercept) poly1 poly2 poly3
## 708 -0.000158 0.01086 -0.0035599 -0.004931
## 712 0.011022 0.10692 -0.0093299 -0.036514
## 715 0.011375 0.11482 -0.0109608 -0.039651
## 720 -0.002071 -0.01300 -0.0003585 0.003742
## 722 0.013828 0.16224 -0.0200789 -0.058174
## 725 -0.017827 -0.20734 0.0253442 0.074200
head(ranef(m.full)$"Subject:Condition")
## (Intercept) poly1 poly2 poly3
## 708:High 0.012278 -0.13121 -0.12985 0.01515
## 708:Low -0.061217 0.17038 0.06228 0.01231
## 712:High 0.021228 0.08290 0.02803 0.02000
## 712:Low -0.014453 0.04484 0.03255 -0.01730
## 715:High 0.012363 0.05972 0.05517 -0.02500
## 715:Low -0.008684 0.10599 0.13031 -0.03947
# str(ranef(m.full))
VarCorr(m.full)
## Groups Name Std.Dev. Corr
## Subject:Condition (Intercept) 0.0405
## poly1 0.1404 -0.43
## poly2 0.1124 -0.33 0.72
## poly3 0.0417 0.13 -0.49 -0.43
## Subject (Intercept) 0.0124
## poly1 0.1195 0.91
## poly2 0.0165 -0.42 -0.76
## poly3 0.0421 -0.85 -0.99 0.83
## Residual 0.0438
What is being estimated?
This is why df for parameter estimates are poorly defined in MLR
m.left <- lmer(meanFix ~ (poly1+poly2+poly3)*Condition + #fixed effects
((poly1+poly2+poly3)*Condition | Subject), #random effects
data=TargetFix.gca, REML=F)
## Warning in commonArgs(par, fn, control, environment()): maxfun < 10 *
## length(par)^2 is not recommended.
## singular fit
get_pvalues(m.left)
## Estimate Std..Error t.value p.normal p.normal.star
## (Intercept) 0.4773228 0.01679 28.434031 0.000e+00 ***
## poly1 0.6385604 0.05146 12.409015 0.000e+00 ***
## poly2 -0.1095979 0.03717 -2.948228 3.196e-03 **
## poly3 -0.0932612 0.02062 -4.523841 6.073e-06 ***
## ConditionLow -0.0581122 0.02110 -2.753958 5.888e-03 **
## poly1:ConditionLow 0.0003188 0.07477 0.004264 9.966e-01
## poly2:ConditionLow 0.1635455 0.06274 2.606891 9.137e-03 **
## poly3:ConditionLow -0.0020869 0.03334 -0.062598 9.501e-01
# str(ranef(m.left))
# head(ranef(m.left)$"Subject")
VarCorr(m.left)
## Groups Name Std.Dev. Corr
## Subject (Intercept) 0.0519
## poly1 0.1570 0.18
## poly2 0.1094 -0.29 0.03
## poly3 0.0490 -0.28 -0.37 0.63
## ConditionLow 0.0649 -0.89 -0.13 0.49 0.34
## poly1:ConditionLow 0.2285 0.38 -0.46 -0.62 0.10 -0.56
## poly2:ConditionLow 0.1889 0.20 0.08 -0.81 -0.22 -0.43 0.74
## poly3:ConditionLow 0.0862 -0.08 -0.40 -0.06 -0.47 0.06 -0.27 -0.43
## Residual 0.0430
This random effect structure makes fewer assumptions:
Warning message:
In (function (fn, par, lower = rep.int(-Inf, n), upper = rep.int(Inf, :
failure to converge in 10000 evaluations
Need to simplify random effects
Remove random effects of higher-order terms
Outcome ~ (poly1+poly2+poly3)*Condition + (poly1+poly2+poly3 | Subject)
Outcome ~ (poly1+poly2+poly3)*Condition + (poly1+poly2 | Subject)
Remove correlation between random effects
Outcome ~ (poly1+poly2+poly3)*Condition + (1 | Subject) +
(0+poly1 | Subject) + (0+poly2 | Subject) + (0+poly3 | Subject)
In general, participants should be treated as random effects.
This captures the typical assumption of random sampling from some population to which we wish to generalize.
Pooling
In general, participants should be treated as random effects.
This captures the typical assumption of random sampling from some population to which we wish to generalize.
Shrinkage
In general, participants should be treated as random effects.
This captures the typical assumption of random sampling from some population to which we wish to generalize.
Exceptions are possible: e.g., neurological/neuropsychological case studies where the goal is to characterize the pattern of performance for each participant, not generalize to a population.
FunctThemePts
: semantic competition in 17 participants with left hemisphere stroke. Do the participants exhibit statistically significant semantic competition?
summary(FunctThemePts)
## subj Condition Object Time
## 206 : 486 Function:4113 Target :2733 Min. :-1000
## 281 : 486 Thematic:4086 Competitor:2733 1st Qu.: 0
## 419 : 486 Unrelated :2733 Median : 1000
## 1088 : 486 Mean : 1000
## 1238 : 486 3rd Qu.: 2000
## 1392 : 486 Max. : 3000
## (Other):5283
## timeBin meanFix sumFix N
## Min. : 0 Min. :0.0000 Min. : 0.00 Min. :12.0
## 1st Qu.:20 1st Qu.:0.0312 1st Qu.: 1.00 1st Qu.:15.0
## Median :40 Median :0.1250 Median : 2.00 Median :16.0
## Mean :40 Mean :0.1777 Mean : 3.26 Mean :15.4
## 3rd Qu.:60 3rd Qu.:0.2500 3rd Qu.: 5.00 3rd Qu.:16.0
## Max. :80 Max. :1.0000 Max. :16.00 Max. :16.0
##
ggplot(subset(FunctThemePts, Time >= 0 & Time <= 2200 & Object != "Target"),
aes(Time, meanFix, color=Object, fill=Object)) +
stat_summary(fun.y=mean, geom="line") +
stat_summary(fun.data=mean_se, geom="ribbon", color=NA, alpha=0.3) +
geom_vline(xintercept=c(500, 2000)) + theme_bw() +
labs(y="Fixation Proportion", x="Time since word onset (ms)") +
legend_positioning(c(1,1))
FunctThemePts
: semantic competition in 17 participants with left hemisphere stroke. Do the participants exhibit statistically significant semantic competition?
# Prep the data for GCA of the 500-2000ms time window
FunctThemePts.gca <- code_poly(subset(FunctThemePts, Time >= 500 & Time <= 2000 &
Object != "Target"),
predictor="Time", poly.order=4, draw.poly = FALSE)
# Fit the GCA model(s)
m.ft <- lmer(meanFix ~ (poly1+poly2+poly3+poly4)*Object +
(poly1+poly2+poly3+poly4 | subj) +
(poly1+poly2+poly3 | subj:Object),
data=FunctThemePts.gca, REML=F)
## singular fit
# Which terms show significant effects of experimental factors?
get_pvalues(m.ft)
## Estimate Std..Error t.value p.normal p.normal.star
## (Intercept) 0.116256 0.007823 14.8609 0.00000 ***
## poly1 -0.242303 0.025423 -9.5310 0.00000 ***
## poly2 0.003990 0.022221 0.1796 0.85749
## poly3 -0.018664 0.022769 -0.8197 0.41238
## poly4 -0.034287 0.013851 -2.4755 0.01330 *
## ObjectUnrelated -0.008623 0.005779 -1.4922 0.13564
## poly1:ObjectUnrelated -0.034711 0.028437 -1.2206 0.22223
## poly2:ObjectUnrelated 0.048769 0.027994 1.7421 0.08149 .
## poly3:ObjectUnrelated 0.049369 0.032035 1.5411 0.12329
## poly4:ObjectUnrelated -0.003222 0.014909 -0.2161 0.82889
# Plot observed and model fit data
ggplot(fortify(m.ft),
aes(Time, meanFix, color=Object)) +
stat_summary(fun.data=mean_se, geom="pointrange") +
stat_summary(aes(y=.fitted), fun.y=mean, geom="line") +
theme_bw() +
labs(y="Fixation Proportion", x="Time since word onset (ms)") +
legend_positioning(c(1,1))