-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset5_neutral.R
126 lines (92 loc) · 4.18 KB
/
dataset5_neutral.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# R code for fitting an IRT model for the 5GS-Neutral items
# load libraries (assuming they are already installed)
library(mirt)
library(psych)
# load data and key
answers_5 <- read.csv("data/irt_file_5_coded.csv",header=TRUE,sep=",")
Key<-unclass(read.table("data/group_5.txt",sep=" ",comment.char="")[,2]);
snli_5 <- answers_5[2:91]
c_s5 <- colnames(snli_5)
Data <- snli_5;
items<-which(Key==3); #key 3 is neutral
######################
### Dimensionality ###
######################
R<-tetrachoric(Data[,items])
plot(eigen(R$rho,symmetric=TRUE,only.values=TRUE)$values)
### One single high eigenvalue before trailing eigenvalues, suggesting a single factor
# fit 1 factor and 2 factor models
fitMIRT1<-mirt(Data[,items],1,itemtype="3PL",TOL=1e-4,technical=list(NCYCLES=1e4))
fitMIRT2<-mirt(Data[,items],2,itemtype="3PL",TOL=1e-4,technical=list(NCYCLES=1e4))
anova(fitMIRT1,fitMIRT2) ## results here support a 2F model.
summary(fitMIRT2,suppress=0.3) ## 2F model does show an interpretable loading pattern. Second factor has very small SS loadings
# new rotation to squeeze into single factor
target<-cbind(rep(1,30),rep(0,30));
summary(fitMIRT2,suppress=0.3,rotate="pstQ",Target=target,W=!target)
#######################
## Evaluate 1F model ##
#######################
fit.all.est<-coef(fitMIRT1,simplify=TRUE,IRTpar=TRUE)$items;
mtlnL.all<- -2*fitMIRT1@Fit$logLik;
pvalue3v2<-rep(NA,30);
type<-rep("3PL",30);
for (k in 1:30)
{
if (fit.all.est[k,"g"]<1e-2) {pvalue3v2[k]<-.5; next;}
itemtype<-type;
itemtype[k]<-"2PL";
fit.2PL <-mirt(Data[,items],1,itemtype,TOL=2e-4,technical=list(NCYCLES=1e3))
mtlnL.2PL<- -2*fit.2PL@Fit$logLik;
T3v2<-mtlnL.2PL-mtlnL.all;
pvalue3v2[k]<-p<-pchisq(T3v2,df=1,lower.tail=FALSE)/2;
cat(items[k],": 3PL vs 2PL ",T3v2," p = ", p, "\n");
}
s<-sort.int(pvalue3v2,index.return=TRUE)
print(cbind(items[s$ix],s$x,0.05*(1:30)/30));
## No lower asymptote is significant. Should change to 2PL since none of the
# guessing parameters are significant.
fit.2PL<-mirt(Data[,items],1,itemtype="2PL",TOL=2e-4,technical=list(NCYCLES=1e3))
anova(fit.2PL,fitMIRT1) ## Test confirm 2PL model is as good as 3PL.
summary(fit.2PL) ## There are no items with h2 (communality) smaller than 15%.
coef(fit.2PL,simplify=TRUE,IRTpar=TRUE)$items; ## all difficulties are negative, but none look too difficult.
#############################
### Don't Remove any Items###
#############################
fit.2<-mirt(Data[,items],1,itemtype="2PL",TOL=2e-4,technical=list(NCYCLES=1e3))
itf <- itemfit(fit.2)
itf ## 4 items with p < 0.05.
M2(fit.2) ## RMSEA_5 smaller than 0.5; TLI and CFI greater than 0.95. Good overall fit for 2-margins.
res.table<-residuals(fit.2)
pstar<-30*29/2
#first test p values of itemfit using Holm criteria
cbind(sort(itf$p.S_X2),0.05/(30:1))
#one significant item found. which one?
itf #item 7
#plot item 7
thetaF <- fscores(fit.2)
ig0 <- itemGAM(Data[,items[c(7)]],thetaF)
ig.good <- itemGAM(Data[,items[c(3)]],thetaF)
plot(ig0)
plot(ig.good)
itemplot(fit.2,7,main="Item Characteristic Curve",col.line="black")
#remove item 7 & refit
fit.2<-mirt(Data[,items[c(-7)]],1,itemtype="2PL",TOL=2e-4,technical=list(NCYCLES=1e3))
itf <- itemfit(fit.2) ## no significant p values
M2(fit.2) ## RMSEA_5 smaller than 0.5; TLI and CFI greater than 0.95. Good overall fit for 2-margins.
res.table<-residuals(fit.2)
cbind(sort(itf$p.S_X2),0.05/(29:1)) #no significant values here
pstar<-29*28/2
t <- 1-pchisq(df=1,abs(res.table[lower.tri(res.table)]))
cbind(sort(round(t,6)),0.05/(pstar:1)) ## Compare the p-values using Holm's method
## Refit without items 3 & 7
fit.2<-mirt(Data[,items[c(-3,-7)]],1,itemtype="2PL",TOL=2e-4,technical=list(NCYCLES=1e3))
itf <- itemfit(fit.2)
itf ## no significant p values
M2(fit.2) ## RMSEA_5 smaller than 0.5; TLI and CFI greater than 0.95. Good overall fit for 2-margins.
res.table<-residuals(fit.2)
cbind(sort(itf$p.S_X2),0.05/(28:1)) #no significant values here
itemplot(fit.2,10,main="Item Characteristic Curve",col.line="black")
pstar<-28*27/2
t <- 1-pchisq(df=1,abs(res.table[lower.tri(res.table)]))
cbind(sort(round(t,6)),0.05/(pstar:1)) ## Compare the p-values using Holm's method
# no significant residuals, so we are done. Retain all items except items 3 & 7