#remove(list=ls())#
#flush.console()#
library(em)#
#
setwd("/Users/felix/Dropbox/Workspace/ACT-R_EMMA/readsrc98")#
#
f <- read.table("output/fixations.txt")#
colnames(f) <- c("simulation","sentence","region","word","duration")#
# head(f) # summary(f)#
#
f$simulation <- as.factor(f$simulation)#
f$sentence <- as.factor(f$sentence)#
#
etm <- with(f,#
            em(region, duration,#
               trialId=list(simulation,sentence),#
               trialInfo=list(sim=simulation, sn=sentence)))#
                              #
# head(etm) # summary(etm)#
#save(etm, file=paste("data/psc-sim-etm.",Sys.Date(),".RData",sep=""))#
save(etm, file="data/src-sim-etm.RData")#
#
#
#################################################
### WORD INFO#
#################################################
wi <- read.table("src98.measures.txt", header=TRUE)#
colnames(wi)[1] <- "sn"#
# head(wi)#
#
## Add word count to word info#
wi$wordcount <- 0#
lastrois <- NULL#
for(i in 1:max(wi$sn)){#
	last_i <- max(wi$roi[wi$sn==i])#
	lastrois <- append(lastrois,last_i)#
	wi$wordcount[wi$sn==i] <- last_i#
	}#
# head(wi)#
#
#################################################
## Merge with measures#
#################################################
dim(etm)#
m <- merge(etm, wi, by=c("sn","roi"), all.x=TRUE)#
dim(m)#
# head(m) # summary(m)#
#
#
#################################################
## (1) Remove some words from data#
#################################################
## Remove first and last word from data#
m1 <- m#
m <- subset(m, (roi>1 & roi<wordcount))#
dim(m)#
#
##### Some data cleaning#
m1.1 <- m#
dim(m)#
## Remove words with FFD < 30ms#
(dim(subset(m,(FFD<30 & FFD>0)))[1]) / dim(m)[1]#
m <- subset(m,(FFD>=30 | FFD==0))#
dim(m)#
## Remove words with FFD > 1s#
(dim(subset(m,FFD>1000))[1]) / dim(m)[1]#
m <- subset(m,FFD<=1000)#
dim(m)#
## Remove words with TFT or FPRT > 1.5s#
(dim(subset(m,TFT>1500 | FPRT>1500))[1]) / dim(m)[1]#
m <- subset(m,(TFT<=1500 & FPRT<=1500))#
dim(m)#
## number of words removed#
dim(m1.1)[1]-dim(m)[1]#
#
#
#################################################
## (2) Assign skipping and one- and two-fixation values#
#################################################
m$skipped <- 0#
m$onefix <- 0#
m$twofix <- 0#
m$skipped[m$FFP==0] <- 1#
m$skipped[is.na(m$FFP)] <- 1#
m$onefix[m$SFD>0 & m$FFP==1] <- 1#
m$twofix[m$SFD==0 & m$TFT>0 & m$FFP==1] <- 1#
# head(m)#
#
#
#################################################
## (3) Assign Regression Origin and Regression Goal#
##     probability#
#################################################
m$RO <- 0#
m$RG <- 0#
m$RO[m$TRC>0] <- 1#
#m$RG[m$RRT>0] <- 1  # wrong!#
# head(m)#
#
#save(m, file=paste("data/m.",Sys.Date(),".RData",sep=""))#
save(m, file="data/m.RData")
#remove(list=ls())#
library(reshape)	# for aggregating#
library(plyr)#
#
setwd("/Users/felix/Dropbox/Workspace/ACT-R_EMMA/readsrc98")#
source("~/Dropbox/FelixE/PSC-Model/helpers.R")#
#
#source("1-analyze.R")#
#
#file <- paste("data/m.",Sys.Date(),".RData",sep="")#
file <- "data/m.RData"#
load(file)#
# head(m)#
#
#
##################################################
### (0) Exclude Trials with interword regressions#
##################################################
m0 <- m#
m$trial <- paste(m$sn, m$sim, sep=".")  ## add trial variable#
regtrials <- na.omit(m$trial[m$TRC>0])  ## identify trials with regressions#
round(length(unique(regtrials))/length(unique(m$trial))*100)  ## percentage of trials with regressions#
################### <---> ####################
m <- subset(m, !(trial%in%regtrials)) ### COMMENT OUT IF NOT WANTED ####
################### <---> ####################
dim(m0); dim(m)#
round(dim(m)[1]/dim(m0)[1] * 100) ## percentage#
# head(m)#
#
#
#################################################
## Frequency Summary Statictics#
#################################################
## Experimentel Data: ###
#load("~/Dropbox/FelixE/PSC-Model/PSC-CORPUS-DATA/psc.fstat.RData")#
fstat1 <- data.frame(freq.class=1:5, freq.pm=0,#
		FPRT=c(293,272,256,234,214),#
		FFD=c(248,233,230,223,208),#
		SFD=c(265, 249, 243, 235, 216))#
fstat1.sd <- data.frame(#
		FPRT=c(147, 123, 132, 122, 130),#
		FFD=c(100, 100, 100, 100, 100),#
		SFD=c(100, 100, 100, 100, 100))#
fstat2 <- data.frame(freq.class=1:5, freq.pm=0,#
		skipped=c(.10, .13, .22, .55, .67),#
		onefix=c(.68, .70, .68, .44, .32),#
		twofix=c(.20, .16, .10, .02, .01))#
fstat2.sd <- data.frame(#
		skipped=c(.39, .40, .45, .50, .47),#
		onefix=c(.48, .47, .47, .49, .46),#
		twofix=c(.43, .39, .30, .14, .1))#
#
#
#
## Model: ###
# head(m)#
m.m1 <- melt(m,#
	id=c("sn","roi","freq.class"),#
	measure=c("frequency","FPRT","FFD","SFD"),#
	variable="Measure", na.rm = T)#
# head(m.m1) # summary(m.m1) # dim(m.m1) # str(m.m1)#
fstat1.m <- data.frame(round(cast(subset(m.m1,value!=0), freq.class ~ Measure, mean)))#
fstat1.m#
#
m.m2 <- melt(m,#
	id=c("sn","roi","freq.class"),#
	measure=c("frequency","skipped","onefix","twofix"),#
	variable="Measure", na.rm = T)#
# head(m.m2) # summary(m.m2) # dim(m.m2) # str(m.m2)#
fstat2.m <- data.frame(round(cast(m.m2, freq.class ~ Measure, mean),digits=2))#
fstat2.m$frequency <- round(fstat2.m$frequency)#
fstat2.m#
#
## correlations: ###
fstat1.cor <- round(cor(fstat1[,3:5],fstat1.m[,3:5], method="pearson"), digits=2)#
fstat2.cor <- round(cor(fstat2[,3:5],fstat2.m[,3:5], method="pearson"), digits=2)#
#fstat1.cor[is.na(fstat1.cor)] <- 0#
#fstat2.cor[is.na(fstat2.cor)] <- 0#
#
i <- diag(nrow=length(1:3))==1#
(fstat1.cor <- fstat1.cor[,1:3][i])#
(fstat2.cor <- fstat2.cor[,1:3][i])#
fstat.cor.mean <- mean(c(fstat1.cor,fstat2.cor),na.rm=T)#
#
## rmsd: ###
fstat1.rmsd <- NULL#
fstat2.rmsd <- NULL#
for(i in 3:5){#
	fstat1.rmsd <-	cbind(fstat1.rmsd,nrmsd(fstat1[,i],fstat1.m[,i],fstat1.sd[,i-2]))#
	fstat2.rmsd <-	cbind(fstat2.rmsd,nrmsd(fstat2[,i],fstat2.m[,i],fstat2.sd[,i-2]))#
}#
(fstat1.rmsd <- as.vector(fstat1.rmsd))#
(fstat2.rmsd <- as.vector(fstat2.rmsd))#
fstat.rmsd.mean <- mean(c(fstat1.rmsd,fstat2.rmsd), na.rm=T)#
#
## range: ###
range <- colwise(max)(cbind(fstat1[3:5],100*fstat2[3:5])) - colwise(min)(cbind(fstat1[3:5],100*fstat2[3:5]))#
range.m <- colwise(max)(cbind(fstat1.m[3:5],100*fstat2.m[3:5])) - colwise(min)(cbind(fstat1.m[3:5],100*fstat2.m[3:5]))#
fstat.rangediff <- sqrt((range-range.m)^2)#
#
## SCORES ###
scores <- as.vector(1*c(fstat1.rmsd,fstat2.rmsd) + 1*(1 - c(fstat1.cor,fstat2.cor)) + 0.01*fstat.rangediff) #
pscore <- mean(scores, na.rm=T) #+ var(scores, na.rm=T)#
round(fstat.cor.mean, digits=2)#
round(fstat.rmsd.mean, digits=3)#
round(pscore,digits=3)#
#
#
#
#
save(fstat1, fstat2, fstat1.m, fstat2.m, fstat1.rmsd, fstat2.rmsd,fstat1.cor, fstat2.cor, fstat.rmsd.mean, fstat.cor.mean, fstat.rangediff, file="data/fstat.RData")
#remove(list=ls())#
#library(ggplot2)	# for plotting#
setwd("/Users/felix/Dropbox/Workspace/ACT-R_EMMA/readsrc98")#
source("~/Dropbox/FelixE/PSC-Model/helpers.R")#
#
load("data/fstat.RData")#
#
modelname = "model"#
#
##################################################
### Plots#
##################################################
#
## sumstat1 on frequency#
postscript(paste("plots/src",modelname,"fstat1.eps",sep="-")#
	,width=5, height=5#
	,encoding="TeXtext.enc"#
	#,family="ComputerModern"#
	,horizontal=FALSE#
	)#
#
plot(c(1,2,3,4,5),fstat1.m$FPRT,#
	,type="b" ,pch=19, ylim=c(200,320), lwd=2#
	#,main="PSC"#
	,xlab="Frequency Class"#
	,ylab="Duration (ms)"#
	#,pty=s#
	)#
lines(fstat1$FPRT, lty=3, type="b", pch=1, lwd=2)#
lines(fstat1.m$FFD, type="b", pch=15, lwd=2)#
lines(fstat1$FFD, lty=3, type="b", pch=22, lwd=2)#
lines(fstat1.m$SFD, type="b", pch=17, lwd=2)#
lines(fstat1$SFD, lty=3, type="b", pch=2, lwd=2)#
legend("topright",#
       legend = c("model","data","Gaze","FFD","SFD")#
       ,lty=c(1,3,0,0,0)#
       ,pch=c(NA,NA,19,15,17)#
       ,lwd=2#
       ,cex=0.8     #magnification of legend#
       ,xjust=1#
       ,yjust=1#
       #,merge=TRUE#
       ,horiz=FALSE)#, trace=TRUE)#
#
dev.off()#
#######################
#
#
#
postscript(paste("plots/src",modelname,"fstat2.eps",sep="-")#
	,width=5, height=5#
	,encoding="TeXtext.enc"#
	#,family="ComputerModern"#
	,horizontal=FALSE#
	)#
#
plot(1:5,fstat2.m$skipped,#
	,type="b" ,pch=19, ylim=c(0,1), lwd=2#
	#,main="PSC"#
	,xlab="Frequency Class"#
	,ylab="Probability"#
	#,pty=s#
	)#
lines(fstat2$skipped, lty=3, type="b", pch=1, lwd=2)#
lines(fstat2.m$onefix, type="b", pch=15, lwd=2)#
lines(fstat2$onefix, lty=3, type="b", pch=22, lwd=2)#
lines(fstat2.m$twofix, type="b", pch=17, lwd=2)#
lines(fstat2$twofix, lty=3, type="b", pch=2, lwd=2)#
#
legend("topright",#
       legend = c("model","data","skip","once","multiple")#
       ,lty=c(1,3,0,0,0)#
       ,pch=c(NA,NA,19,15,17)#
       ,lwd=2#
       ,cex=0.7     #magnification of legend#
       ,xjust=1#
       ,yjust=1#
       #,merge=TRUE#
       ,horiz=FALSE)#, trace=TRUE)#
#
dev.off()
#remove(list=ls())#
#library(ggplot2)	# for plotting#
setwd("/Users/felix/Dropbox/Workspace/ACT-R_EMMA/readsrc98")#
source("~/Dropbox/FelixE/PSC-Model/helpers.R")#
#
load("data/fstat.RData")#
#
modelname = "model"#
#
##################################################
### Plots#
##################################################
#
## sumstat1 on frequency#
# postscript(paste("plots/src",modelname,"fstat1.eps",sep="-")#
	# ,width=5, height=5#
	# ,encoding="TeXtext.enc"#
	# #,family="ComputerModern"#
	# ,horizontal=FALSE#
	# )#
#
pdf(file = paste("plots/src",modelname,"fstat1.pdf",sep="-"),#
    width=5, height=5)#
#
plot(c(1,2,3,4,5),fstat1.m$FPRT,#
	,type="b" ,pch=19, ylim=c(200,320), lwd=2#
	#,main="PSC"#
	,xlab="Frequency Class"#
	,ylab="Duration (ms)"#
	#,pty=s#
	)#
lines(fstat1$FPRT, lty=3, type="b", pch=1, lwd=2)#
lines(fstat1.m$FFD, type="b", pch=15, lwd=2)#
lines(fstat1$FFD, lty=3, type="b", pch=22, lwd=2)#
lines(fstat1.m$SFD, type="b", pch=17, lwd=2)#
lines(fstat1$SFD, lty=3, type="b", pch=2, lwd=2)#
legend("topright",#
       legend = c("model","data","Gaze","FFD","SFD")#
       ,lty=c(1,3,0,0,0)#
       ,pch=c(NA,NA,19,15,17)#
       ,lwd=2#
       ,cex=0.8     #magnification of legend#
       ,xjust=1#
       ,yjust=1#
       #,merge=TRUE#
       ,horiz=FALSE)#, trace=TRUE)#
#
dev.off()#
#######################
#
#
#
# postscript(paste("plots/src",modelname,"fstat2.eps",sep="-")#
	# ,width=5, height=5#
	# ,encoding="TeXtext.enc"#
	# #,family="ComputerModern"#
	# ,horizontal=FALSE#
	# )#
pdf(file = paste("plots/src",modelname,"fstat1.pdf",sep="-"),#
    width=5, height=5)#
#
#
plot(1:5,fstat2.m$skipped,#
	,type="b" ,pch=19, ylim=c(0,1), lwd=2#
	#,main="PSC"#
	,xlab="Frequency Class"#
	,ylab="Probability"#
	#,pty=s#
	)#
lines(fstat2$skipped, lty=3, type="b", pch=1, lwd=2)#
lines(fstat2.m$onefix, type="b", pch=15, lwd=2)#
lines(fstat2$onefix, lty=3, type="b", pch=22, lwd=2)#
lines(fstat2.m$twofix, type="b", pch=17, lwd=2)#
lines(fstat2$twofix, lty=3, type="b", pch=2, lwd=2)#
#
legend("topright",#
       legend = c("model","data","skip","once","multiple")#
       ,lty=c(1,3,0,0,0)#
       ,pch=c(NA,NA,19,15,17)#
       ,lwd=2#
       ,cex=0.7     #magnification of legend#
       ,xjust=1#
       ,yjust=1#
       #,merge=TRUE#
       ,horiz=FALSE)#, trace=TRUE)#
#
dev.off()
#remove(list=ls())#
#library(ggplot2)	# for plotting#
setwd("/Users/felix/Dropbox/Workspace/ACT-R_EMMA/readsrc98")#
source("~/Dropbox/FelixE/PSC-Model/helpers.R")#
#
load("data/fstat.RData")#
#
modelname = "model"#
#
##################################################
### Plots#
##################################################
#
## sumstat1 on frequency#
# postscript(paste("plots/src",modelname,"fstat1.eps",sep="-")#
	# ,width=5, height=5#
	# ,encoding="TeXtext.enc"#
	# #,family="ComputerModern"#
	# ,horizontal=FALSE#
	# )#
#
pdf(file = paste("plots/src",modelname,"fstat1.pdf",sep="-"),#
    width=5, height=5)#
#
plot(c(1,2,3,4,5),fstat1.m$FPRT,#
	,type="b" ,pch=19, ylim=c(200,320), lwd=2#
	#,main="PSC"#
	,xlab="Frequency Class"#
	,ylab="Duration (ms)"#
	#,pty=s#
	)#
lines(fstat1$FPRT, lty=3, type="b", pch=1, lwd=2)#
lines(fstat1.m$FFD, type="b", pch=15, lwd=2)#
lines(fstat1$FFD, lty=3, type="b", pch=22, lwd=2)#
lines(fstat1.m$SFD, type="b", pch=17, lwd=2)#
lines(fstat1$SFD, lty=3, type="b", pch=2, lwd=2)#
legend("topright",#
       legend = c("model","data","Gaze","FFD","SFD")#
       ,lty=c(1,3,0,0,0)#
       ,pch=c(NA,NA,19,15,17)#
       ,lwd=2#
       ,cex=0.8     #magnification of legend#
       ,xjust=1#
       ,yjust=1#
       #,merge=TRUE#
       ,horiz=FALSE)#, trace=TRUE)#
#
dev.off()#
#######################
#
#
#
# postscript(paste("plots/src",modelname,"fstat2.eps",sep="-")#
	# ,width=5, height=5#
	# ,encoding="TeXtext.enc"#
	# #,family="ComputerModern"#
	# ,horizontal=FALSE#
	# )#
pdf(file = paste("plots/src",modelname,"fstat2.pdf",sep="-"),#
    width=5, height=5)#
#
#
plot(1:5,fstat2.m$skipped,#
	,type="b" ,pch=19, ylim=c(0,1), lwd=2#
	#,main="PSC"#
	,xlab="Frequency Class"#
	,ylab="Probability"#
	#,pty=s#
	)#
lines(fstat2$skipped, lty=3, type="b", pch=1, lwd=2)#
lines(fstat2.m$onefix, type="b", pch=15, lwd=2)#
lines(fstat2$onefix, lty=3, type="b", pch=22, lwd=2)#
lines(fstat2.m$twofix, type="b", pch=17, lwd=2)#
lines(fstat2$twofix, lty=3, type="b", pch=2, lwd=2)#
#
legend("topright",#
       legend = c("model","data","skip","once","multiple")#
       ,lty=c(1,3,0,0,0)#
       ,pch=c(NA,NA,19,15,17)#
       ,lwd=2#
       ,cex=0.7     #magnification of legend#
       ,xjust=1#
       ,yjust=1#
       #,merge=TRUE#
       ,horiz=FALSE)#, trace=TRUE)#
#
dev.off()
0.01*fstat.rangediff
fstat.rangediff
scores <- as.vector(1*c(fstat1.rmsd,fstat2.rmsd) + 1*(1 - c(fstat1.cor,fstat2.cor)) + 0.01*fstat.rangediff)
1*c(fstat1.rmsd,fstat2.rmsd) + 1*(1 - c(fstat1.cor,fstat2.cor))
scores <- as.vector(1*c(fstat1.rmsd,fstat2.rmsd) + 1*(1 - c(fstat1.cor,fstat2.cor)) + 0.01*fstat.rangediff)
scores
pscore <- mean(scores, na.rm=T) #+ var(scores, na.rm=T)
pscore
scores <- as.vector(1*c(fstat1.rmsd,fstat2.rmsd) + 1*(1 - c(fstat1.cor,fstat2.cor))
)
pscore <- mean(scores, na.rm=T) #+ var(scores, na.rm=T)
pscore
scores <- as.vector(1*c(fstat1.rmsd,fstat2.rmsd) + 1*(1 - c(fstat1.cor,fstat2.cor)) + 0.1*fstat.rangediff)
pscore <- mean(scores, na.rm=T) #+ var(scores, na.rm=T)
pscore
fstat1.m
dim(subset(melt(fstat.m, variable="Measure", na.rm=T),value==0))[1]
dim(subset(melt(fstat1.m, variable="Measure", na.rm=T),value==0))[1]
fstat1.m
dim(subset(melt(fstat2.m, variable="Measure", na.rm=T),value==0))[1]
fstat2.m
melt(fstat2.m, variable="Measure", na.rm=T)
dim(subset(melt(rbind(fstat1.m,fstat2.m), variable="Measure", na.rm=T),value==0))[1]
list(fstat1.m,fstat2.m)
flat(list(fstat1.m,fstat2.m)
)
dim(subset(melt(fstat2.m, variable="Measure", na.rm=T),value==0))[1]
z1 <- dim(subset(melt(fstat2.m, variable="Measure", na.rm=T),value==0))[1]
z1 <- dim(subset(melt(fstat1.m, variable="Measure", na.rm=T),value==0))[1]
z2 <- dim(subset(melt(fstat2.m, variable="Measure", na.rm=T),value==0))[1]
z <- dim(subset(melt(fstat1.m, variable="Measure", na.rm=T),value==0))[1]
z <- z + dim(subset(melt(fstat2.m, variable="Measure", na.rm=T),value==0))[1]
z
zeros <- dim(subset(melt(fstat1.m, variable="Measure", na.rm=T),value==0))[1]
zeros <- zeros + dim(subset(melt(fstat2.m, variable="Measure", na.rm=T),value==0))[1]
0.1*zeros
scores <- as.vector(1*c(fstat1.rmsd,fstat2.rmsd) + 1*(1 - c(fstat1.cor,fstat2.cor)) + 0.01*fstat.rangediff + 0.1*zeros)
pscore <- mean(scores, na.rm=T) #+ var(scores, na.rm=T)
pscore
fstat.zeros <- dim(subset(melt(fstat1.m, variable="Measure", na.rm=T),value==0))[1] + dim(subset(melt(fstat2.m, variable="Measure", na.rm=T),value==0))[1]
fstat.zeros <- dim(subset(melt(fstat1.m, variable="Measure", na.rm=T),value==0))[1] + dim(subset(melt(fstat2.m, variable="Measure", na.rm=T),value==0))[1]
scores <- as.vector(1*c(fstat1.rmsd,fstat2.rmsd) + 1*(1 - c(fstat1.cor,fstat2.cor)) + 0.01*fstat.rangediff + 0.1*fstat.zeros)
pscore <- mean(scores, na.rm=T) #+ var(scores, na.rm=T)
pscore
pscore <- mean(scores, na.rm=T) + var(scores, na.rm=T)
pscore
var(scores, na.rm=T)
scores
