 em_n0[1:4, c("cond","f2bb","f2ab","n2bb","n2ab")]
keep <- c("id","sn","wn","nw","wid","cond","f2bb","f2ab","n2bb",       #
"n2ab","pvn2","lxn1","f",        #
"wl","wl1","f1","wl2","f2","wl3","f3",#
 "wl4","f4","ffd","gzd","sfd","tvt","prx",#
"psk","prg","ilp")  #
#
em_n0 <- em_n0[,keep]#
em_n1 <- em_n1[,keep]#
em_n2 <- em_n2[,keep]#
em_n3 <- em_n3[,keep]#
em_nm1 <- em_nm1[,keep]
save(em_nm1, em_n0, em_n1, em_n2, em_n3, file=ifile_em)
ifile_em   <- c("n2FQ_em_filtered.rda")
load(ifile_em)
ls()			# em_n0, em_n1, em_n2, em_n3, em_nm1
str(em_n0)
ifile_em   <- c("/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_RData/n2FQ_em_filtered.rda")
load(ifile_em)
em_n2$sn1 <- em_n1$p0 - 0.5#
em_n0$sn1 <- em_n1$p0 - 0.5#
#
em_n1$sn2 <- em_n2$p0 - 0.5
load(ifile_em)
str(em)
str(em_n0)
# Program xProbDur_n2FQ.R#
# Generates descriptive statistics about probabilities, durations, #
# letter positions of fixations for first-pass and second-pass reading#
# (was xProbDurCorpus.m)#
# this version for "BoundaryN2Fq" (preview frequency in boundary paradigm)#
##
# input = (1) output of xSetup1.R#
#         (2) output of xIndex.R#
##
# output = em.rda, em_w.rda, em_w_id.rda, em_w_sn.rda#
##
# R. Kliegl, 08/08/2006#
# some modifications for PSC2, J.Laubrock, 10/08/2006#
# not yet finished: #
# 	vectorized rewrite J.Laubrock, 12/2006 #
#	todo: rewrite (vectorized) produr1a.R#
#
rm(list=ls())#
#
# SET WORKING DIRECTORY:#
setwd("/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_RData")#
#
# FUNCTIONS:#
source("~/Documents/EM_Rfunctions/probdur1_bndN2.R")#
source("~/Documents/EM_Rfunctions/parseq_fast.R")#
#
# LOAD DATA:#
load("n2FQ_fix_R.rda")#
load("n2FQ_index_fix.rda")#
#
# OUTPUT:#
ofile_em    <- "/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_analysis/FQ_probDur/n2FQ_em.rda"#
#
#
# BAD FIXATION INFO - fixations to be ignored, not deleted, to keep index #
# add dur following a micro to previous duration prior to micro#
# Q: isn't this a bit dangerous, because it assumes that the fixation are from the same sentence etc? check mcoutR!#
# mcoutR contains within-letter saccades#
mcoutR <- which(a$ao==0)#
length(mcoutR)#
#
# check whether mcoutR+1 is on the same word as mcoutR:#
wid <- 1e5*a$id + 1e2*a$sn + a$wn#
wid[mcoutR]==wid[mcoutR+1]		# yes, okay...#
#
a$dur[mcoutR] <- a$dur[mcoutR]+a$dur[mcoutR+1]#
#
# flag as bad: fix following a micro and short fixations (i.e., < 51 ms)#
# Question should we remove first and last from firstpR, use vFirstpR?#
#
shrtR <- which(a$dur < 10)     # need new shrtR because of dur-ms-dur addition #
longR <- which(a$dur > 750)#
length(shrtR)#
length(longR)#
#mcinR <- which(a$ao1==0)#
mcinR <- mcoutR+1#
#
# VECTOR WITH INVALID FIXATIONS:#
# (1) within-word refixations (microsaccades)#
# (2) DO NOT EXCLUDE NOW: short durations + long durations #
# (3) first and last fixation in sentence (frstfix, lastfix)#
# (4) binocular (xSameWord) #
invalBC <- setdiff(c(1:nrow(a)), xSameWord)#
#
bad <- unique(c(mcinR, invalBC, frstfix, lastfix))#
badfix <- rep(0,nrow(a))#
badfix[bad] <- 1#
#
# FIRST-PASS INFO #
# (5) first-pass (firstpR)#
fpfix  <- rep(0,nrow(a))#
fpfix[firstpR] <- 1#
#
#
# INVALID TRIALS:#
# (6) invalid display change trials (vBndChangeTrials)#
invalDC <- setdiff(c(1:nrow(a)), vBndChangeTrials)#
xTrial <- rep(0,nrow(a))#
xTrial[invalDC] <- 1#
#
#
# SUMMARIES#
# (1) basic info for EACH WORD OF EACH SENTENCE read#
sid <- 1e2*a$sn + a$wn#
six <- parseq_fast(sid)#
#
nrow_em<-sum(a[six[,1],"nw"]); #
ncol_em <- 43 #this depends on the number of columns available; 43 for Boundrary-N2-Experiments#
em <- array(NA, c(nrow_em, ncol_em))#
ns <- nrow(six)        # n of sentences#
#
sntix <- a[,"id"]*1000+a[,"sn"]  # Recompute because records were deleted#
a_fpf_bad <- cbind(a[,1:6], fpfix, badfix, xTrial)#
sntix <- a_fpf_bad[,"id"]*1000+a_fpf_bad[,"sn"]  # Recompute because records were deleted#
#
#
pdlist <- by(a_fpf_bad, sntix, probdur1_bndN2)#
#
#
cnt=1#
for (i in 1:length(pdlist)) {#
	dcnt <- dim(pdlist[[i]])[1]#
	em[cnt:(cnt+dcnt-1), ] <- pdlist[[i]]#
	cnt <- cnt+dcnt#
}#
#
em <- data.frame(em)#
names(em) <- c("id", "sn", "wn", "p0", "p1", "p2", "p3",#
                                 "d0", "d1", "d2", "d3", #
                                 "l0", "l1", "l2", "l3", #
                                  "gz", "ngz",#
         "rop1",  "rop2", "rod0", "rod1", "rod2", "rol0", "rol1", "rol2", "rogz", "rongz",#
         "rgp1",  "rgp2", "rgd0", "rgd1", "rgd2", "rgl0", "rgl1", "rgl2", "rggz", "rgngz",#
         "rgc",  "roc", "trt", "nfx", "xfp","xmp")#
#
#
em$idsn <- 1e3*em$id + em$sn#
a$idsn  <- 1e3*a$id  + a$sn#
#
a.idsn <- parseq_fast(a$idsn)#
asn <- a[a.idsn[,1],]#
#
xSentRead <- match(em$idsn,asn$idsn)#
em[,c("prevw","cnpl1","cond","f2bb","f2ab","n2bb","n2ab")] <- asn[xSentRead,c("prevw","cnpl1","cond","f2bb","f2ab","n2bb","n2ab")]#
#
invalidSent <- which(is.na(em$id)==TRUE)#
em <- em[-invalidSent,]#
#
save(em, file=ofile_em)#
#
summary(em)
ofile_em
ls()
dim(em)
ofile_em
ofile_em_w
ofile_em    <- "/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_RData/n2FQ_em.rda"
save(em, file=ofile_em)
rm(list=ls())#
#
# SET WORKING DIRECTORY:#
setwd("/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_RData")#
#
# INPUT:#
ifile_em    <- "/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_RData/n2FQ_em.rda"#
ifile_corpus <- "/Users/Sarah/Documents/EM_Corpora/n2FQ/FQ.DWDS.SR.rda"#
#
#
# OUTPUT:#
ofile_em    <- "/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_RData/n2FQ_em.rda"#
ofile_em_w  <- "/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_RData/n2FQ_em_w.rda"#
ofile_em_s  <- "/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_RData/n2FQ_em_s.rda"#
ofile_em_id <- "/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_RData/n2FQ_em_id.rda"#
#
#
# LIBRARIES:#
#
# FUNCTIONS:#
source("/Users/Sarah/Documents/EM_Rfunctions/parseq_fast.R")#
#
source("~/Documents/EM_Rfunctions/probdur_id.R")#
source("~/Documents/EM_Rfunctions/probdur_s.R")#
source("~/Documents/EM_Rfunctions/probdur_w.R")#
#
#
# LOAD DATA:#
load(ifile_em)			# word-level matrix for each sentence of each participant#
#
str(em)#
#
#
# get corpus information about target word index (tp) and number of word (nw):#
load(ifile_corpus)#
str(FQ.dwds)#
#
corpus_wrdidx <- 1e2*FQ.dwds$sn + FQ.dwds$wn#
em_wrdidx     <- 1e2*em$sn + em$wn#
#
corp_to_em <- match(em_wrdidx, corpus_wrdidx)#
em[,c("nw","twx")] <- FQ.dwds[corp_to_em,c("nw","tp")]#
#
#
# index for first and last word in sentence:#
x1stLastWrd <- which(em$wn==1 | em$wn==em$nw)#
em <- em[-x1stLastWrd,]#
#
# index for invalid trials (incorrect display changes):#
xValidSentences <- which(em$id > 0)#
em <- em[xValidSentences,]#
#
# "xfp": 0: valid first pass, > 0. invalid in first pass#
# "xmp": 0: valid in second and more pass, >0: invalid#
# drop temporary columns:#
KeepColNames <-  c("id", "sn", "wn", "p0", "p1", "p2", "p3", "d0", "d1", "d2", "d3",    #
	"l0", "l1", "l2", "l3",  "gz", "ngz", "rop1", "rop2", "rod0", "rod1", "rod2",  #
	"rol0", "rol1", "rol2", "rogz", "rongz", "rgp1", "rgp2", "rgd0", "rgd1", "rgd2",#
	"rgl0", "rgl1", "rgl2", "rggz", "rgngz", "rgc",  "roc",  "trt",  "nfx", "xfp", "xmp",#
	"prevw", "cnpl1", "cond", "f2bb", "f2ab", "n2bb", "n2ab", "twx",  "nw")    #
em <- em[,KeepColNames]
str(em)
save(em, file=ofile_em)		# overwrites prior "em"
# AGGREGATE DATA ON DIFFERENT LEVELS (id, sn, wn):#
# (1) aggregate to 60 subjects (averaged across words + sentences)#
em_id <- data.frame(probdur_id(em))#
save(em_id, file=ofile_em_id)#
#
# (2) aggregate to 160 sentences (averaged across readers + words)#
em_s <- data.frame(probdur_s(em))#
save(em_s, file=ofile_em_s)#
#
# (2) aggregate to 1504 words (averaged across readers + sentences)#
em_w <- data.frame(probdur_w(em))#
save(em_w, file=ofile_em_w)
# xPrepMatrix_em
#

#
# DEPENDENT VARIABLES AND CENTER CONTINUOUS PREDICTORS:
#
# (1) INDEPENDENT VARIABLES:
#
# (1.1) EXPERIMENTAL FACTORS:				lxn1, pvn2, n2bb, n2ab
#
# (1.2) CONTINUOUS PREDICTORS:				f, wl, (wid)
#
# (2) DEPENDENT VARIABLES:
#
# (2.1) DURATIONS:							sfd, ffd, gzd, tvt
#
# (2.2) PROBABILITIES:						prx, psk, prg
#
# (2.3) SACCADE TARGETING:					ilp
#
# (3) CENTER CONTINUOUS PREDICTORS:			f, f1-f4, wl, wl1-wl4
#

#
# Sarah Risse, November 2008 
#

#

#
rm(list=ls())
#

#
# SET WORKING DIRECTORY:
#
#setwd("/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_analysis/FQ_probDur/")
#
setwd("c:/EM_BoundaryN2FQ/FQ_publication/FQ_analysis/FQ_probDur/")
#

#

#
# INPUT:
#
ifile_em     <- "n2FQ_em.rda"
#
#ifile_corpus <- "/Users/Sarah/Documents/EM_Corpora/n2FQ/FQ.DWDS.SR.rda"
#
ifile_corpus <- "c:/EM_Corpora/n2FQ/FQ.DWDS.wid.rda"
#

#

#

#
# OUTPUT:
#
ofile <- "n2FQ_em_matrix.rda"
#

#
# LIBRARIES:
#
library(lme4)
#

#
# FUNCTIONS:
#
#source("/Users/Sarah/Documents/EM_Rfunctions/parseq_fast.R")
#
source("c:/EM_Rfunctions/parseq_fast.R")
#

#

#
source("~/Documents/EM_Rfunctions/probdur_id.R")
#
source("~/Documents/EM_Rfunctions/probdur_s.R")
#
source("~/Documents/EM_Rfunctions/probdur_w.R")
#

#

#
# LOAD DATA:
#
load(ifile_em)			# word-level matrix across subjects
#
str(em)
#

#
load(ifile_corpus)		# corpus information 
#
corpus <- FQ.dwds
#

#
# order em data frame:
#
em <- em[order(em$id,em$sn,em$wn),]
#

#
# (1) INDEPENDENT VARIABLES:
#
# (1.1) EXPERIMENTAL FACTORS:
#
# effect coding of factors:
#
em[,c("n2ab","n2bb","pvn2")] <- em[,c("n2ab","n2bb","prevw")]-0.5
#
em[,c("lxn1")] <- em[,c("cnpl1")]-1.5
#

#
# (1.2) CONTINUOUS PREDICTORS:
#
# add frequency as continuous predictor (corpus analytic):
#
em_snwn <- 1e2*em$sn + em$wn
#
c_snwn  <- 1e2*corpus$sn + corpus$wn
#

#
# frequency of word n+2 must be either low or frequency:
#
# (dependent on subject's conditions)
#
c_to_em <- match(em_snwn,c_snwn)
#
em[,c("f")] <- corpus[c_to_em,c("fh")] # default: high frq. n+2
#
xlowfn2 <- which(em[,c("n2ab")]==-0.5 & em[,c("twx")]==2)
#
em[,c("fl")] <- corpus[c_to_em,c("fl")]
#
em[xlowfn2,c("f")] <- em[xlowfn2,c("fl")] # replace by low frq. n+2
#
em <- em[,c(1:(dim(em)[2]-1))]
#

#
# add wid (unique word id):
#
em[,c("wid")] <- corpus[c_to_em,c("widh")] # default: high frq. n+2
#
em[,c("widl")] <- corpus[c_to_em,c("widl")]
#
em[xlowfn2,c("wid")] <- em[xlowfn2,c("widl")] # replace by low frq. n+2
#
em <- em[,c(1:(dim(em)[2]-1))]
#

#
# add wl (1/word length):
#
em[,c("wl")] <- 1/corpus[c_to_em, c("l")]
#

#
# add length and frequency information for n-2 (3),n-1 (1),n+1 (2),n+2 (4):
#
var <- c("wl", "f")
#
var1 <- paste(var, "1", sep="") # lag1
#
var2 <- paste(var, "2", sep="") # suc1
#
var3 <- paste(var, "3", sep="") # lag2
#
var4 <- paste(var, "4", sep="") # suc2
#

#
 em[        , var1] <- rbind(NA, em[-nrow(em), var])
#
 em[em$wn==1, var1] <- NA
#

#
 em[            , var2] <- rbind(em[-1, var],NA)
#
 em[em$wn==em$nw, var2] <- NA
#

#
 em[        , var3] <- rbind(NA, NA, em[-(c(nrow(em)-1,nrow(em))), var])
#
 em[em$wn<=2, var3] <- NA
#

#
 em[                , var4] <- rbind(em[-c(1,2), var],NA,NA)
#
 em[em$wn>=(em$nw-1), var4] <- NA
#

#

#

#
 
#
s <- parseq_fast(em$id*1000+em$sn)
#
	em[s[,1], c(var1,var3)] <- NA	# for 1st word in sentence
#
	em[s[,1]+1, c(var3)]    <- NA	# for 2nd word in sentence
#
	em[s[,2], c(var2,var4)] <- NA	# for last word in sentence
#
	em[s[,2]-1, c(var4)]    <- NA	# for last-1 word in sentence
#

#

#
# (2) DEPENDENT VARIABLES:
#
# (2.1) DURATIONS:
#
# add new variable FFD (first fixation duration):
#
em[,c("ffd")] <- rowSums(em[,c("d0","d1")], na.rm=TRUE)
#
em[which(em[,c("ffd")]==0),c("ffd")] <- NA
#

#
# add new variable gzd (gaze duration):
#
em[,c("gzd")] <- em[,c("gz")]
#

#
# add new variable sfd (single fixation duration):
#
em[,c("sfd")] <- em[,c("d0")]
#

#
# add new variable tvt (total viewing time):
#
em[,c("tvt")] <- em[,c("trt")]
#

#

#
# (2.2) PROBABILITIES:
#
# add new variable prx (refixation probability):
#
em[,c("prx")] <- rowSums(em[,c("p2","p3")])  # 1: prob of 2 and more fixations
#
em[which(em[,c("prx")] < 1),c("prx")] <- NA	 # NA: for skippings or fixations in regression origin
#
em[which(em[,c("p1")]==1),c("prx")] <- 0	 # 0: prob of 1 fixation (no refixation)
#

#
# add new variable psk (skipping probability):
#
em[,c("psk")] <- NA
#
em[which(em[,c("p0")]==1),c("psk")] <- 1 	 	             # 1: is skipped
#
em[which(rowSums(em[,c("p1","p2","p3")])>0),c("psk")] <- 0   # 0: is fixated
#

#
# add new variable prg (regression (goal) probability):
#
em[,c("prg")] <- ifelse(em[,c("rgc")]>0, 1, 0)	# 1: regression, 0: no regression
#

#

#
# (2.3) SACCADE TARGETING:
#
# add new variable ilp (initial landing position):
#
em[,c("ilp")] <- rowSums(em[,c("l0","l1")], na.rm=TRUE)   # letter position / word length
#
em[,c("ilp")] <- em[,c("ilp")]/(1/em[,c("wl")])	
#

#

#
# (3) CENTER CONTINUOUS PREDICTORS:
#
# NOTE: center for each word n, n+1, n+2 seperately (analoguous to DV belonging to each word-based analysis)
#
em_n0 <- em[which(em[,c("twx")]==0),]  # word n
#
em_n1 <- em[which(em[,c("twx")]==1),]  # word n+1
#
em_n2 <- em[which(em[,c("twx")]==2),]  # word n+2
#

#
# additional matrices (word n-1, word n+3):
#
em_nm1 <- em[which(em[,c("twx")]==0)-1,]
#
em_n3  <- em[which(em[,c("twx")]==2)+1,]
#

#
# 
#
centerVar <- c("wl","wl1","wl2","wl3","wl4","f","f1","f2","f3","f4")
#
em_n0[ ,centerVar]  <- scale(em_n0[ ,centerVar], scale=F)
#
em_n1[ ,centerVar]  <- scale(em_n1[ ,centerVar], scale=F)
#
em_n2[ ,centerVar]  <- scale(em_n2[ ,centerVar], scale=F)
#

#
em_nm1[ ,centerVar] <- scale(em_nm1[ ,centerVar], scale=F)
#
em_n3[ ,centerVar]  <- scale(em_n3[ ,centerVar], scale=F)
#

#
# CHECK:
#
summary(em_n0)
#

#
# SAVE MATRICES:
#
save(em, em_n0, em_n1, em_n2, em_n3, em_nm1
#
	, file=ofile)
setwd("/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_analysis/FQ_probDur/")
ifile_em     <- "n2FQ_em.rda"
#
ifile_corpus <- "/Users/Sarah/Documents/EM_Corpora/n2FQ/FQ.DWDS.SR.rda"
setwd("/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_probDur/")
ifile_em     <- "n2FQ_em.rda"
#
ifile_corpus <- "/Users/Sarah/Documents/EM_Corpora/n2FQ/FQ.DWDS.SR.rda"
ofile <- "n2FQ_em_matrix.rda"
library(lme4)
#

#
# FUNCTIONS:
source("/Users/Sarah/Documents/EM_Rfunctions/parseq_fast.R")
source("~/Documents/EM_Rfunctions/probdur_id.R")
#
source("~/Documents/EM_Rfunctions/probdur_s.R")
#
source("~/Documents/EM_Rfunctions/probdur_w.R")
load(ifile_em)			# word-level matrix across subjects
#
str(em)
load(ifile_corpus)		# corpus information 
#
corpus <- FQ.dwds
# order em data frame:
#
em <- em[order(em$id,em$sn,em$wn),]

#
# (1) INDEPENDENT VARIABLES:
#
# (1.1) EXPERIMENTAL FACTORS:
#
# effect coding of factors:
#
em[,c("n2ab","n2bb","pvn2")] <- em[,c("n2ab","n2bb","prevw")]-0.5
#
em[,c("lxn1")] <- em[,c("cnpl1")]-1.5
em_snwn <- 1e2*em$sn + em$wn
#
c_snwn  <- 1e2*corpus$sn + corpus$wn
#

#
# frequency of word n+2 must be either low or frequency:
#
# (dependent on subject's conditions)
#
c_to_em <- match(em_snwn,c_snwn)
#
em[,c("f")] <- corpus[c_to_em,c("fh")] # default: high frq. n+2
#
xlowfn2 <- which(em[,c("n2ab")]==-0.5 & em[,c("twx")]==2)
#
em[,c("fl")] <- corpus[c_to_em,c("fl")]
#
em[xlowfn2,c("f")] <- em[xlowfn2,c("fl")] # replace by low frq. n+2
#
em <- em[,c(1:(dim(em)[2]-1))]
#

#
# add wid (unique word id):
#
em[,c("wid")] <- corpus[c_to_em,c("widh")] # default: high frq. n+2
#
em[,c("widl")] <- corpus[c_to_em,c("widl")]
#
em[xlowfn2,c("wid")] <- em[xlowfn2,c("widl")] # replace by low frq. n+2
#
em <- em[,c(1:(dim(em)[2]-1))]
#

#
# add wl (1/word length):
#
em[,c("wl")] <- 1/corpus[c_to_em, c("l")]
#

#
# add length and frequency information for n-2 (3),n-1 (1),n+1 (2),n+2 (4):
#
var <- c("wl", "f")
#
var1 <- paste(var, "1", sep="") # lag1
#
var2 <- paste(var, "2", sep="") # suc1
#
var3 <- paste(var, "3", sep="") # lag2
#
var4 <- paste(var, "4", sep="") # suc2
#

#
 em[        , var1] <- rbind(NA, em[-nrow(em), var])
#
 em[em$wn==1, var1] <- NA
#

#
 em[            , var2] <- rbind(em[-1, var],NA)
#
 em[em$wn==em$nw, var2] <- NA
#

#
 em[        , var3] <- rbind(NA, NA, em[-(c(nrow(em)-1,nrow(em))), var])
#
 em[em$wn<=2, var3] <- NA
#

#
 em[                , var4] <- rbind(em[-c(1,2), var],NA,NA)
#
 em[em$wn>=(em$nw-1), var4] <- NA
#

#

#

#
 
#
s <- parseq_fast(em$id*1000+em$sn)
#
	em[s[,1], c(var1,var3)] <- NA	# for 1st word in sentence
#
	em[s[,1]+1, c(var3)]    <- NA	# for 2nd word in sentence
#
	em[s[,2], c(var2,var4)] <- NA	# for last word in sentence
#
	em[s[,2]-1, c(var4)]    <- NA	# for last-1 word in sentence
#

#

#
# (2) DEPENDENT VARIABLES:
#
# (2.1) DURATIONS:
#
# add new variable FFD (first fixation duration):
#
em[,c("ffd")] <- rowSums(em[,c("d0","d1")], na.rm=TRUE)
#
em[which(em[,c("ffd")]==0),c("ffd")] <- NA
#

#
# add new variable gzd (gaze duration):
#
em[,c("gzd")] <- em[,c("gz")]
#

#
# add new variable sfd (single fixation duration):
#
em[,c("sfd")] <- em[,c("d0")]
#

#
# add new variable tvt (total viewing time):
#
em[,c("tvt")] <- em[,c("trt")]
#

#

#
# (2.2) PROBABILITIES:
#
# add new variable prx (refixation probability):
#
em[,c("prx")] <- rowSums(em[,c("p2","p3")])  # 1: prob of 2 and more fixations
#
em[which(em[,c("prx")] < 1),c("prx")] <- NA	 # NA: for skippings or fixations in regression origin
#
em[which(em[,c("p1")]==1),c("prx")] <- 0	 # 0: prob of 1 fixation (no refixation)
#

#
# add new variable psk (skipping probability):
#
em[,c("psk")] <- NA
#
em[which(em[,c("p0")]==1),c("psk")] <- 1 	 	             # 1: is skipped
#
em[which(rowSums(em[,c("p1","p2","p3")])>0),c("psk")] <- 0   # 0: is fixated
#

#
# add new variable prg (regression (goal) probability):
#
em[,c("prg")] <- ifelse(em[,c("rgc")]>0, 1, 0)	# 1: regression, 0: no regression
#

#

#
# (2.3) SACCADE TARGETING:
#
# add new variable ilp (initial landing position):
#
em[,c("ilp")] <- rowSums(em[,c("l0","l1")], na.rm=TRUE)   # letter position / word length
#
em[,c("ilp")] <- em[,c("ilp")]/(1/em[,c("wl")])	
#

#

#
# (3) CENTER CONTINUOUS PREDICTORS:
#
# NOTE: center for each word n, n+1, n+2 seperately (analoguous to DV belonging to each word-based analysis)
#
em_n0 <- em[which(em[,c("twx")]==0),]  # word n
#
em_n1 <- em[which(em[,c("twx")]==1),]  # word n+1
#
em_n2 <- em[which(em[,c("twx")]==2),]  # word n+2
#

#
# additional matrices (word n-1, word n+3):
#
em_nm1 <- em[which(em[,c("twx")]==0)-1,]
#
em_n3  <- em[which(em[,c("twx")]==2)+1,]
#

#
# 
#
centerVar <- c("wl","wl1","wl2","wl3","wl4","f","f1","f2","f3","f4")
#
em_n0[ ,centerVar]  <- scale(em_n0[ ,centerVar], scale=F)
#
em_n1[ ,centerVar]  <- scale(em_n1[ ,centerVar], scale=F)
#
em_n2[ ,centerVar]  <- scale(em_n2[ ,centerVar], scale=F)
#

#
em_nm1[ ,centerVar] <- scale(em_nm1[ ,centerVar], scale=F)
#
em_n3[ ,centerVar]  <- scale(em_n3[ ,centerVar], scale=F)
#

#
# CHECK:
#
summary(em_n0)
str(corpus)
ifile_corpus <- "/Users/Sarah/Documents/EM_Corpora/n2FQ/FQ.DWDS.wid.rda"
load(ifile_corpus)		# corpus information 
#
corpus <- FQ.dwds
str(corpus)
ifile_corpus <- "/Users/Sarah/Documents/EM_Corpora/n2FQ/FQ.DWDS.SR.rda"
load(ifile_em)			# word-level matrix across subjects
#
str(em)
load(ifile_corpus)		# corpus information 
#
corpus <- FQ.dwds
#

#
# order em data frame:
#
em <- em[order(em$id,em$sn,em$wn),]
#

#
# (1) INDEPENDENT VARIABLES:
#
# (1.1) EXPERIMENTAL FACTORS:
#
# effect coding of factors:
#
em[,c("n2ab","n2bb","pvn2")] <- em[,c("n2ab","n2bb","prevw")]-0.5
#
em[,c("lxn1")] <- em[,c("cnpl1")]-1.5
#

#
# (1.2) CONTINUOUS PREDICTORS:
#
# add frequency as continuous predictor (corpus analytic):
#
em_snwn <- 1e2*em$sn + em$wn
#
c_snwn  <- 1e2*corpus$sn + corpus$wn
#

#
# frequency of word n+2 must be either low or frequency:
#
# (dependent on subject's conditions)
#
c_to_em <- match(em_snwn,c_snwn)
#
em[,c("f")] <- corpus[c_to_em,c("fh")] # default: high frq. n+2
#
xlowfn2 <- which(em[,c("n2ab")]==-0.5 & em[,c("twx")]==2)
#
em[,c("fl")] <- corpus[c_to_em,c("fl")]
#
em[xlowfn2,c("f")] <- em[xlowfn2,c("fl")] # replace by low frq. n+2
#
em <- em[,c(1:(dim(em)[2]-1))]
# add wid (unique word id):
#
em[,c("wid")] <- corpus[c_to_em,c("widh")] # default: high frq. n+2
#
em[,c("widl")] <- corpus[c_to_em,c("widl")]
#
em[xlowfn2,c("wid")] <- em[xlowfn2,c("widl")] # replace by low frq. n+2
#
em <- em[,c(1:(dim(em)[2]-1))]
# add wl (1/word length):
#
em[,c("wl")] <- 1/corpus[c_to_em, c("l")]
# add length and frequency information for n-2 (3),n-1 (1),n+1 (2),n+2 (4):
#
var <- c("wl", "f")
#
var1 <- paste(var, "1", sep="") # lag1
#
var2 <- paste(var, "2", sep="") # suc1
#
var3 <- paste(var, "3", sep="") # lag2
#
var4 <- paste(var, "4", sep="") # suc2

#
 em[        , var1] <- rbind(NA, em[-nrow(em), var])
#
 em[em$wn==1, var1] <- NA
#

#
 em[            , var2] <- rbind(em[-1, var],NA)
#
 em[em$wn==em$nw, var2] <- NA
#

#
 em[        , var3] <- rbind(NA, NA, em[-(c(nrow(em)-1,nrow(em))), var])
#
 em[em$wn<=2, var3] <- NA
#

#
 em[                , var4] <- rbind(em[-c(1,2), var],NA,NA)
#
 em[em$wn>=(em$nw-1), var4] <- NA
s <- parseq_fast(em$id*1000+em$sn)
#
	em[s[,1], c(var1,var3)] <- NA	# for 1st word in sentence
#
	em[s[,1]+1, c(var3)]    <- NA	# for 2nd word in sentence
#
	em[s[,2], c(var2,var4)] <- NA	# for last word in sentence
#
	em[s[,2]-1, c(var4)]    <- NA	# for last-1 word in sentence
em[,c("ffd")] <- rowSums(em[,c("d0","d1")], na.rm=TRUE)
#
em[which(em[,c("ffd")]==0),c("ffd")] <- NA
#

#
# add new variable gzd (gaze duration):
#
em[,c("gzd")] <- em[,c("gz")]
#

#
# add new variable sfd (single fixation duration):
#
em[,c("sfd")] <- em[,c("d0")]
#

#
# add new variable tvt (total viewing time):
#
em[,c("tvt")] <- em[,c("trt")]
# (2.2) PROBABILITIES:
#
# add new variable prx (refixation probability):
#
em[,c("prx")] <- rowSums(em[,c("p2","p3")])  # 1: prob of 2 and more fixations
#
em[which(em[,c("prx")] < 1),c("prx")] <- NA	 # NA: for skippings or fixations in regression origin
#
em[which(em[,c("p1")]==1),c("prx")] <- 0	 # 0: prob of 1 fixation (no refixation)
#

#
# add new variable psk (skipping probability):
#
em[,c("psk")] <- NA
#
em[which(em[,c("p0")]==1),c("psk")] <- 1 	 	             # 1: is skipped
#
em[which(rowSums(em[,c("p1","p2","p3")])>0),c("psk")] <- 0   # 0: is fixated
#

#
# add new variable prg (regression (goal) probability):
#
em[,c("prg")] <- ifelse(em[,c("rgc")]>0, 1, 0)	# 1: regression, 0: no regression
# (2.3) SACCADE TARGETING:
#
# add new variable ilp (initial landing position):
#
em[,c("ilp")] <- rowSums(em[,c("l0","l1")], na.rm=TRUE)   # letter position / word length
#
em[,c("ilp")] <- em[,c("ilp")]/(1/em[,c("wl")])
em_n0 <- em[which(em[,c("twx")]==0),]  # word n
em_n0 <- em[which(em[,c("twx")]==0),]  # word n
em_n0 <- em[which(em[,c("twx")]==0),]  # word n
em_n1 <- em[which(em[,c("twx")]==1),]  # word n+1
#
em_n2 <- em[which(em[,c("twx")]==2),]  # word n+2
# additional matrices (word n-1, word n+3):
#
em_nm1 <- em[which(em[,c("twx")]==0)-1,]
#
em_n3  <- em[which(em[,c("twx")]==2)+1,]
colnames(em_n0)
centerVar <- c("wl","wl1","wl2","wl3","wl4","f","f1","f2","f3","f4")
#
em_n0[ ,centerVar]  <- scale(em_n0[ ,centerVar], scale=F)
em_n1[ ,centerVar]  <- scale(em_n1[ ,centerVar], scale=F)
em_n2[ ,centerVar]  <- scale(em_n2[ ,centerVar], scale=F)
em_nm1[ ,centerVar] <- scale(em_nm1[ ,centerVar], scale=F)
em_n3[ ,centerVar]  <- scale(em_n3[ ,centerVar], scale=F)
save(em, em_n0, em_n1, em_n2, em_n3, em_nm1
#
	, file=ofile)
rm(list=ls())
setwd("/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_analysis/FQ_probDur/")
setwd("/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_probDur/")
# INPUT:
#
ifile_em     <- "n2FQ_em_matrix.rda"
#

#
# OUTPUT:
#
ofile_em     <- "n2FQ_em_filtered.rda"
# LIBRARIES:
#
library(lme4)
#
library(reshape)
#
library(ggplot)
#

#
# FUNCTIONS:
#

#

#
# LOAD DATA:
#
load(ifile_em)
#
ls()			# em, em_n0, em_n1, em_n2, em_n3, em_nm1
tmp <- expression(em_n0,em_n1,em_n2,em_n3,em_nm1)
#

#
for (k in 1:length(tmp))  {
#
	
#
em <- eval(tmp[k]) 
#

#
	#(1) FILTER FOR DURATIONS:
#
	xDurCrit1 <- which(em$sfd < 50 | em$sfd > 750)
#
	em$sfd[xDurCrit1] <- NA
#
	xDurCrit2 <- which(em$ffd < 50 | em$ffd > 750)
#
	em$ffd[xDurCrit2] <- NA
#
	xDurCrit3 <- which(em$gzd < 50 | em$gzd > 750)
#
	em$gzd[xDurCrit3] <- NA
#
	xDurCrit4 <- which(em$tvt < 50 | em$tvt > 750)
#
	em$tvt[xDurCrit4] <- NA
#

#
	# (2)INVALID FIRST-PASS:
#
	xFPinvalid <- which(em$xfp > 0)
#
	em[xFPinvalid, c("sfd","ffd","gzd","prx","psk","ilp")] <- NA	
#

#
	# (3)INVALID MORE-PASS:
#
	xMPinvalid <- which(em$xfp > 0 & em$xmp > 0)
#
	if (length(xMPinvalid) > 0)
#
	em[xMPinvalid, c("tvt","prg")] <- NA	
#

#
	# (4)FACTORIZE RANDOM VARIABLES:
#
	em$id <- as.factor(em$id)
#
	em$wid <- as.factor(em$wid)
#
	
#
	if (k==1) em_n0  <- em
#
	if (k==2) em_n1  <- em
#
	if (k==3) em_n2  <- em
#
	if (k==4) em_n3  <- em
#
	if (k==5) em_nm1  <- em
#

#
}
# (4) PREPARE CONTRASTS:
#

#
# RENAME TARGET WORD N+2 FREQUENCY:
#
em_n0$fqn2  <- em_n0$n2ab
#
em_n1$fqn2  <- em_n1$n2ab
#
em_n2$fqn2  <- em_n2$n2ab
#
em_n3$fqn2  <- em_n3$n2ab
#
em_nm1$fqn2 <- em_nm1$n2ab
#

#

#
# FACTORIZE VARIBALES FOR NESTING:
#
em_n0$LexStatN1 <- as.factor(em_n0$lxn1)
#
levels(em_n0$LexStatN1) <- c("CW","FW")
#
em_n1$LexStatN1 <- as.factor(em_n1$lxn1)
#
levels(em_n1$LexStatN1) <- c("CW","FW")
#
em_n2$LexStatN1 <- as.factor(em_n2$lxn1)
#
levels(em_n2$LexStatN1) <- c("CW","FW")
#
em_n3$LexStatN1 <- as.factor(em_n3$lxn1)
#
levels(em_n3$LexStatN1) <- c("CW","FW")
#
em_nm1$LexStatN1 <- as.factor(em_nm1$lxn1)
#
levels(em_nm1$LexStatN1) <- c("CW","FW")
#

#
em_n0$FrequencyN2 <- as.factor(em_n0$fqn2)
#
levels(em_n0$FrequencyN2) <- c("low","high")
#
em_n1$FrequencyN2 <- as.factor(em_n1$fqn2)
#
levels(em_n1$FrequencyN2) <- c("low","high")
#
em_n2$FrequencyN2 <- as.factor(em_n2$fqn2)
#
levels(em_n2$FrequencyN2) <- c("low","high")
#
em_n3$FrequencyN2 <- as.factor(em_n3$fqn2)
#
levels(em_n3$FrequencyN2) <- c("low","high")
#
em_nm1$FrequencyN2 <- as.factor(em_nm1$fqn2)
#
levels(em_nm1$FrequencyN2) <- c("low","high")
#

#
em_n0$PreviewN2<- as.factor(em_n0$pvn2)
#
levels(em_n0$PreviewN2) <- c("identical","inverse")
#
em_n1$PreviewN2<- as.factor(em_n1$pvn2)
#
levels(em_n1$PreviewN2) <- c("identical","inverse")
#
em_n2$PreviewN2<- as.factor(em_n2$pvn2)
#
levels(em_n2$PreviewN2) <- c("identical","inverse")
#
em_n3$PreviewN2<- as.factor(em_n3$pvn2)
#
levels(em_n3$PreviewN2) <- c("identical","inverse")
#
em_nm1$PreviewN2<- as.factor(em_nm1$pvn2)
#
levels(em_nm1$PreviewN2) <- c("identical","inverse")
#

#
# (4.1) CONTRASTS 1: Frequency:Preview
#
em_n0$cfp <- as.factor(paste(em_n0$FrequencyN2, em_n0$PreviewN2))
#
em_n1$cfp <- as.factor(paste(em_n1$FrequencyN2, em_n1$PreviewN2))
#
em_n2$cfp <- as.factor(paste(em_n2$FrequencyN2, em_n2$PreviewN2))
#
em_n3$cfp <- as.factor(paste(em_n3$FrequencyN2, em_n3$PreviewN2))
#
em_nm1$cfp <- as.factor(paste(em_nm1$FrequencyN2, em_nm1$PreviewN2))
#

#
mat <- matrix(c(    1,  1, -1, -1,    # FrequencyN2 
#
			  0,  0, -1,  1,    # low:PreviewN2
#
			 -1,  1,  0,  0),4,3)/2   # high:PreviewN2
#
				
#
colnames(mat) <- c(".fqn2", ".LFn2:pvn2", ".HFn2:pvn2")
#

#
em_n0$cfp <- C(em_n0$cfp, mat, 3)
#
em_n1$cfp <- C(em_n1$cfp, mat, 3)
#
em_n2$cfp <- C(em_n2$cfp, mat, 3)
#
em_n3$cfp <- C(em_n3$cfp, mat, 3)
#
em_nm1$cfp <- C(em_nm1$cfp, mat, 3)
#

#

#
# (4.2) CONTRASTS 2: LexStat:Preview
#
em_n0$clp <- as.factor(paste(em_n0$LexStatN1, em_n0$PreviewN2))
#
em_n1$clp <- as.factor(paste(em_n1$LexStatN1, em_n1$PreviewN2))
#
em_n2$clp <- as.factor(paste(em_n2$LexStatN1, em_n2$PreviewN2))
#
em_n3$clp <- as.factor(paste(em_n3$LexStatN1, em_n3$PreviewN2))
#
em_nm1$clp <- as.factor(paste(em_nm1$LexStatN1, em_nm1$PreviewN2))
#

#
mat <- matrix(c(   -1, -1,  1,  1,    # LexStatN1
#
			  0,  0, -1,  1,    # FW:PreviewN2
#
			 -1,  1,  0,  0),4,3)/2   # CW:PreviewN2
#
				
#
colnames(mat) <- c(".lxn1", ".FWn1:pvn2", ".CWn1:pvn2")
#

#
em_n0$clp <- C(em_n0$clp, mat, 3)
#
em_n1$clp <- C(em_n1$clp, mat, 3)
#
em_n2$clp <- C(em_n2$clp, mat, 3)
#
em_n3$clp <- C(em_n3$clp, mat, 3)
#
em_nm1$clp <- C(em_nm1$clp, mat, 3)
#

#

#
# (4.3) CONTRASTS 3: LexStat:Frequency:Preview
#
em_n0$clfp <- as.factor(paste(em_n0$LexStatN1, em_n0$FrequencyN2, em_n0$PreviewN2))
#
em_n1$clfp <- as.factor(paste(em_n1$LexStatN1, em_n1$FrequencyN2, em_n1$PreviewN2))
#
em_n2$clfp <- as.factor(paste(em_n2$LexStatN1, em_n2$FrequencyN2, em_n2$PreviewN2))
#
em_n3$clfp <- as.factor(paste(em_n3$LexStatN1, em_n3$FrequencyN2, em_n3$PreviewN2))
#
em_nm1$clfp <- as.factor(paste(em_nm1$LexStatN1, em_nm1$FrequencyN2, em_nm1$PreviewN2))
#

#
mat <- matrix(c(   -1, -1, -1, -1,  1,  1,  1,  1,    # LexStatN1
#
			  1,  1, -1, -1,  0,  0,  0,  0,    # CW:FrequencyN2
#
			 -1,  1,  0,  0,  0,  0,  0,  0,	# CW:high:PreviewN2
#
			  0,  0, -1,  1,  0,  0,  0,  0,    # CW:low:PreviewN2
#
			  0,  0,  0,  0,  1,  1, -1, -1,    # FW:FrequencyN2
#
			  0,  0,  0,  0, -1,  1,  0,  0,    # FW:high:PreviewN2
#
			  0,  0,  0,  0,  0,  0, -1,  1),8,7)/2   # FW:low:PreviewN2
#
				
#
colnames(mat) <- c(".lxn1", ".CWn1:fqn2", ".CWn1:HFn2:pvn2", ".CWn1:LFn2:pvn2"
#
, ".FWn1:fqn2", ".FWn1:HFn2:pvn2", ".FWn1:LFn2:pvn2")
#

#
em_n0$clfp <- C(em_n0$clfp, mat, 7)
#
em_n1$clfp <- C(em_n1$clfp, mat, 7)
#
em_n2$clfp <- C(em_n2$clfp, mat, 7)
#
em_n3$clfp <- C(em_n3$clfp, mat, 7)
#
em_nm1$clfp <- C(em_nm1$clfp, mat, 7)
#

#

#
save(em_n0, em_n1, em_n2, em_n3, em_nm1, 
#
file=ofile_em)
rm(list=ls())
#

#
# SET WORKING DIRECTORY:
#
getwd()
#
setwd("/Users/Sarah/Documents/EM_Manuscripts/2011.Article.dPoF.N2/pmr2")#
dir()#

#
# INPUT:#
# (1) data frame containing eye-movement (em) measures for different words:#
# em_nm1: word n-1#
# em_n0: preboundary word n#
# em_n1: postboundary word n+1#
# em_n2: target word n+2#
# em_n3: posttarget word n+3
#
ifile_em   <- c("n2FQ_em_filtered.rda")#
#
# (2)
#
ifile_info <- c("n2FQ_sinfo.rda")
#
ifile_corpus <- c("FQ.DWDS.wid.rda")
#

#

#

#
# OUTPUT:
#

#

#
# LIBRARIES:
#
library(lme4)
#
library(reshape)
#
library(MASS)
#
library(Hmisc)
#
library(ggplot2)
#

#
# FUNCTIONS:
#
source("/Users/Sarah/Documents/EM_Rfunctions/remef.R")
#

#
# LOAD DATA:
#
load(ifile_em)
#
ls()			# em_n0, em_n1, em_n2, em_n3, em_nm1#
str(em_n0)
ifile_em   <- c("/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_RData/n2FQ_em_filtered.rda")
# LOAD DATA:
#
load(ifile_em)
#
ls()			# em_n0, em_n1, em_n2, em_n3, em_nm1#
str(em_n0)
em_n2$sn1 <- em_n1$p0 - 0.5#
em_n0$sn1 <- em_n1$p0 - 0.5#
#
em_n1$sn2 <- em_n2$p0 - 0.5
setwd("/Users/Sarah/Documents/EM_Manuscripts/2011.Article.dPoF.N2/pmr2")
ifile_em   <- c("n2FQ_em_filtered.rda")
# RECODE n2bb and n2ab:#
# needs to reflect (1) LPD,     (2) HPD#
# at the moment:   (1) low frq, (2) high frq#
em_nm1$n2bb <- ifelse(em_nm1$n2bb==-0.5, 0.5, -0.5)#
em_n0$n2bb <- ifelse(em_n0$n2bb==-0.5, 0.5, -0.5)#
em_n1$n2bb <- ifelse(em_n1$n2bb==-0.5, 0.5, -0.5)#
em_n2$n2bb <- ifelse(em_n2$n2bb==-0.5, 0.5, -0.5)#
em_n3$n2bb <- ifelse(em_n3$n2bb==-0.5, 0.5, -0.5)#
#
em_nm1$n2ab <- ifelse(em_nm1$n2ab==-0.5, 0.5, -0.5)#
em_n0$n2ab <- ifelse(em_n0$n2ab==-0.5, 0.5, -0.5)#
em_n1$n2ab <- ifelse(em_n1$n2ab==-0.5, 0.5, -0.5)#
em_n2$n2ab <- ifelse(em_n2$n2ab==-0.5, 0.5, -0.5)#
em_n3$n2ab <- ifelse(em_n3$n2ab==-0.5, 0.5, -0.5)
keep1 <- c("id","sn","wn","nw","wid","cond","f2bb","f2ab","n2bb",       #
"n2ab","pvn2","lxn1","f",        #
"wl","wl1","f1","wl2","f2","wl3","f3",#
 "wl4","f4","ffd","gzd","sfd","tvt","prx",#
"psk","prg","ilp")  #
#
keep2 <- c("id","sn","wn","nw","wid","cond","f2bb","f2ab","n2bb",       #
"n2ab","pvn2","lxn1","sn1","f",        #
"wl","wl1","f1","wl2","f2","wl3","f3",#
 "wl4","f4","ffd","gzd","sfd","tvt","prx",#
"psk","prg","ilp")  #
#
keep3 <- c("id","sn","wn","nw","wid","cond","f2bb","f2ab","n2bb",       #
"n2ab","pvn2","lxn1","sn2","f",        #
"wl","wl1","f1","wl2","f2","wl3","f3",#
 "wl4","f4","ffd","gzd","sfd","tvt","prx",#
"psk","prg","ilp")  #
#
#
em_n0 <- em_n0[,keep2]#
em_n1 <- em_n1[,keep3]#
em_n2 <- em_n2[,keep2]#
em_n3 <- em_n3[,keep1]#
em_nm1 <- em_nm1[,keep1]
colnames(em_n0)
colnames(em_n1)
colnames(em_n2)
colnames(em_n3)
save(em_nm1, em_n0, em_n1, em_n2, em_n3, file=ifile_em)
em_n1[1:4,c("cond","n2bb","n2ab")]
em_nm1$n2bb <- ifelse(em_nm1$n2bb==-0.5, 0.5, -0.5)#
em_n0$n2bb <- ifelse(em_n0$n2bb==-0.5, 0.5, -0.5)#
em_n1$n2bb <- ifelse(em_n1$n2bb==-0.5, 0.5, -0.5)#
em_n2$n2bb <- ifelse(em_n2$n2bb==-0.5, 0.5, -0.5)#
em_n3$n2bb <- ifelse(em_n3$n2bb==-0.5, 0.5, -0.5)#
#
em_nm1$n2ab <- ifelse(em_nm1$n2ab==-0.5, 0.5, -0.5)#
em_n0$n2ab <- ifelse(em_n0$n2ab==-0.5, 0.5, -0.5)#
em_n1$n2ab <- ifelse(em_n1$n2ab==-0.5, 0.5, -0.5)#
em_n2$n2ab <- ifelse(em_n2$n2ab==-0.5, 0.5, -0.5)#
em_n3$n2ab <- ifelse(em_n3$n2ab==-0.5, 0.5, -0.5)
ifile_em
getwd()
save(em_nm1, em_n0, em_n1, em_n2, em_n3, file=ifile_em)
em_n1[1:4,c("cond","n2bb","n2ab")]
rm(list=ls())
#

#
# SET WORKING DIRECTORY:
#
getwd()
#
setwd("/Users/Sarah/Documents/EM_Manuscripts/2011.Article.dPoF.N2/pmr2")#
dir()#

#
# INPUT:#
# (1) data frame containing eye-movement (em) measures for different words:#
# em_nm1: word n-1#
# em_n0: preboundary word n#
# em_n1: postboundary word n+1#
# em_n2: target word n+2#
# em_n3: posttarget word n+3
#
ifile_em   <- c("n2FQ_em_filtered.rda")#
#
# (2)
#
ifile_info <- c("n2FQ_sinfo.rda")
#
ifile_corpus <- c("FQ.DWDS.wid.rda")
#

#

#

#
# OUTPUT:
#

#

#
# LIBRARIES:
#
library(lme4)
#
library(reshape)
#
library(MASS)
#
library(Hmisc)
#
library(ggplot2)
#

#
# FUNCTIONS:
#
source("/Users/Sarah/Documents/EM_Rfunctions/remef.R")
#

#
# LOAD DATA:
#
load(ifile_em)
#
ls()			# em_n0, em_n1, em_n2, em_n3, em_nm1#
str(em_n0)
em_n3[1:4,c("cond","n2bb","n2ab")]
em_n3[1:4,c("cond","n2bb","n2ab","pvn2")]
ifile_info <- c("n2FQ_sinfo.rda")
load(ifile_info)
ls()
str(sinfo)
ifile_corpus <- c("FQ.DWDS.wid.rda")
idxCorpusToData <- match(em_n2$sn, sinfo$sn)
em_n2[,c("wtf_hf_abs", "wtf_lf_abs", "wtf_hf_pm",  "wtf_lf_pm", "wtp_hf", "wtp_lf", "f_hf_abs"  
#
		, "f_lf_abs",  "f_hf_pm",    "f_lf_pm",    "ibf_hf_pm", "ibf_lf_pm", "itf_hf_pm", "itf_lf_pm"
#
		, "wtp_hf", "wtp_lf")] <- sinfo[idxCorpusToData,c("wtf_hf_abs", "wtf_lf_abs", "wtf_hf_pm"
#
		, "wtf_lf_pm", "wtp_hf", "wtp_lf" , "f_hf_abs", "f_lf_abs", "f_hf_pm", "f_lf_pm", "ibf_hf_pm"
#
		, "ibf_lf_pm", "itf_hf_pm", "itf_lf_pm", "wtp_hf", "wtp_lf")]
# word trigram frequency (word n, n+1, n+2):
#
wtf_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$wtf_hf_pm, em_n2$wtf_lf_pm)
#
wtf_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$wtf_hf_pm, em_n2$wtf_lf_pm)
# conditional probability of word trigram (word n, n+1, n+2):
#
wtp_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$wtp_hf, em_n2$wtp_lf)
#
wtp_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$wtp_hf, em_n2$wtp_lf)
# initial bigram frequency of word n+2:
#
ibf_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$ibf_hf_pm, em_n2$ibf_lf_pm)
#
ibf_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$ibf_hf_pm, em_n2$ibf_lf_pm)
# initial trigram frequency of word n+2:
#
itf_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$itf_hf_pm, em_n2$itf_lf_pm)
#
itf_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$itf_hf_pm, em_n2$itf_lf_pm)
# (whole) word frequency of word n+2:
#
f_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$f_hf_pm, em_n2$f_lf_pm)
#
f_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$f_hf_pm, em_n2$f_lf_pm)
em_n2[,c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab","wtp_bb","wtp_ab")] <- c(wtf_bb,wtf_ab,log10(ibf_bb),log10(ibf_ab),log10(itf_bb),log10(itf_ab),log10(f_bb),log10(f_ab),wtp_bb,wtp_ab)
em_n1[,c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab","wtp_bb","wtp_ab")] <- c(wtf_bb,wtf_ab,log10(ibf_bb),log10(ibf_ab),log10(itf_bb),log10(itf_ab),log10(f_bb),log10(f_ab),wtp_bb,wtp_ab)
#

#
em_n0[,c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab","wtp_bb","wtp_ab")] <- c(wtf_bb,wtf_ab,log10(ibf_bb),log10(ibf_ab),log10(itf_bb),log10(itf_ab),log10(f_bb),log10(f_ab),wtp_bb,wtp_ab)
# TEST CORRELATION BETWEEN PREDICTORS:
#
# (1) n2bb vs. word trigram predictability:
#
cor(em_n0$n2bb,em_n0$wtp_bb)  # r= -0.06; NOTE: correlations negative because n2bb is coded positive
#
# [-0.5: easy; 0.5: difficult] and continuous predictors are always coded negative
#
# [small values (e.g., low pred): difficult; high values (e.g., high freq): easy]
# before entering into LMMs...
#
# ALL OTHER PREDICTORS MUST BE CENTERED:
#

#
centerVar <- c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab")
#
em_n0[ ,centerVar]  <- scale(em_n0[ ,centerVar], scale=F)
#
em_n1[ ,centerVar]  <- scale(em_n1[ ,centerVar], scale=F)
#
em_n2[ ,centerVar]  <- scale(em_n2[ ,centerVar], scale=F)
xdat <- em_n0
help(lmer)

#
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.0, cor=F)

#
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.0, cor=F)
str(em_n0)
t1 <- which(em_n0$sn>80)
t2 <- which(em_n0$sn <=80)
head(em_n0[t1,c("lxn1","f")])
head(em_n0[t2,c("lxn1","f")])
head(em_n0[t1,c("sn","lxn1","f2")])
head(em_n0[t2,c("sn","lxn1","f2")])
#
em_nm1$lxn1 <- ifelse(em_nm1$lxn1==-0.5, 0.5, -0.5)#
em_n0$lxn1 <- ifelse(em_n0$lxn1==-0.5, 0.5, -0.5)#
em_n1$lxn1 <- ifelse(em_n1$lxn1==-0.5, 0.5, -0.5)#
em_n2$lxn1 <- ifelse(em_n2$lxn1==-0.5, 0.5, -0.5)#
em_n3$lxn1 <- ifelse(em_n3$lxn1==-0.5, 0.5, -0.5)
save(em_nm1, em_n0, em_n1, em_n2, em_n3, file=ifile_em)

#
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.0, cor=F)
head(em_n0[t1,c("sn","lxn1","f2")])
xdat <- em_n0
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.0, cor=F)
lm.1 <- lmer(log(sfd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.1, cor=F)
lm.2 <- lmer(log(ffd) ~ (sn1+lxn1+n2bb)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.2, cor=F)
xdat <- em_n1
lm.0 <- lmer(log(gzd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE)
#
#
lm.1 <- lmer(log(sfd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)#
#
lm.2 <- lmer(log(ffd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)
print(lm.0, cor=F)
print(lm.1, cor=F)
print(lm.2, cor=F)
xdat <- em_n2#
# LMMs REPORTED IN MAIN ANALYSIS:#
#
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)#
#
lm.1 <- lmer(log(ffd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)#
#
lm.2 <- lmer(log(sfd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)#
#
print(lm.0, cor=F)
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)
print(lm.0, cor=F)
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)
print(lm.0, cor=F)
xdat <- em_n2
lm.1 <- lmer(log(ffd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)
print(lm.1, cor=F)
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=FALSE)
print(lm.0, cor=F)
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=FALSE)
print(lm.0, cor=F)
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)
print(lm.0, cor=F)
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) +  (1|sn)#
, data=xdat, REML=TRUE)
print(lm.0, cor=F)
ifile_em   <- c("/Users/Sarah/Documents/EM_BoundaryN2FQ/FQ_publication/FQ_RData/n2FQ_em_filtered.rda")
load(ifile_em)
xdat <- em_n2#
# LMMs REPORTED IN MAIN ANALYSIS:#
#
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)
em_nm1$n2bb <- ifelse(em_nm1$n2bb==-0.5, 0.5, -0.5)#
em_n0$n2bb <- ifelse(em_n0$n2bb==-0.5, 0.5, -0.5)#
em_n1$n2bb <- ifelse(em_n1$n2bb==-0.5, 0.5, -0.5)#
em_n2$n2bb <- ifelse(em_n2$n2bb==-0.5, 0.5, -0.5)#
em_n3$n2bb <- ifelse(em_n3$n2bb==-0.5, 0.5, -0.5)#
#
em_nm1$n2ab <- ifelse(em_nm1$n2ab==-0.5, 0.5, -0.5)#
em_n0$n2ab <- ifelse(em_n0$n2ab==-0.5, 0.5, -0.5)#
em_n1$n2ab <- ifelse(em_n1$n2ab==-0.5, 0.5, -0.5)#
em_n2$n2ab <- ifelse(em_n2$n2ab==-0.5, 0.5, -0.5)#
em_n3$n2ab <- ifelse(em_n3$n2ab==-0.5, 0.5, -0.5)
em_n2$sn1 <- em_n1$p0 - 0.5#
em_n0$sn1 <- em_n1$p0 - 0.5#
#
em_n1$sn2 <- em_n2$p0 - 0.5
xdat <- em_n2#
# LMMs REPORTED IN MAIN ANALYSIS:#
#
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)
print(lm.0, cor=F)
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)
print(lm.0, cor=F)
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=FALSE)
print(lm.0, cor=F)
rm(list=ls())
#

#
# SET WORKING DIRECTORY:
#
getwd()
#
setwd("/Users/Sarah/Documents/EM_Manuscripts/2011.Article.dPoF.N2/pmr2")#
dir()#

#
# INPUT:#
# (1) data frame containing eye-movement (em) measures for different words:#
# em_nm1: word n-1#
# em_n0: preboundary word n#
# em_n1: postboundary word n+1#
# em_n2: target word n+2#
# em_n3: posttarget word n+3
#
ifile_em   <- c("n2FQ_em_filtered.rda")#
#
# (2) data frame containing additional word information#
# based on the dlex-database  http://dlexdb.de/:  
#
ifile_info <- c("n2FQ_sinfo.rda")
#
ifile_corpus <- c("FQ.DWDS.wid.rda")
#

#

#

#
# OUTPUT:
#

#

#
# LIBRARIES:
#
library(lme4)
#
library(reshape)
#
library(MASS)
#
library(Hmisc)
#
library(ggplot2)
#

#
# FUNCTIONS:
#
source("/Users/Sarah/Documents/EM_Rfunctions/remef.R")
#

#
# LOAD DATA:
#
load(ifile_em)
#
ls()			# em_n0, em_n1, em_n2, em_n3, em_nm1#
str(em_n0)#
#'data.frame':	7705 obs. of  30 variables:#
# $ id  : subject identification#
# $ sn  : sentence number#
# $ wn  : current word number in sentence#
# $ nw  : total number of words in sentence#
# $ wid : unique word identification number across all sentences (same word gets same number)#
# $ cond: n+2 processing demand conditions (1) HF-HF (2) LF-HF (3) HF-LF (4) LF-LF#
# $ f2bb: log-frequency of word n+2 preview before boundary#
# $ f2ab: log-frequency of word n+2 target after boundary#
# $ n2bb: n+2 PREVIEW difficulty (-0.5: easy (HF); 0.5: difficult (LF))#
# $ n2ab: n+2 TARGET  difficulty (-0.5: easy (HF); 0.5: difficult (LF))#
# $ pvn2: n+2 preview condition (-0.5: correct preview - no change; 0.5: incorrect preview - display change)#
# $ sn1:  n+1 skipping status (-0.5: fixated; 0.5: skipped)#
# $ sn2:  n+2 skipping status (-0.5: fixated; 0.5: skipped)#
# $ lxn1: n+1 lexical status (-0.5: function word; 0.5: content word)#
# $ f   : centered log-frequency of the fixated word#
# $ wl  : centered 1/word length of the fixated word#
# $ wl1 : centered wl for the word to the left (lag-effect)#
# $ f1  : centered f  for the word to the left (lag-effect)#
# $ wl2 : centered wl for the word to the right (successor-effect)#
# $ f2  : centered f  for the word to the right (successor-effect)#
# $ wl3 : centered wl for the word two words to the left#
# $ f3  : centered f  for the word two words to the left#
# $ wl4 : centered wl for the word two words to the right#
# $ f4  : centered f  for the word two words to the right#
# $ ffd : first fixation duration#
# $ gzd : gaze duration#
# $ sfd : single fixation duration#
# $ tvt : total viewing time#
# $ prx : probability of refixation#
# $ psk : probability of skipping#
# $ prg : probability of regression#
# $ ilp : intial landing position (letter position/word length)#

#
load(ifile_info)
#
str(sinfo)#
#'data.frame':	160 obs. of  15 variables:#
# $ sn        : sentence number#
# $ wtf_hf_abs: absolute word trigram frequency for HF n+2#
# $ wtf_lf_abs: absolute word trigram frequency for LF n+2#
# $ wtf_hf_pm : word trigram frequency for HF n+2 per million#
# $ wtf_lf_pm : word trigram frequency for LF n+2 per million#
# $ wtp_hf    : probability of word trigram for HF n+2#
# $ wtp_lf    : probability of word trigram for LF n+2#
# $ f_hf_abs  : absolute word frequency for HF n+2#
# $ f_lf_abs  : absolute word frequency for LF n+2#
# $ f_hf_pm   : word frequency for HF n+2 per million#
# $ f_lf_pm   : word frequency for LF n+2 per million#
# $ ibf_hf_pm : initial bigram frequency for HF n+2 (per million)#
# $ ibf_lf_pm : initial bigram frequency for LF n+2 (per million)#
# $ itf_hf_pm : initial trigram frequency for HF n+2 (per million)#
# $ itf_lf_pm : initial trigram frequency for LF n+2 (per million)#

#

#
#------------------------------------#
#
# EXPERIMENT 1:
#
# YOUNG ADULTS (BOUNDARY-N2-FQ):
#
#------------------------------------#
#

#
#----------------------------------------#
#
# add new corpus information 
#
# (e.g., initial bigram/trigram frequency)
#
#----------------------------------------#
#

#
idxCorpusToData <- match(em_n2$sn, sinfo$sn)
#

#

#
em_n2[,c("wtf_hf_abs", "wtf_lf_abs", "wtf_hf_pm",  "wtf_lf_pm", "wtp_hf", "wtp_lf", "f_hf_abs"  
#
		, "f_lf_abs",  "f_hf_pm",    "f_lf_pm",    "ibf_hf_pm", "ibf_lf_pm", "itf_hf_pm", "itf_lf_pm"
#
		, "wtp_hf", "wtp_lf")] <- sinfo[idxCorpusToData,c("wtf_hf_abs", "wtf_lf_abs", "wtf_hf_pm"
#
		, "wtf_lf_pm", "wtp_hf", "wtp_lf" , "f_hf_abs", "f_lf_abs", "f_hf_pm", "f_lf_pm", "ibf_hf_pm"
#
		, "ibf_lf_pm", "itf_hf_pm", "itf_lf_pm", "wtp_hf", "wtp_lf")]
#

#

#
# word trigram frequency (word n, n+1, n+2):
#
wtf_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$wtf_hf_pm, em_n2$wtf_lf_pm)
#
wtf_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$wtf_hf_pm, em_n2$wtf_lf_pm)
#

#
# conditional probability of word trigram (word n, n+1, n+2):
#
wtp_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$wtp_hf, em_n2$wtp_lf)
#
wtp_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$wtp_hf, em_n2$wtp_lf)
#

#
# initial bigram frequency of word n+2:
#
ibf_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$ibf_hf_pm, em_n2$ibf_lf_pm)
#
ibf_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$ibf_hf_pm, em_n2$ibf_lf_pm)
#

#
# initial trigram frequency of word n+2:
#
itf_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$itf_hf_pm, em_n2$itf_lf_pm)
#
itf_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$itf_hf_pm, em_n2$itf_lf_pm)
#

#
# (whole) word frequency of word n+2:
#
f_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$f_hf_pm, em_n2$f_lf_pm)
#
f_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$f_hf_pm, em_n2$f_lf_pm)
#

#

#

#

#
em_n2[,c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab","wtp_bb","wtp_ab")] <- c(wtf_bb,wtf_ab,log10(ibf_bb),log10(ibf_ab),log10(itf_bb),log10(itf_ab),log10(f_bb),log10(f_ab),wtp_bb,wtp_ab)
#

#
em_n1[,c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab","wtp_bb","wtp_ab")] <- c(wtf_bb,wtf_ab,log10(ibf_bb),log10(ibf_ab),log10(itf_bb),log10(itf_ab),log10(f_bb),log10(f_ab),wtp_bb,wtp_ab)
#

#
em_n0[,c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab","wtp_bb","wtp_ab")] <- c(wtf_bb,wtf_ab,log10(ibf_bb),log10(ibf_ab),log10(itf_bb),log10(itf_ab),log10(f_bb),log10(f_ab),wtp_bb,wtp_ab)
#

#

#

#

#
# TEST CORRELATION BETWEEN PREDICTORS:
#
# (1) n2bb vs. word trigram predictability:
#
cor(em_n0$n2bb,em_n0$wtp_bb)  # r= -0.06; NOTE: correlations negative because n2bb is coded positive
#
# [-0.5: easy; 0.5: difficult] and continuous predictors are always coded negative
#
# [small values (e.g., low pred): difficult; high values (e.g., high freq): easy]
#

#
# (2) n2bb vs. initial bigram frequency:
#
cor(em_n0$n2bb,em_n0$ibf_bb)  # r = -0.45
#

#
# (3) n2bb vs. initial trigram frequency:
#
cor(em_n0$n2bb,em_n0$itf_bb)  # r = -0.72
#

#
# (4) ibf vs. initial trigram frequency:
#
cor(em_n0$ibf_bb,em_n0$itf_bb)  # r = 0.65
#

#
# (5) f_bb vs. initial bigram frequency:
#
cor(em_n0$f_bb,em_n0$ibf_bb)  # r = 0.45
#

#
# (6) f_bb vs. initial trigram frequency:
#
cor(em_n0$f_bb,em_n0$itf_bb)  # r = 0.77
#

#

#
# before entering into LMMs...
#
# ALL OTHER PREDICTORS MUST BE CENTERED:
#

#
centerVar <- c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab")
#
em_n0[ ,centerVar]  <- scale(em_n0[ ,centerVar], scale=F)
#
em_n1[ ,centerVar]  <- scale(em_n1[ ,centerVar], scale=F)
#
em_n2[ ,centerVar]  <- scale(em_n2[ ,centerVar], scale=F)
rm(list=ls())
#

#
# SET WORKING DIRECTORY:
#
getwd()
#
setwd("/Users/Sarah/Documents/EM_Manuscripts/2011.Article.dPoF.N2/pmr2")#
dir()#

#
# INPUT:#
# (1) data frame containing eye-movement (em) measures for different words:#
# em_nm1: word n-1#
# em_n0: preboundary word n#
# em_n1: postboundary word n+1#
# em_n2: target word n+2#
# em_n3: posttarget word n+3
#
ifile_em   <- c("n2FQ_em_filtered.rda")#
#
# (2) data frame containing additional word information#
# based on the dlex-database  http://dlexdb.de/:  
#
ifile_info <- c("n2FQ_sinfo.rda")
#
ifile_corpus <- c("FQ.DWDS.wid.rda")
#

#

#

#
# OUTPUT:
#

#

#
# LIBRARIES:
#
library(lme4)
#
library(reshape)
#
library(MASS)
#
library(Hmisc)
#
library(ggplot2)
#

#
# FUNCTIONS:
#
source("/Users/Sarah/Documents/EM_Rfunctions/remef.R")
#

#
# LOAD DATA:
#
load(ifile_em)
#
ls()			# em_n0, em_n1, em_n2, em_n3, em_nm1#
str(em_n0)
load(ifile_info)
#
str(sinfo)#
#'data.frame':	160 obs. of  15 variables:#
# $ sn        : sentence number#
# $ wtf_hf_abs: absolute word trigram frequency for HF n+2#
# $ wtf_lf_abs: absolute word trigram frequency for LF n+2#
# $ wtf_hf_pm : word trigram frequency for HF n+2 per million#
# $ wtf_lf_pm : word trigram frequency for LF n+2 per million#
# $ wtp_hf    : probability of word trigram for HF n+2#
# $ wtp_lf    : probability of word trigram for LF n+2#
# $ f_hf_abs  : absolute word frequency for HF n+2#
# $ f_lf_abs  : absolute word frequency for LF n+2#
# $ f_hf_pm   : word frequency for HF n+2 per million#
# $ f_lf_pm   : word frequency for LF n+2 per million#
# $ ibf_hf_pm : initial bigram frequency for HF n+2 (per million)#
# $ ibf_lf_pm : initial bigram frequency for LF n+2 (per million)#
# $ itf_hf_pm : initial trigram frequency for HF n+2 (per million)#
# $ itf_lf_pm : initial trigram frequency for LF n+2 (per million)#

#

#
#------------------------------------#
#
# EXPERIMENT 1:
#
# YOUNG ADULTS (BOUNDARY-N2-FQ):
#
#------------------------------------#
#

#
#----------------------------------------#
#
# add new corpus information 
#
# (e.g., initial bigram/trigram frequency)
#
#----------------------------------------#
#

#
idxCorpusToData <- match(em_n2$sn, sinfo$sn)
em_n2[,c("wtf_hf_abs", "wtf_lf_abs", "wtf_hf_pm",  "wtf_lf_pm", "wtp_hf", "wtp_lf", "f_hf_abs"  
#
		, "f_lf_abs",  "f_hf_pm",    "f_lf_pm",    "ibf_hf_pm", "ibf_lf_pm", "itf_hf_pm", "itf_lf_pm"
#
		, "wtp_hf", "wtp_lf")] <- sinfo[idxCorpusToData,c("wtf_hf_abs", "wtf_lf_abs", "wtf_hf_pm"
#
		, "wtf_lf_pm", "wtp_hf", "wtp_lf" , "f_hf_abs", "f_lf_abs", "f_hf_pm", "f_lf_pm", "ibf_hf_pm"
#
		, "ibf_lf_pm", "itf_hf_pm", "itf_lf_pm", "wtp_hf", "wtp_lf")]
t<- c("wtf_hf_abs", "wtf_lf_abs", "wtf_hf_pm"
#
		, "wtf_lf_pm", "wtp_hf", "wtp_lf" , "f_hf_abs", "f_lf_abs", "f_hf_pm", "f_lf_pm", "ibf_hf_pm"
#
		, "ibf_lf_pm", "itf_hf_pm", "itf_lf_pm", "wtp_hf", "wtp_lf")
length(t)
t2<- c("wtf_hf_abs", "wtf_lf_abs", "wtf_hf_pm",  "wtf_lf_pm", "wtp_hf", "wtp_lf", "f_hf_abs"  
#
		, "f_lf_abs",  "f_hf_pm",    "f_lf_pm",    "ibf_hf_pm", "ibf_lf_pm", "itf_hf_pm", "itf_lf_pm"
#
		, "wtp_hf", "wtp_lf")
length(t2)
length(idxCorpusToData)
dim(em_n2)
t
t2
rm(list=ls())
#

#
# SET WORKING DIRECTORY:
#
getwd()
#
setwd("/Users/Sarah/Documents/EM_Manuscripts/2011.Article.dPoF.N2/pmr2")#
dir()#

#
# INPUT:#
# (1) data frame containing eye-movement (em) measures for different words:#
# em_nm1: word n-1#
# em_n0: preboundary word n#
# em_n1: postboundary word n+1#
# em_n2: target word n+2#
# em_n3: posttarget word n+3
#
ifile_em   <- c("n2FQ_em_filtered.rda")#
#
# (2) data frame containing additional word information#
# based on the dlex-database  http://dlexdb.de/:  
#
ifile_info <- c("n2FQ_sinfo.rda")
#
ifile_corpus <- c("FQ.DWDS.wid.rda")
#

#

#

#
# OUTPUT:
#

#

#
# LIBRARIES:
#
library(lme4)
#
library(reshape)
#
library(MASS)
#
library(Hmisc)
#
library(ggplot2)
#

#
# FUNCTIONS:
#
source("/Users/Sarah/Documents/EM_Rfunctions/remef.R")
#

#
# LOAD DATA:
#
load(ifile_em)
#
ls()			# em_n0, em_n1, em_n2, em_n3, em_nm1#
str(em_n0)#
#'data.frame':	7705 obs. of  30 variables:#
# $ id  : subject identification#
# $ sn  : sentence number#
# $ wn  : current word number in sentence#
# $ nw  : total number of words in sentence#
# $ wid : unique word identification number across all sentences (same word gets same number)#
# $ cond: n+2 processing demand conditions (1) HF-HF (2) LF-HF (3) HF-LF (4) LF-LF#
# $ f2bb: log-frequency of word n+2 preview before boundary#
# $ f2ab: log-frequency of word n+2 target after boundary#
# $ n2bb: n+2 PREVIEW difficulty (-0.5: easy (HF); 0.5: difficult (LF))#
# $ n2ab: n+2 TARGET  difficulty (-0.5: easy (HF); 0.5: difficult (LF))#
# $ pvn2: n+2 preview condition (-0.5: correct preview - no change; 0.5: incorrect preview - display change)#
# $ sn1:  n+1 skipping status (-0.5: fixated; 0.5: skipped)#
# $ sn2:  n+2 skipping status (-0.5: fixated; 0.5: skipped)#
# $ lxn1: n+1 lexical status (-0.5: function word; 0.5: content word)#
# $ f   : centered log-frequency of the fixated word#
# $ wl  : centered 1/word length of the fixated word#
# $ wl1 : centered wl for the word to the left (lag-effect)#
# $ f1  : centered f  for the word to the left (lag-effect)#
# $ wl2 : centered wl for the word to the right (successor-effect)#
# $ f2  : centered f  for the word to the right (successor-effect)#
# $ wl3 : centered wl for the word two words to the left#
# $ f3  : centered f  for the word two words to the left#
# $ wl4 : centered wl for the word two words to the right#
# $ f4  : centered f  for the word two words to the right#
# $ ffd : first fixation duration#
# $ gzd : gaze duration#
# $ sfd : single fixation duration#
# $ tvt : total viewing time#
# $ prx : probability of refixation#
# $ psk : probability of skipping#
# $ prg : probability of regression#
# $ ilp : intial landing position (letter position/word length)#

#
load(ifile_info)
#
str(sinfo)#
#'data.frame':	160 obs. of  15 variables:#
# $ sn        : sentence number#
# $ wtf_hf_abs: absolute word trigram frequency for HF n+2#
# $ wtf_lf_abs: absolute word trigram frequency for LF n+2#
# $ wtf_hf_pm : word trigram frequency for HF n+2 per million#
# $ wtf_lf_pm : word trigram frequency for LF n+2 per million#
# $ wtp_hf    : probability of word trigram for HF n+2#
# $ wtp_lf    : probability of word trigram for LF n+2#
# $ f_hf_abs  : absolute word frequency for HF n+2#
# $ f_lf_abs  : absolute word frequency for LF n+2#
# $ f_hf_pm   : word frequency for HF n+2 per million#
# $ f_lf_pm   : word frequency for LF n+2 per million#
# $ ibf_hf_pm : initial bigram frequency for HF n+2 (per million)#
# $ ibf_lf_pm : initial bigram frequency for LF n+2 (per million)#
# $ itf_hf_pm : initial trigram frequency for HF n+2 (per million)#
# $ itf_lf_pm : initial trigram frequency for LF n+2 (per million)#

#

#
#------------------------------------#
#
# EXPERIMENT 1:
#
# YOUNG ADULTS (BOUNDARY-N2-FQ):
#
#------------------------------------#
#

#
#----------------------------------------#
#
# add new corpus information 
#
# (e.g., initial bigram/trigram frequency)
#
#----------------------------------------#
#

#
idxCorpusToData <- match(em_n2$sn, sinfo$sn)
str(em_n2)
str(em_n1)
str(em_nm1)
ifile_em
getwd()
str(em_n0)
keep1 <- c("id","sn","wn","nw","wid","cond","f2bb","f2ab","n2bb",       #
"n2ab","pvn2","lxn1","f",        #
"wl","wl1","f1","wl2","f2","wl3","f3",#
 "wl4","f4","ffd","gzd","sfd","tvt","prx",#
"psk","prg","ilp")  #
#
keep2 <- c("id","sn","wn","nw","wid","cond","f2bb","f2ab","n2bb",       #
"n2ab","pvn2","lxn1","sn1","f",        #
"wl","wl1","f1","wl2","f2","wl3","f3",#
 "wl4","f4","ffd","gzd","sfd","tvt","prx",#
"psk","prg","ilp")  #
#
keep3 <- c("id","sn","wn","nw","wid","cond","f2bb","f2ab","n2bb",       #
"n2ab","pvn2","lxn1","sn2","f",        #
"wl","wl1","f1","wl2","f2","wl3","f3",#
 "wl4","f4","ffd","gzd","sfd","tvt","prx",#
"psk","prg","ilp")  #
#
#
em_n0 <- em_n0[,keep2]#
em_n1 <- em_n1[,keep3]#
em_n2 <- em_n2[,keep2]#
em_n3 <- em_n3[,keep1]#
em_nm1 <- em_nm1[,keep1]
str(em_n0)
str(em_n1)
save(em_nm1, em_n0, em_n1, em_n2, em_n3, file=ifile_em)
rm(list=ls())
#

#
# SET WORKING DIRECTORY:
#
getwd()
#
setwd("/Users/Sarah/Documents/EM_Manuscripts/2011.Article.dPoF.N2/pmr2")#
dir()#

#
# INPUT:#
# (1) data frame containing eye-movement (em) measures for different words:#
# em_nm1: word n-1#
# em_n0: preboundary word n#
# em_n1: postboundary word n+1#
# em_n2: target word n+2#
# em_n3: posttarget word n+3
#
ifile_em   <- c("n2FQ_em_filtered.rda")#
#
# (2) data frame containing additional word information#
# based on the dlex-database  http://dlexdb.de/:  
#
ifile_info <- c("n2FQ_sinfo.rda")
#
ifile_corpus <- c("FQ.DWDS.wid.rda")
#

#

#

#
# OUTPUT:
#

#

#
# LIBRARIES:
#
library(lme4)
#
library(reshape)
#
library(MASS)
#
library(Hmisc)
#
library(ggplot2)
#

#
# FUNCTIONS:
#
source("/Users/Sarah/Documents/EM_Rfunctions/remef.R")
#

#
# LOAD DATA:
#
load(ifile_em)
#
ls()			# em_n0, em_n1, em_n2, em_n3, em_nm1#
str(em_n0)#
#'data.frame':	7705 obs. of  30 variables:#
# $ id  : subject identification#
# $ sn  : sentence number#
# $ wn  : current word number in sentence#
# $ nw  : total number of words in sentence#
# $ wid : unique word identification number across all sentences (same word gets same number)#
# $ cond: n+2 processing demand conditions (1) HF-HF (2) LF-HF (3) HF-LF (4) LF-LF#
# $ f2bb: log-frequency of word n+2 preview before boundary#
# $ f2ab: log-frequency of word n+2 target after boundary#
# $ n2bb: n+2 PREVIEW difficulty (-0.5: easy (HF); 0.5: difficult (LF))#
# $ n2ab: n+2 TARGET  difficulty (-0.5: easy (HF); 0.5: difficult (LF))#
# $ pvn2: n+2 preview condition (-0.5: correct preview - no change; 0.5: incorrect preview - display change)#
# $ sn1:  n+1 skipping status (-0.5: fixated; 0.5: skipped)#
# $ sn2:  n+2 skipping status (-0.5: fixated; 0.5: skipped)#
# $ lxn1: n+1 lexical status (-0.5: function word; 0.5: content word)#
# $ f   : centered log-frequency of the fixated word#
# $ wl  : centered 1/word length of the fixated word#
# $ wl1 : centered wl for the word to the left (lag-effect)#
# $ f1  : centered f  for the word to the left (lag-effect)#
# $ wl2 : centered wl for the word to the right (successor-effect)#
# $ f2  : centered f  for the word to the right (successor-effect)#
# $ wl3 : centered wl for the word two words to the left#
# $ f3  : centered f  for the word two words to the left#
# $ wl4 : centered wl for the word two words to the right#
# $ f4  : centered f  for the word two words to the right#
# $ gzd : gaze duration#
# $ sfd : single fixation duration#
# $ tvt : total viewing time#
# $ prx : probability of refixation#
# $ psk : probability of skipping#
# $ prg : probability of regression#
# $ ilp : intial landing position (letter position/word length)#

#
load(ifile_info)
#
str(sinfo)#
#'data.frame':	160 obs. of  15 variables:#
# $ sn        : sentence number#
# $ wtf_hf_abs: absolute word trigram frequency for HF n+2#
# $ wtf_lf_abs: absolute word trigram frequency for LF n+2#
# $ wtf_hf_pm : word trigram frequency for HF n+2 per million#
# $ wtf_lf_pm : word trigram frequency for LF n+2 per million#
# $ wtp_hf    : probability of word trigram for HF n+2#
# $ wtp_lf    : probability of word trigram for LF n+2#
# $ f_hf_abs  : absolute word frequency for HF n+2#
# $ f_lf_abs  : absolute word frequency for LF n+2#
# $ f_hf_pm   : word frequency for HF n+2 per million#
# $ f_lf_pm   : word frequency for LF n+2 per million#
# $ ibf_hf_pm : initial bigram frequency for HF n+2 (per million)#
# $ ibf_lf_pm : initial bigram frequency for LF n+2 (per million)#
# $ itf_hf_pm : initial trigram frequency for HF n+2 (per million)#
# $ itf_lf_pm : initial trigram frequency for LF n+2 (per million)#

#

#
#------------------------------------#
#
# EXPERIMENT 1:
#
# YOUNG ADULTS (BOUNDARY-N2-FQ):
#
#------------------------------------#
#

#
#----------------------------------------#
#
# add new corpus information 
#
# (e.g., initial bigram/trigram frequency)
#
#----------------------------------------#
#

#
idxCorpusToData <- match(em_n2$sn, sinfo$sn)
#

#

#
em_n2[,c("wtf_hf_abs", "wtf_lf_abs", "wtf_hf_pm",  "wtf_lf_pm", "wtp_hf", "wtp_lf", "f_hf_abs"  
#
		, "f_lf_abs",  "f_hf_pm",    "f_lf_pm",    "ibf_hf_pm", "ibf_lf_pm", "itf_hf_pm", "itf_lf_pm"
#
		, "wtp_hf", "wtp_lf")] <- sinfo[idxCorpusToData,c("wtf_hf_abs", "wtf_lf_abs", "wtf_hf_pm"
#
		, "wtf_lf_pm", "wtp_hf", "wtp_lf" , "f_hf_abs", "f_lf_abs", "f_hf_pm", "f_lf_pm", "ibf_hf_pm"
#
		, "ibf_lf_pm", "itf_hf_pm", "itf_lf_pm", "wtp_hf", "wtp_lf")]
# word trigram frequency (word n, n+1, n+2):
#
wtf_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$wtf_hf_pm, em_n2$wtf_lf_pm)
#
wtf_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$wtf_hf_pm, em_n2$wtf_lf_pm)
#

#
# conditional probability of word trigram (word n, n+1, n+2):
#
wtp_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$wtp_hf, em_n2$wtp_lf)
#
wtp_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$wtp_hf, em_n2$wtp_lf)
#

#
# initial bigram frequency of word n+2:
#
ibf_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$ibf_hf_pm, em_n2$ibf_lf_pm)
#
ibf_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$ibf_hf_pm, em_n2$ibf_lf_pm)
#

#
# initial trigram frequency of word n+2:
#
itf_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$itf_hf_pm, em_n2$itf_lf_pm)
#
itf_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$itf_hf_pm, em_n2$itf_lf_pm)
#

#
# (whole) word frequency of word n+2:
#
f_bb <- ifelse(em_n2$n2bb==-0.5, em_n2$f_hf_pm, em_n2$f_lf_pm)
#
f_ab <- ifelse(em_n2$n2ab==-0.5, em_n2$f_hf_pm, em_n2$f_lf_pm)
em_n2[,c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab","wtp_bb","wtp_ab")] <- c(wtf_bb,wtf_ab,log10(ibf_bb),log10(ibf_ab),log10(itf_bb),log10(itf_ab),log10(f_bb),log10(f_ab),wtp_bb,wtp_ab)
#

#
em_n1[,c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab","wtp_bb","wtp_ab")] <- c(wtf_bb,wtf_ab,log10(ibf_bb),log10(ibf_ab),log10(itf_bb),log10(itf_ab),log10(f_bb),log10(f_ab),wtp_bb,wtp_ab)
#

#
em_n0[,c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab","wtp_bb","wtp_ab")] <- c(wtf_bb,wtf_ab,log10(ibf_bb),log10(ibf_ab),log10(itf_bb),log10(itf_ab),log10(f_bb),log10(f_ab),wtp_bb,wtp_ab)
# TEST CORRELATION BETWEEN PREDICTORS:
#
# (1) n2bb vs. word trigram predictability:
#
cor(em_n0$n2bb,em_n0$wtp_bb)  # r= -0.06; NOTE: correlations negative because n2bb is coded positive
#
# [-0.5: easy; 0.5: difficult] and continuous predictors are always coded negative
#
# [small values (e.g., low pred): difficult; high values (e.g., high freq): easy]
#

#
# (2) n2bb vs. initial bigram frequency:
#
cor(em_n0$n2bb,em_n0$ibf_bb)  # r = -0.45
#

#
# (3) n2bb vs. initial trigram frequency:
#
cor(em_n0$n2bb,em_n0$itf_bb)  # r = -0.72
#

#
# (4) ibf vs. initial trigram frequency:
#
cor(em_n0$ibf_bb,em_n0$itf_bb)  # r = 0.65
#

#
# (5) f_bb vs. initial bigram frequency:
#
cor(em_n0$f_bb,em_n0$ibf_bb)  # r = 0.45
#

#
# (6) f_bb vs. initial trigram frequency:
#
cor(em_n0$f_bb,em_n0$itf_bb)  # r = 0.77
# see manuscript p. 11#
# (1) n2bb vs. word trigram predictability:
#
cor(em_n0$n2bb,em_n0$wtp_bb)  # r= -0.06; NOTE: correlations negative because n2bb is coded positive
#
# [-0.5: easy; 0.5: difficult] and continuous predictors are always coded negative
#
# [small values (e.g., low pred): difficult; high values (e.g., high freq): easy]
#

#
# (2) n2bb vs. initial bigram frequency:
#
cor(em_n0$n2bb,em_n0$ibf_bb)  # r = -0.45
#

#
# (3) n2bb vs. initial trigram frequency:
#
cor(em_n0$n2bb,em_n0$itf_bb)  # r = -0.72
#

#
# (4) ibf vs. initial trigram frequency:
#
cor(em_n0$ibf_bb,em_n0$itf_bb)  # r = 0.65
#

#
# (5) f_bb vs. initial bigram frequency:
#
cor(em_n0$f_bb,em_n0$ibf_bb)  # r = 0.45
#

#
# (6) f_bb vs. initial trigram frequency:
#
cor(em_n0$f_bb,em_n0$itf_bb)  # r = 0.77
#

#

#
# before entering into LMMs...
#
# ALL OTHER PREDICTORS MUST BE CENTERED:
#

#
centerVar <- c("wtf_bb","wtf_ab","ibf_bb","ibf_ab","itf_bb","itf_ab","f_bb","f_ab")
#
em_n0[ ,centerVar]  <- scale(em_n0[ ,centerVar], scale=F)
#
em_n1[ ,centerVar]  <- scale(em_n1[ ,centerVar], scale=F)
#
em_n2[ ,centerVar]  <- scale(em_n2[ ,centerVar], scale=F)
xdat <- em_n2#
# LMMs REPORTED IN MAIN ANALYSIS:#
#
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=FALSE)#
#
lm.1 <- lmer(log(ffd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)#
#
lm.2 <- lmer(log(sfd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)#
#
print(lm.0, cor=F)
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=FALSE)
print(lm.0, cor=F)
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id)+ (1|sn)#
, data=xdat, REML=FALSE)
print(lm.0, cor=F)
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) #
, data=xdat, REML=FALSE)
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) #
, data=xdat, REML=FALSE)
print(lm.0, cor=F)
#
lm.1 <- lmer(log(ffd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE)
print(lm.1, cor=F)
print(lm.3, cor=F)
print(lm.2, cor=F)
lm.0 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.0, cor=F)

#
lm.0 <- lmer(log(ffd) ~ (sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.0, cor=F)

#
lm.0 <- lmer(log(sfd) ~ (sn1+lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.0, cor=F)
xdat <- em_n1

#
lm.0 <- lmer(log(gzd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.0, cor=F)
lm.1 <- lmer(log(gzd) ~ (lxn1+n2ab+ibf_bb)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.1, cor=F)
lm.2 <- lmer(log(gzd) ~ (lxn1+n2ab+itf_bb)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.2, cor=F)
lm.1 <- lmer(log(gzd) ~ (lxn1+ibf_bb+ibf_ab)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.1, cor=F)
cor(em_n0$n2bb,em_n0$wtp_bb)  # r= -0.06; NOTE: correlations negative because n2bb is coded positive
cor(em_n0$n2bb,em_n0$ibf_bb)  # r = -0.45
cor(em_n0$n2bb,em_n0$itf_bb)  # r = -0.72
cor(em_n0$ibf_bb,em_n0$itf_bb)  # r = 0.65
cor(em_n0$f_bb,em_n0$ibf_bb)  # r = 0.45
cor(em_n0$f_bb,em_n0$itf_bb)  # r = 0.77
lm.1 <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+ibf_bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.1, cor=F)
lm.1 <- lmer(log(gzd) ~ (lxn1+n2bb+ibf_bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.1, cor=F)
lm.3 <- lmer(log(gzd) ~ (lxn1+n2bb+n2ab+ibf_bb+itf_bb)^4 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.3, cor=F)
lm.1 <- lmer(log(gzd) ~ (lxn1+n2bb+n2ab+ibf_bb)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.1, cor=F)
xdat <- em_n1
#
# COMPLETE MODEL WITH ALL PREDICTORS:
#

#
lm.org <- lmer(log(gzd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.org, cor=F)
lm.bigram <- lmer(log(gzd) ~ (lxn1+ibf_bb+ibf_ab)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.bigram, cor=F)
bigram.res <- resid(lm.bigram)
summary(lm(bigram.res ~ lm.org@frame$n2bb))
summary(lm(bigram.res ~ lm.org@frame$n2ab))
summary(lm(bigram.res ~ lm.org@frame$n2bb*lm.org@frame$n2ab))
lm.trigram <- lmer(log(gzd) ~ (lxn1+itf_bb+itf_ab)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.trigram, cor=F)
trigram.res <- resid(lm.trigram)
summary(lm(trigram.res ~ lm.org@frame$n2bb))
summary(lm(trigram.res ~ lm.org@frame$n2ab))
summary(lm(trigram.res ~ lm.org@frame$n2bb+lm.org@frame$n2ab))
###############################
#
xdat <- em_n1
#
# COMPLETE MODEL WITH ALL PREDICTORS:
#

#
lm.0 <- lmer(log(gzd) ~ (lxn1+n2bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.0, cor=F)
lm.1 <- lmer(log(gzd) ~ (lxn1+n2bb+ibf_bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.1, cor=F)
lm.1 <- lmer(log(gzd) ~ (lxn1+ibf_bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.1, cor=F)
lm.1 <- lmer(log(gzd) ~ (lxn1+ibf_bb+f2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.1, cor=F)
lm.1 <- lmer(log(gzd) ~ (lxn1+f2ab+ibf_bb+f2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.1, cor=F)
lm.1 <- lmer(log(gzd) ~ (lxn1+f2ab+ibf_bb+f2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.1, cor=F)
lm.1 <- lmer(log(gzd) ~ (lxn1+f2bb+ibf_bb+f2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.1, cor=F)
lm.1 <- lmer(log(gzd) ~ (lxn1+f2bb+itf_bb+f2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.1, cor=F)
lm.1 <- lmer(log(gzd) ~ (lxn1+ibf_bb+f2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.1, cor=F)
lm.1 <- lmer(log(sfd) ~ (lxn1+f2bb+ibf_bb+f2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.1, cor=F)
lm.1 <- lmer(log(sfd) ~ (lxn1+ibf_bb+f2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.1, cor=F)
lm.1 <- lmer(log(sfd) ~ (lxn1+ibf_bb+n2ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.1, cor=F)
xdat <- em_n1
lm.bigram <- lmer(log(gzd) ~ (lxn1+ibf_bb+ibf_ab)^3 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.bigram, cor=F)
lm.bigram <- lmer(log(sfd) ~ (lxn1+ibf_bb+ibf_ab)^3 + (1|id) + (1|wid) + (1|sn)#
, data=xdat, REML=TRUE) #
print(lm.bigram, cor=F)
xdat <- em_n2
lm.org <- lmer(log(gzd) ~ (sn1+lxn1+n2bb+n2ab)^4 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.org, cor=F)
lm.bigram <- lmer(log(gzd) ~ (sn1+lxn1+ibf_bb+ibf_ab)^4 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.bigram, cor=F)
lm.trigram <- lmer(log(gzd) ~ (sn1+lxn1+itf_bb+itf_ab)^4 + (1|id) + (1|wid) + (1|sn)
#
, data=xdat, REML=TRUE) 
#
print(lm.trigram, cor=F)
