######################### #### Missing states ##### ######################### rm(list = ls()) # clears memory install.packages("TraMineR") # download of package "TraMineR", can be erased after download library(TraMineR) # opens TraMineR ### SET PARAMETERS start.number <- 200 # initial number of sequences start.length <- 5 # initial sequence length n.pat <- 9 # number of patterns to be simulated (must be specified below!) n.states <-9 # number of different states rep <-1 # number of repetitions rep.length<- 20 # number of length repetitions rep.number<- 9 # number of different sample sizes/ variance variation add.length<-5 # how much is added to length add.number <- 0 # how much is added to number start.var.length <-1 # variance in length ### SIMULATION START number<-start.number var.length <-start.var.length out.seq.num <- matrix(0, nrow= rep.length, ncol=4)#matrix for total result for (i in 1:rep.number){ # loop for different numbers of sequences length<-start.length out.seq <- vector()# vector for different seqnuence length outcomes for (i in 1:rep.length){ #loop for different sequence lengths vlength<- c((length-var.length):length) # variance in length wrong.tot <- vector() # vector for wrong classifications in each study repetition for(j in 1:rep){ # loop A start, number of study repetition loops vectype <- c(1:n.pat) states <- c(1:n.states) mat.start <- matrix(0, ncol = n.pat, nrow = n.states) # empty matrix t.mats <- array(0,c(n.states, n.states, n.pat)) # state x state matrices, as many matrices as patterns ### 9 PATTERNS mat.start[,1] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,1] <- matrix(c( 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8 ), 9, 9) mat.start[,2] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,2] <- matrix(c( 0.2, 0.8, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.2, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2 ), 9, 9) mat.start[,3] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,3] <- matrix(c( 0.0, 0.0, 0.5, 0.4, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.1, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.5, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.4, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.4, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.5, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.4, 0.5, 0.0, 0.0, 0.0, 0.0, 0.1 ), 9, 9) mat.start[,4] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,4] <- matrix(c( 0.05, 0.05, 0.0, 0.0, 0.0, 0.05, 0.75, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.0, 0.0, 0.0, 0.75, 0.05, 0.75, 0.0, 0.0, 0.0, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.75, 0.0, 0.0, 0.0, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.75, 0.0, 0.0, 0.0, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.0, 0.0, 0.0, 0.05, 0.05, 0.75, 0.05, 0.05, 0.05, 0.05, 0.05, 0.75, 0.0, 0.0, 0.0, 0.05, 0.05, 0.05, 0.05, 0.05, 0.0, 0.0, 0.0, 0.75, 0.05, 0.05, 0.0, 0.0, 0.0,0.05, 0.05, 0.05, 0.75 ), 9, 9) mat.start[,5] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,5] <- matrix(c( 0.2, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2 ), 9, 9) mat.start[,6] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,6] <- matrix(c( 0.9, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1 ), 9, 9) mat.start[,7] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,7] <- matrix(c( 0.0, 0.2, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.2 ), 9, 9) mat.start[,8] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,8] <- matrix(c( 0.0, 0.2, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.8, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.1, 0.0, 0.0 ), 9, 9) mat.start[,9] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,9] <- matrix(c( 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.2, 0.0, 0.0, 0.6, 0.1, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.3, 0.0, 0.0, 0.2, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.0, 0.0, 0.3, 0.0, 0.0, 0.1, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0 ), 9, 9) ### CREATE SEQUENCES seqmatrix <- matrix(0:(length),1, (length + 1)) # empty matrix for sequences for(i in 1:number) { # loop B.2 start, loop for number of sequences type <- sample(vectype,1) # the rest of the sequence patterns is random sample.state <- sample(states, 1, replace=TRUE, prob = mat.start[,type]) # first state is randomly chosen seqX <- c(type) sample.vlength<- sample(vlength, 1, replace = TRUE, prob = NULL) #take sample from various lengt for(i in 1:sample.vlength) { # loop C.2 start, appends states to sequence until it has the correct length sample.state <- sample(states, 1, prob = t.mats[,sample.state,type]) seqX <- append(seqX, sample.state) } # loop C.2 end if (sample.vlength != length){ # if there are missing elements in a sequence: NA for(i in (sample.vlength+1):length) { seqX <- append(seqX, NA)} } seqmatrix <- rbind(seqmatrix, seqX) } # loop B.2 end typesvector <- seqmatrix[2:(number + 1), 1] # shows pattern seqOM <- seqmatrix[2:(number + 1), 2:(length +1)] # changes matrix #seqOM2<- seqOM[!apply(seqOM[,4:5],1,function(x) {all(is.na(x))} ),] ### OMA mat.OM <- seqdef(seqOM) tr_rates <- seqsubm(mat.OM, method="CONSTANT", cval=2) # substitution costs=2 output <- seqdist(mat.OM, method="OM", norm=FALSE, indel=1.0, sm=tr_rates, with.miss=FALSE, full.matrix=FALSE) # CA OMcluster <- hclust(output, method = "ward") # CA groups_OMA <- cutree(OMcluster, k=n.pat) # cut tree into n.pat clusters ########### ###Results wr.tr <- table(typesvector, groups_OMA) # result of OMA, wrong or true OMA.wrong<- 0 for (i in 1:n.pat) { OMA.wrong<- OMA.wrong + sum(wr.tr[,i])-max(wr.tr[,i]) } #count.states <- t(as.matrix(apply(seqOM, 1, function(x) 100*summary(factor(x, #levels=1:n.states))/length))) wrong.tot<-append(wrong.tot, OMA.wrong) } #end study repetition loop wrong <- mean(wrong.tot)/number*100 # wrong classifications in percent #### RESULTS out.seq.length <- c(number, length, wrong, var.length) out.seq <- append(out.seq, out.seq.length) # append outcomes for different sequence lengths length <- length + add.length } # end loop J sequence length out.seq.tot <- t(matrix(out.seq, ncol= rep.length, nrow=4)) #matrix for outcomes colnames(out.seq.tot) <- c("number","length", "wrong in %", "var.length")# names for columns out.seq.tot #outcomes for different sequences out.seq.num<- rbind(out.seq.num, out.seq.tot) if (var.length==1){ var.length <- var.length + 4} else if (var.length==5){ var.length <- var.length + 5} else{ var.length <- var.length + 10} } # end loop for number out.seq.num<-out.seq.num[-1:-(rep.length),] out.seq.num<-data.frame(out.seq.num) out.seq.num<-out.seq.num[!(out.seq.num$length==5 & out.seq.num$var.length==10),] out.seq.num<-out.seq.num[!(out.seq.num$length<=15& out.seq.num$var.length==20),] out.seq.num<-out.seq.num[!(out.seq.num$length<=25& out.seq.num$var.length==30),] out.seq.num<-out.seq.num[!(out.seq.num$length<=35& out.seq.num$var.length==40),] out.seq.num<-out.seq.num[!(out.seq.num$length<=45& out.seq.num$var.length==50),] out.seq.num<-out.seq.num[!(out.seq.num$length<=55& out.seq.num$var.length==60),] out.seq.num<-out.seq.num[!(out.seq.num$length<=65& out.seq.num$var.length==70),] out.seq.num