######################################### #### Sample size and sequence length #### ######################################### rm(list = ls()) # clears memory install.packages("TraMineR") # download of package "TraMineR", can be erased after download library(TraMineR) # opens TraMineR ###SET PARAMETERS start.number <- 50 # initial number of sequences start.length <- 5 # initial sequence length n.pat <- 9 # number of patterns to be simulated (must be specified below!) n.states <-9 # number of different states rep <-1 # number of repetitions rep.length<- 2 # number of length repetitions rep.number<- 2 # number of different sample sizes add.length<- 5 # how much is added to length add.number <- 50 # how much is added to number ###SIMULATION START number<-start.number out.seq.num <- matrix(0, nrow= rep.length, ncol=3) #matrix for total result for (i in 1:rep.number){ # loop for different numbers of sequences length<-start.length out.seq <- vector()# vector for different seqnuence length outcomes for (i in 1:rep.length){ #loop for different sequence lengths wrong.tot <- vector() # vector for wrong classifications in each study repetition for(j in 1:rep){ # loop A start, number of study repetition loops vectype <- c(1:n.pat) states <- c(1:n.states) mat.start <- matrix(0, ncol = n.pat, nrow = n.states) # empty matrix t.mats <- array(0,c(n.states, n.states, n.pat)) # state x state matrices, as many matrices as patterns ### 9 PATTERNS mat.start[,1] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,1] <- matrix(c( 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8 ), 9, 9) mat.start[,2] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,2] <- matrix(c( 0.2, 0.8, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.2, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2 ), 9, 9) mat.start[,3] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,3] <- matrix(c( 0.0, 0.0, 0.5, 0.4, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.1, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.5, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.4, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.4, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.5, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.5, 0.4, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.4, 0.5, 0.0, 0.0, 0.0, 0.0, 0.1 ), 9, 9) mat.start[,4] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,4] <- matrix(c( 0.05, 0.05, 0.0, 0.0, 0.0, 0.05, 0.75, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.0, 0.0, 0.0, 0.75, 0.05, 0.75, 0.0, 0.0, 0.0, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.75, 0.0, 0.0, 0.0, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.75, 0.0, 0.0, 0.0, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.0, 0.0, 0.0, 0.05, 0.05, 0.75, 0.05, 0.05, 0.05, 0.05, 0.05, 0.75, 0.0, 0.0, 0.0, 0.05, 0.05, 0.05, 0.05, 0.05, 0.0, 0.0, 0.0, 0.75, 0.05, 0.05, 0.0, 0.0, 0.0,0.05, 0.05, 0.05, 0.75 ), 9, 9) mat.start[,5] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,5] <- matrix(c( 0.2, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2 ), 9, 9) mat.start[,6] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,6] <- matrix(c( 0.9, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1 ), 9, 9) mat.start[,7] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,7] <- matrix(c( 0.0, 0.2, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.2 ), 9, 9) mat.start[,8] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,8] <- matrix(c( 0.0, 0.2, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.8, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.8, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.1, 0.0, 0.0 ), 9, 9) mat.start[,9] <- c(1,0,0,0,0,0,0,0,0) t.mats[,,9] <- matrix(c( 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.2, 0.0, 0.0, 0.6, 0.1, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.3, 0.0, 0.0, 0.2, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.0, 0.0, 0.3, 0.0, 0.0, 0.1, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0 ), 9, 9) ### CREATE SEQUENCES seqmatrix <- matrix(0:(length),1, (length + 1)) # empty matrix for sequences for(i in 1:number) { # loop B.2 start, loop for number of sequences type <- sample(vectype,1) # the rest of the sequence patterns is random sample.state <- sample(states, 1, prob = mat.start[,type]) # first state is randomly chosen seqX <- c(type) for(i in 1:length) { # loop C.2 start, appends states to sequence until it has the correct length sample.state <- sample(states, 1, prob = t.mats[,sample.state,type]) seqX <- append(seqX, sample.state) } # loop C.2 end seqmatrix <- rbind(seqmatrix, seqX) } # loop B.2 end typesvector <- seqmatrix[2:(number + 1), 1] # shows pattern seqOM <- seqmatrix[2:(number + 1), 2:(length +1)] # changes matrix ############################ ###OMA mat.OM <- seqdef(seqOM) tr_rates <- seqsubm(mat.OM, method="CONSTANT", cval=2) # substitution costs=2 output <- seqdist(mat.OM, method="OM", norm=FALSE, indel=1.0, sm=tr_rates, with.miss=FALSE, full.matrix=FALSE) # CA OMcluster <- hclust(output, method = "ward") # CA groups_OMA <- cutree(OMcluster, k=n.pat) # cut tree into n.pat clusters ########### ###Results wr.tr <- table(typesvector, groups_OMA) # result of OMA, wrong or true OMA.wrong<- 0 for (i in 1:n.pat) { OMA.wrong<- OMA.wrong + sum(wr.tr[,i])-max(wr.tr[,i]) } #count.states <- t(as.matrix(apply(seqOM, 1, function(x) 100*summary(factor(x, #levels=1:n.states))/length))) wrong.tot<-append(wrong.tot, OMA.wrong) } #end study repetition loop wrong <- mean(wrong.tot)/number*100 # wrong classifications in percent ################################## #### Results for different lenghts out.seq.length <- c(number, length, wrong) out.seq <- append(out.seq, out.seq.length) # append outcomes for different sequence lengths length <- length + add.length } # end loop J sequence length out.seq.tot <- t(matrix(out.seq, ncol= rep.length, nrow=3)) # matrix for outcomes colnames(out.seq.tot) <- c("number","length", "wrong in %") # names for columns out.seq.tot #outcomes for different sequences ############################################# #### Results for different number/sample size out.seq.num<- rbind(out.seq.num, out.seq.tot) if (number==50){ number <- number + add.number} else if (number==100){ number <- number + 100} else if (number==200){ number <- number + 300} else if (number==500){ number <- number + 1500} } # end loop for number out.seq.num<-out.seq.num[-1:-(rep.length),] out.seq.num<-data.frame(out.seq.num) out.seq.num