############################################################ # Simtdup_plot.r R script to simulate the Ks of duplicated genes # and plot the distribution # paramters: size -- # of duplicate gene pairs # tdup -- time of genome duplication # lambda -- rate of gene death # also requires a imput file of Ks and S.E. estimated from duplicated genes # # example: # dat1<-readdata('1.yn00.txt'); # lambda<-0.7; # tdup<-0.5; # simulation <-simtdup(dat1,lambda,tdup); # ###To plot the simulated data ########### # # Set y-axis to 100 # maxy<-100; # plotdup(simulation,maxy); # #### to output the data to a file called 'Simdata.txt' ####### # file<-c('Simdata.txt'); # simdata(simulation, file); ###### to draw 3x3 plots with different rates and duplication time ## t=0.5, 1.0, 1.5 ## rate = base rate, 0.8*base, 0.5*base # base_rate<-0.7; # drawplot(simulation, base_rate); ############################################################ library(stats); low<-0.000001; high<-2.0; # read data file err<-function(ks){ s.lo<-loess(SE~Ks,ks, control = loess.control(surface = "direct")) s.lo } trunc<-function(ks,low,high){ kst<-c(); j<-0; for (i in 1:length(ks$Ks)) { if (ks$Ks[i]>low && ks$Ks[i]2 low<-0.0001; high<-2.0; kst<-trunc(ks,low,high); kst } # simulate Ks of gene pairs generated from constant gene duplication # (background) and genome duplication at time tdup simtdup<-function(kst,lambda,tdup) { size<-300; bsize<-round(size*0.2); ################################################### #simulate Ks and duplication at tdup # #the sample size from background is "size" # the duplication at time 0 will create N0 genes N0<-size-bsize; #exponential decay Nt<-round(N0*exp(-tdup/lambda)); # default Nt=N0*exp(-1/lambda) # assume a exponential with shifted mean Tdup<-rexp(Nt,rate=1/lambda)+tdup; ######################################################### #exclude those outside of range (low,high) t<-c(); for (i in 1:length(Tdup)) { if (Tdup[i]>low && Tdup[i] low) { v$b<-c(v$b,s.Ks[k]); v$bse<-c(v$bse,s.T[k]); j<-j+1; }; }; v } #plot them plotdup<-function(ks,maxy) { bin=seq(1,10)*0.2; bin[1]<-min(min(ks$b,ks$f)); h1<-hist(ks$b,breaks=c(bin,max(ks$b)),plot=F); h2<-hist(ks$f,breaks=c(bin,max(ks$f)),plot=F); xs=bin[1:10]; plot(x=xs,y=h1$counts,ann=FALSE,type='l',lty=3, xlim=c(0,2),ylim=c(0,maxy),frame.plot=F); lines(x=xs,y=h2$counts,lty=2); lines(x=xs,y=h1$counts+h2$counts,lty=1); }; #output simulation data simdata<-function(ks,filename) { if (missing(filename)) {filename="simdata"}; x<-cbind(c(ks$f,ks$b),c(ks$fse,ks$bse)); write.table(x,file=filename,row.names=F, col.names=c('Ks','SE'),sep="\t"); } # draw example plots with different death rates and duplication time t # here t = 0.5, 1, 1.5 # rate = base, 0.8*base, 0.5*base drawplot<-function(ks,l.ks){ rate<-c(l.ks*1, l.ks*0.8,l.ks*0.5); maxy<-c(40,40,40); par(mfrow=c(3,3)); for (row in seq(1,3)) { for (col in seq(1,3)) { dup<-c(); t<-col*0.5; dup<-simtdup(ks,rate[row],t); plotdup(dup,maxy[row]); file<-paste('simdata',row,col,sep=""); simdata(dup,file); } } }