
# Nucleotide composition of di and tri nucleotides

library(seqinr)

setwd("frequencies")
seq<-read.fasta(file="Neo1.cds.fasta")  
di<-lapply(seq, function(x){count(x,2,freq=TRUE)})
di_nuc<-do.call(rbind.data.frame, di)
colnames(di_nuc)<-names(di[[1]])

pca<-princomp(di_nuc, cor=T)
plot(pca$scores[,2],pca$scores[,3], pch=20)

seq2<-read.fasta(file="list_genes_1_Exon.fa")
di2<-lapply(seq2, function(x){count(x,2,freq=TRUE)})
di_nuc2<-do.call(rbind.data.frame, di2)
colnames(di_nuc2)<-names(di2[[1]])

all<-rbind(di_nuc, di_nuc2) 
dim(all)
A<-subset(all, select=1:8)
a<-subset(all, select=9:16)
b<-a[, rev(seq_len(ncol(a)))]
DI<-A+b
pca<-princomp(DI, cor=T)

X<-1
Y<-2
plot(pca$scores[,X],pca$scores[,Y], pch=20, xlim=c(-8,8), ylim=c(-8,8),xlab=paste("Componente ",X[1]), ylab=paste("Componente",Y[1]))
points(pca$scores[1:length(di),X],pca$scores[1:length(di),Y],col="red" )
points(pca$scores[length(di):8774,X],pca$scores[length(di):8774,Y],col="blue" )


tri<-lapply(seq, function(x){count(x,3,freq=TRUE)})
tri_nuc<-do.call(rbind.data.frame, tri)
colnames(tri_nuc)<-names(tri[[1]])

tri2<-lapply(seq2, function(x){count(x,3,freq=TRUE)})
tri_nuc2<-do.call(rbind.data.frame, tri2)
colnames(tri_nuc2)<-names(tri2[[1]])

all<-rbind(tri_nuc, tri_nuc2) 
dim(all)
A<-subset(all, select=1:8)
a<-subset(all, select=9:16)
b<-a[, rev(seq_len(ncol(a)))]
DI<-A+b
pca<-princomp(DI, cor=T)

X<-1
Y<-2
plot(pca$scores[,X],pca$scores[,Y], pch=20, xlim=c(-8,8), ylim=c(-8,8),xlab=paste("Componente ",X[1]), ylab=paste("Componente",Y[1]))
points(pca$scores[1:length(di),X],pca$scores[1:length(di),Y],col="red" )
points(pca$scores[length(di):8774,X],pca$scores[length(di):8774,Y],col="blue" )



pca<-princomp(tri_nuc, cor=T)
plot(pca$scores[,1],pca$scores[,4],  col=rgb(red = 0, green = 0, blue = 1, alpha = 0.5), pch=20)

alpha(grey, 0.4), pch=20 )



for(maxi in list){
  seq<-read.fasta(file=maxi)
  di_maxi<-lapply(seq, function(x){count(x,2,freq=TRUE)})
}

dinuc<-function(x){
  seq<-read.fasta(file=x)
  di<-lapply(seq, function(x){count(x,2,freq=TRUE)})
  return(di)
}

freq_di<-lapply(list,dinuc)
unlist(freq_di)
l<-freq_di
do.call(rbind.data.frame, l)
df <- data.frame(matrix(unlist(l), nrow=6, byrow=T,rownames=names),stringsAsFactors=FALSE)
rownames(df)<-list
colnames(df)<- names(l[[1]][[1]])
p<-rbind(df, df,df)
pca<-princomp(p, cor=F)
plot(pca$scores[,1],pca$scores[,2], pch=20)

