Summary

This script is the 4th of 4 scripts meant to process data in R. This script deals only Transcription Factor Binding Site data output by the Finding Instances of Motif Occurances program (FIMO).

Script is broken down into 4 main sections as follows:

  1. Comparing FIMO P-value thresholds
  2. Total binding site comparison
  3. P-value testing
  4. Binding site score comparison

Required files:

  1. FIMO output for A4 regions(pvalue threshold of p=.001) : fimo_a4_try2p001.tsv
  2. FIMO output for B6 regions(pvalue threshold of p=.001) : fimo_b6_try2p001.tsv
  3. FIMO output for A4 regions(pvalue threshold of p=.0001) : fimo_a4_try2p001.tsv
  4. FIMO output for B6 regions(pvalue threshold of p=.0001) : fimo_b6_try2p001.tsv
  5. Binding sites Identified identified by Senger et al 2004 : senger_2004_bs.txt
  6. List of immune genes: immunelist_LB_2019_cat.txt

Section 1: Comparing FIMO P-value thresholds.

It is worth mentioning that we descided on the less stringent p-value threshold of p=.001 since it allowed us to detect the majority of previously described Binding sites.

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
library(data.table)
library(schoolmath)
library(rlist)
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.6.2
rm(list=ls())
#setwd("~/Desktop/Wunderlichlab/Cis_var_project/Vinay_collab")
#make a data frame for Binding site lengths 
bsa4l<-read.table("fimo_a4_try2p001.tsv", header = T, stringsAsFactors = F)
bsb6l<-read.table("fimo_b6_try2p001.tsv", header = T, stringsAsFactors = F)
bsa4s<-read.table("fimo_a4_try2p0001.tsv", header = T, stringsAsFactors = F)
bsb6s<-read.table("fimo_b6_try2p0001.tsv", header = T, stringsAsFactors = F)
sengerbs<-read.table("senger_2004_bs.txt", header = T, stringsAsFactors = F)
immune_gene<-read.table("~/Desktop/Wunderlichlab/Cis_var_project/immunelist_LB_2019_cat.txt", header = T, stringsAsFactors = F)

#First we are going to look at the 5 immune genes from the Senger 2004 paper
tempimm<-immune_gene[immune_gene$Symbol %in% c("Dpt","Mtk","PGRP-SB1","Def"),1]
templist<-list(bsa4l,bsb6l,bsa4s, bsb6s)
#pull out ONLY the 4 immune genes
templist<-lapply(templist, function(x) x<-x[x$sequence_name %in% tempimm,]) 
genos<-c("a4","b6","a4","b6")
pthresh<-c(0.001,0.001,0.0001,0.0001)
for (i in 1:length(genos)){
  templist[[i]]$genotype<-genos[i]
  templist[[i]]$pthresh<-pthresh[i]
}
templist<-lapply(templist, function(x) cbind(x,paste(x$motif_id, x$genotype, x$pthresh, sep="_")))
templist<-lapply(templist, function(x) x<-x[,c(2,1,12,3,4:7)])

temp_comp<-as.data.frame(matrix(ncol=5,nrow = 0))
colnames(temp_comp)<-c("sequence_name","motif_id","paste(x$motif_id, x$genotype, x$pthresh, sep = \"_\")","start","stop")
for (i in 1:4){
  tempsub<-templist[[i]]
  temp_comp<-rbind(temp_comp,tempsub)
  rm(tempsub)
}
colnames(temp_comp)<-c("Gene","TF","ID_tf","start","stop","strand","score","p.value")
temp_comp<-setcolorder(temp_comp,c("Gene","TF","ID_tf","start","stop","strand","score","p.value"))

sengerbs$genotype<-"iso1"
sengerbs$pthresh<-"none"
sengerbs$ID_tf<-paste(sengerbs$BS,sengerbs$genotype,sengerbs$pthresh,sep="_")
sengerbs$score<-18
sengerbs$p.value<-.0000008
tempsenger<-sengerbs[,c("Gene","BS","ID_tf","start_adj","stop_adj","strand","score","p.value")]
colnames(tempsenger)<-colnames(temp_comp)

temp_comp<-rbind(tempsenger,temp_comp)
temp_comp$ID_tf<-as.factor(temp_comp$ID_tf)
temp_comp$TF<-as.factor(temp_comp$TF)

#function 1 : 
multioverlap<-function(ref){
  ref$d1<-abs(ref$start-tempgenequery[k,c("stop")])
  ref$d2<-abs(ref$stop-tempgenequery[k,c("start")])
  #see if any values are worth keeping
  if ( min(ref$d1)>thresh & min(ref$d2)>thresh ){
    return("nonoverlapping")
  } else if(nrow(ref[ref$d1==ref$d2,])>0) {
    return(rownames(ref[ref$d1==ref$d2,]))
  } else if (min(ref$d1) < min(ref$d2) & min(ref$d1)<thresh ){
    if (nrow(ref[ref$d1== min(ref$d1),])>1){
      ref$d3<-abs(ref$d1-ref$d2)
      return(rownames(ref[ref$d3== min(ref$d3),]))
    } else {
      return(rownames(ref[ref$d1== min(ref$d1),]))
    }
    
  } else if (min(ref$d1) > min(ref$d2) & min(ref$d2)<thresh ){
    if (nrow(ref[ref$d2== min(ref$d2),])>1){
      ref$d3<-abs(ref$d1-ref$d2)
      return(rownames(ref[ref$d3== min(ref$d3),]))
    } else {
      return(rownames(ref[ref$d2== min(ref$d2),]))
    }
  } else{
    return("nonoverlapping")
  }
}
#function2
isdup <- function (x) duplicated (x) | duplicated (x, fromLast = TRUE)
#lets compare stuff now!

hitlist<-list()
thresh<-15
for (i in 1:length(templist)){
  tempdf<-templist[[i]]
  colnames(tempdf)<-colnames(tempsenger)
  tempgenelist<-unique(tempdf$Gene)
  temp_qm_merged<-as.data.frame(matrix(ncol=16,nrow=0))
  colnames(temp_qm_merged)<-c("Gene","TF","ID_tf","start","stop","strand","score","p.value","Gene","TF","ID_tf","start","stop","strand","score","p.value")
  for (j in 1:length(tempgenelist)){
    tempgenequery<-tempdf[tempdf$Gene==tempgenelist[j],] #query
    tempgenematch<-tempsenger[tempsenger$Gene==tempgenelist[j],] #subject
    # rm(tempgenequery,tempgenematch)
    rowsmatched<-as.character()
    for (k in 1:nrow(tempgenequery)){ 
      #tempTS_strand_match<-tempgenematch[tempgenematch$TF==tempgenequery[k,c("TF")] & tempgenematch$strand==tempgenequery[k,c("strand")],]
      tempTS_strand_match<-tempgenematch[tempgenematch$TF==tempgenequery[k,c("TF")] ,]
      #no matches in the "ref set
      if(nrow(tempTS_strand_match)==0){
        tempTS_strand_match<-tempgenequery[k,]
        tempTS_strand_match[,3:8]<-NA
        temp_qm_comb<-cbind(tempTS_strand_match,tempgenequery[k,])
      #one match in the ref set
      } else if (nrow(tempTS_strand_match)>=1){
        if(multioverlap(tempTS_strand_match)=="nonoverlapping"){
          tempTS_strand_match<-tempgenequery[k,]
          tempTS_strand_match[,3:8]<-NA
          temp_qm_comb<-cbind(tempTS_strand_match,tempgenequery[k,])
        }else {
          tempTS_strand_match_sub<-tempTS_strand_match[rownames(tempTS_strand_match) ==multioverlap(tempTS_strand_match),]
          temp_qm_comb<-cbind(tempTS_strand_match_sub,tempgenequery[k,])
          rowsmatched<-append(rowsmatched,multioverlap(tempTS_strand_match))
        }
      }
      temp_qm_merged<-rbind(temp_qm_merged,temp_qm_comb)
      if (k==nrow(tempgenequery)){
        temp_exclude_a<-tempgenematch[! rownames(tempgenematch) %in% rowsmatched,]
        temp_exclude_b<-as.data.frame(matrix(ncol=8,nrow=nrow(temp_exclude_a)))
    colnames(temp_exclude_b)<-c("Gene","TF","ID_tf","start","stop","strand","score","p.value")
        temp_qm_comb<-cbind(temp_exclude_a,temp_exclude_b)
        temp_qm_merged<-rbind(temp_qm_merged,temp_qm_comb)
      }
      #close of for loop3 TFBS iterating. 
    }
    #close of for loop2 Gene subsetting
  }
    hitlist<-list.append(hitlist, temp_qm_merged)
  #rm(tempsub)
  #close of for loop 1 list sep
}

tfbs_list<-templist
rm(list=ls(pattern = "temp"))
tempnames<-c("rgene","rTF","rID_tf","rstart","rstop","rstrand","rscore","rp.value","Gene","TF","ID_tf","start","stop","strand","score","p.value")

tempsharedbs<-numeric()
temptotalbs<-numeric()
tempbsmiss<-numeric()
temprel<-numeric()
temprelmiss<-numeric()
tempsrp<-numeric()
tempsrpmiss<-numeric()
for (i in 1:length(hitlist)){
  temphits<-hitlist[[i]]
  colnames(temphits)<-tempnames
  temptotalbs<-append(temptotalbs, sum(complete.cases(temphits$start)))
  tempsharedbs<-append(tempsharedbs, sum(complete.cases(temphits)))
  tempbsmiss<-append(tempbsmiss, 18-sum(complete.cases(temphits)))
  temprelbs<-temphits[temphits$rTF=="rel"| temphits$TF=="rel",]
  temprel<-append(temprel, sum(complete.cases(temprelbs)))
  temprelmiss<-append(temprelmiss,11-sum(complete.cases(temprelbs)))
  tempsrpbs<-temphits[temphits$rTF=="srp"| temphits$TF=="srp",]
  tempsrp<-append(tempsrp,sum(complete.cases(tempsrpbs)))
  tempsrpmiss<-append(tempsrpmiss,7-sum(complete.cases(tempsrpbs)))
}

bscomp<-as.data.frame(cbind(c("A4_p.001","B6_p.001","A4_p.0001","B6_p.0001"),temptotalbs, tempsharedbs, tempbsmiss,temprel,temprelmiss,tempsrp,tempsrpmiss))

bscomp1<-bscomp[,c("V1","temptotalbs", "tempsharedbs", "tempbsmiss")]
bscomp2<-bscomp[,c("V1","temprel", "temprelmiss", "tempsrp","tempsrpmiss")]

bscomp1<-gather(bscomp1,key = "gth", value = "n", temptotalbs:tempbsmiss)
## Warning: attributes are not identical across measure variables;
## they will be dropped
bscomp2<-gather(bscomp2,key = "gth", value = "n", temprel:tempsrpmiss)
## Warning: attributes are not identical across measure variables;
## they will be dropped
# function to tell of the a Binding sites are overlapping
bscomp1$n<-as.numeric(as.character(bscomp1$n))
bscomp2$n<-as.numeric(as.character(bscomp2$n))

BSCOMP2<-ggplot(bscomp2, aes(fill=`gth`, y=`n`, x=`V1`)) + 
    geom_bar(position="dodge", stat="identity")
BSCOMP2 +
  ggtitle("REl and SRP Identified Binding \nSites by threshhold ")+
  ylab("Number of Sites")+
  xlab("Genotype and Pvalue threshold")+
  scale_fill_manual(values=c("#01c74a", "#01792d","#03dafc", "#0284bc"),
                    name="Type of Site",
                    breaks=c("temprel", "temprelmiss", "tempsrp","tempsrpmiss"),
                    labels=c("Total matching REL sites\n Senger 2004", "Total missing REL sites\n Senger 2004", "Total matching SRP sites\n Senger 2004", "Total missing SRP sites\n Senger 2004"))+
  theme_bw()

Section 2: Total binding site comparison

#looking at total TF Binding sites 
##NOTE we descided to only consider genes showing signal in response to Efae and Smar Infection these genes have been filtered for Immune genes.
tempesig<-read.table("~/Desktop/Wunderlichlab/Cis_var_project/Cisvar7/DSPR_alig/Try9/efae_ct.txt", header = T, stringsAsFactors = F)
tempssig<-read.table("~/Desktop/Wunderlichlab/Cis_var_project/Cisvar7/DSPR_alig/Try9/smar_ct.txt", header = T, stringsAsFactors = F)
colnames(tempesig)[2]<-"EFAE"
colnames(tempssig)[2]<-"SMAR"
tempcombsig<-merge(tempesig,tempssig, by=1, all = T)
#looking at expression Bias
esig<-read.table("~/Desktop/Wunderlichlab/Cis_var_project/Cisvar7/DSPR_alig/Try9/efae_cistrans.txt", header = T)
ssig<-read.table("~/Desktop/Wunderlichlab/Cis_var_project/Cisvar7/DSPR_alig/Try9/smar_cistrans.txt", header = T)
esig$eA4high<-is.negative(esig$PLogFC)
esig<-esig[,c("Row.names","eA4high")]
ssig$sA4high<-is.negative(ssig$PLogFC)
ssig<-ssig[,c("Row.names","sA4high")]

sigsig<-merge(esig,ssig, by=1,all=T)
tempnewcat<-as.character()
for (i in 1:nrow(sigsig)){
  temp<-sigsig[i,]
  if (is.na(temp$eA4high)==TRUE){
    tempnewcat<-append(tempnewcat,temp[1,c("sA4high")])
  } else if (is.na(temp$sA4high)==TRUE){
    tempnewcat<-append(tempnewcat,temp[1,c("eA4high")])
  }else if (temp$eA4high == temp$sA4high){
    tempnewcat<-append(tempnewcat,temp[1,c("eA4high")])
  }else{
    tempnewcat<-append(tempnewcat,"UNCLEAR")
  }
    
}
sigsig<-cbind(sigsig,tempnewcat)

temp_class<-as.character()
for (i in 1:nrow(tempcombsig)){
  templine<-tempcombsig[i,]
  templine[is.na(templine)] = 0
  templine <- data.frame(lapply(templine, function(x) {gsub("cis", "1", x)}))
  templine <- data.frame(lapply(templine, function(x) {gsub("ct", "10", x)}))
  templine <- data.frame(lapply(templine, function(x) {gsub("comp", "10", x)}))
  templine <- data.frame(lapply(templine, function(x) {gsub("trans", "100", x)}))
  templine$EFAE<-as.numeric(as.character(templine$EFAE))
  templine$SMAR<-as.numeric(as.character(templine$SMAR))
  templine$SUM<-templine$EFAE+templine$SMAR
  if(templine$SUM<100){
    temp_class<-append(temp_class, "cis")
  }else if (templine$SUM==100 | templine$SUM==200){
    temp_class<-append(temp_class, "trans")
  }else if (templine$SUM%%100==10){
    temp_class<-append(temp_class, "cis")
  }else if(templine$SUM%%100==1){
    temp_class<-append(temp_class, "exclude")
  }
}


sum(temp_class=="exclude")
## [1] 3
sum(temp_class=="cis")
## [1] 219
sum(temp_class=="trans")
## [1] 199
tempcombsig<-cbind( tempcombsig, temp_class)

temp_a4cis<-bsa4l[bsa4l$sequence_name %in% tempcombsig[tempcombsig$temp_class=="cis",c("V1")],]
temp_a4trans<-bsa4l[bsa4l$sequence_name %in% tempcombsig[tempcombsig$temp_class=="trans",c("V1")],]
temp_b6cis<-bsb6l[bsb6l$sequence_name %in% tempcombsig[tempcombsig$temp_class=="cis",c("V1")],]
temp_b6trans<-bsb6l[bsb6l$sequence_name %in% tempcombsig[tempcombsig$temp_class=="trans",c("V1")],]
#templist<-list(temp_a4cis,temp_a4trans,temp_b6cis,temp_b6trans)

temp_data<-temp_a4cis
geno_cond<-"a4_cis"
gtf<-function(temp_data,geno_cond){
  tempgenelist<-unique(temp_data$sequence_name)
  relish<-as.numeric()
  dorsal<-as.numeric()
  serpent<-as.numeric()
  creba<-as.numeric()
  for(i in 1:length(tempgenelist)){
    tempgene<-temp_data[temp_data$sequence_name==tempgenelist[i],]
    relish<-append(relish, sum(tempgene$motif_id=="rel"))
    dorsal<-append(dorsal, sum(tempgene$motif_id=="dl"))
    serpent<-append(serpent, sum(tempgene$motif_id=="srp"))
    creba<-append(creba, sum(tempgene$motif_id=="crebA"))
  }
  tempgenelist<-as.data.frame(cbind(tempgenelist, relish,dorsal,serpent,creba))
  colnames(tempgenelist)<-c("Gene", paste(geno_cond,"rel", sep="_"),paste(geno_cond,"dl", sep="_"),paste(geno_cond,"srp", sep="_"),paste(geno_cond,"crebA", sep="_"))
  newgenes<-gather(tempgenelist, key = "cond", value = "tfbs", paste(geno_cond,"rel", sep="_"):paste(geno_cond,"crebA", sep="_"))
  newgenes$geno<-t(as.data.frame(strsplit(newgenes$cond,split = "_")))[,1]
  newgenes$eff<-t(as.data.frame(strsplit(newgenes$cond,split = "_")))[,2]
  newgenes$tf<-t(as.data.frame(strsplit(newgenes$cond,split = "_")))[,3]
  newgenes$gene_tf<-paste(newgenes$Gene,newgenes$tf,sep="_")
  return(newgenes)
}

allcis<-merge(gtf(temp_a4cis,"a4_cis"),gtf(temp_b6cis,"b6_cis"), by=7, all=T)
## Warning: attributes are not identical across measure variables;
## they will be dropped

## Warning: attributes are not identical across measure variables;
## they will be dropped
alltrans<-merge(gtf(temp_a4trans,"a4_trans"),gtf(temp_b6trans,"b6_trans"), by=7, all=T)
## Warning: attributes are not identical across measure variables;
## they will be dropped

## Warning: attributes are not identical across measure variables;
## they will be dropped
# a4 binding sites - b6 binding sites
allcis$diff<-as.numeric(allcis$tfbs.x) - as.numeric(allcis$tfbs.y)
alltrans$diff<-as.numeric(alltrans$tfbs.x) - as.numeric(alltrans$tfbs.y)
allisall<-rbind(allcis,alltrans)
allisall<-allisall[complete.cases(allisall),]
#function
#temp_data<-allisall
expression_direction<-function(tempdata){
  temp_cats<-as.character()
  for (i in 1:nrow(tempdata)){
    templine<-as.character(tempdata[i, c("Gene.x")])
    temp_cats<-append(temp_cats,as.character(sigsig[sigsig$Row.names==templine, c("tempnewcat")]))
  }
  return(temp_cats)
}
a4higher<-expression_direction(allisall)
allisall<-cbind(allisall,a4higher)
##gonna fill trhis upp with pvalues

temptfs<-c("all","rel","dl","srp","crebA")
tempcisdiff<-as.numeric()
temptransdiff<-as.numeric()


Figure4_A<-ggplot(allisall,aes(y=`eff.y`, x=`diff`,colour=`eff.y`))

tempmeans<-data.frame(eff.y=c("cis","trans"),diff= c(round(mean(allisall[allisall$eff.y=="cis" & allisall$diff!=0 ,c("diff")],na.rm=TRUE), digits=2),round(mean(allisall[allisall$eff.y=="trans" & allisall$diff!=0 ,c("diff")],na.rm=TRUE),digits = 2))) 
tempcisdiff<-append(tempcisdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis",])/nrow(allisall[allisall$eff.y=="cis",]))
temptransdiff<-append(temptransdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans",])/nrow(allisall[allisall$eff.y=="trans",]))

Figure4_A+
  geom_point()+
  geom_jitter()+
  theme_bw()+
  labs(title="Differences in Total Identified Binding sites", 
       x="Number of Binding Sites\n(A4-B6)",
       y="Gene Effect" )+
  scale_color_manual(name="Gene Group",values=c("cis"='blue',"trans"='red'),labels = c("Any Cis","Only Trans"))+
  stat_summary(fun.data="mean_sdl",geom="errorbar", color="black", width=0.2)+
  stat_summary(fun="mean", geom="point", color="black")

  #geom_point(data=tempmeans, color='black',shape=23)



Figure4_C<-ggplot(allisall[allisall$tf.x=='rel',] ,aes(y=`eff.y`, x=`diff`,colour=`eff.y`))
tempmeans<-data.frame(eff.y=c("cis","trans"),diff= c(round(mean(allisall[allisall$tf.x =="rel" & allisall$eff.y=="cis" & allisall$diff!=0 ,c("diff")],na.rm=TRUE), digits=2),round(mean(allisall[allisall$tf.x =="rel" & allisall$eff.y=="trans" & allisall$diff!=0 ,c("diff")],na.rm=TRUE),digits = 2))) 
tempcisdiff<-append(tempcisdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis"& allisall$tf.x=='rel',])/nrow(allisall[allisall$eff.y=="cis" & allisall$tf.x=='rel',]))
temptransdiff<-append(temptransdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans"& allisall$tf.x=='rel',])/nrow(allisall[allisall$eff.y=="trans" & allisall$tf.x=='rel',]))

Figure4_C+
  geom_point()+
  geom_jitter()+
  theme_bw()+
  labs(title="Relish Only\nDifferences in Total Identified Binding sites", 
       x="Number of Binding Sites\n(A4-B6)",
       y="Gene Effect")+
  xlim(-11,6)+
  scale_color_manual(name="Gene Group",values=c("cis"='blue',"trans"='red'),labels = c("Any Cis","Only Trans"))+
  stat_summary(fun.data="mean_sdl",geom="errorbar", color="black", width=0.2)+
  stat_summary(fun="mean", geom="point", color="black")

 # geom_point(data=tempmeans, color='black',shape=23)

Figure4_B<-ggplot(allisall[allisall$tf.x=='dl',] ,aes(y=`eff.y`, x=`diff`,colour=`eff.y`))
tempmeans<-data.frame(eff.y=c("cis","trans"),diff= c(round(mean(allisall[allisall$tf.x =="dl" & allisall$eff.y=="cis" & allisall$diff!=0 ,c("diff")],na.rm=TRUE), digits=2),round(mean(allisall[allisall$tf.x =="dl" & allisall$eff.y=="trans" & allisall$diff!=0 ,c("diff")],na.rm=TRUE),digits = 2))) 

tempcisdiff<-append(tempcisdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis"& allisall$tf.x=='dl',])/nrow(allisall[allisall$eff.y=="cis" & allisall$tf.x=='dl',]))
temptransdiff<-append(temptransdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans"& allisall$tf.x=='dl',])/nrow(allisall[allisall$eff.y=="trans" & allisall$tf.x=='dl',]))

Figure4_B+
  geom_point()+
  geom_jitter()+
  theme_bw()+
  labs(title="Dorsal Only\nDifferences in Total Identified Binding sites", 
       x="Number of Binding Sites\n(A4-B6)",
       y="Gene Effect")+
  xlim(-11,6)+
  scale_color_manual(name="Gene Group",values=c("cis"='blue',"trans"='red'),labels = c("Any Cis","Only Trans"))+
  stat_summary(fun.data="mean_sdl",geom="errorbar", color="black", width=0.2)+
  stat_summary(fun="mean", geom="point", color="black")
## Warning: Removed 1 rows containing missing values (geom_point).

  #geom_point(data=tempmeans, color='black',shape=23)

Figure4_D<-ggplot(allisall[allisall$tf.x=='srp',] ,aes(y=`eff.y`, x=`diff`,colour=`eff.y`))
tempmeans<-data.frame(eff.y=c("cis","trans"),diff= c(round(mean(allisall[allisall$tf.x =="srp" & allisall$eff.y=="cis" & allisall$diff!=0 ,c("diff")],na.rm=TRUE), digits=2),round(mean(allisall[allisall$tf.x =="srp" & allisall$eff.y=="trans" & allisall$diff!=0 ,c("diff")],na.rm=TRUE),digits = 2))) 

tempcisdiff<-append(tempcisdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis"& allisall$tf.x=='srp',])/nrow(allisall[allisall$eff.y=="cis" & allisall$tf.x=='srp',]))
temptransdiff<-append(temptransdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans"& allisall$tf.x=='srp',])/nrow(allisall[allisall$eff.y=="trans" & allisall$tf.x=='srp',]))


Figure4_D+
  geom_point()+
  geom_jitter()+
  theme_bw()+
  labs(title="Serpent Only\nDifferences in Total Identified Binding sites", 
       x="Number of Binding Sites\n(A4-B6)",
       y="Gene Effect",caption = "Ploted means values are 0 excluding")+
  scale_color_manual(name="Gene Group",values=c("cis"='blue',"trans"='red'),labels = c("Any Cis","Only Trans"))+
  xlim(-11,6)+
  stat_summary(fun.data="mean_sdl",geom="errorbar", color="black", width=0.2)+
  stat_summary(fun="mean", geom="point", color="black")
## Warning: Removed 1 rows containing missing values (geom_point).

  #geom_point(data=tempmeans, color='black',shape=23)

Figure4_E<-ggplot(allisall[allisall$tf.x=='crebA',] ,aes(y=`eff.y`, x=`diff`,colour=`eff.y`))
tempmeans<-data.frame(eff.y=c("cis","trans"),diff= c(round(mean(allisall[allisall$tf.x =="crebA" & allisall$eff.y=="cis" & allisall$diff!=0 ,c("diff")],na.rm=TRUE), digits=2),round(mean(allisall[allisall$tf.x =="crebA" & allisall$eff.y=="trans" & allisall$diff!=0 ,c("diff")],na.rm=TRUE),digits = 2))) 
tempcisdiff<-append(tempcisdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis"& allisall$tf.x=='crebA',])/nrow(allisall[allisall$eff.y=="cis" & allisall$tf.x=='crebA',]))
temptransdiff<-append(temptransdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans"& allisall$tf.x=='crebA',])/nrow(allisall[allisall$eff.y=="trans" & allisall$tf.x=='crebA',]))


Figure4_E+
  geom_point()+
  geom_jitter()+
  theme_bw()+
  labs(title="CrebA Only\nDifferences in Total Identified Binding sites", 
       x="Number of Binding Sites\n(A4-B6)",
       y="Gene Effect",caption = "Ploted means values are 0 excluding")+
  scale_color_manual(name="Gene Group",values=c("cis"='blue',"trans"='red'),labels = c("Any Cis","Only Trans"))+
  xlim(-11,6)+
  stat_summary(fun.data="mean_sdl",geom="errorbar", color="black", width=0.2)+
  stat_summary(fun="mean", geom="point", color="black")

  #geom_point(data=tempmeans, color='black',shape=23)

tempprop<-as.numeric()
tempprop<-append(tempprop,prop.test(as.matrix(data.frame(succ  = c(nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis",]), nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans",]) ),fail = c(nrow(allisall[allisall$diff==0 & allisall$eff.y=="cis",]), nrow(allisall[allisall$diff==0 & allisall$eff.y=="trans",])))))$p.value)
tempprop<-append(tempprop,prop.test(as.matrix(data.frame(succ  = c(nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis" & allisall$tf.x=='rel',]), nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans" & allisall$tf.x=='rel',]) ),fail = c(nrow(allisall[allisall$diff==0 & allisall$eff.y=="cis" & allisall$tf.x=='rel',]), nrow(allisall[allisall$diff==0 & allisall$eff.y=="trans" & allisall$tf.x=='rel',])))))$p.value)
tempprop<-append(tempprop,prop.test(as.matrix(data.frame(succ  = c(nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis" & allisall$tf.x=='dl',]), nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans" & allisall$tf.x=='dl',]) ),fail = c(nrow(allisall[allisall$diff==0 & allisall$eff.y=="cis" & allisall$tf.x=='dl',]), nrow(allisall[allisall$diff==0 & allisall$eff.y=="trans" & allisall$tf.x=='dl',])))))$p.value)
tempprop<-append(tempprop,prop.test(as.matrix(data.frame(succ  = c(nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis" & allisall$tf.x=='srp',]), nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans" & allisall$tf.x=='srp',]) ),fail = c(nrow(allisall[allisall$diff==0 & allisall$eff.y=="cis" & allisall$tf.x=='srp',]), nrow(allisall[allisall$diff==0 & allisall$eff.y=="trans" & allisall$tf.x=='srp',])))))$p.value)
tempprop<-append(tempprop,prop.test(as.matrix(data.frame(succ  = c(nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis" & allisall$tf.x=='crebA',]), nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans" & allisall$tf.x=='crebA',]) ),fail = c(nrow(allisall[allisall$diff==0 & allisall$eff.y=="cis" & allisall$tf.x=='crebA',]), nrow(allisall[allisall$diff==0 & allisall$eff.y=="trans" & allisall$tf.x=='crebA',])))))$p.value)
propsum<-as.data.frame(cbind(temptfs,tempcisdiff,temptransdiff))
colnames(propsum)<-c("tfs","cis","trans")
propsum<-gather(propsum, key = "test", value = "proportion_non0",cis:trans)
## Warning: attributes are not identical across measure variables;
## they will be dropped
propsum$proportion_non0<-as.numeric(propsum$proportion_non0)

Figure4_F<-ggplot(propsum,aes(x=`tfs`, y=`proportion_non0`, fill=`test`))
Figure4_F+
  geom_bar(position="dodge", stat="identity")+
  labs(title = "Proportion of genes with non-zero\ndifferences in TFbinding site",x="Transcription Factor", y="proportion of non-zero differences", caption = "No significance found for using test of proportions")+
  scale_fill_manual("legend", values = c("cis" = "blue","trans" = "red"))+
  theme_bw()

## Section 3 : P-Value testing

Test 1: Testing for differences in the Variances of TFBS differences between cis and trans-only gene groups.

Text: “For all transcription factors except Dl (Figure 4A-E), the genes with cis effects did indeed show a broader distribution of difference than those with trans effects (all TFs: p = 8.8e-13, Rel: p = 2.9e-2, Srp: p = 7.1e-10, CrebA: p =1.5e-7 , F-test to compare distribution variances, Bonferoni corrected).”

tempvar<-as.numeric()

##total tfs
tempvar<-append(tempvar,var.test(allisall[allisall$eff.y=='cis',c("diff")],allisall[allisall$eff.y=='trans',c("diff")] )$p.value)
#rel
tempvar<-append(tempvar,var.test(allisall[allisall$eff.y=='cis'& allisall$tf.x=='rel' ,c("diff")],allisall[allisall$eff.y=='trans'& allisall$tf.x=='rel',c("diff")])$p.value)
#dl
tempvar<-append(tempvar,var.test(allisall[allisall$eff.y=='cis'& allisall$tf.x=='dl' ,c("diff")],allisall[allisall$eff.y=='trans'& allisall$tf.x=='dl',c("diff")] )$p.value)
#srp
tempvar<-append(tempvar,var.test(allisall[allisall$eff.y=='cis'& allisall$tf.x=='srp' ,c("diff")],allisall[allisall$eff.y=='trans'& allisall$tf.x=='srp',c("diff")] )$p.value)
#creba
tempvar<-append(tempvar,var.test(allisall[allisall$eff.y=='cis'& allisall$tf.x=='crebA' ,c("diff")],allisall[allisall$eff.y=='trans'& allisall$tf.x=='crebA',c("diff")] )$p.value)
##
pvaluesum<-as.data.frame(cbind(temptfs, tempvar))
colnames(pvaluesum)<-c("tfs","variance_test")
pvaluesum$variance_test<-as.numeric(as.character(pvaluesum$variance_test))
pvaluesum$variance_test<-pvaluesum$variance_test*5 #Bonferoni correction (5 tests)

pvaluesum
##     tfs variance_test
## 1   all  3.019807e-13
## 2   rel  1.514177e-02
## 3    dl  4.922592e-01
## 4   srp  1.193268e-10
## 5 crebA  4.477930e-08

Section 4 : Binding site score comparison.

Its notable this the these results where not found to be significant but are included in the suplement.

###again but this time with scores
#function
stf<-function(temp_data,geno_cond){
  tempgenelist<-unique(temp_data$sequence_name)
  relish<-as.numeric()
  dorsal<-as.numeric()
  serpent<-as.numeric()
  creba<-as.numeric()
  for(i in 1:length(tempgenelist)){
    tempgene<-temp_data[temp_data$sequence_name==tempgenelist[i],]
    relish<-append(relish, sum(tempgene[tempgene$motif_id=="rel",c("score")]))
    dorsal<-append(dorsal, sum(tempgene[tempgene$motif_id=="dl",c("score")]))
    serpent<-append(serpent, sum(tempgene[tempgene$motif_id=="srp",c("score")]))
    creba<-append(creba, sum(tempgene[tempgene$motif_id=="crebA",c("score")]))
  }
  tempgenelist<-as.data.frame(cbind(tempgenelist, relish,dorsal,serpent,creba))
  colnames(tempgenelist)<-c("Gene", paste(geno_cond,"rel", sep="_"),paste(geno_cond,"dl", sep="_"),paste(geno_cond,"srp", sep="_"),paste(geno_cond,"crebA", sep="_"))
  newgenes<-gather(tempgenelist, key = "cond", value = "tfbs", paste(geno_cond,"rel", sep="_"):paste(geno_cond,"crebA", sep="_"))
  newgenes$geno<-t(as.data.frame(strsplit(newgenes$cond,split = "_")))[,1]
  newgenes$eff<-t(as.data.frame(strsplit(newgenes$cond,split = "_")))[,2]
  newgenes$tf<-t(as.data.frame(strsplit(newgenes$cond,split = "_")))[,3]
  newgenes$gene_tf<-paste(newgenes$Gene,newgenes$tf,sep="_")
  return(newgenes)
}
allcis<-merge(stf(temp_a4cis,"a4_cis"),stf(temp_b6cis,"b6_cis"), by=7, all=T)
## Warning: attributes are not identical across measure variables;
## they will be dropped

## Warning: attributes are not identical across measure variables;
## they will be dropped
alltrans<-merge(stf(temp_a4trans,"a4_trans"),stf(temp_b6trans,"b6_trans"), by=7, all=T)
## Warning: attributes are not identical across measure variables;
## they will be dropped

## Warning: attributes are not identical across measure variables;
## they will be dropped
# a4 binding sites - b6 binding sites
allcis$diff<-as.numeric(allcis$tfbs.x) - as.numeric(allcis$tfbs.y)
alltrans$diff<-as.numeric(alltrans$tfbs.x) - as.numeric(alltrans$tfbs.y)
allisall<-rbind(allcis,alltrans)
allisall<-allisall[complete.cases(allisall),]
a4higher<-expression_direction(allisall)
allisall<-cbind(allisall,a4higher)

tempvar<-as.numeric()
temptfs<-c("all","rel","dl","srp","crebA")
tempcisdiff<-as.numeric()
temptransdiff<-as.numeric()
tempprop<-as.numeric()

Sup_figure_4_A<-ggplot(allisall,aes(y=`eff.y`, x=`diff`,colour=`eff.y`))

tempmeans<-data.frame(eff.y=c("cis","trans"),diff= c(round(mean(allisall[allisall$eff.y=="cis" & allisall$diff!=0 ,c("diff")],na.rm=TRUE), digits=2),round(mean(allisall[allisall$eff.y=="trans" & allisall$diff!=0 ,c("diff")],na.rm=TRUE),digits = 2))) 
tempcisdiff<-append(tempcisdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis",])/nrow(allisall[allisall$eff.y=="cis",]))
temptransdiff<-append(temptransdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans",])/nrow(allisall[allisall$eff.y=="trans",]))

Sup_figure_4_A+
  geom_point()+
  geom_jitter()+
  theme_bw()+
  labs(title="Differences in Total TFBS Score", 
       x="Number of Binding Sites\n(A4-B6)",
       y="Gene Effect" )+
  scale_color_manual(name="Gene Group",values=c("cis"='blue',"trans"='red'),labels = c("Any Cis","Only Trans"))+
  stat_summary(fun.data="mean_sdl",geom="errorbar", color="black", width=0.2)+
  stat_summary(fun="mean", geom="point", color="black")

  #geom_point(data=tempmeans, color='black',shape=23)

Sup_figure_4_C<-ggplot(allisall[allisall$tf.x=='rel',] ,aes(y=`eff.y`, x=`diff`,colour=`eff.y`))
tempmeans<-data.frame(eff.y=c("cis","trans"),diff= c(round(mean(allisall[allisall$tf.x =="rel" & allisall$eff.y=="cis" & allisall$diff!=0 ,c("diff")],na.rm=TRUE), digits=2),round(mean(allisall[allisall$tf.x =="rel" & allisall$eff.y=="trans" & allisall$diff!=0 ,c("diff")],na.rm=TRUE),digits = 2))) 
tempcisdiff<-append(tempcisdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis"& allisall$tf.x=='rel',])/nrow(allisall[allisall$eff.y=="cis" & allisall$tf.x=='rel',]))
temptransdiff<-append(temptransdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans"& allisall$tf.x=='rel',])/nrow(allisall[allisall$eff.y=="trans" & allisall$tf.x=='rel',]))

Sup_figure_4_C+
  geom_point()+
  geom_jitter()+
  theme_bw()+
  labs(title="Relish Only\nDifferences in Total TFBS Score", 
       x="Number of Binding Sites\n(A4-B6)",
       y="Gene Effect")+
  xlim(-120,60)+
  scale_color_manual(name="Gene Group",values=c("cis"='blue',"trans"='red'),labels = c("Any Cis","Only Trans"))+
  stat_summary(fun.data="mean_sdl",geom="errorbar", color="black", width=0.2)+
  stat_summary(fun="mean", geom="point", color="black")

 # geom_point(data=tempmeans, color='black',shape=23)

Sup_figure_4_B<-ggplot(allisall[allisall$tf.x=='dl',] ,aes(y=`eff.y`, x=`diff`,colour=`eff.y`))
tempmeans<-data.frame(eff.y=c("cis","trans"),diff= c(round(mean(allisall[allisall$tf.x =="dl" & allisall$eff.y=="cis" & allisall$diff!=0 ,c("diff")],na.rm=TRUE), digits=2),round(mean(allisall[allisall$tf.x =="dl" & allisall$eff.y=="trans" & allisall$diff!=0 ,c("diff")],na.rm=TRUE),digits = 2))) 
tempcisdiff<-append(tempcisdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis"& allisall$tf.x=='dl',])/nrow(allisall[allisall$eff.y=="cis" & allisall$tf.x=='dl',]))
temptransdiff<-append(temptransdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans"& allisall$tf.x=='dl',])/nrow(allisall[allisall$eff.y=="trans" & allisall$tf.x=='dl',]))


Sup_figure_4_B+
  geom_point()+
  geom_jitter()+
  theme_bw()+
  labs(title="Dorsal Only\nDifferences in Total TFBS score", 
       x="Number of Binding Sites\n(A4-B6)",
       y="Gene Effect")+
  xlim(-120,60)+
  scale_color_manual(name="Gene Group",values=c("cis"='blue',"trans"='red'),labels = c("Any Cis","Only Trans"))+
  stat_summary(fun.data="mean_sdl",geom="errorbar", color="black", width=0.2)+
  stat_summary(fun="mean", geom="point", color="black")

  #geom_point(data=tempmeans, color='black',shape=23)

Sup_figure_4_D<-ggplot(allisall[allisall$tf.x=='srp',] ,aes(y=`eff.y`, x=`diff`,colour=`eff.y`))
tempmeans<-data.frame(eff.y=c("cis","trans"),diff= c(round(mean(allisall[allisall$tf.x =="srp" & allisall$eff.y=="cis" & allisall$diff!=0 ,c("diff")],na.rm=TRUE), digits=2),round(mean(allisall[allisall$tf.x =="srp" & allisall$eff.y=="trans" & allisall$diff!=0 ,c("diff")],na.rm=TRUE),digits = 2))) 
tempcisdiff<-append(tempcisdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis"& allisall$tf.x=='srp',])/nrow(allisall[allisall$eff.y=="cis" & allisall$tf.x=='srp',]))
temptransdiff<-append(temptransdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans"& allisall$tf.x=='srp',])/nrow(allisall[allisall$eff.y=="trans" & allisall$tf.x=='srp',]))

Sup_figure_4_D+
  geom_point()+
  geom_jitter()+
  theme_bw()+
  labs(title="Serpent Only\nDifferences in TFBS score", 
       x="Number of Binding Sites\n(A4-B6)",
       y="Gene Effect",caption = "Ploted means values are 0 excluding")+
  scale_color_manual(name="Gene Group",values=c("cis"='blue',"trans"='red'),labels = c("Any Cis","Only Trans"))+
  xlim(-120,60)+
  stat_summary(fun.data="mean_sdl",geom="errorbar", color="black", width=0.2)+
  stat_summary(fun="mean", geom="point", color="black")

  #geom_point(data=tempmeans, color='black',shape=23)

Sup_figure_4_E<-ggplot(allisall[allisall$tf.x=='crebA',] ,aes(y=`eff.y`, x=`diff`,colour=`eff.y`))
tempmeans<-data.frame(eff.y=c("cis","trans"),diff= c(round(mean(allisall[allisall$tf.x =="crebA" & allisall$eff.y=="cis" & allisall$diff!=0 ,c("diff")],na.rm=TRUE), digits=2),round(mean(allisall[allisall$tf.x =="crebA" & allisall$eff.y=="trans" & allisall$diff!=0 ,c("diff")],na.rm=TRUE),digits = 2))) 
tempcisdiff<-append(tempcisdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis"& allisall$tf.x=='crebA',])/nrow(allisall[allisall$eff.y=="cis" & allisall$tf.x=='crebA',]))
temptransdiff<-append(temptransdiff, nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans"& allisall$tf.x=='crebA',])/nrow(allisall[allisall$eff.y=="trans" & allisall$tf.x=='crebA',]))


Sup_figure_4_E+
  geom_point()+
  geom_jitter()+
  theme_bw()+
  labs(title="CrebA Only\nDifferences in Total TFBS score", 
       x="Number of Binding Sites\n(A4-B6)",
       y="Gene Effect",caption = "Ploted means values are 0 excluding")+
  scale_color_manual(name="Gene Group",values=c("cis"='blue',"trans"='red'),labels = c("Any Cis","Only Trans"))+
  xlim(-120,60)+
  stat_summary(fun.data="mean_sdl",geom="errorbar", color="black", width=0.2)+
  stat_summary(fun="mean", geom="point", color="black")

  #geom_point(data=tempmeans, color='black',shape=23)

tempprop<-append(tempprop,prop.test(as.matrix(data.frame(succ  = c(nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis",]), nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans",]) ),fail = c(nrow(allisall[allisall$diff==0 & allisall$eff.y=="cis",]), nrow(allisall[allisall$diff==0 & allisall$eff.y=="trans",])))))$p.value)
tempprop<-append(tempprop,prop.test(as.matrix(data.frame(succ  = c(nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis" & allisall$tf.x=='rel',]), nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans" & allisall$tf.x=='rel',]) ),fail = c(nrow(allisall[allisall$diff==0 & allisall$eff.y=="cis" & allisall$tf.x=='rel',]), nrow(allisall[allisall$diff==0 & allisall$eff.y=="trans" & allisall$tf.x=='rel',])))))$p.value)
tempprop<-append(tempprop,prop.test(as.matrix(data.frame(succ  = c(nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis" & allisall$tf.x=='dl',]), nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans" & allisall$tf.x=='dl',]) ),fail = c(nrow(allisall[allisall$diff==0 & allisall$eff.y=="cis" & allisall$tf.x=='dl',]), nrow(allisall[allisall$diff==0 & allisall$eff.y=="trans" & allisall$tf.x=='dl',])))))$p.value)
tempprop<-append(tempprop,prop.test(as.matrix(data.frame(succ  = c(nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis" & allisall$tf.x=='srp',]), nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans" & allisall$tf.x=='srp',]) ),fail = c(nrow(allisall[allisall$diff==0 & allisall$eff.y=="cis" & allisall$tf.x=='srp',]), nrow(allisall[allisall$diff==0 & allisall$eff.y=="trans" & allisall$tf.x=='srp',])))))$p.value)
tempprop<-append(tempprop,prop.test(as.matrix(data.frame(succ  = c(nrow(allisall[allisall$diff!=0 & allisall$eff.y=="cis" & allisall$tf.x=='crebA',]), nrow(allisall[allisall$diff!=0 & allisall$eff.y=="trans" & allisall$tf.x=='crebA',]) ),fail = c(nrow(allisall[allisall$diff==0 & allisall$eff.y=="cis" & allisall$tf.x=='crebA',]), nrow(allisall[allisall$diff==0 & allisall$eff.y=="trans" & allisall$tf.x=='crebA',])))))$p.value)


tempprop
## [1] 0.1244081 1.0000000 0.8514442 0.0950432 0.2042293
propsum<-as.data.frame(cbind(temptfs,tempcisdiff,temptransdiff))
colnames(propsum)<-c("tfs","cis","trans")
propsum<-gather(propsum, key = "test", value = "proportion_non0",cis:trans)
## Warning: attributes are not identical across measure variables;
## they will be dropped
propsum$proportion_non0<-as.numeric(propsum$proportion_non0)


tempvar<-append(tempvar,var.test(allisall[allisall$eff.y=='cis',c("diff")],allisall[allisall$eff.y=='trans',c("diff")] )$p.value)  
tempvar<-append(tempvar,var.test(allisall[allisall$eff.y=='cis'& allisall$tf.x=='rel' ,c("diff")],allisall[allisall$eff.y=='trans'& allisall$tf.x=='rel',c("diff")])$p.value)
tempvar<-append(tempvar,var.test(allisall[allisall$eff.y=='cis'& allisall$tf.x=='dl' ,c("diff")],allisall[allisall$eff.y=='trans'& allisall$tf.x=='dl',c("diff")] )$p.value)
tempvar<-append(tempvar,var.test(allisall[allisall$eff.y=='cis'& allisall$tf.x=='srp' ,c("diff")],allisall[allisall$eff.y=='trans'& allisall$tf.x=='srp',c("diff")] )$p.value)
tempvar<-append(tempvar,var.test(allisall[allisall$eff.y=='cis'& allisall$tf.x=='crebA' ,c("diff")],allisall[allisall$eff.y=='trans'& allisall$tf.x=='crebA',c("diff")] )$p.value)

Sup_figure_4_F<-ggplot(propsum,aes(x=`tfs`, y=`proportion_non0`, fill=`test`))
Sup_figure_4_F+
  geom_bar(position="dodge", stat="identity")+
  labs(title = "Proportion of genes with non-zero\ndifferences in TFBS score",x="Transcription Factor", y="proportion of non-zero differences", caption = "No significance found for using test of proportions")+
  scale_fill_manual("legend", values = c("cis" = "blue","trans" = "red"))+
  theme_bw()

##pvalues

pvaluesum<-as.data.frame(cbind(temptfs, tempvar))
colnames(pvaluesum)<-c("tfs","variance_test")
pvaluesum$variance_test<-as.numeric(as.character(pvaluesum$variance_test))
pvaluesum$variance_test<-pvaluesum$variance_test*5 #Bonferoni correction (5 tests)
pvaluesum
##     tfs variance_test
## 1   all  1.276756e-12
## 2   rel  2.639975e-01
## 3    dl  2.750666e+00
## 4   srp  5.140333e-13
## 5 crebA  2.220446e-14