This script is 2nd script of 4 scripts meant to process data in R. This script deals only with the PARSED reads aligned to Either A4 or B6.
This script is broken up into 8 main sections
This script needs the following files to run :
Please not that the user can specify the ‘cond’ variable to either ‘CO2’,‘EFAE’ or ‘SMAR’.
##Load up your environment
library(edgeR)
## Loading required package: limma
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
library(reshape2)
library(matrixStats)
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.6.2
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
library(ggfortify)
library(devtools)
## Loading required package: usethis
library(FactoMineR)
library(factoextra)
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
library(ggbiplot)
## Loading required package: plyr
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:matrixStats':
##
## count
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## Loading required package: scales
## Loading required package: grid
##
## Attaching package: 'ggbiplot'
## The following object is masked from 'package:ggfortify':
##
## ggbiplot
library(limma)
library(schoolmath)
rm(list=ls())
#setwd("~/Desktop/Wunderlichlab/Cis_var_project/Cisvar7/DSPR_alig/Try9")
#### CHOOSE PARAMETERS####
##User can specify condition from the below 3 options
cond<-"CO2"
#cond<-"EFAE"
#cond<-"SMAR"
#### LOAD DATA #####
metadata<-read.table("cv_asap_meta.txt", header = T, row.names = "Sample")
cv7asapc<- read.table("CV_complete_countmatrix.txt", row.names="GENEID", header=TRUE)
cv7asapc<-cv7asapc[,-(1:48)]
cv7asapc <-cv7asapc[complete.cases(cv7asapc),]
xgenes<- read.table("dm6_chrXrefGenes.txt", header=F)
con_gene<-read.table("gene_symbol_conversions4.txt", header = T)
con_gene$Gene_symbol<-NULL
goodgenes<-read.table("poi01_genes.txt", header = T)
immunegenes<-read.table("immunelist_LB_2019_cat.txt", header = T, row.names = "FlybaseID")
tfs<-read.table("tf_Celniker2013_v2.txt", header=T)
cdegenes2toss<-read.table("control_signal.txt", header=T)
con_comp<-read.table("signal_comp.txt", header=T)
esig<-read.table("efae_cistrans.txt", header = T)
ssig<-read.table("smar_cistrans.txt", header = T)
##Remove the ChrX genes:
xconverted<-merge(xgenes, con_gene, by.x=1, by.y=1, all.x=T)
cv7goodc<-cv7asapc[ rownames(cv7asapc) %in% (goodgenes$goodgenes),] ### v2 plots have all been filtered for okgenes
cv7nox<-cv7goodc[! rownames(cv7goodc) %in% (xconverted$Primary_FBgn),]
### ANd now we begin.
cv7_asapx<-cv7asapc[ rownames(cv7asapc) %in% (xconverted$Primary_FBgn),]
xsums<-colSums(cv7_asapx,na.rm = T) ##this doesnt like NAs
xsums<-as.data.frame(xsums)
###ok so now lets pull out the meta data
parsed<-(metadata$Parsed==TRUE)
sampleset<-rownames(metadata)[parsed]
xsums$Samp<-rownames(xsums)
xsums<-xsums[rownames(xsums) %in% sampleset,]
metadata2<-metadata[rownames(metadata) %in% sampleset,]
wholesamp<-strsplit(rownames(xsums), "_g")
wholesamp<-as.data.frame(wholesamp, stringsAsFactors = F)
wholesamp<-as.character(wholesamp[1,])
xsums<-cbind(xsums, wholesamp)
xsums<-merge(xsums, metadata2, by=0)
sampleord<-as.character(c("B18_209","B18_216","B18_222","B19_087",
"B18_231","B18_233","B19_086","B19_093",
"B18_200","B18_230","B18_232","B19_092",
"B18_198","B18_217","B18_224","B19_085",
"B19_075","B19_082","B19_091","B19_095",
"B19_078","B19_081","B19_090","B19_097",
"B19_077","B19_079","B19_089","B19_094",
"B19_076","B19_080","B19_088","B19_096",
"B18_203","B18_220","B18_226","B19_073",
"B18_205","B18_234","B18_235","B18_236","B19_084",
"B18_204","B18_210","B18_219","B19_083",
"B18_202","B18_228","B19_074"))
####Calculate percentages
Percent<-as.numeric()
for(i in 1:length(rownames(xsums)))
{
if(i%%2!=0){
ii<-(xsums[i,2]/(xsums[i,2]+xsums[i+1,2]))*100
Percent<-append(Percent, ii)
}else if(i%%2==0){
iii<-(xsums[i,2]/(xsums[i,2]+xsums[i-1,2]))*100
Percent<-append(Percent, iii)
}
}
xsums<-cbind(xsums,Percent)
xsums$wholesamp <-factor(xsums$wholesamp, levels = sampleord)
###
ggplot(data=xsums, aes(x=wholesamp, y=Percent, fill=Alignment)) +
geom_bar(stat="identity")+
xlab("Sample")+
ggtitle("Xchromosome Misslabeling")+
theme(axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20),
axis.text.x = element_text(size= 16,angle = 90, vjust = 1, hjust = 1),
axis.text.y = element_text(size= 16))+
scale_fill_manual(breaks = c("a4","b6"), values=c("#00bfc4","#f8766d"))
In order to identify changes in cis we need to see differential expression in the parents that is maintained to some extent in the F1-hybrids. So for the first test we simply look for differential expression in the parental samples for the specified condition.
#select the samples we will be using
if (cond == "SMAR"){
parents<-(metadata$Treatment=="SMAR") & (metadata$Generation=="F0") & (metadata$Parsed == FALSE)
hybrids<-(metadata$Treatment=="SMAR") & (metadata$Generation=="F1")
hybridsCi<-(metadata$Treatment=="SMAR") & (metadata$Generation=="F1") &(metadata$Rep=="1") | (metadata$Treatment=="SMAR") & (metadata$Generation=="F1") &(metadata$Rep=="2")
hybridsTr<-(metadata$Treatment=="SMAR") & (metadata$Generation=="F1") &(metadata$Rep=="3") | (metadata$Treatment=="SMAR") & (metadata$Generation=="F1") &(metadata$Rep=="4")
sampset<-rownames(metadata)[parents]
} else if (cond == "CO2"){
parents<-(metadata$Treatment=="CO2") & (metadata$Generation=="F0") & (metadata$Parsed == FALSE)
hybrids<-(metadata$Treatment=="CO2") & (metadata$Generation=="F1")
hybridsCi<-(metadata$Treatment=="CO2") & (metadata$Generation=="F1") &(metadata$Rep=="1") | (metadata$Treatment=="CO2") & (metadata$Generation=="F1") &(metadata$Rep=="3")
hybridsTr<-(metadata$Treatment=="CO2") & (metadata$Generation=="F1") &(metadata$Rep=="2") | (metadata$Treatment=="CO2") & (metadata$Generation=="F1") &(metadata$Rep=="4")
} else if (cond == "EFAE"){
parents<-(metadata$Treatment=="EFAE") & (metadata$Generation=="F0") & (metadata$Parsed == FALSE)
hybrids<-(metadata$Treatment=="EFAE") & (metadata$Generation=="F1")
hybridsCi<-(metadata$Treatment=="EFAE") & (metadata$Generation=="F1") &(metadata$Rep=="1") | (metadata$Treatment=="EFAE") & (metadata$Generation=="F1") &(metadata$Rep=="2")
hybridsTr<-(metadata$Treatment=="EFAE") & (metadata$Generation=="F1") &(metadata$Rep=="3") | (metadata$Treatment=="EFAE") & (metadata$Generation=="F1") &(metadata$Rep=="4")
} else{
print("I'm afraid I cant do that. : ] ")
}
parents<-rownames(metadata)[parents]
hybrids<-rownames(metadata)[hybrids]
hybridsCi<-rownames(metadata)[hybridsCi]
hybridsTr<-rownames(metadata)[hybridsTr]
test<-append(parents,hybrids)
####YOU GOTTA REORDER THE META DATA BECAUSE WHEN YOU SUBSET BASED ON A CHARACTER VECTOR THE ORDER OF THE DF IS MAINTAINED NOT OF THE VECTOR!
metadata<-metadata[rownames(metadata) %in% test,]
metadata<-metadata[match(test,(rownames(metadata))),]
thresh<-length(cv7nox)/2
isexpr <- rowSums(cpm(cv7nox)>1) >= thresh
cv7nox <- cv7nox[isexpr,]
#### diffferentially expressed genes - parents ####
#only need treated parental samples for the first part
cv7parent<-cv7nox[,names(cv7nox) %in% parents]
cv7parent<-setcolorder(cv7parent, parents)
y <- DGEList(counts=cv7parent, genes=rownames(cv7parent))
y <- calcNormFactors(y)
#assign genotype
GT<-factor(c(metadata$Genotype[rownames(metadata) %in% parents])) ##1=a4, 3 = b6
BC<-factor(c(metadata$Batch[rownames(metadata) %in% parents]))
data.frame(Sample=colnames(y), GT, BC)
## Sample GT BC
## 1 B18_209 1 1
## 2 B18_216 1 2
## 3 B18_222 1 3
## 4 B19_087 1 8
## 5 B18_198 3 0
## 6 B18_217 3 2
## 7 B18_224 3 3
## 8 B19_085 3 8
design <- model.matrix(~GT+BC)
tdesign <- model.matrix(~GT)
rownames(design) <- colnames(y)
design
## (Intercept) GT3 BC1 BC2 BC3 BC8
## B18_209 1 0 1 0 0 0
## B18_216 1 0 0 1 0 0
## B18_222 1 0 0 0 1 0
## B19_087 1 0 0 0 0 1
## B18_198 1 1 0 0 0 0
## B18_217 1 1 0 1 0 0
## B18_224 1 1 0 0 1 0
## B19_085 1 1 0 0 0 1
## attr(,"assign")
## [1] 0 1 2 2 2 2
## attr(,"contrasts")
## attr(,"contrasts")$GT
## [1] "contr.treatment"
##
## attr(,"contrasts")$BC
## [1] "contr.treatment"
y <- estimateDisp(y, design, robust=TRUE)
y$common.dispersion
## [1] 0.2090148
pcpm<-cpm(y)
fit <- glmFit(y, design)
####topTags
Parent_lrt <- glmLRT(fit, coef=2)
DE_parents <- Parent_lrt$table
FDR <- p.adjust(DE_parents$PValue, method="fdr")
DE_parents <- cbind(DE_parents, FDR, y$genes$genes)
colnames(DE_parents)[6] <- "gene"
DE_parents$parentDE_sig <- DE_parents$FDR <= 0.05
Pa_de.genes <- DE_parents[DE_parents$FDR <= 0.05, ] ## 547:control, 386:treated
## tidy up our objects
##if it doesnt spark joy remove it
rm( design, FDR, fit, GT, isexpr, Parent_lrt, y, BC)
In order to identify changes in cis we need evidence of allelic imbalance in the parents and F1-hybrids. So for the second test we look for allelic imbalance in the F1 hybrids.
#### ASE - hybrids ####
cv7_F1s<-cv7nox[,names(cv7nox) %in% hybridsCi]
cv7_F1s<-setcolorder(cv7_F1s, hybridsCi)
z <- DGEList(counts=cv7_F1s, genes=rownames(cv7_F1s))
z <- calcNormFactors(z)
##
AG <- factor(c(metadata$Alignment[rownames(metadata) %in% hybridsCi])) ## 0=A4, 1=B6
MG <- factor(c(metadata$MG[rownames(metadata) %in% hybridsCi])) ## 1=B6 mom, 0=A4 mom
PO <- factor(c(metadata$PO[rownames(metadata) %in% hybridsCi])) ## 0=inhiereted from father, 1=inhiereted from mother
BC <- factor(c(metadata$Batch[rownames(metadata) %in% hybridsCi])) ## extraction batch
data.frame(Sample=colnames(z), AG, PO, MG, BC)
## Sample AG PO MG BC
## 1 B18_231_g1 1 0 1 4
## 2 B18_231_g2 2 1 1 4
## 3 B19_086_g1 1 0 1 8
## 4 B19_086_g2 2 1 1 8
## 5 B18_230_g1 1 1 0 4
## 6 B18_230_g2 2 0 0 4
## 7 B19_092_g1 1 1 0 9
## 8 B19_092_g2 2 0 0 9
design <- model.matrix(~AG+PO+MG+BC)
rownames(design) <- colnames(z)
design
## (Intercept) AG2 PO1 MG1 BC8 BC9
## B18_231_g1 1 0 0 1 0 0
## B18_231_g2 1 1 1 1 0 0
## B19_086_g1 1 0 0 1 1 0
## B19_086_g2 1 1 1 1 1 0
## B18_230_g1 1 0 1 0 0 0
## B18_230_g2 1 1 0 0 0 0
## B19_092_g1 1 0 1 0 0 1
## B19_092_g2 1 1 0 0 0 1
## attr(,"assign")
## [1] 0 1 2 3 4 4
## attr(,"contrasts")
## attr(,"contrasts")$AG
## [1] "contr.treatment"
##
## attr(,"contrasts")$PO
## [1] "contr.treatment"
##
## attr(,"contrasts")$MG
## [1] "contr.treatment"
##
## attr(,"contrasts")$BC
## [1] "contr.treatment"
z <- estimateDisp(z, design, robust=TRUE)
z$common.dispersion
## [1] 0.005136854
hcpm<-cpm(z)
fit <- glmFit(z, design)
F1_lrt_AG <- glmLRT(fit, coef=2)
f1_AG <- F1_lrt_AG$table
FDR <- p.adjust(f1_AG$PValue, method="fdr")
f1_AG <- cbind(f1_AG, FDR, z$genes$genes)
colnames(f1_AG)[6] <- "gene"
f1_AG$ASE <- f1_AG$FDR <= 0.05
f1_AG_de.genes <- f1_AG[f1_AG$FDR <= 0.05, ] ###1056:control, 1082:treated
##clean up our space
rm(design, FDR, fit, z, F1_lrt_AG, AG, BC, MG, PO)
sampl<-as.character()
sampl<-append(parents,hybridsTr)
transmat<-cv7nox[,names(cv7nox) %in% sampl]
transmat<-setcolorder(transmat, sampl)
GN <- factor(c(metadata$Alignment[rownames(metadata) %in% sampl])) #genotype : 0=a4 reads , 1=b6 reads
OG <- factor(c(metadata$Generation[rownames(metadata) %in% sampl]))##Origin : 0= parents, 1= hybrids
BT <- factor(c(metadata$Batch[rownames(metadata) %in% sampl]))
zz <- DGEList(counts=transmat, genes=rownames(transmat))
data.frame(Sample=colnames(zz), GN, OG, BT)
## Sample GN OG BT
## 1 B18_209 1 1 1
## 2 B18_216 1 1 2
## 3 B18_222 1 1 3
## 4 B19_087 1 1 8
## 5 B18_198 2 1 0
## 6 B18_217 2 1 2
## 7 B18_224 2 1 3
## 8 B19_085 2 1 8
## 9 B18_233_g1 1 2 4
## 10 B18_233_g2 2 2 4
## 11 B19_093_g1 1 2 9
## 12 B19_093_g2 2 2 9
## 13 B18_200_g1 1 2 0
## 14 B18_200_g2 2 2 0
## 15 B18_232_g1 1 2 4
## 16 B18_232_g2 2 2 4
design <- model.matrix(~0+BT+GN*OG)
rownames(design) <- colnames(zz)
design
## BT0 BT1 BT2 BT3 BT4 BT8 BT9 GN2 OG2 GN2:OG2
## B18_209 0 1 0 0 0 0 0 0 0 0
## B18_216 0 0 1 0 0 0 0 0 0 0
## B18_222 0 0 0 1 0 0 0 0 0 0
## B19_087 0 0 0 0 0 1 0 0 0 0
## B18_198 1 0 0 0 0 0 0 1 0 0
## B18_217 0 0 1 0 0 0 0 1 0 0
## B18_224 0 0 0 1 0 0 0 1 0 0
## B19_085 0 0 0 0 0 1 0 1 0 0
## B18_233_g1 0 0 0 0 1 0 0 0 1 0
## B18_233_g2 0 0 0 0 1 0 0 1 1 1
## B19_093_g1 0 0 0 0 0 0 1 0 1 0
## B19_093_g2 0 0 0 0 0 0 1 1 1 1
## B18_200_g1 1 0 0 0 0 0 0 0 1 0
## B18_200_g2 1 0 0 0 0 0 0 1 1 1
## B18_232_g1 0 0 0 0 1 0 0 0 1 0
## B18_232_g2 0 0 0 0 1 0 0 1 1 1
## attr(,"assign")
## [1] 1 1 1 1 1 1 1 2 3 4
## attr(,"contrasts")
## attr(,"contrasts")$BT
## [1] "contr.treatment"
##
## attr(,"contrasts")$GN
## [1] "contr.treatment"
##
## attr(,"contrasts")$OG
## [1] "contr.treatment"
zz <- estimateDisp(zz, design, robust=TRUE)
zz$common.dispersion
## [1] 0.1229924
fit <- glmFit(zz, design)
if (cond == "CO2"){
teff_lrt_int <- glmLRT(fit, coef=10)
} else if (cond == "SMAR"){
teff_lrt_int <- glmLRT(fit, coef=9)
} else if (cond == "EFAE"){
teff_lrt_int <- glmLRT(fit, coef=7)
}
t_eff <- teff_lrt_int$table
FDR <- p.adjust(t_eff$PValue, method="fdr")
t_eff <- cbind(t_eff, FDR, zz$genes$genes)
colnames(t_eff)[6] <- "gene"
t_eff$trans_sig <- t_eff$FDR <= 0.05
t_eff_de.genes <- t_eff[t_eff$FDR <= 0.05, ]
Now we can assign each gene a cis/trans category depending on the results from sections 2-4. The logic is explained here:
#first we combine the parental and hybrid DE genes and use that to pull out rows from our larger data matrix
colnames(DE_parents)[1]<-"PLogFC"
colnames(f1_AG)[1]<-"FLogFC"
genesDE<-merge(DE_parents, f1_AG, by=0, all = TRUE)
rownames(genesDE)<-genesDE[,1]
col2keep<-as.character(c("Row.names","PLogFC","FLogFC","parentDE_sig","ASE"))
genesDE<-genesDE[,names(genesDE) %in% col2keep]
genesDE<-merge(genesDE, t_eff, by=0, all = TRUE)
## Warning in merge.data.frame(genesDE, t_eff, by = 0, all = TRUE): column
## name 'Row.names' is duplicated in the result
col2keep<-as.character(c("Row.names","PLogFC","FLogFC","parentDE_sig","ASE","trans_sig"))
genesDE<-genesDE[,names(genesDE) %in% col2keep]
rownames(genesDE)<-genesDE[,1]
genesDE$Row.names.1<-NULL
genesDE$trans_sig <- ifelse(is.na(genesDE$trans_sig), 'FALSE', genesDE$trans_sig)
### assign categories
test<-integer()
categories<-character()
for(i in 1:length(genesDE[,1]))
{
gene<-genesDE[i,]
if ( isTRUE(as.logical(gene$parentDE_sig)) & isTRUE(as.logical(gene$ASE)) & gene$trans_sig=="FALSE") {
categories<-append(categories, "cis")
} else if (isTRUE(as.logical(gene$parentDE_sig)) & gene$ASE == "FALSE" & isTRUE(as.logical(gene$trans_sig ))) {
categories<-append(categories, "trans")
} else if (isTRUE(as.logical(gene$parentDE_sig)) & isTRUE(as.logical(gene$ASE)) & isTRUE(as.logical(gene$trans_sig ))) {
categories<-append(categories, "ct")
} else if (gene$parentDE_sig== "FALSE" & isTRUE(as.logical(gene$ASE)) & isTRUE(as.logical(gene$trans_sig ))) {
categories<-append(categories, "comp")
} else if (gene$parentDE_sig== "FALSE" & gene$ASE == "FALSE" & gene$trans_sig== "FALSE") {
categories<-append(categories, "con")
}else
categories<-append(categories, "und")
}
test<-append(test,sum(categories == "cis"))
test<-append(test,sum(categories == "trans"))
test<-append(test,sum(categories == "ct"))
test<-append(test,sum(categories == "comp"))
test<-append(test,sum(categories == "und"))
test<-append(test,sum(categories == "con"))
print(test)
## [1] 87 16 11 37 1000 3802
genesDE <- cbind(genesDE, categories)
genesDE <-as.data.frame(genesDE)
rownames(genesDE)<-genesDE[,1]
##plots using edgeR colculation:
geneDEfull<-genesDE
genesDE<-genesDE[!(genesDE$categories=="und"),]
#removes genes
geneDEnoc<-genesDE[! rownames(genesDE) %in% cdegenes2toss$x,]
if (cond == "CO2"){
#write.table(genesDE, "co2_cistrans.txt", quote = F, row.names = F )
co2genes<-rownames(genesDE)
write.table(co2genes, "control_signal.txt", quote = F, row.names = F )
genesDE2<-genesDE
co2ct<-cbind(genesDE2$Row.names, as.character(genesDE2$categories))
#write.table(co2ct, "co2_ct.txt", row.names = F, quote = F )
ii<-"Control Samples"
} else if (cond == "SMAR"){
controlsig<-read.table("control_signal.txt", header = T)
genesDE2<-genesDE[!rownames(genesDE) %in% controlsig$x,]
write.table(genesDE2, "smar_cistrans.txt", quote = F, row.names = F )
smarct<-cbind(genesDE2$Row.names, as.character(genesDE2$categories))
#write.table(smarct, "smar_ct.txt", row.names = F, quote = F )
#write.table(genesDE, "smar_ct_unfilt.txt", row.names = F, quote = F )
ii<-"Immune responsive cis and trans effects in S. marcescens"
} else if (cond == "EFAE"){
controlsig<-read.table("control_signal.txt", header = T)
genesDE2<-genesDE[!rownames(genesDE) %in% controlsig$x,]
write.table(genesDE2, "efae_cistrans.txt", quote = F, row.names = F )
efaect<-cbind(genesDE2$Row.names, as.character(genesDE2$categories))
#write.table(efaect, "efae_ct.txt", row.names = F, quote = F )
#write.table(genesDE, "efae_ct_unfilt.txt", row.names = F, quote = F )
ii<-"Immune responsive cis and trans effects in E. faecalis"
}
if (cond == "CO2"){
i<-"Cis and Trans effects in control"
} else if (cond == "SMAR"){
i<-"Cis and Trans effects in S. marcescens"
} else if (cond == "EFAE"){
i<-"Cis and Trans effects in E. faecalis"
}
print(test)
## [1] 87 16 11 37 1000 3802
###FUll data
ggplot(data= genesDE, aes(y=FLogFC,x=PLogFC, colour=categories)) +
geom_point(size = 4, alpha = .7) +
geom_abline(color = "blue") +
geom_hline(yintercept = 0, color = "red") +
geom_vline(xintercept = 0) +
xlab("Log2 Ratio Parents(B6/A4)") +
ylab("Log2 Ratio F1-hybrid (B6/A4)") +
ggtitle(i)+
#ggtitle("Control")+
ylim(-12.5,12.5) +
xlim(-12.5,12.5) +
theme(axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20), axis.text.x = element_text(size= 16), axis.text.y = element_text(size= 16)) +
scale_color_manual(breaks = c("cis","comp","con","ct","trans","und"), values=c("blue", "green", "yellow","purple", "red", "grey" ))
genesDE<-genesDE[!(genesDE$categories=="con"),]
##ONLY genes that dont show cis or trans signal in fatbody control samples
if (cond == "CO2"){
ggplot(data= genesDE2, aes(y=FLogFC,x=PLogFC, colour=categories)) +
geom_point(size = 3, alpha = .7) +
geom_abline(color = "blue") +
geom_hline(yintercept = 0, color = "red") +
geom_vline(xintercept = 0) +
xlab("Log2 Ratio Parents(B6/A4)") +
ylab("Log2 Ratio F1-hybrid (B6/A4)") +
ggtitle(ii)+
theme_bw()+
ylim(-10,10) +
xlim(-10,10) +
scale_color_manual(breaks = c("cis", "trans", "ct", "und", "comp", "con"), values=c("blue", "red", "purple", "grey", "green", "yellow"))
} else if (cond == "SMAR"){
ggplot(data= genesDE2, aes(y=FLogFC,x=PLogFC, colour=categories)) +
geom_point(size = 3, alpha = .7) +
geom_abline(color = "blue") +
geom_hline(yintercept = 0, color = "red") +
geom_vline(xintercept = 0) +
xlab("Log2 Ratio Parents(B6/A4)") +
ylab("Log2 Ratio F1-hybrid (B6/A4)") +
ggtitle(ii)+
theme_bw()+
ylim(-10,10) +
xlim(-10,10) +
scale_color_manual(breaks = c("cis", "trans", "ct", "und", "comp", "con"), values=c("blue", "red", "purple", "grey", "green","yellow"))
} else if (cond == "EFAE"){
ggplot(data= genesDE2, aes(y=FLogFC,x=PLogFC, colour=categories)) +
geom_point(size = 3, alpha = .7) +
geom_abline(color = "blue") +
geom_hline(yintercept = 0, color = "red") +
geom_vline(xintercept = 0) +
xlab("Log2 Ratio Parents(B6/A4)") +
ylab("Log2 Ratio F1-hybrid (B6/A4)") +
ggtitle(ii)+
theme_bw()+
ylim(-10,10) +
xlim(-10,10) +
scale_color_manual(breaks = c("cis", "trans", "ct", "und", "comp", "con"), values=c("blue", "red", "purple", "grey", "green","yellow"))
}
## Warning: Removed 7 rows containing missing values (geom_point).
sum(genesDE2$categories == "cis")
## [1] 87
sum(genesDE2$categories == "trans")
## [1] 16
sum(genesDE2$categories == "ct")
## [1] 11
sum(genesDE2$categories == "comp")
## [1] 37
sum(genesDE2$categories == "und")
## [1] 0
sum(genesDE2$categories == "con")
## [1] 3802
##Looking at expression Bias
#bias in cis genes
ec_A4<-esig[esig$categories=='cis' & is.negative(esig$PLogFC),]
ec_B6<-esig[esig$categories=='cis' & is.positive(esig$PLogFC),]
sc_A4<-ssig[ssig$categories=='cis' & is.negative(ssig$PLogFC),]
sc_B6<-ssig[ssig$categories=='cis' & is.positive(ssig$PLogFC),]
#bias in trans genes
et_A4<-esig[esig$categories=='trans' & is.negative(esig$PLogFC),]
et_B6<-esig[esig$categories=='trans' & is.positive(esig$PLogFC),]
st_A4<-ssig[ssig$categories=='trans' & is.negative(ssig$PLogFC),]
st_B6<-ssig[ssig$categories=='trans' & is.positive(ssig$PLogFC),]
##looking at overlap.
es_sig<-merge(esig,ssig, by=1, all=T)
es_sig$categories.x<-as.character(es_sig$categories.x)
es_sig$categories.y<-as.character(es_sig$categories.y)
es_sig<-es_sig[complete.cases(es_sig),]
paste("Genes showing cis or signal in both EFAE and SMAR=", nrow(es_sig))
## [1] "Genes showing cis or signal in both EFAE and SMAR= 86"
paste("Concordant Classifications ", nrow(es_sig[es_sig$categories.x == es_sig$categories.y,]))
## [1] "Concordant Classifications 71"
transig<-es_sig[es_sig$categories.x=="trans" | es_sig$categories.y=="trans",]
paste("Shared trans effects ", nrow(es_sig[es_sig$categories.x=="trans" & es_sig$categories.y=="trans",]))
## [1] "Shared trans effects 17"
###looking at category proportions
con_comp$T_samp <-factor(con_comp$T_samp, levels = con_comp$T_samp)
ccc<-con_comp[con_comp$Treatment=="CO2",]
cce<-con_comp[con_comp$Treatment=="EFAE",]
ccs<-con_comp[con_comp$Treatment=="SMAR",]
ccc$Per<-ccc$Value/sum(ccc$Value)
cce$Per<-cce$Value/sum(cce$Value)
ccs$Per<-ccs$Value/sum(ccs$Value)
ces<-rbind(ccc,cce,ccs)
ggplot(data=ces, aes(x=Signal, y=Per, fill=Treatment)) +
geom_bar(stat="identity",position="dodge")+
ylab("Percent")+
scale_fill_manual(values=c("#4349D2", "#FFA373", "#CB66C2"))+
ggtitle("Signal Comparison between Treatments")+
theme_bw()+
theme(axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text.x = element_text(size= 16,angle = 90, vjust = 1, hjust = 1),
axis.text.y = element_text(size= 16),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
## Section 8: P Value Calculation
Test 1: Comparing proportions of trans-only genes’ genotype preference in expression A) Text “nearly twice as many [genes]were expressed more highly in the B6 genotype (48 genes) than in the A4 genotype (27 genes) [of the trans only genes in Efae infection]” So we are testing that the the proportion of genes that more highly expressed in B6 (47) is greater than those that more highly express A4 (26). Ie that the proportion of these genes is not equal (p=.5). Despite the function name this is actually a chi square test.
tesvt2<-prop.test(48,75,p=.5, alternative = 'greater')
tesvt2$p.value
## [1] 0.01046067
Test2 : Comparing proportions of each category for co2, efae, smar
#control vs Efae
success<-c(87,174)
trails<-c(151,263)
test3aCE<-prop.test(success,trails)
9*test3aCE$p.value # Bonferoni correction ( we only do 9 tests)
## [1] 0.9320157
#control vs Smar
success<-c(87,79)
trails<-c(151,251)
test3aCS<-prop.test(success,trails)
9*test3aCS$p.value # Bonferoni correction ( we only do 9 tests)
## [1] 3.959068e-06
#Efae vs Smar
success<-c(174,79)
trails<-c(263,251)
test3aES<-prop.test(success,trails)
9*test3aES$p.value # Bonferoni correction ( we only do 9 tests)
## [1] 6.82653e-14
#Control vs Efae
success<-c(16,75)
trails<-c(151,263)
test3bCE<-prop.test(success,trails)
9*test3bCE$p.value # Bonferoni correction ( we only do 9 tests)
## [1] 0.0003481742
#Control vs Smar
success<-c(16,149)
trails<-c(151,251)
test3bCS<-prop.test(success,trails)
9*test3bCS$p.value # Bonferoni correction ( we only do 9 tests)
## [1] 1.539558e-20
#Efae vs SMar
success<-c(75,149)
trails<-c(263,251)
test3bES<-prop.test(success,trails)
9*test3bES$p.value # Bonferoni correction ( we only do 9 tests)
## [1] 3.049017e-11
success<-c(37,6)
trails<-c(151,263)
test3cCE<-prop.test(success,trails)
9*test3cCE$p.value # Bonferoni correction ( we only do 9 tests)
## [1] 2.922811e-11
success<-c(38,16)
trails<-c(151,251)
test3cCS<-prop.test(success,trails)
9*test3cCS$p.value # Bonferoni correction ( we only do 9 tests)
## [1] 1.797449e-06
success<-c(6,16)
trails<-c(263,251)
test3cES<-prop.test(success,trails)
9*test3cES$p.value # Bonferoni correction ( we only do 9 tests)
## [1] 0.3429412