#load packages and set theme
require(ggplot2)
## Loading required package: ggplot2
require(reshape2)
## Loading required package: reshape2
theme_dbc <- theme_set(theme_gray())
theme_dbc <- theme_update(
panel.background = element_rect(fill = "white"),
panel.border = element_rect( colour = "black",fill=NA,size=2),
panel.grid.major = element_line(colour = "gray93",size=1),
panel.grid.minor = element_line(colour = "gray98",size=1),
strip.text.x = element_text(size=12,face='bold'),
axis.title = element_text(size=16),
strip.background = element_rect(colour="black", fill="white",size = 1),
axis.text = element_text(colour = "black",face="bold",size=16),
axis.ticks=element_line(color="black",size=2))
ypd24_all_dat = read.table(file="~/Desktop/SherlockLab2/manuscript_aug2017/code/final_fitness_GI_estimates/ypd24_data.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
ypd48_all_dat = read.table(file="~/Desktop/SherlockLab2/manuscript_aug2017/code/final_fitness_GI_estimates/ypd48_data.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
ypeg_all_dat = read.table(file="~/Desktop/SherlockLab2/manuscript_aug2017/code/final_fitness_GI_estimates/ypeg_data.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
ypd37_all_dat = read.table(file="~/Desktop/SherlockLab2/manuscript_aug2017/code/final_fitness_GI_estimates/ypd37_data.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
ura_all_dat = read.table(file="~/Desktop/SherlockLab2/manuscript_aug2017/code/final_fitness_GI_estimates/ura_data.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
Calculate mean single mutant fitness across replicate strains for each replicate of each condition
arrays = unique(subset(ypd24_all_dat,category=="double")$array)
queries = c("PRE7g7","PRE4g9","PRE4g3","RPN5g1","COG3g1","SED5g5","SEC22g1","SEC22g2","COG8g2","GET2g2","IMP4g6","DIP2g5","PWP2g2_BC1","PWP2g2_BC2","TIF6g8","RPF1g3","MAK16g1","YCR016Wg4","YLR050Cg1","SAP30g7")
get_sm_fit = function(dat,all_dat){
dat$r1_array_mean = NA
dat$r2_array_mean = NA
dat$r3_array_mean = NA
for(guide in arrays){
rows = which(dat$array == guide)
sub = subset(all_dat,category=="single"&array == guide)
dat$r1_array_mean[rows]=mean(sub$r1,na.rm=TRUE)
dat$r2_array_mean[rows]=mean(sub$r2,na.rm=TRUE)
dat$r3_array_mean[rows]=mean(sub$r3,na.rm=TRUE)
}
dat$r1_query_mean = NA
dat$r2_query_mean = NA
dat$r3_query_mean = NA
for(guide in queries){
rows = which(dat$query == guide)
sub = subset(all_dat,category=="single"&query == guide)
dat$r1_query_mean[rows]=mean(sub$r1,na.rm=TRUE)
dat$r2_query_mean[rows]=mean(sub$r2,na.rm=TRUE)
dat$r3_query_mean[rows]=mean(sub$r3,na.rm=TRUE)
}
return(dat)
}
ypd24_doubles = get_sm_fit(subset(ypd24_all_dat,category=="double")[,c(31:33,28:30)],ypd24_all_dat)
ypd48_doubles = get_sm_fit(subset(ypd48_all_dat,category=="double")[,c(31:33,28:30)],ypd48_all_dat)
ypeg_doubles = get_sm_fit(subset(ypeg_all_dat,category=="double")[,c(31:33,28:30)],ypeg_all_dat)
ypd37_doubles = get_sm_fit(subset(ypd37_all_dat,category=="double")[,c(31:33,28:30)],ypd37_all_dat)
ura_doubles = get_sm_fit(subset(ura_all_dat,category=="double")[,c(31:33,28:30)],ura_all_dat)
Calculated genetic interaction scores empirically, using a linear model to estimate expectation for each group of double mutants generated from the same query guide measured in the same condition
get_corrected = function(dat,cond_name){
dat$corrected_i_score_r1 = NA
dat$corrected_i_score_r2 = NA
dat$corrected_i_score_r3 = NA
dat$corrected_expected_r1 = NA
dat$corrected_expected_r2 = NA
dat$corrected_expected_r3 = NA
#loop through each query guide
for(i in 1:20){
rows = which(dat$query==queries[i])#find rows with query guide
sub = subset(dat,query==queries[i])#subset data to these rows
lin_fit_r1 = lm(sub$r1~sub$r1_array_mean)#fit linear model from data for this query guide
lin_fit_r2 = lm(sub$r2~sub$r2_array_mean)#fit linear model from data for this query guide
lin_fit_r3 = lm(sub$r3~sub$r3_array_mean)#fit linear model from data for this query guide
#for each strain carrying this query guide,
#calculate and record expectation and deviation from expectation
for(row in rows){
xval_r1 = dat$r1_array_mean[row]
expected_r1 = lin_fit_r1$coefficients[2]*xval_r1+lin_fit_r1$coefficients[1]
observed_r1 = dat$r1[row]
dat$corrected_expected_r1[row]=expected_r1
dat$corrected_i_score_r1[row]=observed_r1-expected_r1
xval_r2 = dat$r2_array_mean[row]
expected_r2 = lin_fit_r2$coefficients[2]*xval_r2+lin_fit_r2$coefficients[1]
observed_r2 = dat$r2[row]
dat$corrected_expected_r2[row]=expected_r2
dat$corrected_i_score_r2[row]=observed_r2-expected_r2
xval_r3 = dat$r3_array_mean[row]
expected_r3 = lin_fit_r3$coefficients[2]*xval_r3+lin_fit_r3$coefficients[1]
observed_r3 = dat$r3[row]
dat$corrected_expected_r3[row]=expected_r3
dat$corrected_i_score_r3[row]=observed_r3-expected_r3
}
}
#calculate mean and sd of gi scores across replicate cultures
dat$gi_mean = apply(dat[,13:15],1,mean)
dat$gi_sd = apply(dat[,13:15],1,sd)
#plot pairwise comparisons of i scores across replicate cultures
p=ggplot(dat)+ggtitle(cond_name)+geom_point(aes(x=corrected_i_score_r1,y=corrected_i_score_r2),alpha=0.7)
print(p+facet_wrap(~query))
p=ggplot(dat)+ggtitle(cond_name)+geom_point(aes(x=corrected_i_score_r1,y=corrected_i_score_r3),alpha=0.7)
print(p+facet_wrap(~query))
p=ggplot(dat)+ggtitle(cond_name)+geom_point(aes(x=corrected_i_score_r2,y=corrected_i_score_r3),alpha=0.7)
print(p+facet_wrap(~query))
#print median and sd of gi scores
print(median(dat$gi_mean,na.rm=TRUE))
print(median(dat$gi_sd,na.rm=TRUE))
#plot points and model used for correction (red line is multiplicative mode, blue is empirical)
p=ggplot(dat,aes(x=r1_array_mean,y=r1))+geom_point(alpha=0.3,size=1)+
geom_abline(aes(slope=r1_query_mean),color="red",size=1)+
ggtitle(cond_name)+geom_smooth(method="lm",size=1)
print(p+facet_wrap(~query))
p=ggplot(dat,aes(x=r2_array_mean,y=r2))+geom_point(alpha=0.3,size=1)+
geom_abline(aes(slope=r2_query_mean),color="red",size=1)+
ggtitle(cond_name)+geom_smooth(method="lm",size=1)
print(p+facet_wrap(~query))
p=ggplot(dat,aes(x=r3_array_mean,y=r3))+geom_point(alpha=0.3,size=1)+
geom_abline(aes(slope=r3_query_mean),color="red",size=1)+
ggtitle(cond_name)+geom_smooth(method="lm",size=1)
print(p+facet_wrap(~query))
return(dat)
}
ypd24_doubles=get_corrected(ypd24_doubles,"YPD24hr")
## [1] 0.001365467
## [1] 0.04131298
ypd48_doubles=get_corrected(ypd48_doubles,"YPD48hr")
## [1] 0.002964696
## [1] 0.03402522
ypeg_doubles=get_corrected(ypeg_doubles,"YPEG")