Load packages
require(ggplot2)
## Loading required package: ggplot2
require(reshape2)
## Loading required package: reshape2
require(RColorBrewer)
## Loading required package: RColorBrewer
require(mgcv)
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-3. For overview type 'help("mgcv-package")'.
theme_dbc <- theme_set(theme_gray())
theme_dbc <- theme_update(
panel.background = element_rect(fill = "white"),
panel.border = element_rect( colour = "black",fill=NA,size=2),
panel.grid.major = element_line(colour = "gray93",size=1),
panel.grid.minor = element_line(colour = "gray98",size=1),
strip.text.x = element_text(size=12,face='bold'),
axis.title = element_text(size=16),
strip.background = element_rect(colour="black", fill="white",size = 1),
axis.text = element_text(colour = "black",face="bold",size=16),
axis.ticks=element_line(color="black",size=2))
Load and format in FACs data from 5 days of experiments
#Function to read in data
load_data = function(path_name,samples,dat_names){
no_samples = dim(samples)[1]
fsc_dat = data.frame(matrix(nrow=10000,ncol=no_samples))
names(fsc_dat)=samples[,2]
blu1_dat = data.frame(matrix(nrow=10000,ncol=no_samples))
names(blu1_dat)=samples[,2]
for(i in 1:dim(samples)[1]){
file_name = paste(path_name,samples[i,1],sep="")
sample_name = samples[[i,2]]
print(sample_name)
temp = read.table(file_name,sep=",",header=FALSE)
names(temp)=dat_names[,2]
#remove sample with max FSC value
rem = unique(c(which(temp[,2]==261621),which(temp[,2]<2000),which(temp[,3]<1000)))
print(length(rem))
if(length(rem>=1)){temp = temp[-rem,]}
#add data to matrix
size = dim(temp)[1]
fsc_dat[1:size,i]=temp[,2]
blu1_dat[1:size,i]=temp[,4]
}
return(cbind(fsc_dat,blu1_dat))
}
all_fsc = data.frame(matrix(nrow = 11500,ncol=61))
all_blu1 = data.frame(matrix(nrow = 11500,ncol=61))
#load data from 12/7/17
samples_exp12_7 = read.table("~/Desktop/SherlockLab2/gfp/Data-12-7-16/name_key.txt",sep="\t",header=FALSE,stringsAsFactors = FALSE)
dat_names_exp12_7 = read.table("~/Desktop/SherlockLab2/gfp/Data-12-7-16/files/file=1-2003.info",sep="\t",header=FALSE)
path_name_exp12_7 = "~/Desktop/SherlockLab2/gfp/Data-12-7-16/files/file="
temp = load_data(path_name_exp12_7,samples_exp12_7,dat_names_exp12_7)
## [1] "GET2g1_4o-1"
## [1] 182
## [1] "GET2g2_4o-1"
## [1] 635
## [1] "NT_4o-1"
## [1] 208
## [1] "GET2g1_4+1"
## [1] 201
## [1] "GET2g1_4+2"
## [1] 170
## [1] "GET2g1_4+3"
## [1] 176
## [1] "GET2g1_4-1"
## [1] 168
## [1] "GET2g1_4-2"
## [1] 845
## [1] "GET2g1_4-3"
## [1] 399
## [1] "GET2g2_4+1"
## [1] 171
## [1] "GET2g2_4+2"
## [1] 142
## [1] "GET2g2_4+3"
## [1] 499
## [1] "GET2g2_4-1"
## [1] 275
## [1] "GET2g2_4-2"
## [1] 953
## [1] "GET2g2_4-3"
## [1] 178
## [1] "NT_4+1"
## [1] 214
## [1] "NT_4+2"
## [1] 191
## [1] "NT_4+3"
## [1] 166
## [1] "GET2g1_8+1"
## [1] 249
## [1] "GET2g1_8+2"
## [1] 283
## [1] "GET2g1_8+3"
## [1] 228
## [1] "GET2g1_8-1"
## [1] 200
## [1] "GET2g1_8-2"
## [1] 205
## [1] "GET2g1_8-3"
## [1] 217
## [1] "GET2g2_8+1"
## [1] 266
## [1] "GET2g2_8+2"
## [1] 293
## [1] "GET2g2_8+3"
## [1] 274
## [1] "GET2g2_8-1"
## [1] 268
## [1] "GET2g2_8-2"
## [1] 184
## [1] "GET2g2_8-3"
## [1] 232
## [1] "NT_8+1"
## [1] 198
## [1] "NT_8+2"
## [1] 196
## [1] "NT_8+3"
## [1] 179
## [1] "GET2g1_25_BY4741_75"
## [1] 135
## [1] "GET2g1_50_BY4741_50"
## [1] 176
## [1] "GET2g1_75_BY4741_25"
## [1] 182
## [1] "GET2g2_25_BY4741_75"
## [1] 100
## [1] "GET2g2_50_BY4741_50"
## [1] 168
## [1] "GET2g2_75_BY4741_25"
## [1] 197
names(temp)[c(3,10:18,38)]
## [1] "NT_4o-1" "GET2g2_4+1" "GET2g2_4+2"
## [4] "GET2g2_4+3" "GET2g2_4-1" "GET2g2_4-2"
## [7] "GET2g2_4-3" "NT_4+1" "NT_4+2"
## [10] "NT_4+3" "GET2g2_50_BY4741_50"
names(temp)[c(42,49:57,77)]
## [1] "NT_4o-1" "GET2g2_4+1" "GET2g2_4+2"
## [4] "GET2g2_4+3" "GET2g2_4-1" "GET2g2_4-2"
## [7] "GET2g2_4-3" "NT_4+1" "NT_4+2"
## [10] "NT_4+3" "GET2g2_50_BY4741_50"
names(temp)[c(3,10:18,38)]==names(temp)[c(42,49:57,77)]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
fsc_temp = temp[,c(3,10:18,38)]
blu1_temp = temp[,c(42,49:57,77)]
no_samples = dim(fsc_temp)[2]
all_fsc[1:dim(fsc_temp)[1],1:no_samples]=fsc_temp
names(all_fsc)[1:no_samples]=names(fsc_temp)
all_blu1[1:dim(blu1_temp)[1],1:no_samples]=blu1_temp
names(all_blu1)[1:no_samples]=names(blu1_temp)
col_count = 1 + no_samples
#load data from 1/11/17
samples_exp1_11 = read.table("~/Desktop/SherlockLab2/gfp/Data-1-11-17/name_key.txt",sep="\t",header=FALSE,stringsAsFactors = FALSE)
dat_names_exp1_11 = read.table("~/Desktop/SherlockLab2/gfp/Data-1-11-17/files/file=2-2004.info",sep="\t",header=FALSE)
path_name_exp1_11 = "~/Desktop/SherlockLab2/gfp/Data-1-11-17/files/file="
temp = load_data(path_name_exp1_11,samples_exp1_11,dat_names_exp1_11)
## [1] "MIA165_No"
## [1] 133
## [1] "MIA183_No"
## [1] 110
## [1] "MIA187_No"
## [1] 126
## [1] "MIA165 GFP_BY4741_2575_No"
## [1] 370
## [1] "MIA165 GFP_BY4741_5050_No"
## [1] 80
## [1] "MIA165 GFP_BY4741_7525_No"
## [1] 126
## [1] "MIA183 GFP_BY4741_2575_No"
## [1] 72
## [1] "MIA183 GFP_BY4741_5050_No"
## [1] 70
## [1] "MIA183 GFP_BY4741_7525_No"
## [1] 92
## [1] "MIA187 GFP_BY4741_2575_No"
## [1] 67
## [1] "MIA187 GFP_BY4741_5050_No"
## [1] 116
## [1] "MIA187 GFP_BY4741_7525_No"
## [1] 133
## [1] "MIA153_MIA151_COG8g1_Yes"
## [1] 166
## [1] "MIA153_MIA151_COG8g1_No"
## [1] 167
## [1] "MIA155_MIA151_COG8g2_Yes"
## [1] 102
## [1] "MIA155_MIA151_COG8g2_No"
## [1] 121
## [1] "MIA157_MIA151_COG8g3_Yes"
## [1] 128
## [1] "MIA157_MIA151_COG8g3_No"
## [1] 118
## [1] "MIA159_MIA151_COG8g4_Yes"
## [1] 113
## [1] "MIA159_MIA151_COG8g4_No"
## [1] 114
## [1] "MIA161_MIA151_COG8g5_Yes"
## [1] 114
## [1] "MIA161_MIA151_COG8g5_No"
## [1] 73
## [1] "MIA163_MIA151_COG8g6_Yes"
## [1] 95
## [1] "MIA163_MIA151_COG8g6_No"
## [1] 107
## [1] "MIA165_MIA151_CC16_Yes"
## [1] 142
## [1] "MIA167_MIA148_SAP30g1_Yes"
## [1] 157
## [1] "MIA167_MIA148_SAP30g1_No"
## [1] 127
## [1] "MIA169_MIA148_SAP30g2_Yes"
## [1] 133
## [1] "MIA169_MIA148_SAP30g2_No"
## [1] 148
## [1] "MIA171_MIA148_SAP30g3_Yes"
## [1] 102
## [1] "MIA171_MIA148_SAP30g3_No"
## [1] 105
## [1] "MIA173_MIA148_SAP30g4_Yes"
## [1] 96
## [1] "MIA173_MIA148_SAP30g4_No"
## [1] 105
## [1] "MIA175_MIA148_SAP30g5_Yes"
## [1] 140
## [1] "MIA175_MIA148_SAP30g5_No"
## [1] 91
## [1] "MIA177_MIA148_SAP30g6_Yes"
## [1] 126
## [1] "MIA177_MIA148_SAP30g6_No"
## [1] 102
## [1] "MIA179_MIA148_SAP30g7_Yes"
## [1] 101
## [1] "MIA179_MIA148_SAP30g7_No"
## [1] 137
## [1] "MIA181_MIA148_SAP30g8_Yes"
## [1] 77
## [1] "MIA181_MIA148_SAP30g8_No"
## [1] 127
## [1] "MIA183_MIA148_CC16_Yes"
## [1] 135
## [1] "MIA185_MIA141_RPN5g1_Yes"
## [1] 285
## [1] "MIA185_MIA141_RPN5g1_No"
## [1] 146
## [1] "MIA187_MIA141_CC16_Yes"
## [1] 175
names(temp)[c(5,8,11,15,16,25,38,39,42:45)]
## [1] "MIA165 GFP_BY4741_5050_No" "MIA183 GFP_BY4741_5050_No"
## [3] "MIA187 GFP_BY4741_5050_No" "MIA155_MIA151_COG8g2_Yes"
## [5] "MIA155_MIA151_COG8g2_No" "MIA165_MIA151_CC16_Yes"
## [7] "MIA179_MIA148_SAP30g7_Yes" "MIA179_MIA148_SAP30g7_No"
## [9] "MIA183_MIA148_CC16_Yes" "MIA185_MIA141_RPN5g1_Yes"
## [11] "MIA185_MIA141_RPN5g1_No" "MIA187_MIA141_CC16_Yes"
names(temp)[c(50,53,56,60,61,70,83,84,87:90)]
## [1] "MIA165 GFP_BY4741_5050_No" "MIA183 GFP_BY4741_5050_No"
## [3] "MIA187 GFP_BY4741_5050_No" "MIA155_MIA151_COG8g2_Yes"
## [5] "MIA155_MIA151_COG8g2_No" "MIA165_MIA151_CC16_Yes"
## [7] "MIA179_MIA148_SAP30g7_Yes" "MIA179_MIA148_SAP30g7_No"
## [9] "MIA183_MIA148_CC16_Yes" "MIA185_MIA141_RPN5g1_Yes"
## [11] "MIA185_MIA141_RPN5g1_No" "MIA187_MIA141_CC16_Yes"
names(temp)[c(5,8,11,15,16,25,38,39,42:45)]==names(temp)[c(50,53,56,60,61,70,83,84,87:90)]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
fsc_temp = temp[,c(5,8,11,15,16,25,38,39,42:45)]
blu1_temp = temp[,c(50,53,56,60,61,70,83,84,87:90)]
no_samples = dim(fsc_temp)[2]
col_range = col_count:(col_count-1+no_samples)
all_fsc[1:dim(fsc_temp)[1],col_range]=fsc_temp
names(all_fsc)[col_range]=names(fsc_temp)
all_blu1[1:dim(blu1_temp)[1],col_range]=blu1_temp
names(all_blu1)[col_range]=names(blu1_temp)
col_count = col_count + no_samples
#load data from 1/25/17
samples_exp1_25 = read.table("~/Desktop/SherlockLab2/gfp/Data-1-25-17/name_key.txt",sep="\t",header=FALSE,stringsAsFactors = FALSE)
dat_names_exp1_25 = read.table("~/Desktop/SherlockLab2/gfp/Data-1-25-17/files/file=1-2003.info",sep="\t",header=FALSE)
path_name_exp1_25 = "~/Desktop/SherlockLab2/gfp/Data-1-25-17/files/file="
temp = load_data(path_name_exp1_25,samples_exp1_25,dat_names_exp1_25)
## [1] "COG8g2_YPD1"
## [1] 44
## [1] "SAP30g7_YPD1"
## [1] 69
## [1] "RPN5g1_YPD1"
## [1] 104
## [1] "RPN5g1_SC1"
## [1] 83
## [1] "COG8g2_BY"
## [1] 42
## [1] "SAP30g7_BY"
## [1] 65
## [1] "RPN5g1YPD_BY"
## [1] 38
## [1] "RPN5g1SC_BY"
## [1] 61
## [1] "RPNg1_YPDplus1"
## [1] 182
## [1] "RPNg1_YPDplus2"
## [1] 171
## [1] "RPNg1_YPDplus3"
## [1] 156
## [1] "RPNg1_YPDminus1"
## [1] 161
## [1] "RPNg1_YPDminus2"
## [1] 154
## [1] "RPNg1_YPDminus3"
## [1] 180
## [1] "RPNg1_SCplus1"
## [1] 81
## [1] "RPNg1_SCplus2"
## [1] 86
## [1] "RPNg1_SCplus3"
## [1] 75
## [1] "RPNg1_SCminus1"
## [1] 78
## [1] "RPNg1_SCminus2"
## [1] 73
## [1] "RPNg1_SCminus3"
## [1] 80
## [1] "COG8g2_YPDplus1"
## [1] 93
## [1] "COG8g2_YPDplus2"
## [1] 71
## [1] "COG8g2_YPDplus3"
## [1] 89
## [1] "COG8g2_YPDminus1"
## [1] 77
## [1] "COG8g2_YPDminus2"
## [1] 63
## [1] "COG8g2_YPDminus3"
## [1] 64
## [1] "SAP30g7_YPDplus1"
## [1] 86
## [1] "SAP30g7_YPDplus2"
## [1] 92
## [1] "SAP30g7_YPDplus3"
## [1] 102
## [1] "SAP30g7_YPDminus1"
## [1] 99
## [1] "SAP30g7_YPDminus2"
## [1] 87
## [1] "SAP30g7_YPDminus3"
## [1] 98
names(temp)[c(5:7,9:14,21:32)]
## [1] "COG8g2_BY" "SAP30g7_BY" "RPN5g1YPD_BY"
## [4] "RPNg1_YPDplus1" "RPNg1_YPDplus2" "RPNg1_YPDplus3"
## [7] "RPNg1_YPDminus1" "RPNg1_YPDminus2" "RPNg1_YPDminus3"
## [10] "COG8g2_YPDplus1" "COG8g2_YPDplus2" "COG8g2_YPDplus3"
## [13] "COG8g2_YPDminus1" "COG8g2_YPDminus2" "COG8g2_YPDminus3"
## [16] "SAP30g7_YPDplus1" "SAP30g7_YPDplus2" "SAP30g7_YPDplus3"
## [19] "SAP30g7_YPDminus1" "SAP30g7_YPDminus2" "SAP30g7_YPDminus3"
names(temp)[c(37:39,41:46,53:64)]
## [1] "COG8g2_BY" "SAP30g7_BY" "RPN5g1YPD_BY"
## [4] "RPNg1_YPDplus1" "RPNg1_YPDplus2" "RPNg1_YPDplus3"
## [7] "RPNg1_YPDminus1" "RPNg1_YPDminus2" "RPNg1_YPDminus3"
## [10] "COG8g2_YPDplus1" "COG8g2_YPDplus2" "COG8g2_YPDplus3"
## [13] "COG8g2_YPDminus1" "COG8g2_YPDminus2" "COG8g2_YPDminus3"
## [16] "SAP30g7_YPDplus1" "SAP30g7_YPDplus2" "SAP30g7_YPDplus3"
## [19] "SAP30g7_YPDminus1" "SAP30g7_YPDminus2" "SAP30g7_YPDminus3"
names(temp)[c(5:7,9:14,21:32)]==names(temp)[c(37:39,41:46,53:64)]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [15] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
fsc_temp = temp[,c(5:7,9:14,21:32)]
blu1_temp = temp[,c(37:39,41:46,53:64)]
no_samples = dim(fsc_temp)[2]
col_range = col_count:(col_count-1+no_samples)
all_fsc[1:dim(fsc_temp)[1],col_range]=fsc_temp
names(all_fsc)[col_range]=names(fsc_temp)
all_blu1[1:dim(blu1_temp)[1],col_range]=blu1_temp
names(all_blu1)[col_range]=names(blu1_temp)
col_count = col_count + no_samples
#load data from 2/2/17
samples_exp2_2 = read.table("~/Desktop/SherlockLab2/gfp/Data-2-2-17/name_key.txt",sep="\t",header=FALSE,stringsAsFactors = FALSE)
dat_names_exp2_2 = read.table("~/Desktop/SherlockLab2/gfp/Data-2-2-17/files/file=1-2003.info",sep="\t",header=FALSE)
path_name_exp2_2="~/Desktop/SherlockLab2/gfp/Data-2-2-17/files/file="
temp = load_data(path_name_exp2_2,samples_exp2_2,dat_names_exp2_2)
## [1] "YLR050C_CC8_YPD1"
## [1] 69
## [1] "YCR016W_CC8_YPD1"
## [1] 56
## [1] "RPN5_g1_YPD1"
## [1] 100
## [1] "YLR_BY_25_75"
## [1] 35
## [1] "YLR_BY_50_50"
## [1] 55
## [1] "YLR_BY_75_25"
## [1] 50
## [1] "YCR016W_BY_25_75"
## [1] 27
## [1] "YCR016W_BY_50_50"
## [1] 40
## [1] "YCR016W_BY_75_25"
## [1] 38
## [1] "YLR050C_g1_treated"
## [1] 116
## [1] "YLR050C_g1_untreated"
## [1] 94
## [1] "YLR050C_g2_treated"
## [1] 86
## [1] "YLR050C_g2_untreated"
## [1] 68
## [1] "YLR050C_g3_treated"
## [1] 89
## [1] "YLR050C_g3_untreated"
## [1] 76
## [1] "YLR050C_g4_treated"
## [1] 62
## [1] "YLR050C_g4_untreated"
## [1] 71
## [1] "YLR050C_CC8_treated"
## [1] 115
## [1] "YLR050C_CC8_untreated"
## [1] 98
## [1] "YCR016C_g1_treated"
## [1] 95
## [1] "YCR016C_g1_untreated"
## [1] 85
## [1] "YCR016C_g2_treated"
## [1] 78
## [1] "YCR016C_g2_untreated"
## [1] 75
## [1] "YCR016C_g3_treated"
## [1] 61
## [1] "YCR016C_g3_untreated"
## [1] 77
## [1] "YCR016C_g4_treated"
## [1] 57
## [1] "YCR016C_g4_untreated"
## [1] 85
## [1] "YCR016C_CC8_treated"
## [1] 87
## [1] "YCR016C_CC8_untreated"
## [1] 73
names(temp)[c(5,8,10:11,18:19,26:29)]
## [1] "YLR_BY_50_50" "YCR016W_BY_50_50"
## [3] "YLR050C_g1_treated" "YLR050C_g1_untreated"
## [5] "YLR050C_CC8_treated" "YLR050C_CC8_untreated"
## [7] "YCR016C_g4_treated" "YCR016C_g4_untreated"
## [9] "YCR016C_CC8_treated" "YCR016C_CC8_untreated"
names(temp)[c(34,37,39:40,47:48,55:58)]
## [1] "YLR_BY_50_50" "YCR016W_BY_50_50"
## [3] "YLR050C_g1_treated" "YLR050C_g1_untreated"
## [5] "YLR050C_CC8_treated" "YLR050C_CC8_untreated"
## [7] "YCR016C_g4_treated" "YCR016C_g4_untreated"
## [9] "YCR016C_CC8_treated" "YCR016C_CC8_untreated"
names(temp)[c(5,8,10:11,18:19,26:29)]==names(temp)[c(34,37,39:40,47:48,55:58)]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
fsc_temp = temp[,c(5,8,10:11,18:19,26:29)]
blu1_temp = temp[,c(34,37,39:40,47:48,55:58)]
no_samples = dim(fsc_temp)[2]
col_range = col_count:(col_count-1+no_samples)
all_fsc[1:dim(fsc_temp)[1],col_range]=fsc_temp
names(all_fsc)[col_range]=names(fsc_temp)
all_blu1[1:dim(blu1_temp)[1],col_range]=blu1_temp
names(all_blu1)[col_range]=names(blu1_temp)
col_count = col_count + no_samples
#load data from 2/14/17
samples_exp2_14 = read.table("~/Desktop/SherlockLab2/gfp/Data-2-14-17/name_key.txt",sep="\t",header=FALSE,stringsAsFactors = FALSE)
dat_names_exp2_14 = read.table("~/Desktop/SherlockLab2/gfp/Data-2-14-17/files/file=1-2004.info",sep="\t",header=FALSE)
path_name_exp2_14 = "~/Desktop/SherlockLab2/gfp/Data-2-14-17/files/file="
temp = load_data(path_name_exp2_14,samples_exp2_14,dat_names_exp2_14)
## [1] "BY4741"
## [1] 183
## [1] "MIA193_RPD3_NT"
## [1] 237
## [1] "MIA196_PRE4_NT"
## [1] 142
## [1] "MIA213_SIN3_NT"
## [1] 115
## [1] "MIA216_MRE11_NT"
## [1] 169
## [1] "MIA185_RPN5_g1"
## [1] 263
## [1] "RPD_BY_50_50"
## [1] 142
## [1] "PRE_BY_50_50"
## [1] 123
## [1] "SIN_BY_50_50"
## [1] 178
## [1] "MRE_BY_50_50"
## [1] 157
## [1] "RPD_g1_plus"
## [1] 237
## [1] "RPD_g1_minus"
## [1] 172
## [1] "RPD_g2_plus"
## [1] 249
## [1] "RPD_g2_minus"
## [1] 209
## [1] "RPD_nt_plus"
## [1] 150
## [1] "RPD_nt_minus"
## [1] 146
## [1] "PRE_g3_plus"
## [1] 92
## [1] "PRE_g3_minus"
## [1] 115
## [1] "PRE_g9_plus"
## [1] 145
## [1] "PRE_g9_minus"
## [1] 177
## [1] "PRE_nt_plus"
## [1] 116
## [1] "PRE_nt_minus"
## [1] 118
## [1] "SIN_g1_plus"
## [1] 159
## [1] "SIN_g1_minus"
## [1] 140
## [1] "SIN_g2_plus"
## [1] 162
## [1] "SIN_g2_minus"
## [1] 186
## [1] "SIN_nt_plus"
## [1] 176
## [1] "SIN_nt_minus"
## [1] 225
## [1] "MRE_g1_plus"
## [1] 193
## [1] "MRE_g1_minus"
## [1] 88
## [1] "MRE_g2_plus"
## [1] 179
## [1] "MRE_g2_minus"
## [1] 194
## [1] "MRE_nt_plus"
## [1] 156
## [1] "MRE_nt_minus"
## [1] 161
## [1] "MRE_nt_minus_b"
## [1] 165
## [1] "MRE_nt_minus_c"
## [1] 170
## [1] "MRE_nt_minus_d"
## [1] 166
## [1] "MRE_nt_minus_e"
## [1] 161
names(temp)[c(8,17:22)]
## [1] "PRE_BY_50_50" "PRE_g3_plus" "PRE_g3_minus" "PRE_g9_plus"
## [5] "PRE_g9_minus" "PRE_nt_plus" "PRE_nt_minus"
names(temp)[c(46,55:60)]
## [1] "PRE_BY_50_50" "PRE_g3_plus" "PRE_g3_minus" "PRE_g9_plus"
## [5] "PRE_g9_minus" "PRE_nt_plus" "PRE_nt_minus"
names(temp)[c(8,17:22)]==names(temp)[c(46,55:60)]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
fsc_temp = temp[,c(8,17:22)]
blu1_temp = temp[,c(46,55:60)]
no_samples = dim(fsc_temp)[2]
col_range = col_count:(col_count-1+no_samples)
all_fsc[1:dim(fsc_temp)[1],col_range]=fsc_temp
names(all_fsc)[col_range]=names(fsc_temp)
all_blu1[1:dim(blu1_temp)[1],col_range]=blu1_temp
names(all_blu1)[col_range]=names(blu1_temp)
col_count = col_count + no_samples
Figure S1A and 1A Make scatter plots to compare 50/50 mix to treated and untreated
#One example for RPN5 (Have 4 replicates)
names(all_blu1)[14] #MIA187 strain is the GFP RPN5 strain carrying the control guide
## [1] "MIA187 GFP_BY4741_5050_No"
names(all_blu1)[21:22]
## [1] "MIA185_MIA141_RPN5g1_Yes" "MIA185_MIA141_RPN5g1_No"
names(all_fsc)[14]
## [1] "MIA187 GFP_BY4741_5050_No"
names(all_fsc)[21:22]
## [1] "MIA185_MIA141_RPN5g1_Yes" "MIA185_MIA141_RPN5g1_No"
ggplot()+geom_point(aes(y=all_blu1[,14],x=all_fsc[,14]),alpha=0.6)+
geom_point(aes(y=all_blu1[,22],x=all_fsc[,22]),alpha=0.2,color="turquoise")+
geom_point(aes(y=all_blu1[,21],x=all_fsc[,21]),alpha=0.2,color="indianred2")+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("RPN5g1")
## Warning in scale$trans$trans(x): NaNs produced
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 1479 rows containing missing values (geom_point).
## Warning: Removed 1451 rows containing missing values (geom_point).
## Warning: Removed 1153 rows containing missing values (geom_point).
ggplot()+geom_point(aes(y=all_blu1[,14],x=all_fsc[,14]),alpha=0.6)+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("RPN5g1")
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 1479 rows containing missing values (geom_point).
#One example for GET2g2 (have 3 total)
names(all_blu1)[c(11,5,2)]
## [1] "GET2g2_50_BY4741_50" "GET2g2_4-1" "GET2g2_4+1"
names(all_fsc)[c(11,5,2)]
## [1] "GET2g2_50_BY4741_50" "GET2g2_4-1" "GET2g2_4+1"
ggplot()+geom_point(aes(y=all_blu1[,11],x=all_fsc[,11]),alpha=0.6)+
geom_point(aes(y=all_blu1[,5],x=all_fsc[,5]),alpha=0.2,color="turquoise")+
geom_point(aes(y=all_blu1[,2],x=all_fsc[,2]),alpha=0.2,color="indianred2")+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("GET2g2")
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 2000 rows containing missing values (geom_point).
## Warning: Removed 2103 rows containing missing values (geom_point).
## Warning: Removed 2016 rows containing missing values (geom_point).
ggplot()+geom_point(aes(y=all_blu1[,11],x=all_fsc[,11]),alpha=0.6)+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("GET2g2")
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 2000 rows containing missing values (geom_point).
#One example for COG8g2 (have 4 total)
names(all_blu1)[c(12,16,15)] #MIA165 is COG8 gfp strain with non targeting control guide
## [1] "MIA165 GFP_BY4741_5050_No" "MIA155_MIA151_COG8g2_No"
## [3] "MIA155_MIA151_COG8g2_Yes"
names(all_fsc)[c(12,16,15)]
## [1] "MIA165 GFP_BY4741_5050_No" "MIA155_MIA151_COG8g2_No"
## [3] "MIA155_MIA151_COG8g2_Yes"
ggplot()+geom_point(aes(y=all_blu1[,12],x=all_fsc[,12]),alpha=0.6)+
geom_point(aes(y=all_blu1[,16],x=all_fsc[,16]),alpha=0.2,color="turquoise")+
geom_point(aes(y=all_blu1[,15],x=all_fsc[,15]),alpha=0.2,color="indianred2")+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("COG8g2")
## Warning in scale$trans$trans(x): NaNs produced
## Warning in scale$trans$trans(x): NaNs produced
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 1567 rows containing missing values (geom_point).
## Warning: Removed 1377 rows containing missing values (geom_point).
## Warning: Removed 1498 rows containing missing values (geom_point).
ggplot()+geom_point(aes(y=all_blu1[,12],x=all_fsc[,12]),alpha=0.6)+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("COG8g2")
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 1567 rows containing missing values (geom_point).
#One example for SAP30g7 (have 4 total)
names(all_blu1)[c(13,19,18)] #MIA183 is SAP30 gfp strain with non targeting control guide
## [1] "MIA183 GFP_BY4741_5050_No" "MIA179_MIA148_SAP30g7_No"
## [3] "MIA179_MIA148_SAP30g7_Yes"
names(all_fsc)[c(13,19,18)]
## [1] "MIA183 GFP_BY4741_5050_No" "MIA179_MIA148_SAP30g7_No"
## [3] "MIA179_MIA148_SAP30g7_Yes"
ggplot()+geom_point(aes(y=all_blu1[,13],x=all_fsc[,13]),alpha=0.6)+
geom_point(aes(y=all_blu1[,19],x=all_fsc[,19]),alpha=0.2,color="turquoise")+
geom_point(aes(y=all_blu1[,18],x=all_fsc[,18]),alpha=0.2,color="indianred2")+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("SAP30g7")
## Warning in scale$trans$trans(x): NaNs produced
## Warning in scale$trans$trans(x): NaNs produced
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 1531 rows containing missing values (geom_point).
## Warning: Removed 1384 rows containing missing values (geom_point).
## Warning: Removed 1436 rows containing missing values (geom_point).
ggplot()+geom_point(aes(y=all_blu1[,13],x=all_fsc[,13]),alpha=0.6)+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("SAP30g7")
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 1531 rows containing missing values (geom_point).
#One example for YCR016W (have 1 total)
names(all_blu1)[c(46,52,51)]
## [1] "YCR016W_BY_50_50" "YCR016C_g4_untreated" "YCR016C_g4_treated"
names(all_fsc)[c(46,52,51)]
## [1] "YCR016W_BY_50_50" "YCR016C_g4_untreated" "YCR016C_g4_treated"
ggplot()+geom_point(aes(y=all_blu1[,46],x=all_fsc[,46]),alpha=0.6)+
geom_point(aes(y=all_blu1[,52],x=all_fsc[,52]),alpha=0.2,color="turquoise")+
geom_point(aes(y=all_blu1[,51],x=all_fsc[,51]),alpha=0.2,color="indianred2")+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("YCR016Wg4")
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 1286 rows containing missing values (geom_point).
## Warning: Removed 882 rows containing missing values (geom_point).
## Warning: Removed 877 rows containing missing values (geom_point).
ggplot()+geom_point(aes(y=all_blu1[,46],x=all_fsc[,46]),alpha=0.6)+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("YCR016Wg4")
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 1286 rows containing missing values (geom_point).
#one example for YLR050C (have 1 total)
names(all_blu1)[c(45,48,47)]
## [1] "YLR_BY_50_50" "YLR050C_g1_untreated" "YLR050C_g1_treated"
names(all_fsc)[c(45,48,47)]
## [1] "YLR_BY_50_50" "YLR050C_g1_untreated" "YLR050C_g1_treated"
ggplot()+geom_point(aes(y=all_blu1[,45],x=all_fsc[,45]),alpha=0.6)+
geom_point(aes(y=all_blu1[,48],x=all_fsc[,48]),alpha=0.2,color="turquoise")+
geom_point(aes(y=all_blu1[,47],x=all_fsc[,47]),alpha=0.2,color="indianred2")+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("YLR050Cg1")
## Warning in scale$trans$trans(x): NaNs produced
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 1271 rows containing missing values (geom_point).
## Warning: Removed 639 rows containing missing values (geom_point).
## Warning: Removed 537 rows containing missing values (geom_point).
ggplot()+geom_point(aes(y=all_blu1[,45],x=all_fsc[,45]),alpha=0.6)+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("YLR050Cg1")
## Warning: Removed 1271 rows containing missing values (geom_point).
#One example for PRE4g3 (have 1 total)
names(all_blu1)[c(55,57,56)]
## [1] "PRE_BY_50_50" "PRE_g3_minus" "PRE_g3_plus"
names(all_fsc)[c(55,57,56)]
## [1] "PRE_BY_50_50" "PRE_g3_minus" "PRE_g3_plus"
ggplot()+geom_point(aes(y=all_blu1[,55],x=all_fsc[,55]),alpha=0.6)+
geom_point(aes(y=all_blu1[,57],x=all_fsc[,57]),alpha=0.2,color="turquoise")+
geom_point(aes(y=all_blu1[,56],x=all_fsc[,56]),alpha=0.2,color="indianred2")+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("PRE4g3")
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 1025 rows containing missing values (geom_point).
## Warning: Removed 1077 rows containing missing values (geom_point).
## Warning: Removed 1102 rows containing missing values (geom_point).
ggplot()+geom_point(aes(y=all_blu1[,55],x=all_fsc[,55]),alpha=0.6)+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("PRE4g3")
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 1025 rows containing missing values (geom_point).
#For PRE4 also show non-targeting plus and minus atc
names(all_blu1)[c(55,61,60)]
## [1] "PRE_BY_50_50" "PRE_nt_minus" "PRE_nt_plus"
names(all_fsc)[c(55,61,60)]
## [1] "PRE_BY_50_50" "PRE_nt_minus" "PRE_nt_plus"
ggplot()+geom_point(aes(y=all_blu1[,55],x=all_fsc[,55]),alpha=0.6)+
geom_point(aes(y=all_blu1[,61],x=all_fsc[,61]),alpha=0.2,color="turquoise")+
geom_point(aes(y=all_blu1[,60],x=all_fsc[,60]),alpha=0.2,color="indianred2")+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("PRE4 non-targeting")
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 1025 rows containing missing values (geom_point).
## Warning: Removed 882 rows containing missing values (geom_point).
## Warning: Removed 1060 rows containing missing values (geom_point).
#One example for PRE4g9 (have 1 total)
names(all_blu1)[c(55,59,58)]
## [1] "PRE_BY_50_50" "PRE_g9_minus" "PRE_g9_plus"
names(all_fsc)[c(55,59,58)]
## [1] "PRE_BY_50_50" "PRE_g9_minus" "PRE_g9_plus"
ggplot()+geom_point(aes(y=all_blu1[,55],x=all_fsc[,55]),alpha=0.6)+
geom_point(aes(y=all_blu1[,59],x=all_fsc[,59]),alpha=0.2,color="turquoise")+
geom_point(aes(y=all_blu1[,58],x=all_fsc[,58]),alpha=0.2,color="indianred2")+
scale_x_log10(limits=c(30000,3e5))+scale_y_log10(limits=c(20,10000))+
xlab("FSC")+ylab("BluFL1")+annotation_logticks(size=2)+ggtitle("PRE4g9")
## Warning in scale$trans$trans(x): NaNs produced
## Warning: Removed 1025 rows containing missing values (geom_point).
## Warning: Removed 856 rows containing missing values (geom_point).
## Warning: Removed 904 rows containing missing values (geom_point).
Figure S1B Generate violin plots of residuals
return_resids = function(training_i, test_i, guide){
training = data.frame(all_fsc[,training_i],all_blu1[,training_i])
names(training)=c("FSC","BluFL1")
#subtract missing values and small cell sizes
training = training[-which(is.na(training$BluFL1)),] #remove rows with values of NA
training = training[-which(training$FSC<40000),] #remove small cells where data is sparse
#save model and use to predict fluorescence
mdl = gam(BluFL1~s(FSC,bs="cs"),data=training)
print(summary(mdl))
pBluFL1=predict(mdl, data.frame(FSC=training$FSC))
#calculate residuals
residuals = training$BluFL1-pBluFL1
print(head(residuals))
#get data for treated sample
temp = data.frame(all_fsc[,test_i],all_blu1[,test_i])
names(temp)=c("FSC","BluFL1")
#remove na values or small cell sizes
if(length(which(is.na(temp$FSC)))!=0){temp = temp[-which(is.na(temp$FSC)),]}
if(length(which(temp$FSC<40000))){temp = temp[-which(temp$FSC<40000),]}
#calculate residuals
pBluFL1_test = predict(mdl, data.frame(FSC=temp$FSC))
residuals_test = temp$BluFL1-pBluFL1_test
print(head(residuals_test))
#return training and test residuals
resids = data.frame(matrix(ncol=2,nrow=11820))
names(resids) = c(paste(guide,"resid_minus",sep="_"),paste(guide,"resid_plus",sep="_"))
print
resids[1:length(residuals),1]=residuals
resids[1:length(residuals_test),2]=residuals_test
return(resids)
}
all_resids = cbind(return_resids(16,15,"COG8g2"),
return_resids(5,2,"GET2g2"),
return_resids(57,56,"PRE4g3"),
return_resids(59,58,"PRE4g9"),
return_resids(22,21,"RPN5g1"),
return_resids(19,18,"SAP30g7"),
return_resids(52,51,"YCR016Wg4"),
return_resids(48,47,"YLR050Cg1"))
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BluFL1 ~ s(FSC, bs = "cs")
## <environment: 0x11454fb88>
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 416.99 1.27 328.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(FSC) 5.72 6.648 4806 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.755 Deviance explained = 75.5%
## GCV = 16768 Scale est. = 16757 n = 10386
## 1 2 3 4 5 6
## 29.86883 -68.18100 -77.64325 150.10505 -40.88830 13.70448
## 1 2 3 4 5 6
## -63.64514 -161.03329 2.03349 33.82400 129.85406 -115.94656
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BluFL1 ~ s(FSC, bs = "cs")
## <environment: 0x10cb93120>
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 447.001 1.707 261.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(FSC) 6.799 7.752 1453 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.537 Deviance explained = 53.8%
## GCV = 28290 Scale est. = 28267 n = 9699
## 1 2 3 4 5 6
## 54.00150 -38.58858 216.16070 49.51831 -26.52656 -13.69264
## 1 2 3 4 5 6
## -76.97394 -123.05047 -98.87559 24.10754 -276.92346 -22.41213
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BluFL1 ~ s(FSC, bs = "cs")
## <environment: 0x111f76b78>
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2534.798 3.172 799.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(FSC) 6.254 7.224 16376 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.917 Deviance explained = 91.7%
## GCV = 1.0765e+05 Scale est. = 1.0757e+05 n = 10694
## 1 2 3 4 5 6
## -417.0336 -128.1839 -295.2510 152.1785 611.1883 213.4633
## 1 2 3 4 5 6
## -605.12472 -38.36933 -611.34118 -82.72733 -1701.75714 -1016.75894
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BluFL1 ~ s(FSC, bs = "cs")
## <environment: 0x10ca3f670>
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2494.089 3.266 763.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(FSC) 5.374 6.315 18095 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.913 Deviance explained = 91.3%
## GCV = 1.1644e+05 Scale est. = 1.1637e+05 n = 10909
## 1 2 3 4 5 6
## 40.842442 -60.784492 105.183707 -95.910874 -8.773327 56.245267
## 1 2 3 4 5 6
## -524.1695 -384.2912 -304.0200 -369.9072 -388.6042 -327.1675
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BluFL1 ~ s(FSC, bs = "cs")
## <environment: 0x10b776510>
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1377.165 3.045 452.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(FSC) 5.536 6.515 4930 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.756 Deviance explained = 75.7%
## GCV = 96028 Scale est. = 95967 n = 10351
## 1 2 3 4 5 6
## 25.987635 -765.467472 -84.044003 100.039688 272.688275 -2.852894
## 1 2 3 4 5 6
## -820.3839 121.2901 -1104.6904 -1019.1890 -1679.8391 -900.2575
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BluFL1 ~ s(FSC, bs = "cs")
## <environment: 0x10d4add40>
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 324.50 1.02 318.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(FSC) 5.421 6.311 4460 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.731 Deviance explained = 73.1%
## GCV = 10776 Scale est. = 10770 n = 10352
## 1 2 3 4 5 6
## 29.44655 45.12521 -56.39731 86.78551 38.11146 29.43659
## 1 2 3 4 5 6
## -42.54130 -77.03074 -13.54585 -20.98839 -58.07486 -53.88578
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BluFL1 ~ s(FSC, bs = "cs")
## <environment: 0x10fd78f10>
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 609.403 1.351 451.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(FSC) 4.96 5.854 7659 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.805 Deviance explained = 80.5%
## GCV = 19807 Scale est. = 19797 n = 10847
## 1 2 3 4 5 6
## -38.86078 -170.77904 -103.97367 30.33548 -12.01668 24.86364
## 1 2 3 4 5 6
## -191.1028 -366.4223 -447.0799 -236.1767 -150.8516 -107.2404
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BluFL1 ~ s(FSC, bs = "cs")
## <environment: 0x10fef4f88>
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 459.865 1.577 291.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(FSC) 5.792 6.715 3579 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.685 Deviance explained = 68.5%
## GCV = 27534 Scale est. = 27517 n = 11061
## 1 2 3 4 5 6
## -86.230288 14.994270 85.322728 206.149942 7.692613 -71.297696
## 1 2 3 4 5 6
## 4.249164 37.630798 -47.508118 -117.109274 -132.460730 -113.909139
med_resid = data.frame(apply(all_resids,2,function(x)median(x,na.rm=TRUE)))
names(med_resid) = c("median")
med_resid$sample = rownames(med_resid)
med_resid$xval=NA
temp = melt(all_resids)
## No id variables; using all as measure variables
temp$xval=NA
samples = unique(temp$variable)
for(i in 1:length(samples)){
temp$xval[which(temp$variable==samples[i])]=i
med_resid$xval[which(med_resid$sample==samples[i])]=i
}
ggplot(temp,aes(x=xval,y=value))+geom_hline()+ylim(-2000,1000)+
annotate("rect",xmin=0.5,xmax=2.5,ymin=-2000,ymax=1000,alpha=0.3)+
annotate("rect",xmin=4.5,xmax=6.5,ymin=-2000,ymax=1000,alpha=0.3)+
annotate("rect",xmin=8.5,xmax=10.5,ymin=-2000,ymax=1000,alpha=0.3)+
annotate("rect",xmin=12.5,xmax=14.5,ymin=-2000,ymax=1000,alpha=0.3)+
geom_violin(aes(fill=gsub(".+_","",variable),group=variable))+
scale_x_continuous(breaks=c(1.5,3.5,5.5,7.5,9.5,11.5,13.5,15.5),
labels=c("COG8g2","GET2g2","PRE4g3","PRE4g9",
"RPN5g1","SAP30g7","YCR016Wg4","YLR050Cg1"))+
theme(axis.text.x=element_text(angle=70,hjust=1))+ylab("")+xlab("")+
geom_point(data=med_resid,aes(x=xval,y=median),shape=95,size=20)+
scale_fill_manual(values=c("#00BFC4","#F8766D"))
## Warning: Removed 21501 rows containing non-finite values (stat_ydensity).
cont_resids = cbind(return_resids(1,8,"GET2nt"),
return_resids(61,60,"PRE4nt"),
return_resids(54,53,"YCR016Wnt"),
return_resids(50,49,"YLR050Cnt"))
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BluFL1 ~ s(FSC, bs = "cs")
## <environment: 0x10b5c4980>
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 470.87 1.59 296.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(FSC) 5.513 6.467 2308 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.605 Deviance explained = 60.5%
## GCV = 24693 Scale est. = 24676 n = 9766
## 1 2 3 4 5 6
## -14.465075 -60.469310 43.722448 7.554213 -52.634042 16.603320
## 1 2 3 4 5
## -39.5298659 -107.3539226 67.3649508 -25.5018437 0.5292885
## 6
## 65.1571779
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BluFL1 ~ s(FSC, bs = "cs")
## <environment: 0x111028028>
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2605.162 3.274 795.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(FSC) 6.398 7.368 15784 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.914 Deviance explained = 91.4%
## GCV = 1.1685e+05 Scale est. = 1.1677e+05 n = 10891
## 1 2 3 4 5 6
## -36.43497 198.22369 505.26716 -237.75045 211.19260 -54.78146
## 1 2 3 4 5 6
## -86.45027 42.49931 315.35720 -273.88236 77.84866 138.94085
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BluFL1 ~ s(FSC, bs = "cs")
## <environment: 0x1113555f8>
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 600.028 1.432 419 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(FSC) 5.656 6.572 6303 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.792 Deviance explained = 79.3%
## GCV = 22267 Scale est. = 22253 n = 10853
## 1 2 3 4 5 6
## 49.91418 49.20313 -51.86718 33.68658 189.55770 174.55648
## 1 2 3 4 5 6
## 75.61202 -156.02561 -94.55037 -114.50908 -31.78370 16.42874
##
## Family: gaussian
## Link function: identity
##
## Formula:
## BluFL1 ~ s(FSC, bs = "cs")
## <environment: 0x112b220d0>
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 442.113 1.307 338.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(FSC) 5.65 6.565 4844 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.744 Deviance explained = 74.4%
## GCV = 18748 Scale est. = 18737 n = 10961
## 1 2 3 4 5 6
## -54.24026 22.91573 85.70131 178.43397 -96.85212 -112.79623
## 1 2 3 4 5 6
## -95.89807 -60.26999 -46.55477 391.03070 -24.18579 20.90696
med_resid = data.frame(apply(cont_resids,2,function(x)median(x,na.rm=TRUE)))
names(med_resid) = c("median")
med_resid$sample = rownames(med_resid)
med_resid$xval=NA
temp = melt(cont_resids)
## No id variables; using all as measure variables
temp$xval=NA
samples = unique(temp$variable)
for(i in 1:length(samples)){
temp$xval[which(temp$variable==samples[i])]=i
med_resid$xval[which(med_resid$sample==samples[i])]=i
}
ggplot(temp,aes(x=xval,y=value))+geom_hline()+ylim(-2000,1000)+
annotate("rect",xmin=0.5,xmax=2.5,ymin=-2000,ymax=1000,alpha=0.3)+
annotate("rect",xmin=4.5,xmax=6.5,ymin=-2000,ymax=1000,alpha=0.3)+
geom_violin(aes(fill=gsub(".+_","",variable),group=variable))+
scale_x_continuous(breaks=c(1.5,3.5,5.5,7.5),
labels=c("GET2nt","PRE4nt","YCR016Wnt","YLR050Cnt"))+
theme(axis.text.x=element_text(angle=70,hjust=1))+ylab("")+xlab("")+
geom_point(data=med_resid,aes(x=xval,y=median),shape=95,size=20)+
scale_fill_manual(values=c("#00BFC4","#F8766D"))
## Warning: Removed 10144 rows containing non-finite values (stat_ydensity).