require(ggplot2)
## Loading required package: ggplot2
require(reshape2)
## Loading required package: reshape2
theme_dbc <- theme_set(theme_gray())
theme_dbc <- theme_update(
panel.background = element_rect(fill = "white"),
panel.border = element_rect( colour = "black",fill=NA,size=2),
panel.grid.major = element_line(colour = "gray93",size=1),
panel.grid.minor = element_line(colour = "gray98",size=1),
strip.text.x = element_text(size=12,face='bold'),
axis.title = element_text(size=16),
strip.background = element_rect(colour="black", fill="white",size = 1),
axis.text = element_text(colour = "black",face="bold",size=16),
axis.ticks=element_line(color="black",size=2))
dat = read.table(file="~/Desktop/SherlockLab2/System_test/screen2_hiseq/seqlib12_analysis/bc_counts/all_counts.txt", sep="\t",header=TRUE,row.names=1,stringsAsFactors = FALSE)
dat2 = read.table(file = "~/Desktop/SherlockLab2/System_test/screen2_hiseq/seqlib12_analysis/bc_counts/recovered_counts.txt", sep="\t",header=TRUE,row.names=1,stringsAsFactors = FALSE)
dat5 = dat+dat2
rm(dat)
rm(dat2)
key = read.table(file="~/Desktop/SherlockLab2/System_test/screen2_hiseq/data_table_row_key.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)
no_libs = 18
Look at counts for present barcodes vs not present barcodes
for(i in 1:25){
p=ggplot()+geom_histogram(aes(x=dat5[which(key$present=="yes"),i]),alpha=0.5,binwidth=5,fill="green")+
geom_histogram(aes(x=dat5[which(key$present=="no"),i]),alpha=0.5,binwidth=5,fill="blue")+ylim(0,500)+ggtitle(names(dat5)[i])
print(p)
}
Look at coverage across libraries
ggplot(melt(dat5))+geom_boxplot(aes(x=variable,y=value,color=variable))+
theme(legend.position="none",axis.text.x=element_text(angle=60,hjust=1))
## No id variables; using all as measure variables
Look at representation of each query guide in starting pool
sub = dat5
sub$category = key$category
sub$query = key$query
sub = sub[which(key$present=="yes"),]
for(i in 1:no_libs){
temp = sub[,c(i,26,27)]
names(temp)[1]="count"
p=ggplot(temp)+geom_boxplot(aes(x=query,y=count,color=query))+ggtitle(names(dat5)[i])+
theme(legend.position="none",axis.text.x = element_text(angle = 60, hjust = 1))+scale_y_log10()
print(p+facet_wrap(~category))
print(names(dat5)[i])
print(length(which(temp$count==0)))
print(rownames(sub)[which(temp$count==0)])
}
## Warning: Removed 5 rows containing non-finite values (stat_boxplot).
## [1] "YPEGR1T1"
## [1] 5
## [1] "PRE4g9-RFC5-NRg-2" "SED5g5-RFC5-NRg-2" "DIP2g5-RFC5-NRg-2"
## [4] "TIF6g8-RFC5-NRg-2" "SAP30g7-RFC5-NRg-2"
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
## [1] "YPEGR1T2"
## [1] 4
## [1] "COG3g1-RFC5-NRg-2" "SED5g5-RFC5-NRg-2" "DIP2g5-RFC5-NRg-2"
## [4] "TIF6g8-RFC5-NRg-2"
## Warning: Removed 24 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## [1] "YPEGR1T3"
## [1] 25
## [1] "CC16-CCT8-TRg-2" "PRE7g7-ATP15-NRg-5"
## [3] "PRE7g7-ATP3-TRg-4" "PRE7g7-ATP3-NRg-3"
## [5] "PRE7g7-ACO1-TRg-6" "PRE7g7-SUB2-NRg-4"
## [7] "PRE7g7-ATP16-TRg-9" "PRE4g9-MSN5-TRg-7"
## [9] "PRE4g9-RFC5-NRg-2" "RPN5g1-RFC5-NRg-2"
## [11] "COG3g1-RFC5-NRg-2" "SED5g5-CCT8-TRg-2"
## [13] "SED5g5-ATP3-TRg-4" "GET2g2-CCT8-TRg-2"
## [15] "GET2g2-MPS1-NRg-1" "GET2g2-RFC5-NRg-2"
## [17] "IMP4g6-MSN5-TRg-7" "DIP2g5-ARP2-NRg-3"
## [19] "DIP2g5-MOB2-NRg-6" "DIP2g5-YPT6-NRg-2"
## [21] "DIP2g5-RHO1-NRg-2" "DIP2g5-RFC5-NRg-2"
## [23] "DIP2g5-ATP16-TRg-2" "PWP2g2_BC1-ARC35-TRg-5"
## [25] "TIF6g8-DAD2-TRg-2"
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## [1] "YPEGR2T1"
## [1] 7
## [1] "PRE4g9-RFC5-NRg-2" "COG3g1-RFC5-NRg-2" "IMP4g6-RFC5-NRg-2"
## [4] "DIP2g5-RFC5-NRg-2" "TIF6g8-RFC5-NRg-2" "YLR050Cg1-RFC5-NRg-2"
## [7] "SAP30g7-RFC5-NRg-2"
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).
## [1] "YPEGR2T2"
## [1] 12
## [1] "PRE7g7-RFC5-NRg-2" "PRE4g9-RFC5-NRg-2" "RPN5g1-RFC5-NRg-2"
## [4] "COG3g1-RFC5-NRg-2" "SED5g5-RFC5-NRg-2" "SEC22g2-CCT8-TRg-2"
## [7] "IMP4g6-RPP1-NRg-1" "IMP4g6-CDC42-TRg-6" "DIP2g5-ARC35-TRg-5"
## [10] "DIP2g5-RFC5-NRg-2" "TIF6g8-RFC5-NRg-2" "SAP30g7-RFC5-NRg-2"
## Warning: Removed 47 rows containing non-finite values (stat_boxplot).
## [1] "YPEGR2T3"
## [1] 47
## [1] "PRE7g7-MSN5-TRg-7" "PRE7g7-ARP2-NRg-3"
## [3] "PRE7g7-ATP3-NRg-2" "PRE7g7-SEC31-TRg-2"
## [5] "PRE7g7-TAF12-TRg-3" "PRE7g7-PGS1-NRg-4"
## [7] "PRE7g7-RFC5-NRg-2" "PRE7g7-ATP16-TRg-2"
## [9] "PRE4g9-RFC5-NRg-2" "PRE4g3-RFC5-NRg-2"
## [11] "RPN5g1-RFC5-NRg-2" "COG3g1-MSN5-TRg-7"
## [13] "COG3g1-CDC42-TRg-6" "COG3g1-ARC35-TRg-5"
## [15] "COG3g1-GAL11-NRg-10" "COG3g1-RFC5-NRg-2"
## [17] "SED5g5-ATP15-NRg-5" "SED5g5-CCT8-TRg-2"
## [19] "SED5g5-ARC35-TRg-2" "SED5g5-ARC35-TRg-5"
## [21] "SED5g5-RFC5-NRg-2" "SEC22g1-ARP2-NRg-3"
## [23] "GET2g2-CCT8-TRg-2" "GET2g2-ARC35-TRg-5"
## [25] "GET2g2-ULP1-NRg-2" "IMP4g6-CCT8-TRg-2"
## [27] "IMP4g6-ATP3-TRg-4" "IMP4g6-RPP1-NRg-1"
## [29] "IMP4g6-PET117-NRg-8" "IMP4g6-CDC42-TRg-6"
## [31] "IMP4g6-ATP16-TRg-2" "DIP2g5-CCT8-TRg-2"
## [33] "DIP2g5-YPT1-TRg-2" "DIP2g5-YDR355C-TRg-2"
## [35] "DIP2g5-CDC42-TRg-6" "DIP2g5-TOA2-TRg-3"
## [37] "DIP2g5-SEC31-TRg-2" "DIP2g5-SNU13-NRg-4"
## [39] "DIP2g5-ACO1-TRg-6" "DIP2g5-RPL3-NRg-2"
## [41] "DIP2g5-RFC5-NRg-2" "DIP2g5-ATP16-TRg-2"
## [43] "PWP2g2_BC1-ARC35-TRg-5" "TIF6g8-RFC5-NRg-2"
## [45] "RPF1g3-CCT8-TRg-2" "YLR050Cg1-ATP16-TRg-9"
## [47] "SAP30g7-RFC5-NRg-2"
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## [1] "YPEGR3T1"
## [1] 7
## [1] "PRE7g7-RFC5-NRg-2" "PRE4g9-RFC5-NRg-2" "COG3g1-RFC5-NRg-2"
## [4] "SED5g5-RFC5-NRg-2" "DIP2g5-RFC5-NRg-2" "TIF6g8-RFC5-NRg-2"
## [7] "SAP30g7-RFC5-NRg-2"
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## [1] "YPEGR3T2"
## [1] 8
## [1] "PRE7g7-RFC5-NRg-2" "PRE4g9-RFC5-NRg-2" "COG3g1-RFC5-NRg-2"
## [4] "SED5g5-RFC5-NRg-2" "SEC22g1-RFC5-NRg-2" "DIP2g5-RFC5-NRg-2"
## [7] "TIF6g8-RFC5-NRg-2" "SAP30g7-RFC5-NRg-2"
## Warning: Removed 52 rows containing non-finite values (stat_boxplot).
## [1] "YPEGR3T3"
## [1] 52
## [1] "PRE7g7-CCT8-TRg-2" "PRE7g7-ATP3-TRg-4"
## [3] "PRE7g7-RAP1-NRg-5" "PRE7g7-GLC7-NRg-4"
## [5] "PRE7g7-SEC24-TRg-2" "PRE7g7-ARC35-TRg-5"
## [7] "PRE7g7-NSL1-NRg-1" "PRE7g7-CDC20-TRg-6"
## [9] "PRE7g7-FCF1-NRg-1" "PRE7g7-PGS1-NRg-4"
## [11] "PRE7g7-RFC5-NRg-2" "PRE7g7-CKS1-TRg-5"
## [13] "PRE4g9-RFC5-NRg-2" "PRE4g9-COG1-TRg-3"
## [15] "PRE4g3-MSN5-TRg-7" "PRE4g3-CCT8-TRg-2"
## [17] "COG3g1-ATP15-NRg-5" "COG3g1-GAL11-NRg-10"
## [19] "COG3g1-RFC5-NRg-2" "COG3g1-SEC16-TRg-7"
## [21] "SED5g5-GAL11-NRg-10" "SED5g5-RFC5-NRg-2"
## [23] "SEC22g1-ATP3-TRg-4" "SEC22g2-CCT8-TRg-2"
## [25] "GET2g2-MSN5-TRg-7" "GET2g2-CDC25-TRg-1"
## [27] "GET2g2-CCT8-TRg-2" "GET2g2-YOS1-TRg-2"
## [29] "GET2g2-DAD2-TRg-2" "GET2g2-MPS1-NRg-1"
## [31] "GET2g2-RFC5-NRg-2" "IMP4g6-MSN5-TRg-7"
## [33] "IMP4g6-CCT8-TRg-2" "IMP4g6-YEF3-NRg-9"
## [35] "IMP4g6-SNU13-NRg-4" "IMP4g6-COG1-TRg-3"
## [37] "DIP2g5-MSN5-TRg-7" "DIP2g5-ARP2-NRg-3"
## [39] "DIP2g5-CCT8-TRg-2" "DIP2g5-COG1-TRg-1"
## [41] "DIP2g5-CLF1-NRg-2" "DIP2g5-SEC10-NRg-1"
## [43] "DIP2g5-CDC42-TRg-6" "DIP2g5-NSL1-NRg-1"
## [45] "DIP2g5-TAF12-TRg-2" "DIP2g5-RFC5-NRg-2"
## [47] "PWP2g2_BC1-CCT8-TRg-2" "PWP2g2_BC1-ACT1-NRg-6"
## [49] "TIF6g8-RFC5-NRg-2" "RPF1g3-RFC5-NRg-2"
## [51] "YLR050Cg1-MSN5-TRg-7" "YLR050Cg1-ATP16-TRg-2"
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).
## [1] "SCURAR1T1"
## [1] 6
## [1] "PRE7g7-RFC5-NRg-2" "PRE4g9-RFC5-NRg-2" "COG3g1-RFC5-NRg-2"
## [4] "SED5g5-RFC5-NRg-2" "DIP2g5-RFC5-NRg-2" "TIF6g8-RFC5-NRg-2"
## Warning: Removed 9 rows containing non-finite values (stat_boxplot).
## [1] "SCURAR1T2"
## [1] 9
## [1] "PRE7g7-RFC5-NRg-2" "PRE4g9-RFC5-NRg-2" "COG3g1-RFC5-NRg-2"
## [4] "SED5g5-RFC5-NRg-2" "GET2g2-RFC5-NRg-2" "IMP4g6-RPP1-NRg-1"
## [7] "DIP2g5-RFC5-NRg-2" "TIF6g8-RFC5-NRg-2" "SAP30g7-RFC5-NRg-2"
## Warning: Removed 9 rows containing non-finite values (stat_boxplot).
## [1] "SCURAR1T3"
## [1] 9
## [1] "PRE7g7-RFC5-NRg-2" "PRE4g9-RFC5-NRg-2" "COG3g1-RFC5-NRg-2"
## [4] "SED5g5-RFC5-NRg-2" "SEC22g2-RFC5-NRg-2" "GET2g2-CCT8-TRg-2"
## [7] "DIP2g5-RFC5-NRg-2" "TIF6g8-RFC5-NRg-2" "SAP30g7-RFC5-NRg-2"
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## [1] "SCURAR2T1"
## [1] 7
## [1] "PRE7g7-RFC5-NRg-2" "PRE4g9-RFC5-NRg-2" "COG3g1-RFC5-NRg-2"
## [4] "SED5g5-RFC5-NRg-2" "GET2g2-RFC5-NRg-2" "DIP2g5-RFC5-NRg-2"
## [7] "SAP30g7-RFC5-NRg-2"
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## [1] "SCURAR2T2"
## [1] 7
## [1] "PRE7g7-RFC5-NRg-2" "PRE4g9-RFC5-NRg-2" "COG3g1-RFC5-NRg-2"
## [4] "SED5g5-RFC5-NRg-2" "TIF6g8-RFC5-NRg-2" "RPF1g3-RFC5-NRg-2"
## [7] "SAP30g7-RFC5-NRg-2"
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).
## [1] "SCURAR2T3"
## [1] 12
## [1] "PRE7g7-RFC5-NRg-2" "PRE4g9-RFC5-NRg-2" "COG3g1-MSN5-TRg-7"
## [4] "COG3g1-RFC5-NRg-2" "SED5g5-RFC5-NRg-2" "SEC22g2-MSN5-TRg-7"
## [7] "GET2g2-CCT8-TRg-2" "DIP2g5-CCT8-TRg-2" "DIP2g5-RFC5-NRg-2"
## [10] "DIP2g5-COG1-TRg-3" "TIF6g8-RFC5-NRg-2" "SAP30g7-RFC5-NRg-2"
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
## [1] "SCURAR3T1"
## [1] 4
## [1] "PRE7g7-RFC5-NRg-2" "PRE4g9-RFC5-NRg-2" "GET2g2-CCT8-TRg-2"
## [4] "TIF6g8-RFC5-NRg-2"
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## [1] "SCURAR3T2"
## [1] 7
## [1] "PRE7g7-RFC5-NRg-2" "PRE4g9-RFC5-NRg-2" "COG3g1-RFC5-NRg-2"
## [4] "SED5g5-RFC5-NRg-2" "DIP2g5-RFC5-NRg-2" "TIF6g8-RFC5-NRg-2"
## [7] "SAP30g7-RFC5-NRg-2"
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
## [1] "SCURAR3T3"
## [1] 4
## [1] "PRE7g7-MSN5-TRg-7" "PRE4g9-RFC5-NRg-2" "GET2g2-CCT8-TRg-2"
## [4] "RPF1g3-RFC5-NRg-2"
Calculate and plot chimera subtraction
#store what BC1, BC2, and DBC is represented on each line of dat5
all_BC1 = key$query
all_BC2 = key$array
all_DBC = rownames(dat5)
#make data frame to store frequencies for each of BC1s in each samples
BC1_freqs = data.frame(matrix(nrow=length(unique(all_BC1)),ncol=no_libs))
names(BC1_freqs) = names(dat5)[1:no_libs]
BC1_freqs$BC1_name = unique(all_BC1)
#loop through BC1s and find total frequencies in each of the samples
for (i in 1:dim(BC1_freqs)[1]){
for (j in 1:(dim(BC1_freqs)[2]-1)){
BC1_freqs[i,j] = sum(dat5[which(all_BC1==BC1_freqs$BC1_name[i]),j])
}
}
#Make data frame to store frequencies for each of the BC2s in samples
BC2_freqs = data.frame(matrix(nrow=length(unique(all_BC2)),ncol=no_libs))
names(BC2_freqs)=names(dat5)[1:no_libs]
BC2_freqs$BC2_names = unique(all_BC2)
#loop through BC2s and find total frequencies in each of the samples
for (i in 1:dim(BC2_freqs)[1]){
for (j in 1:(dim(BC2_freqs)[2]-1)){
BC2_freqs[i,j] = sum(dat5[which(all_BC2==BC2_freqs$BC2_names[i]),j])
}
}
#Function to calculate expected number of counts at the given time point for each DBC
get_expected = function(sample){
print(names(dat5)[sample])
temp = rep(NA,dim(dat5)[1])
#calculate expected counts for each DBC at each time point
for (i in 1:dim(dat5)[1]){
#expected frequency of DBC based on two single mutants
BC1_i = which(BC1_freqs$BC1_name==all_BC1[i])
BC1_count = as.numeric(BC1_freqs[BC1_i,sample])
BC2_i = which(BC2_freqs$BC2_name==all_BC2[i])
BC2_count = as.numeric(BC2_freqs[BC2_i,sample])
temp[i]=BC1_count * BC2_count
}
return(temp)
}
#Calculate expectation for all counts in data matrix
dat5_expected = data.frame(matrix(nrow=dim(dat5)[1],ncol=no_libs))
names(dat5_expected)=names(dat5)[1:no_libs]
rownames(dat5_expected)=rownames(dat5)
for(i in 1:no_libs){
dat5_expected[,i] = get_expected(i)
}
## [1] "YPEGR1T1"
## [1] "YPEGR1T2"
## [1] "YPEGR1T3"
## [1] "YPEGR2T1"
## [1] "YPEGR2T2"
## [1] "YPEGR2T3"
## [1] "YPEGR3T1"
## [1] "YPEGR3T2"
## [1] "YPEGR3T3"
## [1] "SCURAR1T1"
## [1] "SCURAR1T2"
## [1] "SCURAR1T3"
## [1] "SCURAR2T1"
## [1] "SCURAR2T2"
## [1] "SCURAR2T3"
## [1] "SCURAR3T1"
## [1] "SCURAR3T2"
## [1] "SCURAR3T3"
Plot data before and after subtracting chimeric reads for each time point in each pool and save new count tables
subtract_BG = function(temp,column){
#fit line to non-existent DBCs
fit_non = coef(lm(Observed~Expected+0,data=temp,presence=="no"))
print("slope/total counts (before chimera removal)/total counts (after chimera removal)")
print(unlist(fit_non[1]))
#make data frame with just strains that exsit in the pool
sub_temp = temp[which(temp$presence == "yes"),]
sub_temp$norm = NA
#plot before correction
p = ggplot(temp)+geom_point(aes(x=Expected,y=Observed,shape=presence,color=category),alpha=0.2)+
theme(legend.position="none")+ylim(0,12000)+
geom_abline(slope=fit_non[1])#+xlim(0,4e10)
print(p+ggtitle(paste(names(dat5)[column]," Before")))
#loop through each existent strain and correct for chimeras by subtracting non-existent value for that x-value
for (i in 1:dim(sub_temp)[1]){
sub_temp$norm[i]=sub_temp$Observed[i]-(as.numeric(fit_non[1])*sub_temp$Expected[i])
}
#print how many counts were subtracted
print(sum(sub_temp$Observed))
print(sum(sub_temp$norm))
#return normalized data for fitness calculation
return(sub_temp)
}
#make dataframes to store bc counts before and after chimera removal
#only include strains which exist in the pool
dat5_unnorm = dat5[-which(key$present=="no"),1:no_libs]
dat5_norm = data.frame(matrix(nrow=dim(dat5_unnorm)[1],ncol=no_libs))
names(dat5_norm)=names(dat5_unnorm)
rownames(dat5_norm) = rownames(dat5_unnorm)
#loop through each library and subtract chimeras, plot data after subtraction
for (column in 1:no_libs){
print(names(dat5)[column])
temp = data.frame(dat5[,column],dat5_expected[,column],key$present,key$category)
names(temp)=c("Observed","Expected","presence","category")
rownames(temp)=rownames(dat5)
temp2=subtract_BG(temp,column)
p = ggplot()+geom_point(aes(x=temp$Expected,y=temp$Observed,color=temp$presence),alpha=0.2)+
geom_point(aes(x=temp2$Expected,y=temp2$norm),shape=1,alpha=0.1)+
theme(legend.position="none")+ylim(0,12000)+xlim(0,max(temp2$Expected))+
xlab("Expected")+ylab("Observed")
print(p+ggtitle(paste(names(dat5)[column],"After")))
dat5_norm[,column]=temp2$norm
}
## [1] "YPEGR1T1"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 4.665019e-09
## [1] 4555960
## [1] 4460630
## Warning: Removed 5 rows containing missing values (geom_point).
## [1] "YPEGR1T2"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 3.928694e-09
## [1] 5807617
## [1] 5678118
## Warning: Removed 4 rows containing missing values (geom_point).
## [1] "YPEGR1T3"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 8.37173e-09
## [1] 5830073
## [1] 5558160
## Warning: Removed 27 rows containing missing values (geom_point).
## [1] "YPEGR2T1"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 4.037868e-09
## [1] 5517762
## [1] 5396683
## Warning: Removed 7 rows containing missing values (geom_point).
## [1] "YPEGR2T2"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 1.2368e-09
## [1] 6002224
## [1] 5958735
## Warning: Removed 12 rows containing missing values (geom_point).
## [1] "YPEGR2T3"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 1.743539e-09
## [1] 5757913
## [1] 5702643
## Warning: Removed 47 rows containing missing values (geom_point).
## [1] "YPEGR3T1"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 2.931316e-09
## [1] 6318021
## [1] 6202802
## Warning: Removed 7 rows containing missing values (geom_point).
## [1] "YPEGR3T2"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 2.264984e-09
## [1] 5343605
## [1] 5280469
## Warning: Removed 8 rows containing missing values (geom_point).
## [1] "YPEGR3T3"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 3.872551e-09
## [1] 5988172
## [1] 5855488
## Warning: Removed 53 rows containing missing values (geom_point).
## [1] "SCURAR1T1"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 5.739071e-09
## [1] 5684251
## [1] 5501074
## Warning: Removed 6 rows containing missing values (geom_point).
## [1] "SCURAR1T2"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 3.209791e-09
## [1] 4914804
## [1] 4838322
## Warning: Removed 9 rows containing missing values (geom_point).
## [1] "SCURAR1T3"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 6.987924e-09
## [1] 4699511
## [1] 4547936
## Warning: Removed 9 rows containing missing values (geom_point).
## [1] "SCURAR2T1"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 4.651677e-09
## [1] 5400323
## [1] 5266352
## Warning: Removed 7 rows containing missing values (geom_point).
## [1] "SCURAR2T2"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 2.36521e-09
## [1] 6490506
## [1] 6392210
## Warning: Removed 7 rows containing missing values (geom_point).
## [1] "SCURAR2T3"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 2.181724e-09
## [1] 5028957
## [1] 4974821
## Warning: Removed 12 rows containing missing values (geom_point).
## [1] "SCURAR3T1"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 9.757492e-09
## [1] 5136137
## [1] 4881706
## Warning: Removed 5 rows containing missing values (geom_point).
## [1] "SCURAR3T2"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 3.828555e-09
## [1] 4795700
## [1] 4708816
## Warning: Removed 7 rows containing missing values (geom_point).
## [1] "SCURAR3T3"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
## Expected
## 1.43722e-08
## [1] 5237076
## [1] 4849492
## Warning: Removed 4 rows containing missing values (geom_point).
Save matrixes of bc counts before and after chimera removal
#save(dat5_unnorm,file="~/Desktop/SherlockLab2/System_test/screen2_hiseq/seqlib12_analysis/seqlib12_raw_counts.RData")
#save(dat5_norm,file="~/Desktop/SherlockLab2/System_test/screen2_hiseq/seqlib12_analysis/seqlib12_chimera_normalized_counts.RData")
Also look at differences in slope for wt strain unlike last experiment some combos didnt exist, and the ones that did exist were at different expected values!
temp = dat5[1:100,1:18]
temp2 = dat5_expected[1:100,1:18]
for(i in 1:18){
temp3 = data.frame(temp[,i],temp2[,i],key$present[1:100])
names(temp3)=c("obs","exp","pres")
print(ggplot(temp3,aes(x=exp,y=obs,color=pres))+geom_point()+ggtitle(names(dat5)[i]))
}
#non-existent strain with relatively high count is CC16-CC16
```