require(ggplot2)
## Loading required package: ggplot2
require(reshape2)
## Loading required package: reshape2
theme_dbc <- theme_set(theme_gray())
theme_dbc <- theme_update(
  panel.background = element_rect(fill = "white"),
  panel.border = element_rect( colour = "black",fill=NA,size=2),
  panel.grid.major = element_line(colour = "gray93",size=1),
  panel.grid.minor = element_line(colour = "gray98",size=1),
  strip.text.x = element_text(size=12,face='bold'),
  axis.title = element_text(size=16),
  strip.background = element_rect(colour="black", fill="white",size = 1),
  axis.text = element_text(colour = "black",face="bold",size=16),
  axis.ticks=element_line(color="black",size=2))

dat = read.table(file="~/Desktop/SherlockLab2/System_test/screen2_hiseq/seqlib12_analysis/bc_counts/all_counts.txt", sep="\t",header=TRUE,row.names=1,stringsAsFactors = FALSE)
dat2 = read.table(file = "~/Desktop/SherlockLab2/System_test/screen2_hiseq/seqlib12_analysis/bc_counts/recovered_counts.txt", sep="\t",header=TRUE,row.names=1,stringsAsFactors = FALSE)
dat5 = dat+dat2

rm(dat)
rm(dat2)

key = read.table(file="~/Desktop/SherlockLab2/System_test/screen2_hiseq/data_table_row_key.txt",sep="\t",header=TRUE,stringsAsFactors = FALSE)

no_libs = 18

Look at counts for present barcodes vs not present barcodes

for(i in 1:25){
  p=ggplot()+geom_histogram(aes(x=dat5[which(key$present=="yes"),i]),alpha=0.5,binwidth=5,fill="green")+
    geom_histogram(aes(x=dat5[which(key$present=="no"),i]),alpha=0.5,binwidth=5,fill="blue")+ylim(0,500)+ggtitle(names(dat5)[i])
  print(p)
  }

Look at coverage across libraries

ggplot(melt(dat5))+geom_boxplot(aes(x=variable,y=value,color=variable))+
  theme(legend.position="none",axis.text.x=element_text(angle=60,hjust=1))
## No id variables; using all as measure variables

Look at representation of each query guide in starting pool

sub = dat5
sub$category = key$category
sub$query = key$query
sub = sub[which(key$present=="yes"),]
for(i in 1:no_libs){
  temp = sub[,c(i,26,27)]
  names(temp)[1]="count"
  p=ggplot(temp)+geom_boxplot(aes(x=query,y=count,color=query))+ggtitle(names(dat5)[i])+
    theme(legend.position="none",axis.text.x = element_text(angle = 60, hjust = 1))+scale_y_log10()
  print(p+facet_wrap(~category))
  print(names(dat5)[i])
  print(length(which(temp$count==0)))
  print(rownames(sub)[which(temp$count==0)])
}
## Warning: Removed 5 rows containing non-finite values (stat_boxplot).

## [1] "YPEGR1T1"
## [1] 5
## [1] "PRE4g9-RFC5-NRg-2"  "SED5g5-RFC5-NRg-2"  "DIP2g5-RFC5-NRg-2" 
## [4] "TIF6g8-RFC5-NRg-2"  "SAP30g7-RFC5-NRg-2"
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).

## [1] "YPEGR1T2"
## [1] 4
## [1] "COG3g1-RFC5-NRg-2" "SED5g5-RFC5-NRg-2" "DIP2g5-RFC5-NRg-2"
## [4] "TIF6g8-RFC5-NRg-2"
## Warning: Removed 24 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## [1] "YPEGR1T3"
## [1] 25
##  [1] "CC16-CCT8-TRg-2"        "PRE7g7-ATP15-NRg-5"    
##  [3] "PRE7g7-ATP3-TRg-4"      "PRE7g7-ATP3-NRg-3"     
##  [5] "PRE7g7-ACO1-TRg-6"      "PRE7g7-SUB2-NRg-4"     
##  [7] "PRE7g7-ATP16-TRg-9"     "PRE4g9-MSN5-TRg-7"     
##  [9] "PRE4g9-RFC5-NRg-2"      "RPN5g1-RFC5-NRg-2"     
## [11] "COG3g1-RFC5-NRg-2"      "SED5g5-CCT8-TRg-2"     
## [13] "SED5g5-ATP3-TRg-4"      "GET2g2-CCT8-TRg-2"     
## [15] "GET2g2-MPS1-NRg-1"      "GET2g2-RFC5-NRg-2"     
## [17] "IMP4g6-MSN5-TRg-7"      "DIP2g5-ARP2-NRg-3"     
## [19] "DIP2g5-MOB2-NRg-6"      "DIP2g5-YPT6-NRg-2"     
## [21] "DIP2g5-RHO1-NRg-2"      "DIP2g5-RFC5-NRg-2"     
## [23] "DIP2g5-ATP16-TRg-2"     "PWP2g2_BC1-ARC35-TRg-5"
## [25] "TIF6g8-DAD2-TRg-2"
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).

## [1] "YPEGR2T1"
## [1] 7
## [1] "PRE4g9-RFC5-NRg-2"    "COG3g1-RFC5-NRg-2"    "IMP4g6-RFC5-NRg-2"   
## [4] "DIP2g5-RFC5-NRg-2"    "TIF6g8-RFC5-NRg-2"    "YLR050Cg1-RFC5-NRg-2"
## [7] "SAP30g7-RFC5-NRg-2"
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).

## [1] "YPEGR2T2"
## [1] 12
##  [1] "PRE7g7-RFC5-NRg-2"  "PRE4g9-RFC5-NRg-2"  "RPN5g1-RFC5-NRg-2" 
##  [4] "COG3g1-RFC5-NRg-2"  "SED5g5-RFC5-NRg-2"  "SEC22g2-CCT8-TRg-2"
##  [7] "IMP4g6-RPP1-NRg-1"  "IMP4g6-CDC42-TRg-6" "DIP2g5-ARC35-TRg-5"
## [10] "DIP2g5-RFC5-NRg-2"  "TIF6g8-RFC5-NRg-2"  "SAP30g7-RFC5-NRg-2"
## Warning: Removed 47 rows containing non-finite values (stat_boxplot).

## [1] "YPEGR2T3"
## [1] 47
##  [1] "PRE7g7-MSN5-TRg-7"      "PRE7g7-ARP2-NRg-3"     
##  [3] "PRE7g7-ATP3-NRg-2"      "PRE7g7-SEC31-TRg-2"    
##  [5] "PRE7g7-TAF12-TRg-3"     "PRE7g7-PGS1-NRg-4"     
##  [7] "PRE7g7-RFC5-NRg-2"      "PRE7g7-ATP16-TRg-2"    
##  [9] "PRE4g9-RFC5-NRg-2"      "PRE4g3-RFC5-NRg-2"     
## [11] "RPN5g1-RFC5-NRg-2"      "COG3g1-MSN5-TRg-7"     
## [13] "COG3g1-CDC42-TRg-6"     "COG3g1-ARC35-TRg-5"    
## [15] "COG3g1-GAL11-NRg-10"    "COG3g1-RFC5-NRg-2"     
## [17] "SED5g5-ATP15-NRg-5"     "SED5g5-CCT8-TRg-2"     
## [19] "SED5g5-ARC35-TRg-2"     "SED5g5-ARC35-TRg-5"    
## [21] "SED5g5-RFC5-NRg-2"      "SEC22g1-ARP2-NRg-3"    
## [23] "GET2g2-CCT8-TRg-2"      "GET2g2-ARC35-TRg-5"    
## [25] "GET2g2-ULP1-NRg-2"      "IMP4g6-CCT8-TRg-2"     
## [27] "IMP4g6-ATP3-TRg-4"      "IMP4g6-RPP1-NRg-1"     
## [29] "IMP4g6-PET117-NRg-8"    "IMP4g6-CDC42-TRg-6"    
## [31] "IMP4g6-ATP16-TRg-2"     "DIP2g5-CCT8-TRg-2"     
## [33] "DIP2g5-YPT1-TRg-2"      "DIP2g5-YDR355C-TRg-2"  
## [35] "DIP2g5-CDC42-TRg-6"     "DIP2g5-TOA2-TRg-3"     
## [37] "DIP2g5-SEC31-TRg-2"     "DIP2g5-SNU13-NRg-4"    
## [39] "DIP2g5-ACO1-TRg-6"      "DIP2g5-RPL3-NRg-2"     
## [41] "DIP2g5-RFC5-NRg-2"      "DIP2g5-ATP16-TRg-2"    
## [43] "PWP2g2_BC1-ARC35-TRg-5" "TIF6g8-RFC5-NRg-2"     
## [45] "RPF1g3-CCT8-TRg-2"      "YLR050Cg1-ATP16-TRg-9" 
## [47] "SAP30g7-RFC5-NRg-2"
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).

## [1] "YPEGR3T1"
## [1] 7
## [1] "PRE7g7-RFC5-NRg-2"  "PRE4g9-RFC5-NRg-2"  "COG3g1-RFC5-NRg-2" 
## [4] "SED5g5-RFC5-NRg-2"  "DIP2g5-RFC5-NRg-2"  "TIF6g8-RFC5-NRg-2" 
## [7] "SAP30g7-RFC5-NRg-2"
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).

## [1] "YPEGR3T2"
## [1] 8
## [1] "PRE7g7-RFC5-NRg-2"  "PRE4g9-RFC5-NRg-2"  "COG3g1-RFC5-NRg-2" 
## [4] "SED5g5-RFC5-NRg-2"  "SEC22g1-RFC5-NRg-2" "DIP2g5-RFC5-NRg-2" 
## [7] "TIF6g8-RFC5-NRg-2"  "SAP30g7-RFC5-NRg-2"
## Warning: Removed 52 rows containing non-finite values (stat_boxplot).

## [1] "YPEGR3T3"
## [1] 52
##  [1] "PRE7g7-CCT8-TRg-2"     "PRE7g7-ATP3-TRg-4"    
##  [3] "PRE7g7-RAP1-NRg-5"     "PRE7g7-GLC7-NRg-4"    
##  [5] "PRE7g7-SEC24-TRg-2"    "PRE7g7-ARC35-TRg-5"   
##  [7] "PRE7g7-NSL1-NRg-1"     "PRE7g7-CDC20-TRg-6"   
##  [9] "PRE7g7-FCF1-NRg-1"     "PRE7g7-PGS1-NRg-4"    
## [11] "PRE7g7-RFC5-NRg-2"     "PRE7g7-CKS1-TRg-5"    
## [13] "PRE4g9-RFC5-NRg-2"     "PRE4g9-COG1-TRg-3"    
## [15] "PRE4g3-MSN5-TRg-7"     "PRE4g3-CCT8-TRg-2"    
## [17] "COG3g1-ATP15-NRg-5"    "COG3g1-GAL11-NRg-10"  
## [19] "COG3g1-RFC5-NRg-2"     "COG3g1-SEC16-TRg-7"   
## [21] "SED5g5-GAL11-NRg-10"   "SED5g5-RFC5-NRg-2"    
## [23] "SEC22g1-ATP3-TRg-4"    "SEC22g2-CCT8-TRg-2"   
## [25] "GET2g2-MSN5-TRg-7"     "GET2g2-CDC25-TRg-1"   
## [27] "GET2g2-CCT8-TRg-2"     "GET2g2-YOS1-TRg-2"    
## [29] "GET2g2-DAD2-TRg-2"     "GET2g2-MPS1-NRg-1"    
## [31] "GET2g2-RFC5-NRg-2"     "IMP4g6-MSN5-TRg-7"    
## [33] "IMP4g6-CCT8-TRg-2"     "IMP4g6-YEF3-NRg-9"    
## [35] "IMP4g6-SNU13-NRg-4"    "IMP4g6-COG1-TRg-3"    
## [37] "DIP2g5-MSN5-TRg-7"     "DIP2g5-ARP2-NRg-3"    
## [39] "DIP2g5-CCT8-TRg-2"     "DIP2g5-COG1-TRg-1"    
## [41] "DIP2g5-CLF1-NRg-2"     "DIP2g5-SEC10-NRg-1"   
## [43] "DIP2g5-CDC42-TRg-6"    "DIP2g5-NSL1-NRg-1"    
## [45] "DIP2g5-TAF12-TRg-2"    "DIP2g5-RFC5-NRg-2"    
## [47] "PWP2g2_BC1-CCT8-TRg-2" "PWP2g2_BC1-ACT1-NRg-6"
## [49] "TIF6g8-RFC5-NRg-2"     "RPF1g3-RFC5-NRg-2"    
## [51] "YLR050Cg1-MSN5-TRg-7"  "YLR050Cg1-ATP16-TRg-2"
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).

## [1] "SCURAR1T1"
## [1] 6
## [1] "PRE7g7-RFC5-NRg-2" "PRE4g9-RFC5-NRg-2" "COG3g1-RFC5-NRg-2"
## [4] "SED5g5-RFC5-NRg-2" "DIP2g5-RFC5-NRg-2" "TIF6g8-RFC5-NRg-2"
## Warning: Removed 9 rows containing non-finite values (stat_boxplot).

## [1] "SCURAR1T2"
## [1] 9
## [1] "PRE7g7-RFC5-NRg-2"  "PRE4g9-RFC5-NRg-2"  "COG3g1-RFC5-NRg-2" 
## [4] "SED5g5-RFC5-NRg-2"  "GET2g2-RFC5-NRg-2"  "IMP4g6-RPP1-NRg-1" 
## [7] "DIP2g5-RFC5-NRg-2"  "TIF6g8-RFC5-NRg-2"  "SAP30g7-RFC5-NRg-2"
## Warning: Removed 9 rows containing non-finite values (stat_boxplot).

## [1] "SCURAR1T3"
## [1] 9
## [1] "PRE7g7-RFC5-NRg-2"  "PRE4g9-RFC5-NRg-2"  "COG3g1-RFC5-NRg-2" 
## [4] "SED5g5-RFC5-NRg-2"  "SEC22g2-RFC5-NRg-2" "GET2g2-CCT8-TRg-2" 
## [7] "DIP2g5-RFC5-NRg-2"  "TIF6g8-RFC5-NRg-2"  "SAP30g7-RFC5-NRg-2"
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).

## [1] "SCURAR2T1"
## [1] 7
## [1] "PRE7g7-RFC5-NRg-2"  "PRE4g9-RFC5-NRg-2"  "COG3g1-RFC5-NRg-2" 
## [4] "SED5g5-RFC5-NRg-2"  "GET2g2-RFC5-NRg-2"  "DIP2g5-RFC5-NRg-2" 
## [7] "SAP30g7-RFC5-NRg-2"
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).

## [1] "SCURAR2T2"
## [1] 7
## [1] "PRE7g7-RFC5-NRg-2"  "PRE4g9-RFC5-NRg-2"  "COG3g1-RFC5-NRg-2" 
## [4] "SED5g5-RFC5-NRg-2"  "TIF6g8-RFC5-NRg-2"  "RPF1g3-RFC5-NRg-2" 
## [7] "SAP30g7-RFC5-NRg-2"
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).

## [1] "SCURAR2T3"
## [1] 12
##  [1] "PRE7g7-RFC5-NRg-2"  "PRE4g9-RFC5-NRg-2"  "COG3g1-MSN5-TRg-7" 
##  [4] "COG3g1-RFC5-NRg-2"  "SED5g5-RFC5-NRg-2"  "SEC22g2-MSN5-TRg-7"
##  [7] "GET2g2-CCT8-TRg-2"  "DIP2g5-CCT8-TRg-2"  "DIP2g5-RFC5-NRg-2" 
## [10] "DIP2g5-COG1-TRg-3"  "TIF6g8-RFC5-NRg-2"  "SAP30g7-RFC5-NRg-2"
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).

## [1] "SCURAR3T1"
## [1] 4
## [1] "PRE7g7-RFC5-NRg-2" "PRE4g9-RFC5-NRg-2" "GET2g2-CCT8-TRg-2"
## [4] "TIF6g8-RFC5-NRg-2"
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).

## [1] "SCURAR3T2"
## [1] 7
## [1] "PRE7g7-RFC5-NRg-2"  "PRE4g9-RFC5-NRg-2"  "COG3g1-RFC5-NRg-2" 
## [4] "SED5g5-RFC5-NRg-2"  "DIP2g5-RFC5-NRg-2"  "TIF6g8-RFC5-NRg-2" 
## [7] "SAP30g7-RFC5-NRg-2"
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).

## [1] "SCURAR3T3"
## [1] 4
## [1] "PRE7g7-MSN5-TRg-7" "PRE4g9-RFC5-NRg-2" "GET2g2-CCT8-TRg-2"
## [4] "RPF1g3-RFC5-NRg-2"

Calculate and plot chimera subtraction

#store what BC1, BC2, and DBC is represented on each line of dat5
all_BC1 = key$query
all_BC2 = key$array
all_DBC = rownames(dat5)

#make data frame to store frequencies for each of BC1s in each samples
BC1_freqs = data.frame(matrix(nrow=length(unique(all_BC1)),ncol=no_libs))
names(BC1_freqs) = names(dat5)[1:no_libs]
BC1_freqs$BC1_name = unique(all_BC1)

#loop through BC1s and find total frequencies in each of the samples
for (i in 1:dim(BC1_freqs)[1]){
  for (j in 1:(dim(BC1_freqs)[2]-1)){
    BC1_freqs[i,j] = sum(dat5[which(all_BC1==BC1_freqs$BC1_name[i]),j])
    }
  }

#Make data frame to store frequencies for each of the BC2s in samples
BC2_freqs = data.frame(matrix(nrow=length(unique(all_BC2)),ncol=no_libs))
names(BC2_freqs)=names(dat5)[1:no_libs]
BC2_freqs$BC2_names = unique(all_BC2)

#loop through BC2s and find total frequencies in each of the samples
for (i in 1:dim(BC2_freqs)[1]){
  for (j in 1:(dim(BC2_freqs)[2]-1)){
    BC2_freqs[i,j] = sum(dat5[which(all_BC2==BC2_freqs$BC2_names[i]),j])
    }
  }
#Function to calculate expected number of counts at the given time point for each DBC
get_expected = function(sample){
  
  print(names(dat5)[sample])  
  temp = rep(NA,dim(dat5)[1])
  
  #calculate expected counts for each DBC at each time point
  for (i in 1:dim(dat5)[1]){      
    
    #expected frequency of DBC based on two single mutants
    BC1_i = which(BC1_freqs$BC1_name==all_BC1[i])
    BC1_count = as.numeric(BC1_freqs[BC1_i,sample])
    
    BC2_i = which(BC2_freqs$BC2_name==all_BC2[i])
    BC2_count = as.numeric(BC2_freqs[BC2_i,sample])
    
    temp[i]=BC1_count * BC2_count
    
    }
    return(temp)  
}

#Calculate expectation for all counts in data matrix
dat5_expected = data.frame(matrix(nrow=dim(dat5)[1],ncol=no_libs))
names(dat5_expected)=names(dat5)[1:no_libs]
rownames(dat5_expected)=rownames(dat5)
for(i in 1:no_libs){
  dat5_expected[,i] = get_expected(i)
}
## [1] "YPEGR1T1"
## [1] "YPEGR1T2"
## [1] "YPEGR1T3"
## [1] "YPEGR2T1"
## [1] "YPEGR2T2"
## [1] "YPEGR2T3"
## [1] "YPEGR3T1"
## [1] "YPEGR3T2"
## [1] "YPEGR3T3"
## [1] "SCURAR1T1"
## [1] "SCURAR1T2"
## [1] "SCURAR1T3"
## [1] "SCURAR2T1"
## [1] "SCURAR2T2"
## [1] "SCURAR2T3"
## [1] "SCURAR3T1"
## [1] "SCURAR3T2"
## [1] "SCURAR3T3"

Plot data before and after subtracting chimeric reads for each time point in each pool and save new count tables

subtract_BG = function(temp,column){
  
  #fit line to non-existent DBCs
  fit_non = coef(lm(Observed~Expected+0,data=temp,presence=="no"))
  print("slope/total counts (before chimera removal)/total counts (after chimera removal)")
  print(unlist(fit_non[1]))
     
  #make data frame with just strains that exsit in the pool
  sub_temp = temp[which(temp$presence == "yes"),]
  sub_temp$norm = NA
  
  #plot before correction
  p = ggplot(temp)+geom_point(aes(x=Expected,y=Observed,shape=presence,color=category),alpha=0.2)+
    theme(legend.position="none")+ylim(0,12000)+
    geom_abline(slope=fit_non[1])#+xlim(0,4e10)
  print(p+ggtitle(paste(names(dat5)[column]," Before")))
  
  #loop through each existent strain and correct for chimeras by subtracting non-existent value for that x-value
  for (i in 1:dim(sub_temp)[1]){
    sub_temp$norm[i]=sub_temp$Observed[i]-(as.numeric(fit_non[1])*sub_temp$Expected[i])
  }
  
  #print how many counts were subtracted
  print(sum(sub_temp$Observed))
  print(sum(sub_temp$norm))
  
  #return normalized data for fitness calculation
  return(sub_temp) 
}

#make dataframes to store bc counts before and after chimera removal
#only include strains which exist in the pool
dat5_unnorm = dat5[-which(key$present=="no"),1:no_libs]
dat5_norm = data.frame(matrix(nrow=dim(dat5_unnorm)[1],ncol=no_libs))
names(dat5_norm)=names(dat5_unnorm)
rownames(dat5_norm) = rownames(dat5_unnorm)

#loop through each library and subtract chimeras, plot data after subtraction
for (column in 1:no_libs){
  print(names(dat5)[column])
  temp = data.frame(dat5[,column],dat5_expected[,column],key$present,key$category)
  names(temp)=c("Observed","Expected","presence","category")
  rownames(temp)=rownames(dat5)
  temp2=subtract_BG(temp,column)
  p = ggplot()+geom_point(aes(x=temp$Expected,y=temp$Observed,color=temp$presence),alpha=0.2)+
    geom_point(aes(x=temp2$Expected,y=temp2$norm),shape=1,alpha=0.1)+
    theme(legend.position="none")+ylim(0,12000)+xlim(0,max(temp2$Expected))+
    xlab("Expected")+ylab("Observed")
  print(p+ggtitle(paste(names(dat5)[column],"After")))
  
  dat5_norm[,column]=temp2$norm
  
  }
## [1] "YPEGR1T1"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##     Expected 
## 4.665019e-09

## [1] 4555960
## [1] 4460630
## Warning: Removed 5 rows containing missing values (geom_point).

## [1] "YPEGR1T2"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##     Expected 
## 3.928694e-09

## [1] 5807617
## [1] 5678118
## Warning: Removed 4 rows containing missing values (geom_point).

## [1] "YPEGR1T3"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##    Expected 
## 8.37173e-09

## [1] 5830073
## [1] 5558160
## Warning: Removed 27 rows containing missing values (geom_point).

## [1] "YPEGR2T1"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##     Expected 
## 4.037868e-09

## [1] 5517762
## [1] 5396683
## Warning: Removed 7 rows containing missing values (geom_point).

## [1] "YPEGR2T2"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##   Expected 
## 1.2368e-09

## [1] 6002224
## [1] 5958735
## Warning: Removed 12 rows containing missing values (geom_point).

## [1] "YPEGR2T3"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##     Expected 
## 1.743539e-09

## [1] 5757913
## [1] 5702643
## Warning: Removed 47 rows containing missing values (geom_point).

## [1] "YPEGR3T1"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##     Expected 
## 2.931316e-09

## [1] 6318021
## [1] 6202802
## Warning: Removed 7 rows containing missing values (geom_point).

## [1] "YPEGR3T2"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##     Expected 
## 2.264984e-09

## [1] 5343605
## [1] 5280469
## Warning: Removed 8 rows containing missing values (geom_point).

## [1] "YPEGR3T3"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##     Expected 
## 3.872551e-09

## [1] 5988172
## [1] 5855488
## Warning: Removed 53 rows containing missing values (geom_point).

## [1] "SCURAR1T1"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##     Expected 
## 5.739071e-09

## [1] 5684251
## [1] 5501074
## Warning: Removed 6 rows containing missing values (geom_point).

## [1] "SCURAR1T2"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##     Expected 
## 3.209791e-09

## [1] 4914804
## [1] 4838322
## Warning: Removed 9 rows containing missing values (geom_point).

## [1] "SCURAR1T3"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##     Expected 
## 6.987924e-09

## [1] 4699511
## [1] 4547936
## Warning: Removed 9 rows containing missing values (geom_point).

## [1] "SCURAR2T1"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##     Expected 
## 4.651677e-09

## [1] 5400323
## [1] 5266352
## Warning: Removed 7 rows containing missing values (geom_point).

## [1] "SCURAR2T2"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##    Expected 
## 2.36521e-09

## [1] 6490506
## [1] 6392210
## Warning: Removed 7 rows containing missing values (geom_point).

## [1] "SCURAR2T3"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##     Expected 
## 2.181724e-09

## [1] 5028957
## [1] 4974821
## Warning: Removed 12 rows containing missing values (geom_point).

## [1] "SCURAR3T1"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##     Expected 
## 9.757492e-09

## [1] 5136137
## [1] 4881706
## Warning: Removed 5 rows containing missing values (geom_point).

## [1] "SCURAR3T2"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##     Expected 
## 3.828555e-09

## [1] 4795700
## [1] 4708816
## Warning: Removed 7 rows containing missing values (geom_point).

## [1] "SCURAR3T3"
## [1] "slope/total counts (before chimera removal)/total counts (after chimera removal)"
##    Expected 
## 1.43722e-08

## [1] 5237076
## [1] 4849492
## Warning: Removed 4 rows containing missing values (geom_point).

Save matrixes of bc counts before and after chimera removal

#save(dat5_unnorm,file="~/Desktop/SherlockLab2/System_test/screen2_hiseq/seqlib12_analysis/seqlib12_raw_counts.RData")

#save(dat5_norm,file="~/Desktop/SherlockLab2/System_test/screen2_hiseq/seqlib12_analysis/seqlib12_chimera_normalized_counts.RData")

Also look at differences in slope for wt strain unlike last experiment some combos didnt exist, and the ones that did exist were at different expected values!

temp = dat5[1:100,1:18]
temp2 = dat5_expected[1:100,1:18]
for(i in 1:18){
  temp3 = data.frame(temp[,i],temp2[,i],key$present[1:100])
  names(temp3)=c("obs","exp","pres")
  print(ggplot(temp3,aes(x=exp,y=obs,color=pres))+geom_point()+ggtitle(names(dat5)[i]))
}

#non-existent strain with relatively high count is CC16-CC16

```