# hamster_align.txt is from RH_pools_hamster_align_1.xlsx, which in turn is a simplified version of RH_mapped_unmapped_info_hg38.xlsx from Arshad

hamster_align <- read.table("hamster_align.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

cell <- read.table("cell_label_info.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

hamster_align[grep("^RH1",hamster_align$RH_ID),"pool"] <- 1
hamster_align[grep("^RH2",hamster_align$RH_ID),"pool"] <- 2
hamster_align[grep("^RH3",hamster_align$RH_ID),"pool"] <- 3
hamster_align[grep("^RH4",hamster_align$RH_ID),"pool"] <- 4
hamster_align[grep("^RH5",hamster_align$RH_ID),"pool"] <- 5
hamster_align[grep("^RH6",hamster_align$RH_ID),"pool"] <- 6

hamster_align[grep("_w0_",hamster_align$RH_ID),"week"] <- 0
hamster_align[grep("_w1_",hamster_align$RH_ID),"week"] <- 1
hamster_align[grep("_w2_",hamster_align$RH_ID),"week"] <- 2
hamster_align[grep("_w3_",hamster_align$RH_ID),"week"] <- 3
hamster_align[grep("_w4_",hamster_align$RH_ID),"week"] <- 4
hamster_align[grep("_w6_",hamster_align$RH_ID),"week"] <- 6

hamster_align[grep("_d0",hamster_align$RH_ID),"conc"] <- 0
hamster_align[grep("_d8",hamster_align$RH_ID),"conc"] <- 8
hamster_align[grep("_d25",hamster_align$RH_ID),"conc"] <- 25
hamster_align[grep("_d75",hamster_align$RH_ID),"conc"] <- 75

dim(hamster_align)
# [1] 115   9

head(hamster_align)
       # RH_ID Total_reads hamster_unaligned hamster_aligned hamster_aligned_and_human_unaligned
# 1  RH1_w0_d0    42000172           4304785        37695387                            37595178
# 2  RH1_w1_d0    47971567           4982337        42989230                            42886773
# 3  RH1_w1_d8    62213204           6335817        55877387                            55742797
# 4 RH1_w1_d25    55396431           5628828        49767603                            49651729
# 5 RH1_w1_d75    66004325           6591757        59412568                            59267597
# 6  RH1_w2_d0    38276132           3876901        34399231                            34210971
  # hamster_aligned_and_human_aligned pool week conc
# 1                            100209    1    0    0
# 2                            102457    1    1    0
# 3                            134590    1    1    8
# 4                            115874    1    1   25
# 5                            144971    1    1   75
# 6                            188260    1    2    0

dim(cell)
# [1] 115   4

head(cell)
  # pool conc week cell
# 1    1    0    0    1
# 2    1    0    1    2
# 3    1    0    2    2
# 4    1    0    3    2
# 5    1    0    4    2
# 6    1    0    6    2

hamster_align_1 <- merge(hamster_align,cell)

dim(hamster_align_1)
# [1] 115  10

hamster_align_2 <- hamster_align_1[,c("RH_ID","pool", "week", "conc", "cell", "Total_reads", "hamster_unaligned", "hamster_aligned" ,"hamster_aligned_and_human_unaligned", "hamster_aligned_and_human_aligned")]

hamster_align_3 <- hamster_align_2[order(hamster_align_2$cell),]



dim(hamster_align_3)
# [1] 115  10


head(hamster_align_3)
       # RH_ID pool week conc cell Total_reads hamster_unaligned hamster_aligned hamster_aligned_and_human_unaligned
# 1  RH1_w0_d0    1    0    0    1    42000172           4304785        37695387                            37595178
# 2  RH1_w1_d0    1    1    0    2    47971567           4982337        42989230                            42886773
# 6  RH1_w2_d0    1    2    0    2    38276132           3876901        34399231                            34210971
# 9  RH1_w3_d0    1    3    0    2    34816772           3585610        31231162                            31147459
# 13 RH1_w4_d0    1    4    0    2    36328023           3623841        32704182                            32520994
# 17 RH1_w6_d0    1    6    0    2    50327669           5325221        45002448                            44894243
   # hamster_aligned_and_human_aligned
# 1                             100209
# 2                             102457
# 6                             188260
# 9                              83703
# 13                            183188
# 17                            108205

tail(hamster_align_3)
         # RH_ID pool week conc cell Total_reads hamster_unaligned hamster_aligned hamster_aligned_and_human_unaligned
# 103 RH6_w2_d25    6    2   25   29    33860116           3592410        30267706                            30103817
# 106 RH6_w3_d25    6    3   25   29    34412488           3964043        30448445                            30279256
# 110 RH6_w4_d25    6    4   25   29    41335980           4811609        36524371                            36322280
# 113 RH6_w6_d25    6    6   25   29    41324384           4829222        36495162                            36292899
# 107 RH6_w3_d75    6    3   75   30    38966343           4298228        34668115                            34474981
# 114 RH6_w6_d75    6    6   75   30    38440264           4501293        33938971                            33748824
    # hamster_aligned_and_human_aligned
# 103                            163889
# 106                            169189
# 110                            202091
# 113                            202263
# 107                            193134
# 114                            190147

# write.table(hamster_align_3, "RH_pool_hamster_total_align.txt",quote=FALSE,sep="\t",row.names=FALSE)



# ~~~~~~~~~~~~~ Prepare table for latex: ~~~~~~~~~~~~~~~~~~~~~~~


# ~~~~~~~ RH pools only ~~~~~~~~~~~~~~~~~~~~~

Table_body <- cbind(hamster_align_3[hamster_align_3$week==0 & hamster_align_3$conc == 0,c("pool","Total_reads","hamster_aligned","hamster_aligned_and_human_unaligned","hamster_aligned_and_human_aligned")],format(round(100*(hamster_align_3[hamster_align_3$week==0 & hamster_align_3$conc == 0,c("hamster_aligned_and_human_aligned")]/hamster_align_3[hamster_align_3$week==0 & hamster_align_3$conc == 0,c("hamster_aligned")]),2),nsmall=2))
Table_end <- c("Means",round(colMeans(Table_body[,2:5])),format(round(mean(as.numeric(as.character(Table_body[,6]))),2),nsmall=2))

Table_body
   # pool Total_reads hamster_aligned hamster_aligned_and_human_unaligned hamster_aligned_and_human_aligned
# 1     1    42000172        37695387                            37595178                            100209
# 21    2    35358581        30838307                            30754890                             83417
# 40    3    52166656        45779128                            45668817                            110311
# 60    4    32756205        28166022                            28092611                             73411
# 80    5    42876176        38382035                            38167486                            214549
# 98    6    41449894        37403393                            37193349                            210044
   # format(round(100 * (hamster_align_3[hamster_align_3$week == 0 & 
# 1                                                              0.27
# 21                                                             0.27
# 40                                                             0.24
# 60                                                             0.26
# 80                                                             0.56
# 98                                                             0.56


Table_end
                                                            # Total_reads                       hamster_aligned 
                            # "Means"                          "41101281"                          "36377379" <<<<<<<<< use in paper
# hamster_aligned_and_human_unaligned   hamster_aligned_and_human_aligned                                     
                         # "36245388"                            "131990"                              "0.36" 
                         


# write.table(Table_body,"Table_body_ham_align.txt",quote=FALSE,sep="&\t",row.names=FALSE,col.names=FALSE,eol="\\\n")


# write.table(Table_end,"Table_end_ham_align.txt",quote=FALSE,sep="&\t",row.names=FALSE,col.names=FALSE,eol="\\\n")

# sem of error rate as percent:
sd(100*hamster_align_3[hamster_align_3$week==0 & hamster_align_3$conc == 0,c("hamster_aligned_and_human_aligned")]/hamster_align_3[hamster_align_3$week==0 & hamster_align_3$conc == 0,c("hamster_aligned")])/sqrt(6)
# [1] 0.06354566 <<<<<<<<< use in paper

# sem of other cols:
apply(hamster_align_3[hamster_align_3$week==0 & hamster_align_3$conc == 0,c("Total_reads","hamster_aligned","hamster_aligned_and_human_unaligned","hamster_aligned_and_human_aligned")],2,FUN= function(x) {sd(x)/sqrt(6)})
                        # Total_reads                     hamster_aligned hamster_aligned_and_human_unaligned 
                         # 2763700.47                          2538431.81                          2528697.81 <<<<<<<<< use in paper
  # hamster_aligned_and_human_aligned 
                           # 25935.34 



# sem of all cols:
apply(cbind(hamster_align_3[hamster_align_3$week==0 & hamster_align_3$conc == 0,c("Total_reads","hamster_aligned","hamster_aligned_and_human_unaligned","hamster_aligned_and_human_aligned")],100*(hamster_align_3[hamster_align_3$week==0 & hamster_align_3$conc == 0,c("hamster_aligned_and_human_aligned")]/hamster_align_3[hamster_align_3$week==0 & hamster_align_3$conc == 0,c("hamster_aligned")])),2,FUN= function(x) {sd(x)/sqrt(6)})
                                                                # Total_reads 
                                                               # 2.763700e+06 
                                                            # hamster_aligned 
                                                               # 2.538432e+06 
                                        # hamster_aligned_and_human_unaligned 
                                                               # 2.528698e+06 
                                          # hamster_aligned_and_human_aligned 
                                                               # 2.593534e+04 
# 100 * (hamster_align_3[hamster_align_3$week == 0 & hamster_align_3$conc ==  
                                                               # 6.354566e-02 



# ~~~~~~~~~~~~~~~ All samples ~~~~~~~~~~~~~~~~~~~~~~~~~~~

# Means for all 115 samples:
colMeans(hamster_align_3[,c(6:ncol(hamster_align_3))])
                        # Total_reads                   hamster_unaligned                     hamster_aligned 
                           # 40453761                             4670861                            35782901 <<<<<<<<<<< use in paper
# hamster_aligned_and_human_unaligned   hamster_aligned_and_human_aligned 
                           # 35624274                              158627 

# sem for all 115 samples:
apply(hamster_align_3[,c(6:ncol(hamster_align_3))],2,FUN=function(x) {sd(x)/sqrt(nrow(hamster_align_3))})
                        # Total_reads                   hamster_unaligned                     hamster_aligned 
                         # 666836.199                           81333.972                          607987.366 <<<<<<<<<<< use in paper
# hamster_aligned_and_human_unaligned   hamster_aligned_and_human_aligned 
                         # 607969.096                            4515.189

# Mean correction rate for all 115 samples:
mean(hamster_align_3[,c("hamster_aligned_and_human_aligned")]/hamster_align_3[,c("hamster_aligned")])
# [1] 0.004536133 <<<<<<<<<<< use in paper

# sem correction rate for all 115 samples:
sd(hamster_align_3[,c("hamster_aligned_and_human_aligned")]/hamster_align_3[,c("hamster_aligned")])/sqrt(115)
# [1] 0.0001372799 <<<<<<<<<<< use in paper



# ~~~~~~~~~~~~~ Seq coverage of total reads in RH pools compared to hamster genome: ~~~~~~~~~~~~~~~~ 

# reads = 64 bp long. 
# Hamster genome is 2,368,923,191 bp + 16,283 bp mito = 2368939474 bp from https://www.ncbi.nlm.nih.gov/assembly/GCA_003668045.1/ (cf A23_HEK_mit_copy_num_1.R)


# mean:
64 * mean(hamster_align_3[hamster_align_3$week==0 & hamster_align_3$conc==0,c("Total_reads")]) / 2368939474
# [1] 1.110405



# sem:
(64 * sd(hamster_align_3[hamster_align_3$week==0 & hamster_align_3$conc==0,c("Total_reads")]) / 2368939474)/sqrt(6)
# [1] 0.07466498


# ~~~~~~~~~~~~~ Seq coverage of total reads in all samples compared to hamster genome: ~~~~~~~~~~~~~~~~

# mean:
64 * mean(hamster_align_3[,c("Total_reads")]) / 2368939474
# [1] 1.092911



# sem:
(64 * sd(hamster_align_3[,c("Total_reads")]) / 2368939474)/sqrt(115)
# [1] 0.01801545



# ~~~~~~~~~~~~~ Seq coverage of hamster-specific aligned reads in RH pools compared to hamster genome: ~~~~~~~~~~~~~~~~ 

# reads = 64 bp long. 
# Hamster genome is 2,368,923,191 bp + 16,283 bp mito = 2368939474 bp from https://www.ncbi.nlm.nih.gov/assembly/GCA_003668045.1/ (cf A23_HEK_mit_copy_num_1.R)


# mean:
64 * mean(hamster_align_3[hamster_align_3$week==0 & hamster_align_3$conc==0,c("hamster_aligned_and_human_unaligned")]) / 2368939474
# [1] 0.9792166 <<<<<<<<<<<<< use in paper



# sem:
(64 * sd(hamster_align_3[hamster_align_3$week==0 & hamster_align_3$conc==0,c("hamster_aligned_and_human_unaligned")]) / 2368939474)/sqrt(6)
# [1] 0.06831608 <<<<<<<<<<<<< use in paper


# ~~~~~~~~~~~~~ Seq coverage of hamster-specific aligned reads in all samples compared to hamster genome: ~~~~~~~~~~~~~~~~


# mean:
64 * mean(hamster_align_3[,c("hamster_aligned_and_human_unaligned")]) / 2368939474
# [1] 0.9624364 <<<<<<<<<<<<< use in paper



# sem:
(64 * sd(hamster_align_3[,c("hamster_aligned_and_human_unaligned")]) / 2368939474)/sqrt(115)
# [1] 0.01642508 <<<<<<<<<<<<< use in paper





# ~~~~~~~~~~ Calculate ratio hamster total reads to human DNA ~~~~~~~~~~~~~~~~~~~~~

# Need hamster-specifc and human-specific aligned data for accurate ratio estimate...

# ####### For accurate ratios see RH_human_total_align_1.R ############



























