# IF DESIRED DATAFRAMES CREATED HERE CAN BE USED TO PROVIDE SEMs FOR RETENTION GRAPHS




#######################################################################################
# -------------- Lowest euchromatic retention freq using seq reads --------------------
#######################################################################################

#----------------- Prepare human retain RH pools ---------------------


RH_human <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Get rows at beginning of each chromosome:
RH_human_start <- RH_human[RH_human$posS == 0 & RH_human$posE == 1e6,]

# Get rid of ramp ups and ramp downs:
RH_human <- RH_human[c(0,diff(RH_human$pos)) == 1e4,]

# combine RH_human without ramps and RH_human_start:
RH_human <- rbind(RH_human_start,RH_human)


# Sort:
chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.character(RH_human$Chromosome)


# # Transform chr1 etc. to numbers
RH_human$Chromosome <- gsub('chr', '', RH_human$Chromosome)
RH_human[RH_human$Chromosome == "X","Chromosome"] <- 23
RH_human[RH_human$Chromosome == "Y","Chromosome"] <- 24
chrOrder<-c(1:24)
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.numeric(RH_human$Chromosome)

# Compute chromosome size
gen_coord <- aggregate(pos~Chromosome,FUN=max,data=RH_human)
colnames(gen_coord)[2] <- "chr_size"
gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# Use cumsum to make genome coordinates
gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-24])

# merge genome coordinates with RH_human
RH_human <- merge(RH_human,gen_coord[,c("Chromosome","coord")])
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.numeric(RH_human$Chromosome)

RH_human$coord <- RH_human$pos + RH_human$coord

# Decided to get rid of chrY (cf below), because even though its reads shd contribute to chrX, large segments chrY are non-pseudoautosomal, so artifactually decreases genome median.
# get rid of chrY, because no chrY seq in hamster genome
RH_human <- RH_human[RH_human$Chromosome != 24,]

RH_human <- RH_human[,c(1:4, ncol(RH_human), grep("_w0_d0$",colnames(RH_human)))]







#----------------- Prepare hamster retain RH pools ---------------------

RH_hamster <- read.table("RH_hamster_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Get rows at beginning of each chromosome:
RH_hamster_start <- RH_hamster[RH_hamster$posS == 0 & RH_hamster$posE == 1e6,]

# Get rid of ramp ups and ramp downs (though note that hamster has ramp downs, not ramp ups):
RH_hamster <- RH_hamster[c(0,diff(RH_hamster$pos)) == 1e4,]

# combine RH_hamster without ramps and RH_hamster_start:
RH_hamster <- rbind(RH_hamster_start,RH_hamster)


# # get rid of contigs with only one entry:
# RH_hamster <- RH_hamster[!(RH_hamster$Contig_ID %in% aggregate(pos ~ Contig_ID, 
          # data = RH_hamster, 
          # FUN = function(x){NROW(x)})[aggregate(pos ~ Contig_ID, 
          # data = RH_hamster, 
          # FUN = function(x){NROW(x)})$pos==1,"Contig_ID"]),]

# Sort:
chrOrder<-paste("chr",c(1:10,"X"),sep="")
RH_hamster$Chromosome <-factor(RH_hamster$Chromosome, levels=chrOrder)
RH_hamster <- RH_hamster[order(RH_hamster$Chromosome, RH_hamster$pos), ]
RH_hamster$Chromosome <- as.character(RH_hamster$Chromosome)



RH_hamster <- RH_hamster[,c(2:5, grep("_w0_d0$",colnames(RH_hamster)))]




# ------------- normalize human reads based on median hamster reads in each window ------------

# Factor of two, because hamster diploid, human frags haploid

for(i in names(RH_human[,c(6:ncol(RH_human))])) {RH_human[,i] <- 2 * RH_human[,i]/median(RH_hamster[,i])}




# use visual inspection of following:

for(i in c(1:8)) {plot(RH_human[RH_human$Chromosome==i,]$coord, rowMeans(RH_human[RH_human$Chromosome==i,c("RH1_w0_d0","RH2_w0_d0","RH3_w0_d0","RH4_w0_d0","RH5_w0_d0","RH6_w0_d0")]),ylim=c(0,0.05),cex=0.1,main=i)}
for(i in c(9:23)) {plot(RH_human[RH_human$Chromosome==i,]$coord, rowMeans(RH_human[RH_human$Chromosome==i,c("RH1_w0_d0","RH2_w0_d0","RH3_w0_d0","RH4_w0_d0","RH5_w0_d0","RH6_w0_d0")]),ylim=c(0,0.05),cex=0.1,main=i)}


# lowest retention on chr 19, exact same position as for TK1 retention below:

for(i in c(19)) {plot(RH_human[RH_human$Chromosome==i,]$coord, rowMeans(RH_human[RH_human$Chromosome==i,c("RH1_w0_d0","RH2_w0_d0","RH3_w0_d0","RH4_w0_d0","RH5_w0_d0","RH6_w0_d0")]),ylim=c(0,0.05),cex=0.1,main=i)}

identify(RH_human[RH_human$Chromosome==i,]$coord, rowMeans(RH_human[RH_human$Chromosome==i,c("RH1_w0_d0","RH2_w0_d0","RH3_w0_d0","RH4_w0_d0","RH5_w0_d0","RH6_w0_d0")]),ylim=c(0,0.2),cex=0.1,main=i)
# [1] 5404



RH_human[RH_human$Chromosome==i,][5404,]
       # Chromosome     posS     posE      pos      coord   RH1_w0_d0   RH2_w0_d0  RH3_w0_d0   RH4_w0_d0  RH5_w0_d0   RH6_w0_d0
# 269053         19 54030000 55030000 54530000 2699840000 0.001585309 0.004854762 0.00624863 0.008362989 0.00331587 0.002283565


colnames(RH_human)[c(6:ncol(RH_human))] <- paste0(colnames(RH_human)[c(6:ncol(RH_human))],"_reads")




#######################################################################################
# -------------- Lowest euchromatic retention freq using TK1 --------------------
#######################################################################################

#----------------- Prepare human retain RH pools ---------------------

RH_human_TK1 <- read.table("RH_human_gseq.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

# Get rows at beginning of each chromosome:
RH_human_TK1_start <- RH_human_TK1[RH_human_TK1$posS == 0 & RH_human_TK1$posE == 1e6,]

# Get rid of ramp ups and ramp downs:
RH_human_TK1 <- RH_human_TK1[c(0,diff(RH_human_TK1$pos)) == 1e4,]

# combine RH_human_TK1 without ramps and RH_human_TK1_start:
RH_human_TK1 <- rbind(RH_human_TK1_start,RH_human_TK1)


# Sort:
chrOrder<-paste("chr",c(1:22,"X","Y"),sep="")
RH_human_TK1$Chromosome <-factor(RH_human_TK1$Chromosome, levels=chrOrder)
RH_human_TK1 <- RH_human_TK1[order(RH_human_TK1$Chromosome, RH_human_TK1$pos), ]
RH_human_TK1$Chromosome <- as.character(RH_human_TK1$Chromosome)


# # Transform chr1 etc. to numbers
RH_human_TK1$Chromosome <- gsub('chr', '', RH_human_TK1$Chromosome)
RH_human_TK1[RH_human_TK1$Chromosome == "X","Chromosome"] <- 23
RH_human_TK1[RH_human_TK1$Chromosome == "Y","Chromosome"] <- 24
chrOrder<-c(1:24)
RH_human_TK1$Chromosome <-factor(RH_human_TK1$Chromosome, levels=chrOrder)
RH_human_TK1 <- RH_human_TK1[order(RH_human_TK1$Chromosome, RH_human_TK1$pos), ]
RH_human_TK1$Chromosome <- as.numeric(RH_human_TK1$Chromosome)

# Compute chromosome size
gen_coord <- aggregate(pos~Chromosome,FUN=max,data=RH_human_TK1)
colnames(gen_coord)[2] <- "chr_size"
gen_coord$Chromosome <-factor(gen_coord$Chromosome, levels=chrOrder)
gen_coord <- gen_coord[order(gen_coord$Chromosome), ]
gen_coord$Chromosome <- as.numeric(gen_coord$Chromosome)

# Use cumsum to make genome coordinates
gen_coord$coord <- c(0,cumsum(gen_coord$chr_size)[-24])

# merge genome coordinates with RH_human_TK1
RH_human_TK1 <- merge(RH_human_TK1,gen_coord[,c("Chromosome","coord")])
RH_human_TK1$Chromosome <-factor(RH_human_TK1$Chromosome, levels=chrOrder)
RH_human_TK1 <- RH_human_TK1[order(RH_human_TK1$Chromosome, RH_human_TK1$pos), ]
RH_human_TK1$Chromosome <- as.numeric(RH_human_TK1$Chromosome)

RH_human_TK1$coord <- RH_human_TK1$pos + RH_human_TK1$coord

# Decided to get rid of chrY (cf below), because even though its reads shd contribute to chrX, large segments chrY are non-pseudoautosomal, so artifactually decreases genome median.
# get rid of chrY, because no chrY seq in hamster genome
RH_human_TK1 <- RH_human_TK1[RH_human_TK1$Chromosome != 24,]



RH_human_TK1 <- RH_human_TK1[,c(1:4, ncol(RH_human_TK1), grep("_w0_d0$",colnames(RH_human_TK1)))]


# -------- find middle TK1 -------------

gencode_gtf_ensembl_ucsc <- read.delim("gencode_gtf_ensembl_ucsc_v31.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE,check.names=FALSE)


gencode_gtf_ensembl_ucsc[gencode_gtf_ensembl_ucsc$geneSymbol=="TK1",]
     # Chromosome         gene_id           tx_id geneSymbol strand    geneS    geneE geneLength txLength cdsLength    5utrS    5utrE 5utrDiff    3utrS    3utrE 3utrDiff
# 49401      chr17 ENSG00000167900 ENST00000588734        TK1      - 78174091 78187233      13143     1681       804 78186995 78187233      239 78174121 78174758      638
      # exonCount      gene_type                                       gene_description
# 49401         6 protein_coding thymidine kinase 1 [Source:HGNC Symbol;Acc:HGNC:11830]


TK1_coord <- mean(gencode_gtf_ensembl_ucsc[gencode_gtf_ensembl_ucsc$geneSymbol=="TK1","geneS"],gencode_gtf_ensembl_ucsc[gencode_gtf_ensembl_ucsc$geneSymbol=="TK1","geneE"])






# ----------- find peak TK1 reads ----------------

# for TK1 retention, here, peak TK1 reads are vector for individual samples, unlike for seq retention where peak TK1 from average across six RH pools is used.


# For more accurate search of TK1 peak, restrict search to within delta of TK1, because CEN and TEL becomes larger than TK1 in some samples
delta_1 <- 1e6

RH_human_TK1_subset <- RH_human_TK1[RH_human_TK1$Chromosome==17 & RH_human_TK1$pos >= TK1_coord-delta_1 & RH_human_TK1$pos <= TK1_coord+delta_1,]

TK1_max <- apply(RH_human_TK1_subset[,c(6:(ncol(RH_human_TK1_subset)))],2,max)

TK1_max_coord <- apply(RH_human_TK1_subset[,c(5:(ncol(RH_human_TK1_subset)-1))],2,FUN= function(x) {RH_human_TK1_subset[,"coord"][which.max(x)]})




# ------------ calculate retention assuming TK1 is 100% retention ----------------




for(i in names(TK1_max)) {
RH_human_TK1[,i] <- RH_human_TK1[,i]/TK1_max[i]
}



# -------------------------------------------------------
# -------- Correct TK1 retention for revertants ---------
# -------- Decrease TK1 retention by revertant freq -----
# -------------------------------------------------------


# ~~~~~~~~~~~ Prepare mean_retent_TK1 ~~~~~~~~~~~~~~~~

clone <- read.table("clone.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

clone$revert_freq <- clone$reverts/clone$colonies

revert <- aggregate(revert_freq~Pool,data=clone,FUN=mean)

revert$true_pos_freq <- 1 - revert$revert_freq


# Mean human reads for RH pools:
RH_human_TK1[,c(names(TK1_max))] <- cbind(RH_human_TK1$RH1_w0_d0*(1 - revert[revert$Pool==1,"revert_freq"]),
									RH_human_TK1$RH2_w0_d0*(1 - revert[revert$Pool==2,"revert_freq"]),
									RH_human_TK1$RH3_w0_d0*(1 - revert[revert$Pool==3,"revert_freq"]),
									RH_human_TK1$RH4_w0_d0*(1 - revert[revert$Pool==4,"revert_freq"]),
									RH_human_TK1$RH5_w0_d0*(1 - revert[revert$Pool==5,"revert_freq"]),
									RH_human_TK1$RH6_w0_d0*(1 - revert[revert$Pool==6,"revert_freq"])
									)




# use visual inspection of following:

for(i in c(1:8)) {plot(RH_human_TK1[RH_human_TK1$Chromosome==i,]$coord, rowMeans(RH_human_TK1[RH_human_TK1$Chromosome==i,c("RH1_w0_d0","RH2_w0_d0","RH3_w0_d0","RH4_w0_d0","RH5_w0_d0","RH6_w0_d0")]),ylim=c(0,0.2),cex=0.1,main=i)}
for(i in c(9:23)) {plot(RH_human_TK1[RH_human_TK1$Chromosome==i,]$coord, rowMeans(RH_human_TK1[RH_human_TK1$Chromosome==i,c("RH1_w0_d0","RH2_w0_d0","RH3_w0_d0","RH4_w0_d0","RH5_w0_d0","RH6_w0_d0")]),ylim=c(0,0.2),cex=0.1,main=i)}


# lowest retention on chr 19, exact same position as for seq retention:

for(i in c(19)) {plot(RH_human_TK1[RH_human_TK1$Chromosome==i,]$coord, rowMeans(RH_human_TK1[RH_human_TK1$Chromosome==i,c("RH1_w0_d0","RH2_w0_d0","RH3_w0_d0","RH4_w0_d0","RH5_w0_d0","RH6_w0_d0")]),ylim=c(0,0.2),cex=0.1,main=i)}

identify(RH_human_TK1[RH_human_TK1$Chromosome==i,]$coord, rowMeans(RH_human_TK1[RH_human_TK1$Chromosome==i,c("RH1_w0_d0","RH2_w0_d0","RH3_w0_d0","RH4_w0_d0","RH5_w0_d0","RH6_w0_d0")]),ylim=c(0,0.2),cex=0.1,main=i)
# [1] 5404



RH_human_TK1[RH_human_TK1$Chromosome==i,][5404,]
# Chromosome     posS     posE      pos      coord   RH1_w0_d0   RH2_w0_d0  RH3_w0_d0  RH4_w0_d0   RH5_w0_d0   RH6_w0_d0
# 269053         19 54030000 55030000 54530000 2699840000 0.009184781 0.009094917 0.01261818 0.01062062 0.008006541 0.004180627



colnames(RH_human_TK1)[c(6:ncol(RH_human_TK1))] <- paste0(colnames(RH_human_TK1)[c(6:ncol(RH_human_TK1))],"_TK1")







########################################################################################################
# --------- Combine read retent and TK1 retent dataframes, find lowest euchromatic retention -----------
########################################################################################################



dim(RH_human)
# [1] 300814    11


dim(RH_human_TK1)
# [1] 300814    11



RH_human_comb <- merge(RH_human,RH_human_TK1)

dim(RH_human_comb)
# [1] 300814      17


# Sort:
chrOrder <- c(1:23)
RH_human_comb$Chromosome <-factor(RH_human_comb$Chromosome, levels=chrOrder)
RH_human_comb <- RH_human_comb[order(RH_human_comb$Chromosome, RH_human_comb$pos), ]
RH_human_comb$Chromosome <- as.numeric(as.character(RH_human_comb$Chromosome))
rownames(RH_human_comb) <- NULL



head(RH_human_comb)
  # Chromosome  posS    posE    pos  coord RH1_w0_d0_reads RH2_w0_d0_reads RH3_w0_d0_reads RH4_w0_d0_reads RH5_w0_d0_reads RH6_w0_d0_reads RH1_w0_d0_TK1
# 1          1     0 1000000 500000 500000     0.010568730      0.03107047      0.03420303      0.05320285      0.01843624      0.01705957    0.06123187
# 2          1 10000 1010000 510000 510000     0.010568730      0.03107047      0.03431265      0.05355872      0.01856887      0.01705957    0.06123187
# 3          1 20000 1020000 520000 520000     0.009908184      0.03042317      0.03343565      0.05249110      0.01790570      0.01558197    0.05740488
# 4          1 30000 1030000 530000 530000     0.010040293      0.03042317      0.03332602      0.05231317      0.01777306      0.01558197    0.05817028
# 5          1 40000 1040000 540000 540000     0.010172402      0.03058500      0.03343565      0.05177936      0.01777306      0.01531332    0.05893567
# 6          1 50000 1050000 550000 550000     0.010304512      0.03042317      0.03343565      0.05106762      0.01777306      0.01464168    0.05970107
  # RH2_w0_d0_TK1 RH3_w0_d0_TK1 RH4_w0_d0_TK1 RH5_w0_d0_TK1 RH6_w0_d0_TK1
# 1    0.05820747    0.06906795    0.06756521    0.04451637    0.03123174
# 2    0.05820747    0.06928932    0.06801715    0.04483663    0.03123174
# 3    0.05699481    0.06751835    0.06666133    0.04323532    0.02852663
# 4    0.05699481    0.06729698    0.06643536    0.04291506    0.02852663
# 5    0.05729798    0.06751835    0.06575744    0.04291506    0.02803479
# 6    0.05699481    0.06751835    0.06485356    0.04291506    0.02680520





# Results very close to 'median_retent_seq'  and 'TK1_median_retent' cols in 'retent' dataframe in 'graph_Human_retent_2.R', reassuring. Very slight differences (~1%) probably because used overlapping windows here, but non-overlapping windows in 'graph_Human_retent_2.R'.

apply(RH_human_comb[,c(6:ncol(RH_human_comb))],2,median)
# RH1_w0_d0_reads RH2_w0_d0_reads RH3_w0_d0_reads RH4_w0_d0_reads RH5_w0_d0_reads RH6_w0_d0_reads   RH1_w0_d0_TK1   RH2_w0_d0_TK1   RH3_w0_d0_TK1   RH4_w0_d0_TK1 
     # 0.01004029      0.03139413      0.02685815      0.04644128      0.02042576      0.02082074      0.05817028      0.05881380      0.05423605      0.05897833 
  # RH5_w0_d0_TK1   RH6_w0_d0_TK1 
     # 0.04932029      0.03811748 


RH_human_comb$RH1_w0_d0_mean <- rowMeans(RH_human_comb[,c("RH1_w0_d0_reads","RH1_w0_d0_TK1")])
RH_human_comb$RH2_w0_d0_mean <- rowMeans(RH_human_comb[,c("RH2_w0_d0_reads","RH2_w0_d0_TK1")])
RH_human_comb$RH3_w0_d0_mean <- rowMeans(RH_human_comb[,c("RH3_w0_d0_reads","RH3_w0_d0_TK1")])
RH_human_comb$RH4_w0_d0_mean <- rowMeans(RH_human_comb[,c("RH4_w0_d0_reads","RH4_w0_d0_TK1")])
RH_human_comb$RH5_w0_d0_mean <- rowMeans(RH_human_comb[,c("RH5_w0_d0_reads","RH5_w0_d0_TK1")])
RH_human_comb$RH6_w0_d0_mean <- rowMeans(RH_human_comb[,c("RH6_w0_d0_reads","RH6_w0_d0_TK1")])


sem <- function(x) {sqrt(var(x,na.rm=TRUE)/sum(!is.na(x)))}


RH_human_comb$reads_mean_retent <- rowMeans(RH_human_comb[,c("RH1_w0_d0_reads", "RH2_w0_d0_reads", "RH3_w0_d0_reads", "RH4_w0_d0_reads", "RH5_w0_d0_reads", "RH6_w0_d0_reads")])
RH_human_comb$reads_sem_retent <- apply(RH_human_comb[,c("RH1_w0_d0_reads", "RH2_w0_d0_reads", "RH3_w0_d0_reads", "RH4_w0_d0_reads", "RH5_w0_d0_reads", "RH6_w0_d0_reads")], 1, FUN = function(x) {sem(x)})


RH_human_comb$TK1_mean_retent <- rowMeans(RH_human_comb[,c("RH1_w0_d0_TK1", "RH2_w0_d0_TK1", "RH3_w0_d0_TK1", "RH4_w0_d0_TK1", "RH5_w0_d0_TK1", "RH6_w0_d0_TK1")])
RH_human_comb$TK1_sem_retent <- apply(RH_human_comb[,c("RH1_w0_d0_TK1", "RH2_w0_d0_TK1", "RH3_w0_d0_TK1", "RH4_w0_d0_TK1", "RH5_w0_d0_TK1", "RH6_w0_d0_TK1")], 1, FUN = function(x) {sem(x)})



RH_human_comb$mean_retent <- rowMeans(RH_human_comb[,c("RH1_w0_d0_mean", "RH2_w0_d0_mean", "RH3_w0_d0_mean", "RH4_w0_d0_mean", "RH5_w0_d0_mean", "RH6_w0_d0_mean")])
RH_human_comb$sem_retent <- apply(RH_human_comb[,c("RH1_w0_d0_mean", "RH2_w0_d0_mean", "RH3_w0_d0_mean", "RH4_w0_d0_mean", "RH5_w0_d0_mean", "RH6_w0_d0_mean")], 1, FUN = function(x) {sem(x)})


head(RH_human_comb)
  # Chromosome  posS    posE    pos  coord RH1_w0_d0_reads RH2_w0_d0_reads RH3_w0_d0_reads RH4_w0_d0_reads RH5_w0_d0_reads RH6_w0_d0_reads RH1_w0_d0_TK1 RH2_w0_d0_TK1
# 1          1     0 1000000 500000 500000     0.010568730      0.03107047      0.03420303      0.05320285      0.01843624      0.01705957    0.06123187    0.05820747
# 2          1 10000 1010000 510000 510000     0.010568730      0.03107047      0.03431265      0.05355872      0.01856887      0.01705957    0.06123187    0.05820747
# 3          1 20000 1020000 520000 520000     0.009908184      0.03042317      0.03343565      0.05249110      0.01790570      0.01558197    0.05740488    0.05699481
# 4          1 30000 1030000 530000 530000     0.010040293      0.03042317      0.03332602      0.05231317      0.01777306      0.01558197    0.05817028    0.05699481
# 5          1 40000 1040000 540000 540000     0.010172402      0.03058500      0.03343565      0.05177936      0.01777306      0.01531332    0.05893567    0.05729798
# 6          1 50000 1050000 550000 550000     0.010304512      0.03042317      0.03343565      0.05106762      0.01777306      0.01464168    0.05970107    0.05699481
  # RH3_w0_d0_TK1 RH4_w0_d0_TK1 RH5_w0_d0_TK1 RH6_w0_d0_TK1 RH1_w0_d0_mean RH2_w0_d0_mean RH3_w0_d0_mean RH4_w0_d0_mean RH5_w0_d0_mean RH6_w0_d0_mean reads_mean_retent
# 1    0.06906795    0.06756521    0.04451637    0.03123174     0.03590030     0.04463897     0.05163549     0.06038403     0.03147630     0.02414566        0.02742348
# 2    0.06928932    0.06801715    0.04483663    0.03123174     0.03590030     0.04463897     0.05180099     0.06078793     0.03170275     0.02414566        0.02752317
# 3    0.06751835    0.06666133    0.04323532    0.02852663     0.03365653     0.04370899     0.05047700     0.05957621     0.03057051     0.02205430        0.02662430
# 4    0.06729698    0.06643536    0.04291506    0.02852663     0.03410528     0.04370899     0.05031150     0.05937426     0.03034406     0.02205430        0.02657628
# 5    0.06751835    0.06575744    0.04291506    0.02803479     0.03455404     0.04394149     0.05047700     0.05876840     0.03034406     0.02167406        0.02650980
# 6    0.06751835    0.06485356    0.04291506    0.02680520     0.03500279     0.04370899     0.05047700     0.05796059     0.03034406     0.02072344        0.02627428
  # reads_sem_retent TK1_mean_retent TK1_sem_retent mean_retent  sem_retent
# 1      0.006314276      0.05530343    0.005997777  0.04136346 0.005483552
# 2      0.006360423      0.05546903    0.006026771  0.04149610 0.005527216
# 3      0.006343745      0.05339022    0.006126872  0.04000726 0.005650826
# 4      0.006310191      0.05338985    0.006130096  0.03998307 0.005613902
# 5      0.006249607      0.05340988    0.006194619  0.03995984 0.005586537
# 6      0.006180155      0.05313134    0.006324351  0.03970281 0.005583822





# -------- mean of reads and TK1 ---------------



# use visual inspection of following to find lowest euchromatic retention:

for(i in c(1:8)) {plot(RH_human_comb[RH_human_comb$Chromosome==i,]$coord, RH_human_comb[RH_human_comb$Chromosome==i,]$mean_retent,ylim=c(0,0.1),cex=0.1,main=i)}
for(i in c(9:23)) {plot(RH_human_comb[RH_human_comb$Chromosome==i,]$coord, RH_human_comb[RH_human_comb$Chromosome==i,]$mean_retent,ylim=c(0,0.1),cex=0.1,main=i)}


# lowest retention on chr 19, exact same position as for seq and TK1 retention:

for(i in c(19)) {plot(RH_human_comb[RH_human_comb$Chromosome==i,]$coord, RH_human_comb[RH_human_comb$Chromosome==i,]$mean_retent,ylim=c(0,0.1),cex=0.1,main=i)}

identify(RH_human_comb[RH_human_comb$Chromosome==i,]$coord, RH_human_comb[RH_human_comb$Chromosome==i,]$mean_retent,ylim=c(0,0.1),cex=0.1,main=i)
# [1] 5404



RH_human_comb[RH_human_comb$Chromosome==i,][5404,]
       # Chromosome     posS     posE      pos      coord RH1_w0_d0_reads RH2_w0_d0_reads RH3_w0_d0_reads RH4_w0_d0_reads RH5_w0_d0_reads RH6_w0_d0_reads RH1_w0_d0_TK1
# 269053         19 54030000 55030000 54530000 2699840000     0.001585309     0.004854762      0.00624863     0.008362989      0.00331587     0.002283565   0.009184781
       # RH2_w0_d0_TK1 RH3_w0_d0_TK1 RH4_w0_d0_TK1 RH5_w0_d0_TK1 RH6_w0_d0_TK1 RH1_w0_d0_mean RH2_w0_d0_mean RH3_w0_d0_mean RH4_w0_d0_mean RH5_w0_d0_mean RH6_w0_d0_mean
# 269053   0.009094917    0.01261818    0.01062062   0.008006541   0.004180627    0.005385045    0.006974839    0.009433406    0.009491804    0.005661206    0.003232096
       # reads_mean_retent reads_sem_retent TK1_mean_retent TK1_sem_retent mean_retent  sem_retent
# 269053       0.004441854      0.001047157     0.008950944    0.001154118 0.006696399 0.001002924 <<<<<<<<<<<<<<<< use in paper



# test if lowest euchromatic retention frequency is > 0.
# One sample t test apt here, even though less conservative, because testing difference from theoretical value of zero.

t.test(RH_human_comb[RH_human_comb$Chromosome==i,][5404,c("RH1_w0_d0_mean", "RH2_w0_d0_mean", "RH3_w0_d0_mean", "RH4_w0_d0_mean", "RH5_w0_d0_mean", "RH6_w0_d0_mean")],mu=0)

	# One Sample t-test <<<<<<<<<<<<< use in paper

# data:  RH_human_comb[RH_human_comb$Chromosome == i, ][5404, c("RH1_w0_d0_mean",     "RH2_w0_d0_mean", "RH3_w0_d0_mean", "RH4_w0_d0_mean", "RH5_w0_d0_mean",     "RH6_w0_d0_mean")]
# t = 6.6769, df = 5, p-value = 0.001139
# alternative hypothesis: true mean is not equal to 0
# 95 percent confidence interval:
 # 0.004118301 0.009274497
# sample estimates:
  # mean of x 
# 0.006696399 


# In addition to being inappropriate, pooled variance test has insufficient power to show difference from zero. For example:


t.test(RH_human_comb[RH_human_comb$Chromosome==i,][5404,c("RH1_w0_d0_mean", "RH2_w0_d0_mean", "RH3_w0_d0_mean", "RH4_w0_d0_mean", "RH5_w0_d0_mean", "RH6_w0_d0_mean")],0,var.equal=TRUE)

	# Two Sample t-test

# data:  RH_human_comb[RH_human_comb$Chromosome == i, ][5404, c("RH1_w0_d0_mean",  and 0    "RH2_w0_d0_mean", "RH3_w0_d0_mean", "RH4_w0_d0_mean", "RH5_w0_d0_mean",  and 0    "RH6_w0_d0_mean")] and 0
# t = 2.5236, df = 5, p-value = 0.05294
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -0.0001246073  0.0135174059
# sample estimates:
  # mean of x   mean of y 
# 0.006696399 0.000000000 

t.test(1e6*RH_human_comb[RH_human_comb$Chromosome==i,][5404,c("RH1_w0_d0_mean", "RH2_w0_d0_mean", "RH3_w0_d0_mean", "RH4_w0_d0_mean", "RH5_w0_d0_mean", "RH6_w0_d0_mean")],0,var.equal=TRUE)

	# Two Sample t-test

# data:  1e+06 * RH_human_comb[RH_human_comb$Chromosome == i, ][5404,  and 0    c("RH1_w0_d0_mean", "RH2_w0_d0_mean", "RH3_w0_d0_mean", "RH4_w0_d0_mean",  and 0        "RH5_w0_d0_mean", "RH6_w0_d0_mean")] and 0
# t = 2.5236, df = 5, p-value = 0.05294
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
  # -124.6073 13517.4059
# sample estimates:
# mean of x mean of y 
 # 6696.399     0.000 





# ------------ mean of reads -------------------


# use visual inspection of following to find lowest euchromatic retention:

for(i in c(1:8)) {plot(RH_human_comb[RH_human_comb$Chromosome==i,]$coord, RH_human_comb[RH_human_comb$Chromosome==i,]$reads_mean_retent,ylim=c(0,0.1),cex=0.1,main=i)}
for(i in c(9:23)) {plot(RH_human_comb[RH_human_comb$Chromosome==i,]$coord, RH_human_comb[RH_human_comb$Chromosome==i,]$reads_mean_retent,ylim=c(0,0.1),cex=0.1,main=i)}


# lowest retention on chr 19, exact same position as for seq and TK1 retention:

for(i in c(19)) {plot(RH_human_comb[RH_human_comb$Chromosome==i,]$coord, RH_human_comb[RH_human_comb$Chromosome==i,]$reads_mean_retent,ylim=c(0,0.1),cex=0.1,main=i)}

identify(RH_human_comb[RH_human_comb$Chromosome==i,]$coord, RH_human_comb[RH_human_comb$Chromosome==i,]$reads_mean_retent,ylim=c(0,0.1),cex=0.1,main=i)
# [1] 5404



RH_human_comb[RH_human_comb$Chromosome==i,][5404,]
       # Chromosome     posS     posE      pos      coord RH1_w0_d0_reads RH2_w0_d0_reads RH3_w0_d0_reads RH4_w0_d0_reads RH5_w0_d0_reads RH6_w0_d0_reads RH1_w0_d0_TK1
# 269053         19 54030000 55030000 54530000 2699840000     0.001585309     0.004854762      0.00624863     0.008362989      0.00331587     0.002283565   0.009184781
       # RH2_w0_d0_TK1 RH3_w0_d0_TK1 RH4_w0_d0_TK1 RH5_w0_d0_TK1 RH6_w0_d0_TK1 RH1_w0_d0_mean RH2_w0_d0_mean RH3_w0_d0_mean RH4_w0_d0_mean RH5_w0_d0_mean RH6_w0_d0_mean
# 269053   0.009094917    0.01261818    0.01062062   0.008006541   0.004180627    0.005385045    0.006974839    0.009433406    0.009491804    0.005661206    0.003232096
       # reads_mean_retent reads_sem_retent TK1_mean_retent TK1_sem_retent mean_retent  sem_retent
# 269053       0.004441854      0.001047157     0.008950944    0.001154118 0.006696399 0.001002924 <<<<<<<<<<<<<<<< use in paper



# test if lowest euchromatic retention frequency is > 0.
# One sample t test apt here, even though less conservative, because testing difference from theoretical value of zero.

t.test(RH_human_comb[RH_human_comb$Chromosome==i,][5404,c("RH1_w0_d0_reads", "RH2_w0_d0_reads", "RH3_w0_d0_reads", "RH4_w0_d0_reads", "RH5_w0_d0_reads", "RH6_w0_d0_reads")],mu=0)

	# One Sample t-test 	 <<<<<<<<<<<<< use in paper

# data:  RH_human_comb[RH_human_comb$Chromosome == i, ][5404, c("RH1_w0_d0_reads",     "RH2_w0_d0_reads", "RH3_w0_d0_reads", "RH4_w0_d0_reads",     "RH5_w0_d0_reads", "RH6_w0_d0_reads")]
# t = 4.2418, df = 5, p-value = 0.008155
# alternative hypothesis: true mean is not equal to 0
# 95 percent confidence interval:
 # 0.001750051 0.007133657
# sample estimates:
  # mean of x 
# 0.004441854 

 


# ------------ mean of TK1 -------------------


# use visual inspection of following to find lowest euchromatic retention:

for(i in c(1:8)) {plot(RH_human_comb[RH_human_comb$Chromosome==i,]$coord, RH_human_comb[RH_human_comb$Chromosome==i,]$TK1_mean_retent,ylim=c(0,0.1),cex=0.1,main=i)}
for(i in c(9:23)) {plot(RH_human_comb[RH_human_comb$Chromosome==i,]$coord, RH_human_comb[RH_human_comb$Chromosome==i,]$TK1_mean_retent,ylim=c(0,0.1),cex=0.1,main=i)}


# lowest retention on chr 19, exact same position as for seq and TK1 retention:

for(i in c(19)) {plot(RH_human_comb[RH_human_comb$Chromosome==i,]$coord, RH_human_comb[RH_human_comb$Chromosome==i,]$TK1_mean_retent,ylim=c(0,0.1),cex=0.1,main=i)}

identify(RH_human_comb[RH_human_comb$Chromosome==i,]$coord, RH_human_comb[RH_human_comb$Chromosome==i,]$TK1_mean_retent,ylim=c(0,0.1),cex=0.1,main=i)
# [1] 5404



RH_human_comb[RH_human_comb$Chromosome==i,][5404,]
       # Chromosome     posS     posE      pos      coord RH1_w0_d0_reads RH2_w0_d0_reads RH3_w0_d0_reads RH4_w0_d0_reads RH5_w0_d0_reads RH6_w0_d0_reads RH1_w0_d0_TK1
# 269053         19 54030000 55030000 54530000 2699840000     0.001585309     0.004854762      0.00624863     0.008362989      0.00331587     0.002283565   0.009184781
       # RH2_w0_d0_TK1 RH3_w0_d0_TK1 RH4_w0_d0_TK1 RH5_w0_d0_TK1 RH6_w0_d0_TK1 RH1_w0_d0_mean RH2_w0_d0_mean RH3_w0_d0_mean RH4_w0_d0_mean RH5_w0_d0_mean RH6_w0_d0_mean
# 269053   0.009094917    0.01261818    0.01062062   0.008006541   0.004180627    0.005385045    0.006974839    0.009433406    0.009491804    0.005661206    0.003232096
       # reads_mean_retent reads_sem_retent TK1_mean_retent TK1_sem_retent mean_retent  sem_retent
# 269053       0.004441854      0.001047157     0.008950944    0.001154118 0.006696399 0.001002924 <<<<<<<<<<<<<<<< use in paper



# test if lowest euchromatic retention frequency is > 0.
# One sample t test apt here, even though less conservative, because testing difference from theoretical value of zero.

t.test(RH_human_comb[RH_human_comb$Chromosome==i,][5404,c("RH1_w0_d0_TK1", "RH2_w0_d0_TK1", "RH3_w0_d0_TK1", "RH4_w0_d0_TK1", "RH5_w0_d0_TK1", "RH6_w0_d0_TK1")],mu=0)

	# One Sample t-test <<<<<<<<<<<<< use in paper

# data:  RH_human_comb[RH_human_comb$Chromosome == i, ][5404, c("RH1_w0_d0_TK1",     "RH2_w0_d0_TK1", "RH3_w0_d0_TK1", "RH4_w0_d0_TK1", "RH5_w0_d0_TK1",     "RH6_w0_d0_TK1")]
# t = 7.7557, df = 5, p-value = 0.0005699
# alternative hypothesis: true mean is not equal to 0
# 95 percent confidence interval:
 # 0.005984189 0.011917700
# sample estimates:
  # mean of x 
# 0.008950944  




















