# RH_human_reads.txt saved from "RH_pool against_hg38_fixed-11_24_18.xlsx"


RH_human <- read.table("RH_human_reads.txt", header=TRUE, sep="\t", stringsAsFactors=FALSE)

dim(RH_human)
# [1] 323560    118



RH_human[1:10,1:10]
   # Chromosome Start    End RH1_w0_0 RH1_w1_0 RH1_w1_8 RH1_w1_25 RH1_w1_75 RH1_w2_0 RH1_w2_8
# 1        chr1     0  10000        0        0        0         0         0        0        0
# 2        chr1     0  20000        5        2        2         6         5        4        2
# 3        chr1     0  30000        6        5        4         8         6        5        3
# 4        chr1     0  40000        7        6        6         9         8        6        5
# 5        chr1     0  50000        7        8        7        10         8        7        7
# 6        chr1     0  60000        8       10        8        11         9        8        8
# 7        chr1     0  70000       10       11       11        12        10        9        9
# 8        chr1     0  80000       10       14       13        13        10        9        9
# 9        chr1     0  90000       10       16       14        13        10        9        9
# 10       chr1     0 100000       11       17       15        14        10       12       11

# Rename columns
colnames(RH_human) <- gsub("_0$","_d0",colnames(RH_human))
colnames(RH_human) <- gsub("_8$","_d8",colnames(RH_human))
colnames(RH_human) <- gsub("_25$","_d25",colnames(RH_human))
colnames(RH_human) <- gsub("_75$","_d75",colnames(RH_human))


RH_human[1:10,1:10]
   # Chromosome Start    End RH1_w0_d0 RH1_w1_d0 RH1_w1_d8 RH1_w1_d25 RH1_w1_d75 RH1_w2_d0 RH1_w2_d8
# 1        chr1     0  10000         0         0         0          0          0         0         0
# 2        chr1     0  20000         5         2         2          6          5         4         2
# 3        chr1     0  30000         6         5         4          8          6         5         3
# 4        chr1     0  40000         7         6         6          9          8         6         5
# 5        chr1     0  50000         7         8         7         10          8         7         7
# 6        chr1     0  60000         8        10         8         11          9         8         8
# 7        chr1     0  70000        10        11        11         12         10         9         9
# 8        chr1     0  80000        10        14        13         13         10         9         9
# 9        chr1     0  90000        10        16        14         13         10         9         9
# 10       chr1     0 100000        11        17        15         14         10        12        11


# Get rid of unanchored contigs

# Exclude mitochondria
unique(RH_human[grepl("^chr([[:digit:]]+|X|Y)$",RH_human$Chromosome),"Chromosome"])
 # [1] "chr1"  "chr2"  "chr3"  "chr4"  "chr5"  "chr6"  "chr7"  "chr8"  "chr9"  "chr10" "chr11" "chr12" "chr13" "chr14" "chr15"
# [16] "chr16" "chr17" "chr18" "chr19" "chr20" "chr21" "chr22" "chrX"  "chrY" 

dim(RH_human[grepl("^chr([[:digit:]]+|X|Y)$",RH_human$Chromosome),])
# [1] 311213      118

# Include mitochondria (NB takes 2 lines, only one valid)
unique(RH_human[grepl("^chr([[:digit:]]+|X|Y|M)$",RH_human$Chromosome),"Chromosome"])
 # [1] "chr1"  "chr2"  "chr3"  "chr4"  "chr5"  "chr6"  "chr7"  "chr8"  "chr9"  "chr10" "chr11" "chr12" "chr13" "chr14" "chr15"
# [16] "chr16" "chr17" "chr18" "chr19" "chr20" "chr21" "chr22" "chrX"  "chrY"  "chrM" 

dim(RH_human[grepl("^chr([[:digit:]]+|X|Y|M)$",RH_human$Chromosome),])
# [1] 311215      118

RH_human[RH_human$Chromosome == "chrM",1:10]
       # Chromosome Start   End RH1_w0_d0 RH1_w1_d0 RH1_w1_d8 RH1_w1_d25 RH1_w1_d75 RH1_w2_d0 RH1_w2_d8
# 323024       chrM     0 16569        16        70        31         24         49         1         0
# 323025       chrM 10000 16569         7         1         2          0          2         0         0

# Chose to exclude mitochondria:
RH_human <- RH_human[grepl("^chr([[:digit:]]+|X|Y)$",RH_human$Chromosome),]

colnames(RH_human)[2:3] <- c("posS","posE")

RH_human[1:10,1:10]
   # Chromosome posS   posE RH1_w0_d0 RH1_w1_d0 RH1_w1_d8 RH1_w1_d25 RH1_w1_d75 RH1_w2_d0 RH1_w2_d8
# 1        chr1    0  10000         0         0         0          0          0         0         0
# 2        chr1    0  20000         5         2         2          6          5         4         2
# 3        chr1    0  30000         6         5         4          8          6         5         3
# 4        chr1    0  40000         7         6         6          9          8         6         5
# 5        chr1    0  50000         7         8         7         10          8         7         7
# 6        chr1    0  60000         8        10         8         11          9         8         8
# 7        chr1    0  70000        10        11        11         12         10         9         9
# 8        chr1    0  80000        10        14        13         13         10         9         9
# 9        chr1    0  90000        10        16        14         13         10         9         9
# 10       chr1    0 100000        11        17        15         14         10        12        11

RH_human$pos <- round(rowMeans(RH_human[,c("posS","posE")]))

RH_human <- RH_human[,c(1:3,ncol(RH_human),4:(ncol(RH_human)-1))]

chrOrder<-c(paste("chr",1:22,sep=""),"chrX","chrY")
RH_human$Chromosome <-factor(RH_human$Chromosome, levels=chrOrder)
RH_human <- RH_human[order(RH_human$Chromosome, RH_human$pos), ]
RH_human$Chromosome <- as.character(RH_human$Chromosome)

dim(RH_human)
# [1] 311213    119

RH_human[1:10,1:10]
   # Chromosome posS   posE   pos RH1_w0_d0 RH1_w1_d0 RH1_w1_d8 RH1_w1_d25 RH1_w1_d75 RH1_w2_d0
# 1        chr1    0  10000  5000         0         0         0          0          0         0
# 2        chr1    0  20000 10000         5         2         2          6          5         4
# 3        chr1    0  30000 15000         6         5         4          8          6         5
# 4        chr1    0  40000 20000         7         6         6          9          8         6
# 5        chr1    0  50000 25000         7         8         7         10          8         7
# 6        chr1    0  60000 30000         8        10         8         11          9         8
# 7        chr1    0  70000 35000        10        11        11         12         10         9
# 8        chr1    0  80000 40000        10        14        13         13         10         9
# 9        chr1    0  90000 45000        10        16        14         13         10         9
# 10       chr1    0 100000 50000        11        17        15         14         10        12

write.table(RH_human,"RH_human_gseq.txt",quote=FALSE,sep="\t",row.names=FALSE)































