# HEK293_reads.txt saved from "HEK_A23_to_hg38_bed.xlsx"

HEK <- read.table("HEK293_reads.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

head(HEK)
  # Chromosome Start   End HEK293_vs_hg38 A23_vs_hg38    HEK293_vs_hg38_not_picr    A23_vs_hg38_not_picr
# 1       chr1     0 10000              0           2                          0                       3
# 2       chr1     0 20000            191         306                        138                     284
# 3       chr1     0 30000            342         306                        275                     284
# 4       chr1     0 40000            518         306                        425                     284
# 5       chr1     0 50000            737         306                        634                     284
# 6       chr1     0 60000            936         306                        833                     284

HEK <- HEK[,1:4]

head(HEK)
  # Chromosome Start   End HEK293_vs_hg38
# 1       chr1     0 10000              0
# 2       chr1     0 20000            191
# 3       chr1     0 30000            342
# 4       chr1     0 40000            518
# 5       chr1     0 50000            737
# 6       chr1     0 60000            936

# Get rid of unanchored contigs

# Exclude mitochondria
unique(HEK[grepl("^chr([[:digit:]]+|X|Y)$",HEK$Chromosome),"Chromosome"])
 # [1] "chr1"  "chr2"  "chr3"  "chr4"  "chr5"  "chr6"  "chr7"  "chr8"  "chr9"  "chr10" "chr11" "chr12" "chr13" "chr14" "chr15"
# [16] "chr16" "chr17" "chr18" "chr19" "chr20" "chr21" "chr22" "chrX"  "chrY" 

dim(HEK[grepl("^chr([[:digit:]]+|X|Y)$",HEK$Chromosome),])
# [1] 311213      4

# Include mitochondria (NB takes 2 lines, only one valid)
unique(HEK[grepl("^chr([[:digit:]]+|X|Y|M)$",HEK$Chromosome),"Chromosome"])
 # [1] "chr1"  "chr2"  "chr3"  "chr4"  "chr5"  "chr6"  "chr7"  "chr8"  "chr9"  "chr10" "chr11" "chr12" "chr13" "chr14" "chr15"
# [16] "chr16" "chr17" "chr18" "chr19" "chr20" "chr21" "chr22" "chrX"  "chrY"  "chrM" 

dim(HEK[grepl("^chr([[:digit:]]+|X|Y|M)$",HEK$Chromosome),])
# [1] 311215      4

HEK[HEK$Chromosome == "chrM",]
       # Chromosome Start   End HEK293_against_hg38
# 323024       chrM     0 16569              153367
# 323025       chrM 10000 16569               66727

# Chose to exclude mitochondria:
HEK <- HEK[grepl("^chr([[:digit:]]+|X|Y)$",HEK$Chromosome),]

colnames(HEK)[2:4] <- c("posS","posE","reads")

head(HEK)
  # Chromosome posS  posE reads
# 1       chr1    0 10000     0
# 2       chr1    0 20000   191
# 3       chr1    0 30000   342
# 4       chr1    0 40000   518
# 5       chr1    0 50000   737
# 6       chr1    0 60000   936

HEK$pos <- round(rowMeans(HEK[,c("posS","posE")]))

HEK <- HEK[,c("Chromosome","posS","posE","pos","reads")]

chrOrder<-c(paste("chr",1:22,sep=""),"chrX","chrY")
HEK$Chromosome <-factor(HEK$Chromosome, levels=chrOrder)
HEK <- HEK[order(HEK$Chromosome, HEK$pos), ]
HEK$Chromosome <- as.character(HEK$Chromosome)

dim(HEK)
# [1] 311213      5

head(HEK)
  # Chromosome posS  posE   pos reads
# 1       chr1    0 10000  5000     0
# 2       chr1    0 20000 10000   191
# 3       chr1    0 30000 15000   342
# 4       chr1    0 40000 20000   518
# 5       chr1    0 50000 25000   737
# 6       chr1    0 60000 30000   936

write.table(HEK,"HEK293_gseq.txt",quote=FALSE,sep="\t",row.names=FALSE)























