# Make table with left and right limits of centromeres
# Get hg38_centromere.txt from ucsc table browswer
# "All tracks", "Centromeres" to create hg38_centromere.txt

cen <- read.table("hg38_centromere.txt",header=TRUE,sep="\t",stringsAsFactors=FALSE)

dim(cen)
# [1] 109   5

head(cen)
   # bin chrom chromStart  chromEnd       name
# 1   23  chr1  122503247 124785432 GJ212202.1
# 2  189  chr1  122026459 122224535 GJ211836.1
# 3  189  chr1  122224635 122503147 GJ211837.1
# 4  192  chr1  124849229 124932724 GJ211857.1
# 5 1537  chr1  124785532 124849129 GJ211855.1
# 6    2  chr2   92188145  94090557 GJ211860.1

cen_min <- aggregate(chromStart~chrom,data=cen,min)
cen_max <- aggregate(chromEnd~chrom,data=cen,max)

cen_limit <- merge(cen_min,cen_max)

colnames(cen_limit) <- c("Chromosome","posS","posE")

cen_limit$pos <- round(rowMeans(cen_limit[,c("posS","posE")]))



chrOrder<-c(paste("chr",1:22,sep=""),"chrX","chrY")
cen_limit$Chromosome <-factor(cen_limit$Chromosome, levels=chrOrder)
cen_limit <- cen_limit[order(cen_limit$Chromosome), ]
cen_limit$Chromosome <- as.character(cen_limit$Chromosome)


dim(cen_limit)
# [1] 24  4

cen_limit
   # Chromosome      posS      posE       pos
# 1        chr1 122026459 124932724 123479592
# 12       chr2  92188145  94090557  93139351
# 16       chr3  90772458  93655574  92214016
# 17       chr4  49712061  51743951  50728006
# 18       chr5  46485900  50059807  48272854
# 19       chr6  58553888  59829934  59191911
# 20       chr7  58169653  61528020  59848836
# 21       chr8  44033744  45877265  44955504
# 22       chr9  43389635  45518558  44454096
# 2       chr10  39686682  41593521  40640102
# 3       chr11  51078348  54425074  52751711
# 4       chr12  34769407  37185252  35977330
# 5       chr13  16000000  18051248  17025624
# 6       chr14  16000000  18173523  17086762
# 7       chr15  17083673  19725254  18404464
# 8       chr16  36311158  38265669  37288414
# 9       chr17  22813679  26616164  24714922
# 10      chr18  15460899  20861206  18161052
# 11      chr19  24498980  27190874  25844927
# 13      chr20  26436232  30038348  28237290
# 14      chr21  10864560  12915808  11890184
# 15      chr22  12954788  15054318  14004553
# 23       chrX  58605579  62412542  60509060
# 24       chrY  10316944  10544039  10430492


write.table(cen_limit,"hg38_centromere_limits.txt",quote=FALSE,sep="\t",row.names=FALSE)



































































