#!/usr/bin/env Rscript

# inferences  of centromere positions from reference genome
# based on blast output of core centromer repeat region on assembly

setwd("~/myproject/inputdata")
BlastNInfereedCentroSeq <- read.delim("BlastOuptutRepeatOrenilfromMelters2013AgainstCurrentGenomeRelease_blastn-Alignment.txt", header=FALSE, comment.char="#")
BlastN_LGonly<-BlastNInfereedCentroSeq[grep("NC_",BlastNInfereedCentroSeq$V2 ),]
BlastN_LGonly$V2<-droplevels(BlastN_LGonly$V2)

boxplot(BlastN_LGonly$V9~BlastN_LGonly$V2, las=2) 

BlastN_LGonly$midpoint<-BlastN_LGonly$V9 + ((BlastN_LGonly$V10-BlastN_LGonly$V9)/2)

blast<-boxplot(BlastN_LGonly$midpoint~BlastN_LGonly$V2, las=2)

median_blast<-blast$stats[3,]
median_blast<-as.data.frame(median_blast)

# add chromosome names to the above 

median_blast$chrom<-blast$names

write.table(median_blast, file="EstimatedCentromerPositions.txt", quote = F, sep="\t")














