#functions of the package EasyBed

#' remove 'chr'
#'
#' 'removeChr' remove the prefix chr in the first column of a BED file
#' removeChr(x,y)
#' @param x is the input
#' @param y is the output
#' @return a bed file that contains a first column without chr
#' @author Maude Strobino
#' @examples addChr(inputBed.bed, Outputbed.bed)
removeChr <- function(x, y) {
        text <- read.table(x, header = F)
        text[,1] <- gsub("chr", "", paste(text[,1]))
        write.table(text, y, sep = "\t", col.names = T, quote=F, row.names = F)
}

#' add 'chr'
#'
#' 'addChr' add the prefix chr in the first column of a BED file
#' addChr(x,y)
#' @param x is the input
#' @param y is the output
#' @return a bed file that contains a first column with chr
#' @author Maude Strobino
#' @examples addChr(inputBed.bed, Outputbed.bed)
addChr <- function(x,y){
        sample <- read.delim(x, header = F)
        sample[,1] <- sub("^", "chr", sample[,1] )
        sample[,1] <- sub("chrMtDNA", "chrM", sample[,1])
        new <- sample[,c(1:4)]
        write.table(new,y,sep = "\t", col.names = F, quote=F, row.names = F)
}

#' JSON to BED 0.1kb
#'
#' 'JtoB01' transorms a JSON file on a BED file with 100bp bin size, only for c.elegans
#' JtoB01(x,y,z)
#' @param x is the path that contains the file
#' @param y is the name of the JSON file
#' @param z is the output name
#' @return a bed file with 100bp bin size
#' @author Maude Strobino
#' @examples JtoB01kb(.../path/, MyJson.json, Mybed.bed)
JtoB01 <- function(x,y,z){
        Path <-x
        file1 <-y
        OutputName <- z
        #
        Data <- paste(Path,'/', file1, sep='')
        Sample <- fromJSON(file=Data)

        #Loop throug each chromosome
        Sample.1 <- lapply(Sample, function(I)
        {
                data.frame(matrix(unlist(I),ncol = 1, byrow = TRUE))
        })

        #Remove the data frame with metadata
        Sample.1$metadata <- NULL

        #Prepare files for each chromosome for the Sample
        ChrI <- Sample.1$I
        ChrI$chr <- 'chrI'
        ChrI$start <-seq(from=1, to =15072423, by= 100)
        ChrI <- ChrI[-c(150725),]
        ChrI$end <- seq(from =100, to= 15072401, by=100 )
        names(ChrI)[1:4] <- c('value', 'chr','start', 'end')
        ChrI <- ChrI[c('chr','start', 'end', 'value')]

        ChrII <- Sample.1$II
        ChrII$chr <- 'chrII'
        ChrII$start <-seq(from=1, to =15279345, by= 100)
        ChrII <- ChrII[-c(152794),]
        ChrII$end <- seq(from =100, to= 15279301, by=100 )
        names(ChrII)[1:4] <- c('value', 'chr','start', 'end')
        ChrII <- ChrII[c('chr','start', 'end', 'value')]

        ChrIII <- Sample.1$III
        ChrIII$chr <- 'chrIII'
        ChrIII$start <-seq(from=1, to =13783700, by= 100)
        ChrIII <- ChrIII[-c(137837),]
        ChrIII$end <- seq(from =100, to= 13783601, by=100)
        names(ChrIII)[1:4] <- c('value', 'chr','start', 'end')
        ChrIII <- ChrIII[c('chr','start', 'end', 'value')]

        ChrIV<- Sample.1$IV
        ChrIV$chr <- 'chrIV'
        ChrIV$start <-seq(from=1, to =17493793, by= 100)
        ChrIV <- ChrIV[-c(174938),]
        ChrIV$end <- seq(from =100, to= 17493701, by=100 )
        names(ChrIV)[1:4] <- c('value', 'chr','start', 'end')
        ChrIV <- ChrIV[c('chr','start', 'end', 'value')]

        ChrV<- Sample.1$V
        ChrV$chr <- 'chrV'
        ChrV$start <-seq(from=1, to =20924149, by= 100)
        ChrV <- ChrV[-c(209242),]
        ChrV$end <- seq(from =100, to= 20924101, by=100 )
        names(ChrV)[1:4] <- c('value', 'chr','start', 'end')
        ChrV <- ChrV[c('chr','start', 'end', 'value')]

        ChrX<- Sample.1$X
        ChrX$chr <- 'chrX'
        ChrX$start <-seq(from=1, to =17718866, by= 100)
        ChrX <- ChrX[-c(177189),]
        ChrX$end <- seq(from =100, to= 17718801, by=100 )
        names(ChrX)[1:4] <- c('value', 'chr','start', 'end')
        ChrX <- ChrX[c('chr','start', 'end', 'value')]

        ChrMT<- Sample.1$MtDNA
        ChrMT$chr <- 'chrM'
        ChrMT$start <-seq(from=1, to =13794, by= 100)
        ChrMT <- ChrMT[-c(138),]
        ChrMT$end <- seq(from =100, to= 13701, by=100 )
        names(ChrMT)[1:4] <- c('value', 'chr','start', 'end')
        ChrMT <- ChrMT[c('chr','start', 'end', 'value')]

        #Connect all the data frame together in one data frame
        Sample.2 <- rbind(ChrI,ChrII,ChrIII,ChrIV,ChrV,ChrX,ChrMT)

        #Convert data.frame column from factor to numeric
        indx <- sapply(Sample.2$value, is.character)
        Sample.2[indx] <- lapply(Sample.2[indx], function(x) as.numeric(as.character(x)))

        #Avoid scientific numbers (unread by UCSC)
        Sample.2 <- format(Sample.2, scientific=FALSE)

        #changes to have a format accepted by SIB
        Sample.3 <- Sample.2
        Sample.3$name <- 1:nrow(Sample.3)
        names(Sample.3)[1:5] <- c('chrom','chromStart', 'chromEnd', 'score', 'name')
        Sample.3 <- Sample.3[c('chrom','chromStart', 'chromEnd', 'name', 'score')]


        #Add criteria for UCSC)
        Sample.4<- rbind(data.frame(chr ='track type=bedGraph  visibility=full  yLineOnOff=on   autoScale=on    yLineMark="0.0"  alwaysZero=on  graphType=bar   maxHeightPixels=128:75:11       windowingFunction=maximum       smoothingWindow=off', start ='', end='',value='',stringsAsFactors=FALSE ),Sample.2)


        #Save this file that can be load on UCSC
        write.table(Sample.4,paste(Path,'/',OutputName, sep=''),sep="\t", col.name = FALSE, row.names = FALSE, quote = FALSE)

}


#' JSON to BED 1kb
#'
#' 'JtoB' transorms a JSON file on a BED file with 1kb bin size, only for c.elegans
#' JtoB(x,y,z)
#' @param x is the path that contains the file
#' @param y is the name of the JSON file
#' @param z is the output name
#' @return a bed file with 1Kb bin size
#' @author Maude Strobino
#' @examples JtoB1kb(.../path/, MyJson.json, Mybed.bed)
JtoB1kb <- function(x,y,z){
        Path <-x
        file1 <-y
        OutputName <- z
        #
        Data <- paste(Path,'/', file1, sep='')
        Sample <- fromJSON(file=Data)

        #Loop throug each chromosome
        Sample.1 <- lapply(Sample, function(I)
        {
                data.frame(matrix(unlist(I),ncol = 1, byrow = TRUE))
        })


        #Remove the data frame with metadata
        Sample.1$metadata <- NULL

        #Prepare files for each chromosome for the Sample
        ChrI <- Sample.1$I
        ChrI$chr <- 'chrI'
        ChrI$start <-seq(from=1, to =15072423, by= 1000)
        ChrI <- ChrI[-c(15073),]
        ChrI$end <- seq(from =1000, to= 15072401, by=1000 )
        names(ChrI)[1:4] <- c('value', 'chr','start', 'end')
        ChrI <- ChrI[c('chr','start', 'end', 'value')]

        ChrII <- Sample.1$II
        ChrII$chr <- 'chrII'
        ChrII$start <-seq(from=1, to =15279345, by= 1000)
        ChrII <- ChrII[-c(15280),]
        ChrII$end <- seq(from =1000, to= 15279301, by=1000 )
        names(ChrII)[1:4] <- c('value', 'chr','start', 'end')
        ChrII <- ChrII[c('chr','start', 'end', 'value')]

        ChrIII <- Sample.1$III
        ChrIII$chr <- 'chrIII'
        ChrIII$start <-seq(from=1, to =13783700, by= 1000)
        ChrIII <- ChrIII[-c(13784),]
        ChrIII$end <- seq(from =1000, to= 13783601, by=1000)
        names(ChrIII)[1:4] <- c('value', 'chr','start', 'end')
        ChrIII <- ChrIII[c('chr','start', 'end', 'value')]

        ChrIV<- Sample.1$IV
        ChrIV$chr <- 'chrIV'
        ChrIV$start <-seq(from=1, to =17493793, by= 1000)
        ChrIV <- ChrIV[-c(17494),]
        ChrIV$end <- seq(from =1000, to= 17493701, by=1000 )
        names(ChrIV)[1:4] <- c('value', 'chr','start', 'end')
        ChrIV <- ChrIV[c('chr','start', 'end', 'value')]

        ChrV<- Sample.1$V
        ChrV$chr <- 'chrV'
        ChrV$start <-seq(from=1, to =20924149, by= 1000)
        ChrV <- ChrV[-c(20925),]
        ChrV$end <- seq(from =1000, to= 20924101, by=1000 )
        names(ChrV)[1:4] <- c('value', 'chr','start', 'end')
        ChrV <- ChrV[c('chr','start', 'end', 'value')]

        ChrX<- Sample.1$X
        ChrX$chr <- 'chrX'
        ChrX$start <-seq(from=1, to =17718866, by= 1000)
        ChrX <- ChrX[-c(17719),]
        ChrX$end <- seq(from =1000, to= 17718801, by=1000 )
        names(ChrX)[1:4] <- c('value', 'chr','start', 'end')
        ChrX <- ChrX[c('chr','start', 'end', 'value')]

        ChrMT<- Sample.1$MtDNA
        ChrMT$chr <- 'chrM'
        ChrMT$start <-seq(from=1, to =13794, by= 1000)
        ChrMT <- ChrMT[-c(14),]
        ChrMT$end <- seq(from =1000, to= 13701, by=1000 )
        names(ChrMT)[1:4] <- c('value', 'chr','start', 'end')
        ChrMT <- ChrMT[c('chr','start', 'end', 'value')]

        #Connect all the data frame together in one data frame
        Sample.2 <- rbind(ChrI,ChrII,ChrIII,ChrIV,ChrV,ChrX,ChrMT)

        #Convert data.frame column from factor to numeric
        indx <- sapply(Sample.2$value, is.character)
        Sample.2[indx] <- lapply(Sample.2[indx], function(x) as.numeric(as.character(x)))

        #Avoid scientific numbers (unread by UCSC)
        Sample.2 <- format(Sample.2, scientific=FALSE)

        #changes to have a format accepted by SIB
        Sample.3 <- Sample.2
        Sample.3$name <- 1:nrow(Sample.3)
        names(Sample.3)[1:5] <- c('chrom','chromStart', 'chromEnd', 'score', 'name')
        Sample.3 <- Sample.3[c('chrom','chromStart', 'chromEnd', 'name', 'score')]


        #Add criteria for UCSC)
        Sample.4<- rbind(data.frame(chr ='track type=bedGraph  visibility=full  yLineOnOff=on   autoScale=on    yLineMark="0.0"  alwaysZero=on  graphType=bar   maxHeightPixels=128:75:11       windowingFunction=maximum       smoothingWindow=off', start ='', end='',value='',stringsAsFactors=FALSE ),Sample.2)


        #Save this file that can be load on UCSC
        write.table(Sample.4,paste(Path,'/',OutputName, sep=''),sep="\t", col.name = FALSE, row.names = FALSE, quote = FALSE)

        #write.table(Sample.3,'/data_disk1/Maude/ChEC_analysis_SIB/Merged/FAS_FREE_merged_convert.bed',sep="\t", col.name= FALSE,row.names = FALSE, quote = FALSE)

}

#' JSON to BED 10kb
#'
#' 'JtoB10' transorms a JSON file on a BED file with 10kb bin size, only for c.elegans
#' JtoB10(x,y,z)
#' @param x is the path that contains the file
#' @param y is the name of the JSON file
#' @param z is the output name
#' @return a bed file with 10Kb bin size
#' @author Maude Strobino
#' @examples JtoB10kb(.../path/, MyJson.json, Mybed.bed)
JtoB10 <- function(x,y,z){
        Path <-x
        file1 <-y
        OutputName <- z
        #
        Data <- paste(Path,'/', file1, sep='')
        Sample <- fromJSON(file=Data)

        #Loop throug each chromosome
        Sample.1 <- lapply(Sample, function(I)
        {
                data.frame(matrix(unlist(I),ncol = 1, byrow = TRUE))
        })


        #Remove the data frame with metadata
        Sample.1$metadata <- NULL

        #Prepare files for each chromosome for the Sample
        ChrI <- Sample.1$I
        ChrI$chr <- 'chrI'
        ChrI$start <-seq(from=1, to =15072423, by= 10000)
        ChrI <- ChrI[-c(1508),]
        ChrI$end <- seq(from =10000, to= 15072401, by=10000 )
        names(ChrI)[1:4] <- c('value', 'chr','start', 'end')
        ChrI <- ChrI[c('chr','start', 'end', 'value')]

        ChrII <- Sample.1$II
        ChrII$chr <- 'chrII'
        ChrII$start <-seq(from=1, to =15279345, by= 10000)
        ChrII <- ChrII[-c(1528),]
        ChrII$end <- seq(from =10000, to= 15279301, by=10000 )
        names(ChrII)[1:4] <- c('value', 'chr','start', 'end')
        ChrII <- ChrII[c('chr','start', 'end', 'value')]

        ChrIII <- Sample.1$III
        ChrIII$chr <- 'chrIII'
        ChrIII$start <-seq(from=1, to =13783700, by= 10000)
        ChrIII <- ChrIII[-c(1379),]
        ChrIII$end <- seq(from =10000, to= 13783601, by=10000)
        names(ChrIII)[1:4] <- c('value', 'chr','start', 'end')
        ChrIII <- ChrIII[c('chr','start', 'end', 'value')]

        ChrIV<- Sample.1$IV
        ChrIV$chr <- 'chrIV'
        ChrIV$start <-seq(from=1, to =17493793, by= 10000)
        ChrIV <- ChrIV[-c(1750),]
        ChrIV$end <- seq(from =10000, to= 17493701, by=10000 )
        names(ChrIV)[1:4] <- c('value', 'chr','start', 'end')
        ChrIV <- ChrIV[c('chr','start', 'end', 'value')]

        ChrV<- Sample.1$V
        ChrV$chr <- 'chrV'
        ChrV$start <-seq(from=1, to =20924149, by= 10000)
        ChrV <- ChrV[-c(2093),]
        ChrV$end <- seq(from =10000, to= 20924101, by=10000 )
        names(ChrV)[1:4] <- c('value', 'chr','start', 'end')
        ChrV <- ChrV[c('chr','start', 'end', 'value')]

        ChrX<- Sample.1$X
        ChrX$chr <- 'chrX'
        ChrX$start <-seq(from=1, to =17718866, by= 10000)
        ChrX <- ChrX[-c(1772),]
        ChrX$end <- seq(from =10000, to= 17718801, by=10000 )
        names(ChrX)[1:4] <- c('value', 'chr','start', 'end')
        ChrX <- ChrX[c('chr','start', 'end', 'value')]

        ChrMT<- Sample.1$MtDNA
        ChrMT$chr <- 'chrM'
        ChrMT$start <-seq(from=1, to =13794, by= 10000)
        ChrMT <- ChrMT[-c(2),]
        ChrMT$end <- seq(from =10000, to= 13701, by=10000 )
        names(ChrMT)[1:4] <- c('value', 'chr','start', 'end')
        ChrMT <- ChrMT[c('chr','start', 'end', 'value')]

        #Connect all the data frame together in one data frame
        Sample.2 <- rbind(ChrI,ChrII,ChrIII,ChrIV,ChrV,ChrX,ChrMT)

        #Convert data.frame column from factor to numeric
        indx <- sapply(Sample.2$value, is.character)
        Sample.2[indx] <- lapply(Sample.2[indx], function(x) as.numeric(as.character(x)))

        #Avoid scientific numbers (unread by UCSC)
        Sample.2 <- format(Sample.2, scientific=FALSE)

        #changes to have a format accepted by SIB
        Sample.3 <- Sample.2
        Sample.3$name <- 1:nrow(Sample.3)
        names(Sample.3)[1:5] <- c('chrom','chromStart', 'chromEnd', 'score', 'name')
        Sample.3 <- Sample.3[c('chrom','chromStart', 'chromEnd', 'name', 'score')]


        #Add criteria for UCSC)
        Sample.4<- rbind(data.frame(chr ='track type=bedGraph  visibility=full  yLineOnOff=on   autoScale=on    yLineMark="0.0"  alwaysZero=on  graphType=bar   maxHeightPixels=128:75:11       windowingFunction=maximum       smoothingWindow=off', start ='', end='',value='',stringsAsFactors=FALSE ),Sample.2)


        #Save this file that can be load on UCSC
        write.table(Sample.4,paste(Path,'/',OutputName, sep=''),sep="\t", col.name = FALSE, row.names = FALSE, quote = FALSE)

        #write.table(Sample.3,'/data_disk1/Maude/ChEC_analysis_SIB/Merged/FAS_FREE_merged_convert.bed',sep="\t", col.name= FALSE,row.names = FALSE, quote = FALSE)
}

#' Normalization by substraction
#'
#' Normalization by substracting input reads to beads reads
#' NormaDelta(beads, Input,Breads, Ireads, output)
#' @param beads Experiment file
#' @param Input Control file
#' @param Breads mapped reads for experiment file
#' @param Ireads mapped reads for control file
#' @param output output name, without extension
#' @return a bed file normalized
#' @author Maude Strobino
#' @examples NormaDelta(beads.bed, input.bed, 1000,200,ChIP_delta.bed)
NormaDelta <- function(beads, Input,Breads, Ireads, output){
        beads <- read.table(beads, header = F)
        input <- read.table(Input, header = F)

        beads_reads <-as.numeric(Breads)
        input_reads <-as.numeric(Ireads)


        ##########

        beads <- beads[-c(1), ]
        input <- input[-c(1), ]


        beads['V5'] <- (as.numeric(beads$V4)/beads_reads) * 50000000
        input['V5'] <- (as.numeric(input$V4)/input_reads) * 50000000

        Normalized <- as.data.frame(beads)
        Normalized$V5 <- as.numeric(Normalized$V5) - as.numeric(input$V5)
        Normalized$V4 <- NULL
        names(Normalized)[1:4] <- c('chr', 'start', 'end', 'value')


        #Add criteria for UCSC)
        Normalized <- rbind(data.frame(chr ='track type=bedGraph  visibility=full  yLineOnOff=on autoScale=on    yLineMark="0.0"  alwaysZero=on  graphType=bar maxHeightPixels=128:75:11       windowingFunction=maximum smoothingWindow=off', start ='', end='',value='',stringsAsFactors=FALSE ),Normalized)


        #Save this file that can be load on UCSC
        write.table(Normalized,paste(output,'delta.bed', sep = '_') ,sep="\t", col.name = FALSE,
                    row.names = FALSE, quote = FALSE)


}

#' Normalization by division
#'
#' Normalization by dividing beads reads by input reads
#' NormaDelta(beads, Input,Breads, Ireads, output)
#' @param beads Experiment file
#' @param Input Control file
#' @param Breads mapped reads for experiment file
#' @param Ireads mapped reads for control file
#' @param output output name, without extension
#' @return two bed files, one just divided and one with the log2
#' @author Maude Strobino
#' @examples NormaDelta(beads.bed, input.bed, 1000,200,ChIP_delta.bed)
NormaRatio <- function(beads, Input,Breads, Ireads, output){

        beads <- read.table(beads, header = F)
        input <- read.table(Input, header = F)

        beads_reads <-as.numeric(Breads)
        input_reads <-as.numeric(Ireads)


        beads <- beads[-c(1), ]
        input <- input[-c(1), ]


        beads['V5'] <- (as.numeric(beads$V4)/beads_reads) * 50000000
        input['V5'] <- (as.numeric(input$V4)/input_reads) * 50000000


        input['V6'] <- input$V5 + 1
        beads['V6'] <- beads$V5 + 1


        Normalized <- beads
        Normalized['V7']<- Normalized$V6/input$V6
        Normalized$V4 <- NULL
        Normalized$V5 <- NULL
        Normalized$V6 <- NULL
        names(Normalized)[1:4] <- c('chr', 'start', 'end', 'value')

        Norma_log <- Normalized
        Norma_log$logValue <- log2(as.numeric(Norma_log$value))
        Norma_log$value <- NULL


        #Add criteria for UCSC)
        Normalized <- rbind(data.frame(chr ='track type=bedGraph  visibility=full  yLineOnOff=on   autoScale=on    yLineMark="0.0"  alwaysZero=on  graphType=bar   maxHeightPixels=128:75:11       windowingFunction=maximum       smoothingWindow=off', start ='', end='',value='',stringsAsFactors=FALSE ),Normalized)
        Norma_log <- rbind(data.frame(chr ='track type=bedGraph  visibility=full  yLineOnOff=on   autoScale=on    yLineMark="0.0"  alwaysZero=on  graphType=bar   maxHeightPixels=128:75:11       windowingFunction=maximum       smoothingWindow=off', start ='', end='',logValue='',stringsAsFactors=FALSE ),Norma_log)

        #Save this file that can be load on UCSC
        write.table(Normalized,paste(output,'ratio.bed', sep = '_') ,sep="\t", col.name = FALSE, row.names = FALSE, quote = FALSE)

        write.table(Norma_log,paste(output,'Logratio.bed', sep = '_'),sep="\t", col.name = FALSE, row.names = FALSE, quote = FALSE)

}




