# Maude Strobino (Steiner lab)

# Call domains (early or late) using G1 and G2 dataset from the Repli-seq
# Input = G1.bed and G2.bed
# Output = early.txt and late.txt

sites2 <- read.delim('Path/To/G1.bed', header = T)
sites3<- read.delim('Path/To/G2.bed', header = T)

#For each dataset put 100 if positive and -100 for negative
sites2$domain <- 0
sites3$domain <- 0

for (i in (1:nrow(sites2))){
        if (as.numeric(sites2[i,4]) > 0){
                sites2[i,5] = 100
        } else {
                sites2[i,5] = -100
        }
}
sites2$X.2 <- NULL
for (i in (1:nrow(sites3))){
        if (as.numeric(sites3[i,4]) > 0){
                sites3[i,5] = 100
        } else {
                sites3[i,5] = -100
        }
}
sites3$X.2 <- NULL
names(sites2) <- c('chr', 'start', 'end', 'G1')
names(sites3) <- c('chr', 'start', 'end', 'G2')

#Bind both datasets
all <- cbind(sites2, sites3$G2)
names(all) <- c('chr', 'start', 'end', 'G1', 'G2')

all$domain <- 0

#Assign domain. If positif in G1 and neg in G2 = 100 opposite =-100 same on both =0

for (i in (1:nrow(all))){
        if (as.numeric(all[i,4]) == as.numeric(all[i,5])){
                all[i,6] = 0
        }
        if (as.numeric(all[i,4]) < as.numeric(all[i,5])){
                all[i,6] = -100
        }
        if (as.numeric(all[i,4]) > as.numeric(all[i,5])){
                all[i,6] = 100
        }
}

# To plot, assign colors
all_plot <- all
all_plot$color <- 'green'
all_plot[4:5] <- NULL
for (i in (1:nrow(all_plot))){
        if(all_plot[i,4] == -100){
                all_plot[i,5] <- 'red'
        }
        if(all_plot[i,4] == 0){
                all_plot[i,5] <- 'grey'
        }
}
names(all_plot) <-c('Chrom', 'Start', 'End', 'domain', 'Colors')
library('chromPlot')
chromPlot(bands=mydata, chr=c('I', 'II', 'III', 'IV', 'V', 'X'))


##########
# call domains
all[4:5] <- NULL

# Delete all the 0 because it means that this is data that are not well supported by both datasets
domain <- all[all$domain != 0,]

#Define the start of each domain
dom = 100
domain[,5] <- 0
for (i in (1:nrow(domain))){
        if (as.numeric(domain[i,4]) == dom){
                dom = dom
        } else {
                dom = domain[i,4]
                domain[i,5] = dom
                
        }
}
domain$domain <- NULL
#Create an empty data frame
names(domain) <- c('chr', 'start', 'end','domain')
mydata <- data.frame(matrix(ncol = 5, nrow = 0))
x <- c('chr', 'start', 'end',  'domain')
colnames(mydata) <- x
#Add in this data frame each row with a switch of domain

for (i in (1:nrow(domain))){
        if (as.numeric(domain[i,4]) != 0){
                J = as.data.frame(domain[i,])
                mydata <- rbind(mydata, J)
        }
}

#Give start and end of each domain

mydata$startDom <- 0
mydata$endDom <- 0

for (i in (1:nrow(mydata))){
        if (mydata[i,1] == (mydata[i+1,1])){
                mydata[i,5] <- mydata[i,2]
                mydata[i,6] <-(as.numeric(mydata[i+1,2])) -1
                
        } else {
                mydata[i,5] <- mydata[i,2]
                mydata[i,6] <- mydata[i,3]
        }
}


#Delete domains smaller than 10kb
mydata[2:3]<- NULL
mydata$size <- 0

for (i in (1:nrow(mydata))){
        mydata[i,5] <- (as.numeric(mydata[i,4]) - as.numeric(mydata[i,3]))  
}

mydomains <- mydata[mydata$size > 10000,]

# Give the list of early and late domains
mydomains$domains <- 0

for (i in (1:nrow(mydomains))){
        if (as.numeric(mydomains[i,2]) == -100){
                mydomains[i,6] = '-'
        } else {
                mydomains[i,6] = '+'
                
        }
}

mydomains$domain <- NULL
list_domain <- split(mydomains, mydomains$domains)

positif_domains <- as.data.frame(list_domain[2])
positif_domains$X..domains <- NULL
names(positif_domains) <- c('chr', 'start', 'end', 'size')
write.table(positif_domains,'Path/To/output/early.txt',sep = "\t", col.names = T, quote=F, row.names = F)

negatif_domains <- as.data.frame(list_domain[1])
negatif_domains$X..domains<- NULL
names(negatif_domains) <- c('chr', 'start', 'end', 'size')
write.table(negatif_domains,'Path/To/output/late.txt',sep = "\t", col.names = T, quote=F, row.names = F)


