############################################################################################
#### The command to combine three aligned bam file to a single bam file
system("samtools merge dcl234_total.sorted.bam dcl234_1.sorted.bam dcl234_2.sorted.bam dcl234_1.sorted.bam")
system("samtools index dcl234_total.sorted.bam")
system("samtools merge dcl234nrpd1_total.sorted.bam dcl234nrpd1_1.sorted.bam dcl234nrpd1_2.sorted.bam dcl234nrpd1_3.sorted.bam")
system("samtools index dcl234nrpd1_total.sorted.bam")


#############################################################################################
#### The command to assembly Pol IV/RDR2-dependent transcripts by using a in-house R function
file1 = "dcl234_total.sorted.bam"
file2 = "countDFedgeRds"
file3 ="dcl234nrpd1_total.sorted.bam"
file4="countDFedgeRsm"
len = 60
gap = 60
cov = 1
fold = 4
source("Transcripts.R")
PolIV = Transcripts(file1,file2,file3,file4, len,gap,cov,fold)
write.table(PolIV,"P4RNA")


#############################################################################################
####The function to  assembly Pol IV/RDR2-dependent transcripts
####file1  is the sorted bam file of dcl234
####file2  is the file to store the region where P4RNAs were detected 
####file3 is the sorted bam file of dcl234 nrpd1.
####file4 is the file to store the region where P4siRNAs were detected 
####length is the minimum length for the assembled transcripts
####gap is the maximum gap allowed to merge transcripts
####cov is the minimum RPM value required in the dcl234 libararies 
####fold is the minimum fold change required of the RPM value between dcl234 and dcl234 nrpd1
Transcripts <- function(file1, file2,file3,file4, len, gap,cov, fold)
{
	
        library(ShortRead)

        ####Get the range of the regions where P4RNAs were detected 
        sig<-read.table(file2)
        region<-rownames(sig)
        chr<-gsub("-\\d*","",region)
        num<-gsub("\\w*-","",region)
        num<-as.numeric(num)
        start<- 500 *(num-1) +1
        end <- 500 * num
	rangessig<-GRanges(seqnames=chr,ranges=IRanges(start=start,end=end))
	
	####Get all of the reads in the regions where P4RNAs were detected 
	reads<-readGAlignmentsFromBam(file1)
        rna<-subsetByOverlaps(reads,rangessig)
	
	####Assembly P4RNAs with maximum gap and minimum length 
        rnachr1<-rna[seqnames(rna)=="Chr1",]
        rnachr2<-rna[seqnames(rna)=="Chr2",]
        rnachr3<-rna[seqnames(rna)=="Chr3",]
        rnachr4<-rna[seqnames(rna)=="Chr4",]
        rnachr5<-rna[seqnames(rna)=="Chr5",]
        r1<- IRanges(start<- start(rnachr1), end<- end(rnachr1))
        r1<- reduce(r1,min.gapwidth= gap)
        rna1<-GRanges(seqnames="Chr1",ranges<-r1)
        r2<- IRanges(start<- start(rnachr2), end<- end(rnachr2))
        r2<- reduce(r2,min.gapwidth= gap)
        rna2<-GRanges(seqnames="Chr2",ranges<-r2)
        r3<- IRanges(start<- start(rnachr3), end<- end(rnachr3))
        r3<- reduce(r3,min.gapwidth= gap)
        rna3<-GRanges(seqnames="Chr3",ranges<-r3)
        r4<- IRanges(start<- start(rnachr4), end<- end(rnachr4))
        r4<- reduce(r4,min.gapwidth= gap)
        rna4<-GRanges(seqnames="Chr4",ranges<-r4)
	r5<-IRanges(start<- start(rnachr5), end<- end(rnachr5))
        r5<-reduce(r5,min.gapwidth= gap)
        rna5<-GRanges(seqnames="Chr5",ranges<-r5)
        rna<-c(rna1,rna2,rna3,rna4,rna5)
        rna<-rna[width(rna) >=len,]

	####get the RPM value for every P4RNAs in dcl234 and dcl234 nrpd1
        rr<-split(rna, 1:length(rna))
        counts1 <- countOverlaps(rr, reads)
        counts1 = counts1 /length(reads) * 1000000
        reads2 <- readGAlignmentsFromBam(file3)
        counts2<-countOverlaps(rr, reads2)
        counts2 = counts2 /length(reads2) * 1000000
        suppressWarnings(rna<-c(rna1,rna2,rna3,rna4,rna5))

	####get the positive RPM value for every P4RNAs in dcl234 
	  rna<-rna[width(rna) >=len,]
        strand(rna) <- "+"
        rr<-split(rna, 1:length(rna))
        counts3 <- countOverlaps(rr, reads)
        counts3 = counts3 /length(reads) *1000000
	
	####get the positive RPM value for every P4RNAs in dcl234 
        rna<-data.frame(as.character(seqnames(rna)),as.numeric(start(rna)),as.numeric(end(rna)),counts1,counts2,counts3)

        colnames(rna)<- c("Chr","start","end","countdcl","countdclsde","dclpositive")
        rna <- rna[rna[,4] > cov,]
        rna <- rna[(rna[,4] / rna[,5]) >= fold,]
	rownames(rna) = paste(rna[,1],rna[,2], rna[,3],sep="_")
	
	
	####Get P4RNAs with P4siRNAs detected 
	rg = GRanges(seqnames = rna[,1], IRanges(start = rna[,2], end = rna[,3]), names = rownames(rna))
	sm = read.table(file4)
	region<-rownames(sm)
        chr<-gsub("-\\d*","",region)
        num<-gsub("\\w*-","",region)
        num<-as.numeric(num)
        start<- 500 *(num-1) +1
        end <- 500 * num
	sg<-GRanges(seqnames=chr,ranges=IRanges(start=start,end=end))

	rg = subsetByOverlaps(rg, sg)
	rna = rna[rownames(rna) %in% elementMetadata(rg)[,1],]
	
        return(rna)
}
