library(pheatmap)

data_directory = '../data_tables/'
all_reps = c(
  paste(rep(c('flower','sepal','petal','anther','stigma','ovule'),each=3),1:3,sep='_')
)
all_means = c(
  paste(rep(c('flower','sepal','petal','anther','stigma','ovule'),each=1),'mean',sep='.')
)

folder=paste(data_directory,'detected_sites',sep='')
setwd(folder)
sites = list()
for(current_file in grep('anno.mir.allen',list.files(folder),value = T)){
  tbl = read.table(paste(folder,current_file,sep='/'),header = T,quote='',sep='\t',stringsAsFactors = F)
  sites[[current_file]] = tbl[as.numeric(tbl[,'adj.p.val']) <= 0.05 & tbl[,"slice.site.rpm"]>=0.1 & tbl[,"fold.change"]>1,1:3]
}

all_sites = table(unlist(lapply(sites,function(x)apply(x,1,function(y)paste(y,collapse='_')))))
all_sites = sort(all_sites,decreasing = T)
all_targeted_genes = unique(unlist(lapply(strsplit(names(all_sites),'.',fixed = T),function(x)x[1])))
targeted_transcripts = unlist(lapply(strsplit(names(all_sites[all_sites>=1]),'_',fixed = T),function(x)x[1]))
gene_iso = strsplit(targeted_transcripts,'.',fixed = T)
targeted_mirs = unlist(lapply(strsplit(names(all_sites[all_sites>=1]),'_',fixed = T),function(x)x[2]))
targeted_sites = as.numeric(unlist(lapply(strsplit(names(all_sites[all_sites>=1]),'_',fixed = T),function(x)x[3])))

for(i in 1:length(all_targeted_genes)){
  cat(all_sites[i],'\t',targeted_transcripts[[i]],'_',targeted_mirs[[i]],'\n',sep='')
}

highconfidence = list()
for(samplename in c('flower','sepal','petal','anther','stigma','ovule')){
  samples = grep(paste('^',samplename,'_',sep=''),names(sites),value = T)
  hc_table = table(unlist(lapply(sites[samples],function(x)apply(x,1,function(y)paste(y,collapse='_')))))
  highconfidence[[samplename]] = hc_table
}

sample_subset = c('flower','sepal','petal','anther','stigma','ovule')
hc_sites = unique(unlist(lapply(highconfidence[sample_subset],function(x)names(which(x>=2)))))

whorl_order = c('flower','sepal','petal','anther','stigma','ovule')
mean_names = c('anther','flower','ovule','petal','sepal','stigma')
colors = c('#E38226','#000000','#00AEEF','#C7D878','#009444','#5950A1')
names(colors) = mean_names
colors = append(
  colors,
  c(
    'X5ng_fb'='#1C75BC',fb1='#1C75BC',fb2='#1C75BC',fb3='#1C75BC',
    Col.10ug='#1C75BC',wt_shoot='#006600'
  )
)

heat_choices = c(
  'capped' = '#1C75BC',
  'noncapped' = '#F7941E',
  'mir' = 'firebrick2'
)

setwd(data_directory)
feature_rpm = read.table('feature_rpm.tsv',quote = '',header = T,row.names = 1)
features = read.table("features.tsv",stringsAsFactors = F,header = T,sep='\t')
sense_features = features[features[,'position']%in%c('distal','proximal','single-exon','upstream','intron'),]
capped_features = sense_features[grep('\\.capped\\.',sense_features[,"feature.name"]),]

tss_rpm = function(target_interaction){
  gene = strsplit(target_interaction,'.',fixed=TRUE)[[1]][1]
  full_length = gsub('^','Ath_',capped_features[capped_features[,"associated.gene.ID..Araport11."] == gene,"feature.name"])
  if(target_interaction == "AT1G56010.2_miR164a"){full_length = c("AT1G56010_capped")}
  if(length(full_length) > 1){
    fl_rpm = colSums(as.matrix(feature_rpm[full_length,mean_columns]))
  }else{
    fl_rpm = as.numeric(feature_rpm[full_length,mean_columns])
  }
  
  return(fl_rpm)
}

cleaved_rpm = function(target_interaction){
  gene = strsplit(target_interaction,'.',fixed=TRUE)[[1]][1]
  full_length = gsub('^','Ath_',capped_features[capped_features[,"associated.gene.ID..Araport11."] == gene,"feature.name"])
  if(target_interaction == "AT1G56010.2_miR164a"){full_length = c("AT1G56010_capped")}
  if(length(full_length) > 1){
    fl_rpm = colSums(as.matrix(feature_rpm[full_length,mean_columns]))
  }else{
    fl_rpm = as.numeric(feature_rpm[full_length,mean_columns])
  }
  cleaved_rpm = as.numeric(feature_rpm[target_interaction,mean_columns])
  
  return(cleaved_rpm)
}

cleavage_efficiency = function(target_interaction){
  gene = strsplit(target_interaction,'.',fixed=TRUE)[[1]][1]
  full_length = gsub('^','Ath_',capped_features[capped_features[,"associated.gene.ID..Araport11."] == gene,"feature.name"])
  if(target_interaction == "AT1G56010.2_miR164a"){full_length = c("AT1G56010_capped")}
  cat(target_interaction,'\t',full_length,'\n')
  if(length(full_length) > 1){
    fl_rpm = (colSums(as.matrix(feature_rpm[full_length,mean_columns])))
  }else{
    fl_rpm = (as.numeric(feature_rpm[full_length,mean_columns]))
  }
  cleaved_rpm = (as.numeric(feature_rpm[target_interaction,mean_columns]))
  
  return(cleaved_rpm/(cleaved_rpm+fl_rpm+.01))
}

mean_columns = colnames(feature_rpm)

target_sites = grep('(miR|TAS)',rownames(feature_rpm),value = T)
efficiencies = list()
full_length = list()
cleaved = list()

sub_columns = c(
  "flower.mean","sepal.mean","petal.mean","anther.mean","stigma.mean","ovule.mean"
)

for(i in 1:length(target_sites)){
  x=cleavage_efficiency(target_sites[i])
  y=tss_rpm(target_sites[i])
  z=cleaved_rpm(target_sites[i])
  names(x) = mean_columns
  names(y) = mean_columns
  names(z) = mean_columns
  efficiencies[[target_sites[i]]] = x
  full_length[[target_sites[i]]] = y
  cleaved[[target_sites[i]]] = z

}



efficiency_table = matrix(unlist(efficiencies),ncol=length(mean_columns),byrow=T)
colnames(efficiency_table) = mean_columns
rownames(efficiency_table) = names(efficiencies)
efficiency_table = efficiency_table[gsub('_[ 0-9]+$','',hc_sites),sub_columns]
efficiency_table = t(apply(efficiency_table,1,function(x)x/max(x,na.rm = T)))

topcol = heat_choices['noncapped']
heat_pal= colorRampPalette(c(heat_choices['capped'],'gray90',topcol))(20)
pdf('FIG5D.efficiency_heatmap.pdf',onefile = F,useDingbats = F)
pheatmap(efficiency_table,na_col = 'gray',scale = 'none',cluster_rows = T,cluster_cols = F,clustering_method = 'complete',border_color = NA,color = heat_pal)
dev.off()


heatflower = function(gene,topcol = heat_choices['capped'],norm='column'){
  allfeatures = gsub('^','Ath_',features[features[,"associated.gene.ID..Araport11."]==gene,"feature.name"])
  allfeatures = append(allfeatures,grep(gene,rownames(feature_rpm),value = T))
  meancols = c('flower.mean','sepal.mean','petal.mean','anther.mean','stigma.mean','ovule.mean')
  heat_pal = colorRampPalette(c('white',topcol))(11)
  heatdata = as.matrix(feature_rpm[allfeatures,meancols])
  raw_heatdata = heatdata
  if(norm=='column'){
    heatdata = apply(heatdata,1,function(x)x/max(x))
  }else{
    heatdata = t(heatdata/max(heatdata))
  }
  
  pheatmap(
    heatdata,
    cluster_rows = F,cluster_cols = F,
    color = heat_pal,breaks = seq(0,1,length.out = 12)
  )
  return(raw_heatdata)
}



x=read.table('miR396a.tsv',stringsAsFactors = F,header = F)
tissue_mirs = function(x){
  meanvals = vector(length = 6)
  names(meanvals) = whorl_order
  meanvals['stigma'] = mean(x[grep('carpels',x[,1]),3])
  meanvals['ovule'] = mean(x[grep('carpels',x[,1]),3])
  meanvals['anther'] = mean(x[grep('stamens',x[,1]),3])
  meanvals['petal'] = mean(x[grep('petals',x[,1]),3])
  meanvals['sepal'] = mean(x[grep('sepal',x[,1]),3])
  meanvals['sepal'] = mean(x[grep('fb',x[,1]),3])
  meanvals['flower'] = mean(x[grep('fb(_[12](_repro)?)?$',x[,1]),3])
  heat_pal = colorRampPalette(c('white','firebrick2'))(11)
  pheatmap(
    as.matrix(meanvals/max(meanvals)),show_rownames = T,show_colnames = T,
    cluster_rows = F,cluster_cols = F,color = heat_pal,breaks = seq(0,1,length.out = 12)
  )
  return(meanvals)
}

pdf('mir396a.pdf',onefile = F)
tissue_mirs(x)
dev.off()


efficiency = function(gene,cut=1,uncut=2){
  y = heatflower(gene)
  z = y[cut,]/(y[cut,]+y[uncut,])
  heatdata = z/max(z)
  topcol='firebrick2'
  heat_pal = colorRampPalette(c('white',topcol))(11)
  pheatmap(
    heatdata,
    cluster_rows = F,cluster_cols = F,
    color = heat_pal,breaks = seq(0,1,length.out = 12)
    
  )
  return(z)
}


pdf('FIG5E.MIR167.pdf',onefile = F)
heatflower('AT3G22886',topcol = heat_choices['mir'])
dev.off()

x=heatflower('AT5G37020')
y=colSums(x[1:3,])
z=x[5,]
dev.off()
pdf('FIG5E.ARF8_cleavage_efficiency.pdf',useDingbats = F,pointsize = 8,width = 5,height = 2)
par(mfrow=c(1,3),lwd=1,ljoin='mitre',lend='square')
barplot(
  y,space=0,border=NA,las=2,
  col=colors[c('flower','sepal','petal','anther','stigma','ovule')],
  main='Full-length transcripts',
  ylim=c(0,30),ylab='Reads per Million (RPM)'
)
abline(h=0)
barplot(
  z,space=0,border=NA,las=2,
  col=colors[c('flower','sepal','petal','anther','stigma','ovule')],
  main='Cleaved transcripts', 
  ylim=c(0,3),
  ylab='Reads per Million (RPM)'
)
abline(h=0)
barplot(
  z/(y+z+.01)*100,
  space=0,border=NA,las=2,
  col=colors[c('flower','sepal','petal','anther','stigma','ovule')],
  main='Cleavage Efficiency',ylim=c(0,30),
  ylab="Percent cleaved signal"
)
abline(h=0)
dev.off()


pdf('FIG5E.MIR396.pdf',onefile = F)
heatflower('AT5G35407',topcol = heat_choices['mir'])
dev.off()


x=heatflower('AT2G22840')
y=x[1,]
z=x[3,]
dev.off()
pdf('FIG5E.GRF1_cleavage_efficiency.pdf',useDingbats = F,pointsize = 8,width = 5,height = 2)
par(mfrow=c(1,3),lwd=1,ljoin='mitre',lend='square')
barplot(
  y,space=0,border=NA,las=2,
  col=colors[c('flower','sepal','petal','anther','stigma','ovule')],
  main='Full-length transcripts',
  ylim=c(0,6),ylab='Reads per Million (RPM)'
)
abline(h=0)
barplot(
  z,space=0,border=NA,las=2,
  col=colors[c('flower','sepal','petal','anther','stigma','ovule')],
  main='Cleaved transcripts', 
  ylim=c(0,0.6),
  ylab='Reads per Million (RPM)'
)
abline(h=0)
barplot(
  z/(y+z+.01)*100,
  space=0,border=NA,las=2,
  col=colors[c('flower','sepal','petal','anther','stigma','ovule')],
  main='Cleavage Efficiency',ylim=c(0,30),
  ylab="Percent cleaved signal"
)
abline(h=0)
dev.off()


pdf('FIG5E.MIR172.pdf',onefile = F)
allfeatures = gsub('^','Ath_','chr5.capped.374')
meancols = c('flower.mean','sepal.mean','petal.mean','anther.mean','stigma.mean','ovule.mean')
heat_pal = colorRampPalette(c('white',heat_choices['mir']))(11)
heatdata = as.matrix(feature_rpm[allfeatures,meancols])
raw_heatdata = heatdata
pheatmap(
  t(heatdata),
  cluster_rows = F,cluster_cols = F,
  color = heat_pal,breaks = seq(0,1,length.out = 12)
)
dev.off()


x=heatflower("AT4G36920")
dev.off()
y=x[1,]
z=x[2,]
pdf('FIG5E.AP2_cleavage_efficiency.pdf',useDingbats = F,pointsize = 8,width = 5,height = 2)
par(mfrow=c(1,3),lwd=1,ljoin='mitre',lend='square')
barplot(
  y,space=0,border=NA,las=2,
  col=colors[c('flower','sepal','petal','anther','stigma','ovule')],
  main='Full-length transcripts',
  ylim=c(0,40),ylab='Reads per Million (RPM)'
)
abline(h=0)
barplot(
  z,space=0,border=NA,las=2,
  col=colors[c('flower','sepal','petal','anther','stigma','ovule')],
  main='Cleaved transcripts', 
  ylim=c(0,0.4),
  ylab='Reads per Million (RPM)'
)
abline(h=0)
barplot(
  z/(y+z+.01)*100,
  space=0,border=NA,las=2,
  col=colors[c('flower','sepal','petal','anther','stigma','ovule')],
  main='Cleavage Efficiency',ylim=c(0,2),
  ylab="Percent cleaved signal"
)
abline(h=0)
dev.off()



### FIG S11 ###

all_reps = c(
  paste(rep(c('d234_fb','X5ng_fb','xrn1_fb','xrn4_M','flower','sepal','petal','anther','stigma','ovule'),each=3),1:3,sep='_')
)
all_means = c(
  paste(rep(c('d234_fb','X5ng_fb','xrn1_fb','xrn4_M','flower','sepal','petal','anther','stigma','ovule'),each=1),'mean',sep='.')
)

sense_features = features[features[,'position']%in%c('distal','proxima','single-exon','upstream','intron'),]
has_multiple_features = names(which(table(sense_features[,"associated.gene.ID..Araport11."])>1))
multiple_sense_features = sense_features[sense_features[,"associated.gene.ID..Araport11."]%in%has_multiple_features,]
downstream_TSS = multiple_sense_features[multiple_sense_features[,"position"]=='distal' & gsub('chr.\\.(.+)\\..*$','\\1',multiple_sense_features[,"feature.name"]) == 'capped',]


folder=paste(data_directory,'detected_sites',sep='')
setwd(folder)
sites = list()
for(current_file in grep('anno.mir.allen',list.files(folder),value = T)){
  tbl = read.table(paste(folder,current_file,sep='/'),header = T,quote='',sep='\t',stringsAsFactors = F)
  sites[[current_file]] = tbl[as.numeric(tbl[,'adj.p.val']) <= 0.05 & tbl[,"slice.site.rpm"]>=0.1 & tbl[,"fold.change"]>1,1:3]
}

for(current_file in grep('anno.tas.allen',list.files(folder),value = T)){
  tbl = read.table(paste(folder,current_file,sep='/'),header = T,quote='',sep='\t',stringsAsFactors = F)
  sites[[current_file]] = tbl[as.numeric(tbl[,'adj.p.val']) <= 0.05 & tbl[,"slice.site.rpm"]>=0.1 & tbl[,"fold.change"]>1,1:3]
}


all_sites = table(unlist(lapply(sites,function(x)apply(x,1,function(y)paste(y,collapse='_')))))

all_sites = sort(all_sites,decreasing = T)

all_targeted_genes = unique(unlist(lapply(strsplit(names(all_sites),'.',fixed = T),function(x)x[1])))
targeted_transcripts = unlist(lapply(strsplit(names(all_sites[all_sites>=1]),'_',fixed = T),function(x)x[1]))
gene_iso = strsplit(targeted_transcripts,'.',fixed = T)
targeted_mirs = unlist(lapply(strsplit(names(all_sites[all_sites>=1]),'_',fixed = T),function(x)x[2]))
targeted_sites = as.numeric(unlist(lapply(strsplit(names(all_sites[all_sites>=1]),'_',fixed = T),function(x)x[3])))

for(i in 1:length(all_targeted_genes)){
  cat(all_sites[i],'\t',targeted_transcripts[[i]],'_',targeted_mirs[[i]],'\n',sep='')
}

highconfidence = list()
for(samplename in c('flower','sepal','petal','anther','stigma','ovule','fb','xrn1','xrn4')){
  samples = grep(paste('^',samplename,'_',sep=''),names(sites),value = T)
  hc_table = table(unlist(lapply(sites[samples],function(x)apply(x,1,function(y)paste(y,collapse='_')))))
  highconfidence[[samplename]] = hc_table
}

sample_subset = c('flower','sepal','petal','anther','stigma','ovule','fb','xrn1','xrn4')
hc_sites = unique(unlist(lapply(highconfidence[sample_subset],function(x)names(which(x>=2)))))

endcut_table = matrix(nrow=length(hc_sites),ncol=length(sample_subset))
rownames(endcut_table) = hc_sites
colnames(endcut_table) = sample_subset
for(s in sample_subset){
  x=highconfidence[[s]][hc_sites]
  x[is.na(x)] = 0
  endcut_table[hc_sites,s] = as.numeric(x)
}
endcut_table = endcut_table[names(sort(rowSums(endcut_table),decreasing = T)),]

targeting_mir = gsub('161\\.[12]','161',gsub('165','166',gsub('157','156',gsub('[a-z]$','',gsub('-[53]p$','',gsub('^.*_(.+)_.*$','\\1',(rownames(endcut_table))))))))
mirtargets = list()
for(i in unique(targeting_mir)){
  rows = targeting_mir == i
  mirtargets[[i]] = sum(endcut_table[rows,])
}

mir_order = names(sort(unlist(mirtargets),decreasing = T))
new_order = vector()
for(i in 1:length(mir_order)){
  new_order = append(new_order,which(targeting_mir == mir_order[i]))
}
endcut_table = endcut_table[new_order,]


pdf('endcut_table_all.pdf',useDingbats = F,onefile = F)
topcol = 'black'
heat_pal= colorRampPalette(c('white',topcol))(50)
pheatmap(endcut_table, cluster_cols = F, cluster_rows = F,color = heat_pal, border_color = NA)
dev.off()

sorted_interactions = gsub('(^.+)_.*?$','\\1',rownames(endcut_table))
sorted_interactions = sorted_interactions[sorted_interactions %in% rownames(feature_rpm)]
sub_columns = c(
  "X5ng_fb.mean","xrn1_fb.mean",
  "d234_fb.mean","xrn4_M.mean",
  "flower.mean","sepal.mean","petal.mean","anther.mean","stigma.mean","ovule.mean"
)

full_length_table = (matrix(unlist(full_length),ncol=length(mean_columns),byrow=T))
colnames(full_length_table) = mean_columns
rownames(full_length_table) = names(efficiencies)
full_length_table = full_length_table[sorted_interactions,sub_columns]
rownames(full_length_table) = paste(rownames(full_length_table),round(apply(full_length_table,1,max),1),sep=' ')
full_length_table = t(apply(full_length_table,1,function(x)x/max(x,na.rm = T)))
full_length_table[is.nan(full_length_table)|is.na(full_length_table)] = 0

cleaved_table = (matrix(unlist(cleaved),ncol=length(mean_columns),byrow=T))
colnames(cleaved_table) = mean_columns
rownames(cleaved_table) = names(efficiencies)
cleaved_table = cleaved_table[sorted_interactions,sub_columns]
rownames(cleaved_table) = paste(rownames(cleaved_table),round(apply(cleaved_table,1,max)*10,1),sep=' ')
cleaved_table = t(apply(cleaved_table,1,function(x)x/max(x,na.rm = T)))
cleaved_table[is.nan(cleaved_table)|is.na(cleaved_table)] = 0

efficiency_table = matrix(unlist(efficiencies),ncol=length(mean_columns),byrow=T)
colnames(efficiency_table) = mean_columns
rownames(efficiency_table) = names(efficiencies)
efficiency_table = efficiency_table[sorted_interactions,sub_columns]
e = round(apply(efficiency_table,1,max)*100,1)
rownames(efficiency_table) = paste(rownames(efficiency_table),e,sep=' ')
efficiency_table = t(apply(efficiency_table,1,function(x)x/max(x,na.rm = T)))


topcol = heat_choices['capped']
heat_pal= colorRampPalette(c('white',topcol))(10)
pdf('mean_full_length_heatmap_all.pdf',onefile = F,useDingbats = F)
pheatmap(full_length_table,na_col = 'gray',scale = 'none',cluster_rows = F,cluster_cols = F,clustering_method = 'complete',border_color = NA,color = heat_pal)
dev.off()


topcol = heat_choices['noncapped']
heat_pal= colorRampPalette(c('white',topcol))(10)
pdf('mean_cleaved_heatmap_all.pdf',onefile = F,useDingbats = F)
pheatmap(cleaved_table,na_col = 'gray',scale = 'none',cluster_rows = F,cluster_cols = F,clustering_method = 'complete',border_color = NA,color = heat_pal)
dev.off()

heat_pal= colorRampPalette(c(heat_choices['capped'],'gray90',heat_choices['noncapped']))(15)
pdf('mean_efficiency_heatmap_flowerparts_xrn1.pdf',onefile = F,useDingbats = F)
pheatmap(
  efficiency_table,
  na_col = 'gray',scale = 'none',cluster_rows = F,cluster_cols = F,
  clustering_method = 'complete',
  border_color = NA,color = heat_pal
)
dev.off()


