#!/bin/bash

# sRNA Target identification pipeline
# Copyright (C) - belong to Ma et al., 2021, Genome research. If you use or modify it, please cite the paper.
# usage: sh target_identification.sh miRNA.fas zmB73_cdna.fa zmB73_miRNA Gene_expression.txt zma_miRNAs_Targets.txt

# TargetFinder downloads from website https://github.com/carringtonlab/TargetFinder


InputFile="$1"       	# Mature miRNA fasta file
DB = "$2"            	# Reference: Maize B73 cDNA fasta file
TMPFile="$3"	   	# Name of temporary file 
ExpFile="$4"        	# Gene expression profile (Gene_expression.txt) from Knauer et al., 2019. Data structure see examples in help document of extract_merge_info.pl.
OutputFile="$5" 	# filename of final result 


perl targetfinder_thread.pl -f ${InputFile} -d ${DB} -c 5 -t 8 -o ${TMPFile}_target.txt 

grep "miR" ${TMPFile}_target.txt | sed 's/query=//g; s/target=.*parent_gene=//g; s/score=//g; s/range=.*//g' | perl -0777pe "s/, /\t/g" | sort | uniq >${TMPFile}_target1.txt

perl target_info_filter.pl -i ${TMPFile}_target1.txt -cutoff 5 -o ${TMPFile}_target2.txt 

perl extract_merge_info.pl -k1 1 -k2 2 -i1 ${ExpFile} -i2 ${TMPFile}_target2.txt -o ${OutputFile}

rm -rf ${TMPFile}_target*

# targetfinder_thread.pl come TargetFinder (https://github.com/carringtonlab/TargetFinder); 
# target_info_filter.pl and extract_merge_info.pl are offered together with this script; 