#!/usr/bin/env python

# Copyright 2009, 2010 Yann Surget-Groba
# This file is part of the STM pipeline.
#
# The STM pipeline is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# The STM pipeline is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this file. If not, see <http://www.gnu.org/licenses/>.


import sys, getopt
from Bio import SeqIO

def usage():
    print """sortOrphans.py -i infile [-p prefix]
    -i infile: fasta file containing orphan contigs and reads
    -p prefix: prefix of the reads names (default HWI)"""


try:
    opts, args = getopt.getopt(sys.argv[1:], "i:p:")
except getopt.GetoptError:
    usage()
    sys.exit(1)

prefix = 'HWI'
infile = False

for o,a in opts:
    if o == '-i':
        infile = a
    if o == '-p':
        prefix = a
        
if not infile:
    print usage()
    sys.exit(1)

#Open infile:
try:
    inHandle = open(infile)
except IOError, strerror:
    print "Could not open file %s: %s" % (infile, strerror)
    sys.exit(1)

readHandle = open(infile.split('.')[0]+'_reads.fa', 'w')
nodeHandle = open(infile.split('.')[0]+'_contigs.fa', 'w')

for seq in SeqIO.parse(inHandle, 'fasta'):
    if seq.id.find(prefix) == -1:
        nodeHandle.write(seq.format('fasta'))
    else:
        readHandle.write(seq.format('fasta'))

inHandle.close()
readHandle.close()
nodeHandle.close()
