/*
*
* Protein Grouper
* Copyright (C) 2014 Olivier Langella, Benoit Valot, Michel Zivy.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*
*/

#include "pg_utils.h"
#include "pg_protseqengine.h"
#include "gp_error.h"
#include <QXmlSimpleReader>
#include <QXmlInputSource>
#include <QList>
#include <QtConcurrentMap>
#include <QDebug>
#include <cstdio>
#include <iostream>

#include "sax/protein_grouping_result.h"
#include "grouping/groupng.h"


PgProtSeqEngine::PgProtSeqEngine() {
    //checkInRemainingProteinList_f = std::bind(&PgProtSeqEngine::checkInRemainingProteinList, this, std::placeholders::_1);
    //by default, it removes the non informative subgroups :
    _remove_non_informative_subgroups = true;
}

PgProtSeqEngine::~PgProtSeqEngine() {
    //delete protein pointer
    qDebug() << "Deleting PgProtSeqEngine";
    std::map<QString, ProteinMatch*>::iterator proteinIt;
    ProteinMatch * p_protein;
    for (proteinIt = _proteinMatchList.begin();
            proteinIt != _proteinMatchList.end(); ++proteinIt) {
        p_protein = proteinIt->second;
        delete (p_protein);
    }
    _proteinMatchList.clear();

    //delete peptide pointer
    std::map<QString, Peptide*>::iterator peptideIt;
    Peptide * p_peptide;
    for (peptideIt = _peptideList.begin(); peptideIt != _peptideList.end();
            ++peptideIt) {
        p_peptide = peptideIt->second;
        delete (p_peptide);
    }
    _peptideList.clear();

}

void PgProtSeqEngine::triggerGrouping() {
    this->readInputStream();
    this->performGrouping();
    QTextStream* p_out = new QTextStream(stdout, QIODevice::WriteOnly);

    this->writeFastaGroupingOutputResult(p_out);
}


void PgProtSeqEngine::deindexProteinMatch(const ProteinMatch * p_protMatch) {
    for (std::set<Peptide*>::const_iterator it = p_protMatch->getPeptideSet().getList().begin();
            it != p_protMatch->getPeptideSet().getList().end(); it++) {
        _mapPeptide2proteinMatch.erase(*it);
    }
}

void PgProtSeqEngine::deindexPeptide(const Peptide* p_peptide) {
    _mapPeptide2proteinMatch.erase(p_peptide);
}

void PgProtSeqEngine::indexProteinMatch(const ProteinMatch * p_protMatch) {
    for (std::set<Peptide*>::const_iterator it = p_protMatch->getPeptideSet().getList().begin();
            it != p_protMatch->getPeptideSet().getList().end(); it++) {
        std::map<const Peptide*,std::list<const ProteinMatch*>>::iterator itMapProteinMatch = _mapPeptide2proteinMatch.find(*it);
        if (itMapProteinMatch == _mapPeptide2proteinMatch.end()) {
            std::list<const ProteinMatch*> listPmatch;
            listPmatch.push_back(p_protMatch);
            _mapPeptide2proteinMatch.insert(std::pair<const Peptide *,std::list<const ProteinMatch*>>(*it, listPmatch));
        } else {
            itMapProteinMatch->second.push_back(p_protMatch);
        }
    }
}

void PgProtSeqEngine::readInputStream() {

    qDebug() << "Begin reading fasta";
    QTextStream* p_in = new QTextStream(stdin, QIODevice::ReadOnly);

    QString accession = "";
    QString description = "";
    QString sequence = "";
    //Search accession conta
    //QTextStream in(p_in);
    ProteinMatch * pm = 0;
    QString line = p_in->readLine();
    while (!p_in->atEnd()) {
        if (line.startsWith(">")) {
            if (pm != 0) {
                indexProteinMatch(pm);
            }
            sequence = "";
            accession = line.remove(0, 1);
            description = accession;
            QStringList elements = accession.split(" ");
            if (elements.size() > 0) {
                accession = elements.at(0);
            }
        } else {
            sequence += line;

            //create proteinMatch :
            pm = this->getProteinMatchInstance(accession,
                                               description);
            QStringList pepSeqList = sequence.split(" ");
            for (int i = 0; i < pepSeqList.size(); i++) {
                pm->addPeptides(
                    this->getPeptideInstance(pepSeqList.at(i)));
            }
        }
        line = p_in->readLine();
    }
    if (pm != 0) {
        qDebug() << "PgProtSeqFilterPeptideOrphans::readInputStream ";
        indexProteinMatch(pm);
    }
    //p_in->close();
}
void PgProtSeqEngine::fillTmpSubgroups(SubGroupSet* p_subgroups) {
    std::cerr << "building temporary subgroup list to process ..." << std::endl;
    //unsigned int i = 1;
    std::list<const ProteinMatch*>::iterator proteinIt;
    for (proteinIt = _tmpProteinMatchList.begin();
            proteinIt != _tmpProteinMatchList.end();) {
        //std::cerr << "subgrouping sequence " << i << " on " << p_proteinMatchList.size() << std::endl;

        p_subgroups->addProteinMatch(*proteinIt);
        //back search in the remaining protein list
        std::set<const ProteinMatch*> proteinMatchToGroup;
        PeptideSet newPeptides;
        newPeptides.addAll((*proteinIt)->getPeptideSet());
        checkPeptideSetInRemainingProteinList(proteinMatchToGroup, newPeptides);

        if (proteinMatchToGroup.size() > 1) {
            qDebug() << "checkPeptideSetInRemainingProteinList done " << proteinMatchToGroup.size();
            std::set<const ProteinMatch*>::iterator subBackProteinIt;
            for (subBackProteinIt = proteinMatchToGroup.begin();
                    subBackProteinIt != proteinMatchToGroup.end(); subBackProteinIt++) {
                if (*subBackProteinIt != *proteinIt) {
                    p_subgroups->addProteinMatch(*subBackProteinIt);
                    deindexProteinMatch(*subBackProteinIt);
                    //remove definitively proteinIt from protein list :
                    _tmpProteinMatchList.remove(*subBackProteinIt);
                }

            }
        }

        deindexProteinMatch(*proteinIt);


        ++proteinIt;


        if (p_subgroups->size() > 0) {
            std::cerr << "protein left before removing "
                      << _tmpProteinMatchList.size() << std::endl;
            _tmpProteinMatchList.erase(_tmpProteinMatchList.begin(), proteinIt);
            std::cerr << "tmp subgroup size " << p_subgroups->size() << " on "
                      << _tmpProteinMatchList.size() << std::endl;
            std::cerr << "protein left after removing "
                      << _tmpProteinMatchList.size() << std::endl;
            std::cerr << "building temporary subgroup list to process completed 1" << std::endl;
            return;
        }
    }
    _tmpProteinMatchList.clear();
    std::cerr << "building temporary subgroup list to process completed 2" << std::endl;
}
void PgProtSeqEngine::differProteinMatchListErase(const ProteinMatch * proteinIt) {
    PgProtSeqEngine::_mutex.lock();
    //PgProtSeqEngine::_deleteLateProteinMatchIterator.push_back(proteinIt);
    _tmpProteinMatchList.remove(proteinIt);
    deindexProteinMatch(proteinIt);
    PgProtSeqEngine::_mutex.unlock();
}

void PgProtSeqEngine::checkPeptideSetInRemainingProteinList(std::set<const ProteinMatch*> & proteinMatchSet, PeptideSet & peptides) {
    uint numberOfProteinInMatchSet = proteinMatchSet.size();
    //qDebug() << "checkInRemainingProteinList numberOfProteinInMatchSet " << numberOfProteinInMatchSet;
    //qDebug() << "checkInRemainingProteinList peptides.size() " << peptides.size();
    PeptideSet newPeptides;
    for (std::set<Peptide*>::const_iterator it =peptides.getList().begin(); it !=peptides.getList().end(); it++) {
        //qDebug() << "loop on peptide list begin";
        //for each peptide in the peptide list
        std::map<const Peptide*,std::list<const ProteinMatch*>>::const_iterator itMapProteinMatch = _mapPeptide2proteinMatch.find(*it);
        if (itMapProteinMatch != _mapPeptide2proteinMatch.end()) {

            for (std::list<const ProteinMatch*>::const_iterator itList =itMapProteinMatch->second.begin(); itList !=itMapProteinMatch->second.end(); itList++) {
                //qDebug() << "loop on protein match list begin";
                //grab all proteins containing the current peptide
                std::pair<std::set<const ProteinMatch*>::const_iterator, bool> insertedPair = proteinMatchSet.insert(*itList);
                // qDebug() << "checkInRemainingProteinList proteinMatchSet.insert(itMapProteinMatch->second) " << proteinMatchSet.size();
                if (insertedPair.second) {
                    newPeptides.addAll((*itList)->getPeptideSet());
                }
                //qDebug() << "loop on protein match list end";
            }
            // all proteins were grabbed for this peptide, we can dereference it
            deindexPeptide(*it);
        }
        // qDebug() << "loop on peptide list end";
    }

    //remove from newPeptides the peptides that we have already checked :
    //qDebug() << "newPeptides.removeAll begin";
    // newPeptides.removeAll(peptides.getList());
    //qDebug() << "newPeptides.removeAll end";

    if (proteinMatchSet.size() != numberOfProteinInMatchSet) {
        //qDebug() << "checkInRemainingProteinList recursive call " << numberOfProteinInMatchSet << " " << proteinMatchSet.size() ;
        checkPeptideSetInRemainingProteinList(proteinMatchSet, newPeptides);
    }
}


void PgProtSeqEngine::performGrouping() {
    std::cerr << "Begin Grouping" << std::endl;
    std::cerr << "There is " << _proteinMatchList.size() << " proteins to group"
              << std::endl;
    std::map<QString, ProteinMatch*>::iterator proteinIt;
    //ProteinMatch * p_protein;
    SubGroupSet tmpSubgroups;
    GroupNg * p_tmpGroup;

    for (proteinIt = _proteinMatchList.begin();
            proteinIt != _proteinMatchList.end(); ++proteinIt) {
        _tmpProteinMatchList.push_back(proteinIt->second);
    }
    _proteinMatchListSize = _proteinMatchList.size();
    _peptideList.clear();
    _proteinMatchList.clear();
    while (_tmpProteinMatchList.size() > 0) {
        std::cerr << "subgrouping..." << std::endl << "protein left : "
                  << _tmpProteinMatchList.size() << " on a total of "
                  << _proteinMatchListSize << std::endl
                  << "There is currently " << groups.size() << " groups"
                  << std::endl;
        tmpSubgroups.clear();
        this->fillTmpSubgroups(&tmpSubgroups);
        //this->commitSubgroupList(&tmpSubgroups);
        //sg is checked, we can add it to the main group list
        std::cerr << "Begin grouping of the temporary subgroup list" << std::endl;
        std::list<SubGroup *>::const_iterator sgIt;

        p_tmpGroup = new GroupNg();
        //QList<SubGroup *> qlSubgroups;
        for (sgIt = tmpSubgroups.getSubgroups().begin(); sgIt != tmpSubgroups.getSubgroups().end();
                ++sgIt) {
            // qlSubgroups.append(*sgIt);
            p_tmpGroup->push_back_subgroup(*sgIt);
        }
        std::cerr << "End grouping of the temporary subgroup list" << std::endl;

        //there is nothing in common between tmpGroups and groups, so we can just add tmpGroups to groups :
        std::set<GroupNg *> groupList;
        if (_remove_non_informative_subgroups) {
            std::cerr << "removeNonInformative" << std::endl;
            p_tmpGroup->removeNonInformativeGroup(&groupList);
        }
        else {
            groupList.insert(p_tmpGroup);
        }

        std::set< GroupNg* >::const_iterator itG;
        for (itG = groupList.begin(); itG != groupList.end(); itG++) {
            groups.push_back(*itG);
        }
    }
    tmpSubgroups.clear();
    //std::cerr << "End subgroup creation : " << subgroups.size() << std::endl;

    //Numbering
    std::cerr << "Index and Order group result" << std::endl;
    groups.indexAndOrderGroup();
    //groups.groupingSubGroupSet(subgroups);
    std::cerr << "End group creation : " << groups.size() << std::endl;
}

void PgProtSeqEngine::writeFastaGroupingOutputResult(QTextStream* p_out) {
    std::cerr << "Writing fasta grouping results of " << groups.size() << " groups"<< std::endl;
    qDebug() << "writeXmlOutputResult begin " <<  this->getGroupSet().size() << " groups";
    std::vector<const Group *>::const_iterator groups;
    std::vector<SubGroup *>::const_iterator subgroups;
    std::vector<const ProteinMatch *>::const_iterator prots;
    const Group * group;
    SubGroup * subgroup;
    const ProteinMatch * prot;
    QString id;
    int protCount, groupcount, subgroupcount;
    groupcount = 1;

    std::vector<const Group *>::const_iterator endGroups;
    for (groups = this->getGroupSet().getOrderedGroups().begin(), endGroups = this->getGroupSet().getOrderedGroups().end();
            groups != endGroups; groups++) {
        group = *groups;
        id =PgUtils::getLexicalOrderedString(groupcount);
        // qDebug() << "writeXmlOutputResult "<< group->getSubGroupSet().size() << " subgroups "  << group->getPeptideSet().size();
        subgroupcount = 1;

        const std::vector< SubGroup* > & orderedSgList = group->getSubGroupSet().getOrderedSubgroups();

        if (orderedSgList.size() > 10000) {
            std::cerr << "Writing fasta grouping results of a huge group " << PgUtils::getLexicalOrderedString(groupcount).toStdString() << " (containing " << orderedSgList.size() << " subgroups)"<< std::endl;
        }

        std::vector< SubGroup* >::const_iterator endOrderedSgList;
        for (subgroups = orderedSgList.begin(), endOrderedSgList = orderedSgList.end();
                subgroups != endOrderedSgList;
                subgroups++) {
            //  qDebug() << "writeXmlOutputResult 1 1";
            subgroup = *subgroups;
            //subgroup->debugProtein();
            protCount = 1;
            //SubGroupId
            //id.clear();
            //id.append(PgUtils::getLexicalOrderedString(groupcount));
            // id.append(".").append(PgUtils::getLexicalOrderedString(subgroupcount));
            //qDebug() << "writeXmlOutputResult 1 2";
            //qDebug() << "writeXmlOutputResult " << subgroup->getNumberOfProteinMatch() <<  " proteins "<<  subgroup->getNumberOfOrderedProteinMatch();

            const std::vector< const ProteinMatch* > & orderedProteinList = subgroup->getOrderedProteinMatchs();
            std::vector< const ProteinMatch* >::const_iterator end;

            for (prots = orderedProteinList.begin(), end = orderedProteinList.end();
                    prots != end; ++prots) {
                // qDebug() << "writeXmlOutputResult 2 1"  ;
                prot = *prots;

                //qDebug() << "writeXmlOutputResult 2 3";
                *p_out << ">" << prot->getAccession() << endl;
                // qDebug() << "writeXmlOutputResult 2 4";
                *p_out << prot->getDescription() << endl;
                //qDebug() << "writeXmlOutputResult 2 5";
                *p_out << id << ".";
                PgUtils::writeLexicalOrderedString(p_out, subgroupcount);
                *p_out  << ".";
                PgUtils::writeLexicalOrderedString(p_out, protCount);
                *p_out << endl;

                //qDebug() << "writeXmlOutputResult 2 6";
                protCount++;
            }
            subgroupcount++;
            // qDebug() << "writeXmlOutputResult 1 3";
        }
        groupcount++;
    }
    qDebug() << "writeXmlOutputResult end";
    //end of prot_list
    *p_out << endl << flush;

}

ProteinMatch* PgProtSeqEngine::getProteinMatchInstance(QString access,
        QString desc) {
    std::map<QString, ProteinMatch*>::iterator it;
    it = this->_proteinMatchList.find(access);
    if (it != _proteinMatchList.end()) {
        //qDebug() << "Protein already exist : " + access;
        return it->second;
    } else {
        ProteinMatch* match = new ProteinMatch(access, desc);
        _proteinMatchList[access] = match;
        // qDebug() << "New Protein : " + access;
        return match;
    }
}

Peptide * PgProtSeqEngine::getPeptideInstance(QString seq) {
    std::map<const QString, Peptide*>::iterator it;
    Peptide newPeptide(seq, 100);
    it = this->_peptideList.find(newPeptide.getSequenceLi());
    if (it != _peptideList.end()) {
        //qDebug() << "Peptide already exist : " + data;
        return it->second;
    } else {
        // qDebug() << "New Peptide : " + data;
        Peptide* pep = new Peptide(seq, 100);
        _peptideList.insert(std::pair<const QString,Peptide *>(pep->getSequenceLi(), pep));
        return pep;
    }
}
