#!/usr/bin/env python
"""
Called by PENPP web server to calculate entropy values and email it to users.
By HUI GUO
Mar. 7 2012
"""
import MySQLdb
import os, sys, re
import math
import smtplib
from email.mime.text import MIMEText
import mimetypes
from email.MIMEMultipart import MIMEMultipart
from email.MIMEBase import MIMEBase
from email.MIMEText import MIMEText
from email.MIMEAudio import MIMEAudio
from email.MIMEImage import MIMEImage
from email import Encoders
from email.Encoders import encode_base64



def SentEmail (emailadd,attachment):
    mymessage="Dear user,\n Your PENPP results are attached. Thank you for your support!\n\nPENPP group"
    msg = MIMEMultipart('alternative')
    sender='penppgroup@gmail.com'
    msg['Subject'] = 'PENPP result'
    msg['From'] = 'no-reply@pgml.uga.edu'
    msg['To'] = emailadd
    
    # attachment
    part = MIMEBase('application', "octet-stream")
    part.set_payload(open(attachment, "rb").read())
    Encoders.encode_base64(part)
    part.add_header('Content-Disposition', 'attachment; filename="%s"'  % os.path.basename(attachment))
    msg.attach(part)
    #msg = MIMEText(mymessage)
    part1 = MIMEText(mymessage, 'plain')
    msg.attach(part1)
    s = smtplib.SMTP('smtp.gmail.com', 587)

    s.ehlo()
    s.starttls()
    s.ehlo()
    s.login(sender, 'deadrabbit')
    s.sendmail(sender, emailadd, msg.as_string())
    s.close()


def Connectdb ():
    host = 'localhost'
    user = 'www-data'
    passwd = ''
    mydb = 'nsSNP'
    try:
        #conn = MySQLdb.connect(read_default_file='./.mysql.cnf')
        conn = MySQLdb.connect(host, user, passwd, mydb)
    except:
        sys.exit()
    curs = conn.cursor()
    return curs


def IndposInAln(targetseq,pos):
    ind=0
    for i in range(len(targetseq)):
        if targetseq[i]=='-': continue
        ind+=1
        if ind==int(pos): return i


def CalEntropy(gid, oriaa, pos, mutaa, alignment):
    alpha=0
    beta=0
    pc=0
    targetseq=''
    for i in alignment:
        if i[0] == gid:
            targetseq=i[1]
            break
    indpos_aln=IndposInAln(targetseq, pos)
    aa2num={}
    for i in alignment:
        aa=i[1][indpos_aln]
        if aa in aa2num:
            aa2num[aa]+=1
        else:
            aa2num[aa]=1
    if oriaa in aa2num: 
        alpha=aa2num[oriaa]   
    else:
        alpha=1         # pseudocount
    if mutaa in aa2num:
        beta=aa2num[mutaa]
    else:
        beta=0
    for k,v in sorted(aa2num.items(), key=lambda x: x[1], reverse=True):
        pc=v*1.0/len(alignment)
        break
    # calculate score
    beta+=1
    score=0.0
    if beta < alpha:
        score= math.log(beta*1.0/alpha)*pc*(-1)
    else:
        score= math.log(beta*1.0/alpha)*(pc-1)
   
    return [gid, pos, oriaa, mutaa, str(score)]


def AnalyzensSNP (data):
    curs=Connectdb()
    rslt=[]
    norslt=[]
    for line in data:
        gid, mut=line.split(',')
        oriaa=mut[0]
        mutaa=mut[-1]
        pos=mut[1:-1]
        try:
            curs.execute("select cluster_id from alignments where gene_id='%s'" % gid)
            cluster_id = (curs.fetchone())[0]
            curs.execute("select gene_id, alignment from alignments where cluster_id='%s'" % cluster_id)
            alignment=curs.fetchall()
            try:
                rslt.append(CalEntropy(gid,oriaa,pos,mutaa,alignment)) 
            except:
                norslt.append([gid,oriaa,pos,mutaa,'-'])
        except:
            norslt.append([gid,oriaa,pos,mutaa,'-'])         
    return rslt, norslt

def main():
    myfile=sys.argv[1]
    fp=open(myfile)
    usr_data=fp.read().splitlines()
    fp.close()
    usr_data=list(set(usr_data))
    result, noresult=AnalyzensSNP(usr_data)
    p1, email_add = myfile.split(',') 
    outpath=p1+'.txt'
    fw=open(outpath,'w')
    header=['Gene_id', 'Position', 'Reference_AA', 'Mutated_AA', 'FIS']
    fw.write('\t'.join(header)+'\n')
    for row in sorted(result, key=lambda x: float(x[4]), reverse=True): fw.write('\t'.join(row)+'\n')
    for row in noresult: fw.write('\t'.join(row)+'\n')
    fw.close()
    SentEmail(email_add,outpath)

if __name__ == "__main__":
    main()
