/*************************************************************************************************/
//            "Detecting Genetic Variation in Microarray Expression Data"               
//  Greenhall, JA, Zapala, MA, Caceres, M, Libiger, O, Schork, NJ, Barlow, C and Lockhart DJ.    
//                                       Journal Citation						 
//												 
//                   C++ program written by MA Zapala and O Libiger	 			 
/*************************************************************************************************/

//Assumption: all cell files have an equal number of probes for each probe set.

#include <stdio.h>
#include <iostream>
#include <fstream>
#include <math.h>
using namespace std;

#define N			1100
#define NO_PROBES	100
#define PERC_POS		0.65

#define LIBFILE	"./lib.txt"

int ReadCellFile(int cellFiles, char **argv, double ***matrix)
{
	int c;
	int x,y,count;
	double mean;
	
	for (c=0; c < cellFiles; c++)
	{

		ifstream inFile(argv[c+2]);
		while (!inFile.eof())
		{
			inFile >> x >> y >> mean;
			if (y%2 != 0 ) matrix[x][y][c] += mean;
			else if (y > 0)
			{
				matrix[x][y-1][c] -= mean;			
				matrix[x][y][c] = mean;			
			}
		}
		inFile.close();
	}
}

void PrintMatrix(int cellFiles, double  ***matrix)
{
	int x,y,c;
	for (y=0; y<10; y+=2)	
		for (x=0; x<10; x++)
		{
			cout << x << "  " << y << "   ";
			for (c=0; c<cellFiles; c++)	
			if (matrix[x][y][c] != -1) cout << x << "  " << y << "  " << matrix[x][y+1][c] << " "; 
			else cout << "NaN" << " ";
			cout << endl;
		}
}

int main(int argc, char **argv)
{

	if (argc < 4)  { cout << "Error: too few parameters!" << endl; exit(0); }
	
	int firstGroup = atoi(argv[1]);
	int secondGroup = argc-2-firstGroup;
	int cellFiles = firstGroup+secondGroup;
	
	int i,j,c;

	//Definition of matrix that holds means
	double ***matrix; 
	matrix = new double**[N];
	for (i=0; i<N; i++) matrix[i] = new double*[N];	
	for (i=0; i<N; i++) for (j=0; j<N; j++) matrix[i][j] = new double[cellFiles];	

	//Initialization of matrix that holds the means
	for (i=0; i<N; i++) for (j=0; j<N; j++) 
	{
		if (j%2 != 0) for (c=0; c<cellFiles; c++) matrix[i][j][c] = 0; 
		else for (c=0; c<cellFiles; c++) matrix[i][j][c] = -1;
	}
		
	//Filling the matrix that holds the means	
	ReadCellFile(cellFiles,argv,matrix);

	//Definition of means which hold means for each probe set
	double means[cellFiles][NO_PROBES+1];
	
	ifstream inFile(LIBFILE);
	int x,y,probe,count;
	char probeSet[80];
	char set[80];
		
	inFile >> x >> y >> probeSet >> probe;

             	// Print the Header to Output File
	cout << "Probeset\t"<< "Probepair\t"<<"pspp\t"<<"N1\t"<<"Mean1\t"<< "Var1\t"<<"N2\t"<<"Mean2\t"<<"Var2\t"<< "T-value"<<endl;
	
	while (!inFile.eof())
	{
		count = 0;
		if (y%2 != 0) 
		{
			for (c=0; c<cellFiles; c++) means[c][probe] = matrix[x][y][c];
			if (y%2 != 0) count++;
		}
		strcpy(set,probeSet);
		inFile >> x >> y >> probeSet >> probe;
		while (!inFile.eof() && strcmp(set,probeSet)==0)
		{
			if (y%2 != 0) 
			{
				for (c=0; c<cellFiles; c++) means[c][probe] = matrix[x][y][c];
				count++;
			}
			for (c=0; c<cellFiles; c++) means[c][NO_PROBES] = count;
			strcpy(set,probeSet);
			inFile >> x >> y >> probeSet >> probe;
		}

		//Check for negatives (too many negatives have 0 for count of probes)
		for (c=0; c<firstGroup; c++) 
		{
			count = 0;
			for (i=0; i<means[c][NO_PROBES]; i++) if (means[c][i] > 0) count++;
			if ( double(count/means[c][NO_PROBES]) < PERC_POS ) 
			{
				means[c][NO_PROBES] = 0; 		
			}
		}
		for (c=0; c<secondGroup; c++) 
		{
			count = 0;
			for (i=0; i<means[firstGroup+c][NO_PROBES]; i++) if (means[firstGroup+c][i] > 0) count++;
			if ((count/means[firstGroup+c][NO_PROBES]) < PERC_POS) 
				means[firstGroup+c][NO_PROBES] = 0;				
		}

		//Scaling the raw values
		double mean,sd;
		double min, max;
		int mini, maxi;

		for (c=0; c<cellFiles; c++) if (means[c][NO_PROBES] > 0)
		{
			min = 9999999;
			mini = 0; maxi=0;
			max = -9999999;

			mean = 0;
			sd = 0;
			for (i=0; i<means[c][NO_PROBES]; i++)  
			{
				if (means[c][i] > max) { max = means[c][i]; maxi = i; }
				if (means[c][i] < min) { min = means[c][i]; mini = i; }
			}
			for (i=0; i<means[c][NO_PROBES]; i++)  if (i != maxi && i != mini) mean+= means[c][i];

			mean /= (double)(means[c][NO_PROBES]-2);
			
			for (i=0; i<means[c][NO_PROBES]; i++)  if (i != maxi && i != mini) 			
			sd+=(means[c][i]-mean)*(means[c][i]-mean); 

			sd /= (double)(means[c][NO_PROBES] - 3);

			sd = (double)sqrt(sd);

			sd = (double)200/sd;

			for (i=0; i<means[c][NO_PROBES]; i++)  
				means[c][i] = (double)means[c][i] * sd; 
		}
					

		//Calculate t-tests
		int maxNoProbes = (int)means[0][NO_PROBES];
		for (c=1; c<cellFiles; c++) if ((int)means[c][NO_PROBES] > maxNoProbes) maxNoProbes = (int)means[c][NO_PROBES];
		double mean1,mean2,var1,var2,t;
		int count1,count2; 
		
		for (i=0; i<maxNoProbes; i++) 
		{
			mean1=0.0; mean2=0.0;
			var1=0.0; var2=0.0;		
			count1=0; count2=0;
			t=0.0;
			
			for (c=0; c<firstGroup; c++) 	
				if (means[c][NO_PROBES] > i)
				{
					mean1+=means[c][i];
					count1++;
				}
			if (count1 > 0) mean1 = mean1 / count1; 
			for (c=0; c<firstGroup; c++) 	
				if (means[c][NO_PROBES] > i ) 
					var1+=(means[c][i]-mean1)*(means[c][i]-mean1); 
			if (count1 > 1) var1 = var1 / (count1-1);  

			for (c=0; c<secondGroup; c++) 	
				if (means[firstGroup+c][NO_PROBES] > i)
				{
					mean2+=means[firstGroup+c][i];
					count2++;
				}
			if (count2 > 0) mean2 = mean2 / count2; 
			for (c=0; c<secondGroup; c++) 	
				if (means[firstGroup+c][NO_PROBES] > i ) 										var2+=(means[firstGroup+c][i]-mean2)*(means[firstGroup+c][i]-mean2);
			if (count2 > 1) var2 = var2 / (count2-1); 
			
			if (count1 > 1 && count2 > 1) t =  (mean1-mean2) / sqrt(  var1/count1 + var2/count2  );
			if (t < 0 ) t *= -1;
			
                       // Send data to output file
			if (count1 > 1 && count2 > 1)     
			{
				cout  << set << "\t" << i +1<< "\t" << set <<  i +1 << "\t" << count1 << "\t" << mean1 << "\t" << var1 << "\t" << count2 << "\t" << mean2 << "\t" << var2 << "\t" ;
				printf("%6.5f \n",t);
			}
			else	cout  << set << "\t" << i+1 << "\t" << set <<  i +1<< "\t" << count1 << "\t" << mean1 << "\t" << var1 << "\t" << count2 << "\t" << mean2 << "\t" << var2 << "\t" << "NaN" << endl;
		}
	}
	
	inFile.close();
	
	//Deallocating matrix
	for (i=0; i<N; i++) for (j=0; j<N; j++) { delete[] matrix[i][j]; matrix[i][j] = NULL; }
	for (i=0; i<N; i++) { delete[] matrix[i]; matrix[i] = NULL; }
	delete[] matrix; 
	matrix = NULL;	
}
