#include <stdio.h>
#include <fcntl.h>
#include <sys/mman.h>

#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>

#include "include/dbtypes.h"


#define SMALL_BUFF_SIZE 4000
#define BUFF_SIZE 14*5000000


int window_size=35;
int num_bins=35;

void usage(char* program_name) {
	printf("%s fasta_filename repeats_filename index_filename contig_name window_size[35] num_bins[35]\n",program_name);
}

char* double_size(char * sequence, int * size) {
	assert(*size>0);
	char * temp=(char *)malloc(sizeof(char)*(*size)*2);
	if (sequence==NULL) {
		fprintf(stderr,"Failed to double size of sequence string");
		exit(1);
	}
	memcpy(temp,sequence,*size);
	*size=*size*2;
	assert(*size>0);
	return temp;
}




char* read_in_fasta(char* filename) {
	FILE *f = fopen(filename,"r");
	if (f==NULL) {
		fprintf(stderr,"Error opening file!\n");
		exit(1);
	}

	int current_size=100000;
	char* sequence=(char*)malloc(sizeof(char)*current_size);
	if (sequence==NULL) {
		fprintf(stderr,"Error to init sequence malloc\n");
		exit(1);
	}
	sequence[0]='\0';
	int current_used=1;

	char buffer[SMALL_BUFF_SIZE];
	while (fgets(buffer,SMALL_BUFF_SIZE,f)!=NULL) {
		int len=strlen(buffer);
		assert((len-1)<SMALL_BUFF_SIZE);
		assert(buffer[len-1]=='\n');
		if (buffer[len-1]=='\n') {
			buffer[len-1]='\0';
		}
		len--;
		if (buffer[0]!='>') {
			while (current_used+len>=current_size) {
				sequence=double_size(sequence,&current_size);	
			}
			memcpy(sequence+current_used-1,buffer,len+1);
			current_used+=len;
		}
	}

	//printf("%s\n",sequence);
	

	fclose(f);
	return sequence;
}


int whats_my_bin(int gc_count,int at_count) {
	assert(window_size%num_bins==0);
	if (gc_count==0 && at_count==0) {
		return -1;
	}
	int bin = ((gc_count*100)/(at_count+gc_count))/2;
	if (bin==num_bins) {
		bin--;
	}
	if (bin>=num_bins || bin<0) {
		fprintf(stderr,"Window error!");
	}
	assert(bin>=0 && bin<num_bins);
	return bin;
}

double* compute_normodds(char* index_filename,int sequence_length, char* contig_name) {
	printf("STARTING TO READ INDEX\n");
	//Allocate the doubles array
	double *contig_normodds=(double*)malloc(sizeof(double)*sequence_length);
	if (contig_normodds==NULL) {
		fprintf(stderr,"Trouble allcoating contig_normodds\n");
		exit(1);
	}
	int j;
	//zero all the entries
	for (j=0; j<sequence_length; j++) {
		contig_normodds[j]=0.0;
	}	


	FILE *f=NULL;
	if (strcmp(index_filename,"-")==0) {
		f=stdin;
	} else {
		f=fopen(index_filename,"rb");
		if (f==NULL) {
			fprintf(stderr,"Error opening index file\n");
			exit(1);
		}
	}
	//mapping_t_small buffer[BUFF_SIZE];
	mapping_t_small* buffer=(mapping_t_small*)malloc(sizeof(mapping_t_small)*BUFF_SIZE);
	if (buffer==NULL) {
		printf("OUT OF MEMORY!\n");
		exit(1);
	}


	size_t read_in;
	while((read_in=fread(buffer,sizeof(mapping_t_small), BUFF_SIZE,f))>0) {
		unsigned int i;
		for (i=0; i<read_in; i++) {
			if (strcmp(buffer[i].contigname,contig_name)==0) {
				uint64_t start=buffer[i].contigstart;
				assert((start-1)<sequence_length);
				contig_normodds[start-1]+=buffer[i].normodds;
			}
		}
	}
	


	fclose(f);

	return contig_normodds;


}

void read_in_repeats(char* repeat_filename,short* repeats_mask) {
	FILE *f=fopen(repeat_filename,"r");
	if (f==NULL) {
		fprintf(stderr,"Failed to open repeats file");
		exit(1);
	}


	char buffer[SMALL_BUFF_SIZE];
	int to,from;
	while (fscanf(f,"%s %d %d",buffer,&from,&to)!=EOF) {
		assert(from>=0);
		assert(to>=from);
		while(from<=to) {
			repeats_mask[from-1]=1; //make the repeats 0 based
			from++;
		}
	}
	fclose(f);
	return;
}

int main (int argc, char** argv) {
	assert(window_size%num_bins==0);
	if (argc!=5 && argc!=6 && argc!=7) {
		usage(argv[0]);
		exit(1);
	}
	if (argc>=6) {
		window_size=atoi(argv[5]);
		if (argc==7) {
			num_bins=atoi(argv[6]);
		}	
	}

	fprintf(stderr,"Using window_size %d, num_bins %d\n",window_size,num_bins);


	char* fasta_filename=argv[1];
	char* repeats_filename=argv[2];
	char* index_filename=argv[3];
	char* contig_name=argv[4];

	//READ IN THE FASTA FILE
	char* sequence=read_in_fasta(fasta_filename);
	int sequence_length=strlen(sequence);
	printf("Read in %d chars from %s\n",sequence_length,fasta_filename);

	//compute normodds for contig
	double* contig_normodds=compute_normodds(index_filename,sequence_length,contig_name);

	//READ IN THE REPEATS
	short * repeat_mask=(short*)malloc(sizeof(short)*sequence_length);
	if (repeat_mask==NULL) {
		fprintf(stderr,"Problem with allocating memory!\n");
		exit(1);
	}
	memset(repeat_mask,0, sizeof(short)*sequence_length);	
	
	read_in_repeats(repeats_filename,repeat_mask);


	int i;

	//Allocate the bins
	double bins_hits[num_bins];
	double bins_length[num_bins];
	double bins_lambdas[num_bins];
	for (i=0; i<num_bins; i++) {
		bins_hits[i]=0.0;
		bins_length[i]=0.0;
		bins_lambdas[i]=0.0;
		
	}

	//CALCULATE THE BINS
	int * positions_bin=(int*)malloc(sizeof(int)*sequence_length);
	for (i=0; i<sequence_length; i++) {
		positions_bin[i]=-1;
	}
	


	i=0;
	while(i<sequence_length) {
		int left=sequence_length-i;
		if (left>window_size) {
			left=window_size;
		}
		int gc_content=0;
		int at_content=0;
		int repeats_inside=0;
		double window_arrivals=0.0;

		int j;
		for (j=0; j<left; j++) {
			char c=sequence[i+j];
			if (c=='c' || c=='C' || c=='g' || c=='G') {
				gc_content+=1;
			}
			if (c=='a' || c=='A' || c=='t' || c=='T') {
				at_content+=1;
			}
			if (repeat_mask[i+j] || c=='N' || c=='n') {
				repeats_inside++;
			}
		}	
	
		int bin=whats_my_bin(gc_content,at_content);
		for (j=0; j<left; j++) {
			char c=sequence[i+j];
#ifndef UNSAFE
			if (repeat_mask[i+j] || c=='N' || c=='n') {
				positions_bin[i+j]=-1;
			} else {
				positions_bin[i+j]=bin;
			}
#else
				positions_bin[i+j]=bin;
#endif
		}

		if (repeats_inside==0) {
			for (j=0; j<left; j++) {
				window_arrivals+=contig_normodds[i+j];
			}
			//add the information
			bins_hits[bin]+=window_arrivals;
			bins_length[bin]+=window_size;
		}

		i+=window_size;

	}

	//compute lambdas
	for (i=0; i<num_bins; i++) {
		if (bins_length[i]==0) {
			bins_lambdas[i]=-2.0;
		} else {
			bins_lambdas[i]=bins_hits[i]/bins_length[i];
		}
		printf("B:%d, %f, H: %f , L: %f\n",i,bins_lambdas[i],bins_hits[i],bins_length[i]);
	}

	//calculate expected arrivals per bp
	double* position_lambda=(double*)malloc(sizeof(double)*(sequence_length));
	if (position_lambda==NULL) {
		fprintf(stderr,"Trouble allocating position_lambda\n");
		exit(1);
	}
	unsigned int repeats=0;
	unsigned int un_repeats=0;
	unsigned int shifted_positions=0;
	for(i=0; i<sequence_length; i++) {
		int bin=positions_bin[i];
		if (bin>=0) {
			un_repeats++;
			if (bins_lambdas[bin]==0.0 || bins_lambdas[bin]==-2.0) {
				shifted_positions++;
			}
			//printf("Thinking about bin %d, %f\n",bin,bins_lambdas[bin]);
			if (bin>(num_bins/2)) {
				while(bins_lambdas[bin]==0.0 || bins_lambdas[bin]==-2.0) {
					bin--;
				}
			} else {
				while(bins_lambdas[bin]==0.0 || bins_lambdas[bin]==-2.0) {
					bin++;
				}
			}
			assert(bin<num_bins && bin>=0);
			position_lambda[i]=bins_lambdas[bin];
			assert(position_lambda[i]>0.0);
		} else {
			assert(bin==-1);
			repeats++;
			position_lambda[i]=-1.0;
		}
	}

	//write it out
	char filename_out[500];
	sprintf(filename_out,"%s.gc",contig_name);
	FILE *f = fopen(filename_out,"wb");
	if (f==NULL) {
		fprintf(stderr,"Trouble opening lambda_per_bp");
		exit(1);
	}
	int res=fwrite(position_lambda,sizeof(double),sequence_length,f);
	assert(res==(sequence_length));	
	fclose(f);

	printf("Repeats: %d, Un-Repeats: %d\n",repeats, un_repeats);
	printf("Shifted positions: %d\n",shifted_positions);

	return 0;

}
