#include <iostream>
#include <fstream>
#include <string.h>

#include "bedfile.hh"
#include "exception.hh"

// The end-of-list
#define ENDOFLIST -1

// The id on top of every binary bed file (8 bytes!)
static const char* BED_ID = "nway.bed";

// The width of cells in bytes
static const int BED_CW = sizeof(int);

using namespace std;

void BedFile::freemem(char* p) {
	if (p != NULL) {
		free(p);
	}
}

int* BedFile::createChrHash() {
	int* h = new int[chrhashsize];
	for (int i = 0; i < chrhashsize; i++) {
		h[i] = ENDOFLIST;
	}
	return h;
}

/**
 * Initialize values and create memory arrays based on chrhashsize,
 * chr1size and chr2size
 */
void BedFile::init() {
	freemem((char*) chr1hash);
	freemem((char*) chr2hash);
	freemem((char*) entries);

	// The entrysize is the sum sizes of: chr1 start1 end1 next1
	// chr2 start2 end2 next2 strand - it must be a multiply by 4!
	entrysize = (chr1size + BED_CW * 3) + (chr2size + BED_CW * 3) + BED_CW;

	// Calculate the field offsets
	chr1offset = 0;
	start1offset = chr1offset + chr1size;
	end1offset = start1offset + BED_CW;
	next1offset = end1offset + BED_CW;
	chr2offset = next1offset + BED_CW;
	start2offset = chr2offset + chr2size;
	end2offset = start2offset + BED_CW;
	next2offset = end2offset + BED_CW;
	strandoffset = next2offset + BED_CW;

	// Get the neccessary memory
	chr1hash = createChrHash();
	chr2hash = createChrHash();
	entries = (char*) malloc(entrysize * entrycount);
}

/**
 * Align chromosome size to a value of {4,8,12,...}
 */
int BedFile::alignchrsize(int chrsize) {
	return (chrsize / BED_CW + 1) * BED_CW;
}

void BedFile::createHashChains() {
	// The hashes to the last added elements
	int* chr1h = createChrHash();
	int* chr2h = createChrHash();

	for (int i = 0; i < entrycount; i++) {
		char* entry = getEntry(i);

		int h1 = chrhash(getChr1(entry));
		int h2 = chrhash(getChr2(entry));

		if (chr1hash[h1] == ENDOFLIST) {
			chr1hash[h1] = i;
		}
		if (chr2hash[h2] == ENDOFLIST) {
			chr2hash[h2] = i;
		}

		// Checking i1l<i and i2l<i should not be neccesary, but
		// without, it fails and there is an endless loop (chain)
		int i1l = chr1h[h1];
		if (i1l != ENDOFLIST && i1l < i) {
			setNext1(getEntry(i1l), i);
		}
		int i2l = chr2h[h2];
		if (i2l != ENDOFLIST && i2l < i) {
			setNext2(getEntry(i2l), i);
		}

		chr1h[h1] = i;
		chr2h[h2] = i;
	}

	freemem((char*) chr1h);
	freemem((char*) chr2h);
}

BedFile::BedFile(char* binfile) {
	load(binfile);
}

BedFile::BedFile(char* bedfile, int chrhashsize) {
	this->chrhashsize = chrhashsize;
	loadBed(bedfile);
}

BedFile::~BedFile() {
	freemem((char*) chr1hash);
	freemem((char*) chr2hash);
	freemem((char*) entries);
}

/**
 * Load a binary bed file into memory
 */
void BedFile::load(char* binfile) {
	ifstream in(binfile, std::ifstream::binary);

	if (!in.good()) {
		char message[128];
		sprintf(message, "binfile %s cannot be read!", binfile);
		throw NwayException(message);
	}

	// Check the id
	char id[8];
	in.read(id, 8);
	if (strncmp(id, BED_ID, 8) != 0) {
		char message[128];
		sprintf(message, "invalid binary bedfile %s!", binfile);
		throw NwayException(message);
	}

	in.read((char*) &chrhashsize, (int) sizeof(chrhashsize));
	in.read((char*) &chr1size, (int) sizeof(chr1size));
	in.read((char*) &chr2size, (int) sizeof(chr2size));
	in.read((char*) &entrycount, (int) sizeof(entrycount));

	init();

	in.read((char*) chr1hash, chrhashsize * BED_CW);
	in.read((char*) chr2hash, chrhashsize * BED_CW);
	in.read((char*) entries, entrysize * entrycount);

	in.close();
}

/**
 * Load a bed file into memory
 */
void BedFile::loadBed(char* bedfile) {
	ifstream in(bedfile);

	if (!in.good()) {
		char message[128];
		sprintf(message, "bedfile %s cannot be read!", bedfile);
		throw NwayException(message);
	}

	char line[256];

	// Fill the chromosome sizes
	while (!in.eof()) {
		in.getline(line, sizeof(line) - 1);
		if (strtok(line, " \t")) {
			chr1size = max((int) strlen(strtok(NULL, " \t")), chr1size);
			strtok(NULL, " \t");
			strtok(NULL, " \t");
			chr2size = max((int) strlen(strtok(NULL, " \t")), chr2size);
			entrycount++;
		}
	}

	chr1size = alignchrsize(chr1size);
	chr2size = alignchrsize(chr2size);

	// Rewind to beginning
	in.clear();
	in.seekg(0, in.beg);

	init();

	// Read data again
	int index = 0;
	while (!in.eof()) {
		in.getline(line, sizeof(line) - 1);

		if (strtok(line, " \t")) {
			char* entry = getEntry(index++);
			setChr1(entry, strtok(NULL, " \t"));
			setStart1(entry, atoi(strtok(NULL, " \t")));
			setEnd1(entry, atoi(strtok(NULL, " \t")));
			setNext1(entry, ENDOFLIST);
			setChr2(entry, strtok(NULL, " \t"));
			setStart2(entry, atoi(strtok(NULL, " \t")));
			setEnd2(entry, atoi(strtok(NULL, " \t")));
			setNext2(entry, ENDOFLIST);
			setStrand(entry, *(strtok(NULL, " \t")));
		}
	}

	in.close();

	createHashChains();
}

/**
 * Write the internal data into a file
 */
void BedFile::write(char* binfile) {
	ofstream out(binfile, std::ofstream::binary);

	out.write(BED_ID, 8);
	out.write((char*) &chrhashsize, (int) sizeof(chrhashsize));
	out.write((char*) &chr1size, (int) sizeof(chr1size));
	out.write((char*) &chr2size, (int) sizeof(chr2size));
	out.write((char*) &entrycount, (int) sizeof(entrycount));
	out.write((char*) chr1hash, chrhashsize * BED_CW);
	out.write((char*) chr2hash, chrhashsize * BED_CW);
	out.write((char*) entries, entrysize * entrycount);

	out.close();
}

