#ifndef __EXPBIN_H
#define __EXPBIN_H
#include <stdlib.h>
#include <stdio.h>

/* relative weight of pseudocounts for dinucleotides
   (0.1 : 0.1 pseudocounts per real count) */
#define PRIORWEIGHT 0.1

/* number of bins */
#define N_BINS 20 

typedef struct {
  // globally used members
  int mode;                   // mode of binning (1: by G+C, 2: by exp. k-mer freq.)

  char** seqs;                // array of sequences (strings)
  int n_seqs;                 // nb of sequences in seqs

  int k;                      // k-mer length
  int n_kmers;                // nb of k-mers

  // members used for mode 1 (binning by G+C)
  unsigned short int* gc;     // number of g+c for each sequence
  unsigned short int* breaks1;// upper limits for G+C in each bin bin
  unsigned short int* bins1;  // bin number of each sequence

  // members used for mode 2 (binning by exp. k-mer freq.)
  int** counts;               // observed mono-/dinucleotide counts over all seqs
  int tot[2];                 // total number of mono-/dinucleotides over all seqs
  double** freqs;             // mono-/dinucleotide frequencies over all seqs
  float** f_exp;              // expected k-mer freqs for each sequence
  float** breaks2;            // upper limits for expected k-mer freq in each bin
  unsigned short int** bins2; // bin number of each kmer,sequence

} Binning;

Binning* new_Binning(char**, int, int, int);                           // sequences, n_seqs, k
void     define_bins_byExpFreq(Binning*);
void     define_bins_byGC(Binning*);
int      get_bin_for_gc(Binning*, int);                                // binning, nb G+C
int      get_bin_for_kmer_expfreq(Binning*, int, float);               // binning, kmer code, expected freq
int      get_bin_for_seq(Binning*, int, int);                          // binning, kmer code, sequence index
int      compare_doubles(const void*, const void*);                    // used by qsort()
int      compare_floats(const void*, const void*);                     // used by qsort()
int      compare_unsigned_short_ints(const void*, const void*);        // used by qsort()
int      count_kmers(int, char*, unsigned short int*);                 //          k, sequence, array of len 16 for result
float    expected_freq(Binning*, char*, double*, double*);             // binning, kmer, mononucleotide freqs, dinucleotide freqs
void     destroy_Binning(Binning*);

#endif
