// Copyright (c) 2000-2003 Whitehead Institute for Biomedical Research
// 



#ifndef CHAR_TRANSLATIONS
#define CHAR_TRANSLATIONS

#ifndef IOSTRM
#define IOSTRM
#include <iostream>
#include <fstream>
#endif

#ifdef __DECCXX_VER
#include <string>
#else
#include <cstring>
#endif

#include "system/Assert.h"

const char complement[256] = {
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',

  '-','T','-','G','-','-','-','C','-','-','-','-','-','-','-','-',
  '-','-','-','-','A','-','-','-','-','-','-','-','-','-','-','-',

  '-','T','-','G','-','-','-','C','-','-','-','-','-','-','-','-',
  '-','-','-','-','A','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-'};

const char num2char[256] = {
  'A','C','G','T','N','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-',
  '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-'};

const unsigned char char2num[256] = {
  155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,
  155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,
  155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,
  155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,
  155, 0 ,155, 1 ,155,155,155, 2 ,155,155,155,155,155,155, 4 ,155,
  155,155,155,155, 3 ,155,155,155,155,155,155,155,155,155,155,155,
  155, 0 ,155, 1 ,155,155,155, 2 ,155,155,155,155,155,155, 4 ,155,
  155,155,155,155, 3 ,155,155,155,155,155,155,155,155,155,155,155,
  155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,
  155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,
  155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,
  155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,
  155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,
  155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,
  155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,
  155,155,155,155,155,155,155,155,155,155,155,155,155,155,155,155};

const char char2string_simple[256][4] = {
  "AAA", "AAC", "AAG", "AAT", "AAN", "ACA", "ACC", "ACG", "ACT", "ACN", "AGA", "AGC", "AGG", "AGT", "AGN", "ATA",
  "ATC", "ATG", "ATT", "ATN", "ANA", "ANC", "ANG", "ANT", "ANN", "CAA", "CAC", "CAG", "CAT", "CAN", "CCA", "CCC",
  "CCG", "CCT", "CCN", "CGA", "CGC", "CGG", "CGT", "CGN", "CTA", "CTC", "CTG", "CTT", "CTN", "CNA", "CNC", "CNG",
  "CNT", "CNN", "GAA", "GAC", "GAG", "GAT", "GAN", "GCA", "GCC", "GCG", "GCT", "GCN", "GGA", "GGC", "GGG", "GGT",
  "GGN", "GTA", "GTC", "GTG", "GTT", "GTN", "GNA", "GNC", "GNG", "GNT", "GNN", "TAA", "TAC", "TAG", "TAT", "TAN",
  "TCA", "TCC", "TCG", "TCT", "TCN", "TGA", "TGC", "TGG", "TGT", "TGN", "TTA", "TTC", "TTG", "TTT", "TTN", "TNA",
  "TNC", "TNG", "TNT", "TNN", "NAA", "NAC", "NAG", "NAT", "NAN", "NCA", "NCC", "NCG", "NCT", "NCN", "NGA", "NGC",
  "NGG", "NGT", "NGN", "NTA", "NTC", "NTG", "NTT", "NTN", "NNA", "NNC", "NNG", "NNT", "NNN", "AA\0", "AC\0", "AG\0",
  "AT\0", "AN\0", "CA\0", "CC\0", "CG\0", "CT\0", "CN\0", "GA\0", "GC\0", "GG\0", "GT\0", "GN\0", "TA\0", "TC\0", "TG\0", "TT\0",
  "TN\0", "NA\0", "NC\0", "NG\0", "NT\0", "NN\0", "A\0\0", "C\0\0", "G\0\0", "T\0\0", "N\0\0", "foo", "foo", "foo", "foo", "foo",
  "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo",
  "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo",
  "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo",
  "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo",
  "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo",
  "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "\0\0\0"
};

const char char2string_compact[256][4] = {
  "AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT",
  "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT",
  "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT",
  "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT", // 64 valid triplets;
  
  "AAN", "ACN", "AGN", "ATN", "CAN", "CCN", "CGN", "CTN", "GAN", "GCN", "GGN", "GTN", "TAN", "TCN", "TGN", "TTN",
  "ANA", "ANC", "ANG", "ANT", "CNA", "CNC", "CNG", "CNT", "GNA", "GNC", "GNG", "GNT", "TNA", "TNC", "TNG", "TNT",
  "NAA", "NAC", "NAG", "NAT", "NCA", "NCC", "NCG", "NCT", "NGA", "NGC", "NGG", "NGT", "NTA", "NTC", "NTG", "NTT",
  "ANN", "CNN", "GNN", "TNN", "NAN", "NCN", "NGN", "NTN", "NNA", "NNC", "NNG", "NNT", "NNN", "foo", "foo", "foo",
  
  "AA\0", "AC\0", "AG\0", "AT\0", "CA\0", "CC\0", "CG\0", "CT\0", "GA\0", "GC\0", "GG\0", "GT\0", "TA\0", "TC\0", "TG\0", "TT\0", // 16 valid pairs;
  
  "AN\0", "CN\0", "GN\0", "TN\0", "NA\0", "NC\0", "NG\0", "NT\0", "NN\0", "foo", "foo", "foo", "foo", "foo", "foo", "foo",
  "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo",
  "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo",
  
  "A\0\0", "C\0\0", "G\0\0", "T\0\0", "N\0\0", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", // 4 valid single nucs;
  
  "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo",
  "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo",
  "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "\0\0\0"
};

const unsigned char EOCH = 255;

const unsigned char simple2compact[256] = {
  0, 1, 2, 3, 64, 4, 5, 6, 7, 65, 8, 9, 10, 11, 66, 12,
  13, 14, 15, 67, 80, 81, 82, 83, 112, 16, 17, 18, 19, 68, 20, 21,
  22, 23, 69, 24, 25, 26, 27, 70, 28, 29, 30, 31, 71, 84, 85, 86,
  87, 113, 32, 33, 34, 35, 72, 36, 37, 38, 39, 73, 40, 41, 42, 43,
  74, 44, 45, 46, 47, 75, 88, 89, 90, 91, 114, 48, 49, 50, 51, 76,
  52, 53, 54, 55, 77, 56, 57, 58, 59, 78, 60, 61, 62, 63, 79, 92,
  93, 94, 95, 115, 96, 97, 98, 99, 116, 100, 101, 102, 103, 117, 104, 105,
  106, 107, 118, 108, 109, 110, 111, 119, 120, 121, 122, 123, 124, 128, 129, 130,
  131, 144, 132, 133, 134, 135, 145, 136, 137, 138, 139, 146, 140, 141, 142, 143,
  147, 148, 149, 150, 151, 152, 192, 193, 194, 195, 196, 125, 125, 125, 125, 125,
  125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125,
  125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125,
  125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125,
  125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125,
  125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125,
  125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, EOCH
};



const unsigned char compact2simple[256] = {
  0, 1, 2, 3, 5, 6, 7, 8, 10, 11, 12, 13, 15, 16, 17, 18,
  25, 26, 27, 28, 30, 31, 32, 33, 35, 36, 37, 38, 40, 41, 42, 43,
  50, 51, 52, 53, 55, 56, 57, 58, 60, 61, 62, 63, 65, 66, 67, 68,
  75, 76, 77, 78, 80, 81, 82, 83, 85, 86, 87, 88, 90, 91, 92, 93,
  4, 9, 14, 19, 29, 34, 39, 44, 54, 59, 64, 69, 79, 84, 89, 94,
  20, 21, 22, 23, 45, 46, 47, 48, 70, 71, 72, 73, 95, 96, 97, 98,
  100, 101, 102, 103, 105, 106, 107, 108, 110, 111, 112, 113, 115, 116, 117, 118,
  24, 49, 74, 99, 104, 109, 114, 119, 120, 121, 122, 123, 124, 155, 155, 155,
  125, 126, 127, 128, 130, 131, 132, 133, 135, 136, 137, 138, 140, 141, 142, 143,
  129, 134, 139, 144, 145, 146, 147, 148, 149, 155, 155, 155, 155, 155, 155, 155,
  155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155,
  155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155,
  150, 151, 152, 153, 154, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155,
  155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155,
  155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155,
  155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, EOCH
};



template <class T>
inline T NEXTBASE(const T& t1)                          
{
  return num2char[ (char2num[t1] + 1) % 4 ];
} 


int carraySz(int strl);

int textToCharSimple  (const char *ctext, unsigned char *carr);
int textToCharCompact (const char *ctext, unsigned char *carr);

int textToCharSimple  (int textl, const char *ctext, unsigned char *carr);
int textToCharCompact (int textl, const char *ctext, unsigned char *carr);

int charSimpleToText  (unsigned char *carr, char *ctext);
int charCompactToText (unsigned char *carr, char *ctext);

int capitalization   (char *ctext);
int reverseComplement(char *ctext);


#endif



