# fns convert genotype table to various formats (mstruct, bayescan2.1)
genotable_to_mstruct= function(genotable,diploid=TRUE) {
# life will be easier if data are rounded and w/o NAs, shouldn't affect anything.
genotable = round(na.omit(genotable))
vars = apply(genotable,1,var)
genotable = genotable[vars>0,]
n_ind = ncol(genotable)
n_loc = nrow(genotable)

if (diploid) {
  ploidy=2
} else {
  ploidy=1
  }

# just printing, can redirect to file
cat(n_ind,'\n',sep='')
cat(n_loc,'\n',sep='')
cat(ploidy,'\n',sep='')

for (ind in colnames(genotable)) {
  cat(genotable[,ind])
  cat('\n')
  if (diploid) { # ath is diploid but homozygous!!!
    cat(genotable[,ind])
    cat('\n')
  }
}
}

# helper fn for next
give_highest = function(vec) {
  return(which(vec==max(vec)))
}

# take a structurish ancestry assignment file and use the 
# highest freqs to assign pops, then spit out populations accordingly
# in bayescan format.
pops_and_genos_to_bayescan = function(popfile, genofile, ploidy=2, threshold=70) {
  # first get pop assignments
  pops = read.table(popfile)
  assignments = apply(pops,1,give_highest)
  npops = ncol(pops)
  
  # next populate some important vars
  genos = read.table(genofile,header=T)
  strs = rownames(genos)
  # it over strs, collect shape of data
  str_tables = list()
  for(str in strs) {
    alleles = table(t(genos[str,]))
    
    # throw out empty strs
    if (length(alleles)==0 | sum(alleles) < threshold) {
      next
    }
    
    # make an alleles over individuals table counting alleles for each STR
    # later, can just pull rows from this table
    this_table = matrix(rep(0,ncol(genos) * length(alleles)),ncol=length(alleles))
    colnames(this_table) = names(alleles)
    rownames(this_table) = colnames(genos)
    for (ind in colnames(genos)) {
      allele = as.character(genos[str,ind])
      # ignore missing data
      if(!(is.na(allele))) {
        this_table[ind,allele] = ploidy
      }
    }
    str_tables[[str]] = this_table
  }
  # print out the basic info about the file
  
  cat('[loci]=', length(str_tables),'\n\n', sep='')
  cat('[populations]=',npops,'\n\n', sep='')
  
  # now finally go through pops and pull out relevant data, print to screen
  for (pop in 1:npops) {
    inds = which(assignments==pop)
    indices = c()
    cat('[pop]=',pop,'\n',sep='')
    for(table in 1:length(str_tables)) {
      allele_counts = colSums(str_tables[[table]][inds,,drop=FALSE])
      num_allele=length(allele_counts)
      this_pop = sum(allele_counts)
      cat(table, this_pop, num_allele, allele_counts,'\n', sep='\t')
      indices = rbind(indices,c(table,names(str_tables)[table]))
    }
  }
  write.table(indices,file='str_indices_for_bayescan.txt',quote=FALSE,row.names=FALSE,col.names=FALSE)
}