import numpy as np
import pandas as pd
import sys

def calculate_informativeness(frq_strat_file):
    df = pd.read_csv(frq_strat_file, sep="\s+", header=0)
    snp_groups = df.groupby("SNP")
    results = []
    for snp, group in snp_groups:
        n_k = group["NCHROBS"].values / 2
        p_ik = group["MAF"].values
        p_i = np.sum(n_k * p_ik) / np.sum(n_k)
        entropy_total = - (p_i * np.log(p_i + 1e-10) + (1 - p_i) * np.log(1 - p_i + 1e-10))
        entropy_within = np.sum(n_k * (p_ik * np.log(p_ik + 1e-10) + (1 - p_ik) * np.log(1 - p_ik + 1e-10))) / np.sum(n_k)
        In = entropy_total - entropy_within
        results.append({"SNP": snp, "In": In})
    results_df = pd.DataFrame(results)
    return results_df


def main():
    if len(sys.argv) != 2:
        print("usage: python calculate_informativeness.py <frq_strat_file>")
        print("example: python In.py Data.frq.strat")
        sys.exit(1)

    frq_strat_file = sys.argv[1]
    informativeness_df = calculate_informativeness(frq_strat_file)
    print(informativeness_df)
    output_file = frq_strat_file.replace(".frq.strat", "_informativeness.txt")
    informativeness_df.to_csv(output_file, sep="\t", index=False)


if __name__ == "__main__":
    main()
