import sys
import gzip
import random
import os


mc_files = open(sys.argv[2], "r").readlines()
mc_samples = [os.path.basename(p).split(".")[0] for p in mc_files]
print("node,pos,strand," + ",".join(mc_samples))

cpgs_index = gzip.open(sys.argv[1], "r")
cpgs_index.readline()

cpgs = dict()

for line in cpgs_index:
    node, pos, strand = line.decode().split()
    pos = int(pos)
    if not node in cpgs:
        cpgs[node] = dict()

    if not pos in cpgs[node]:
        if strand == "+":
            cpgs[node][pos] = set()
        elif strand == "-":
            cpgs[node][-pos] = set()

for f in range(len(mc_files)):
    mcf = open(mc_files[f].rstrip(), "r")
    for line in mcf:
        fields = line.split()
        node = fields[0]
        pos = int(fields[1])
        strand = fields[2]
        score = float(fields[4])

        if strand == "+":
            pos = abs(pos)

        if strand == "-":
            pos = -abs(pos)

        if node in cpgs and pos in cpgs[node]:
            # 50% methylation probability in this sample
            if score >= 127:
                cpgs[node][pos].add(f)
    mcf.close()


for node in cpgs:
    for pos in cpgs[node]:
        strand = "+"
        if pos < 0:
            strand = "-"
        if len(cpgs[node][pos]) > 0:
            sys.stdout.write(",".join([node, str(abs(pos)), strand]))
            for i in range(len(mc_samples)):
                if i in cpgs[node][pos]:
                    sys.stdout.write(",1")
                else:
                    sys.stdout.write(",0")
            sys.stdout.write("\n")
