import os
import subprocess
import csv
import warnings

# 定义CHM13和CHM1染色体路径
chm13_dir = "/mnt/sda/CommonData/Human_Genomes/chm13/chm13_each"
chm1_dir = "/mnt/sda/CommonData/Human_Genomes/chm1/chm1_each"

# chm13_dir = " /mnt/d/Result/RaMA/data/human_genome/chm1/chm1_cen"
# chm1_dir = " /mnt/d/Result/RaMA/data/human_genome/chm13/chm13_cen"

# 获取所有染色体文件
chromosomes = [f"chr{i}.fasta" for i in ['X'] + list(range(1, 23))]

# 定义输出目录
output_base_dir = "./result/"

# 定义软件及其执行命令
software_commands = {
    "RaMA": lambda r, q, out_dir: f"/usr/bin/time -v ./software/RaMA -r {r} -q {q} -o {out_dir} -t 32",
    "minimap2": lambda r, q, out_file: f"/usr/bin/time -v ./software/minimap2 -a {r} {q} > {out_file} -t 32",
    "wfmash": lambda r, q, out_file: f"/usr/bin/time -v ./software/wfmash {r} {q} > {out_file} -t 32",
    "UniAligner": lambda r, q, out_dir: f"/usr/bin/time -v ./software/tandem_aligner --first {r} --second {q} -o {out_dir}"
}

# 记录结果到CSV的函数
def write_to_csv(software, chromosome, real_time, memory_usage):
    csv_file = "./csv/benchmark_results.csv"
    file_exists = os.path.isfile(csv_file)
    with open(csv_file, mode='a', newline='') as file:
        writer = csv.writer(file)
        if not file_exists:
            writer.writerow(["Software", "Chromosome", "Real_Time", "Memory_Usage"])
        writer.writerow([software, chromosome, real_time, memory_usage])

import re

# 从time输出中提取时间和内存，并将内存转换为GB
def parse_time_output(output):
    real_time = None
    memory_usage = None

    # 正则表达式匹配 h:mm:ss、m:ss 和 s.ss 等格式
    time_pattern = re.compile(r"(\d+):(\d+):(\d+\.\d+)|(\d+):(\d+\.\d+)|(\d+\.\d+)")

    for line in output.split('\n'):
        if "Elapsed (wall clock) time" in line:
            # 使用正则表达式匹配时间字符串
            match = time_pattern.search(line)
            if match:
                if match.group(1):  # 匹配 h:mm:ss
                    hours = int(match.group(1))
                    minutes = int(match.group(2))
                    seconds = float(match.group(3))
                elif match.group(4):  # 匹配 m:ss
                    hours = 0
                    minutes = int(match.group(4))
                    seconds = float(match.group(5))
                elif match.group(6):  # 匹配 s.ss
                    hours = 0
                    minutes = 0
                    seconds = float(match.group(6))
                else:
                    raise ValueError("Unexpected time format")

                # 将小时、分钟和秒转换为总秒数
                real_time = hours * 3600 + minutes * 60 + seconds

        if "Maximum resident set size" in line:
            memory_usage_kb = int(line.split(":")[-1].strip())  # 获取内存使用量，单位是KB
            memory_usage = memory_usage_kb / (1024 * 1024)  # 转换为GB
    
    return real_time, memory_usage





# 运行比对软件
def run_software(software, chromosome):
    r = os.path.join(chm13_dir, chromosome)
    q = os.path.join(chm1_dir, chromosome)
    out_dir = os.path.join(output_base_dir, software, chromosome.split('.')[0])
    os.makedirs(out_dir, exist_ok=True)
    
    if software == "minimap2":
        output_file = os.path.join(out_dir, "result.sam")
        cmd = software_commands[software](r, q, output_file)
    elif software == "wfmash":
        output_file = os.path.join(out_dir, "result.paf")
        cmd = software_commands[software](r, q, output_file)
    else:
        output_file = os.path.join(out_dir, "cigar.txt")
        cmd = software_commands[software](r, q, out_dir)
    
    print(f"Running {cmd}")
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)

    if result.returncode != 0:
        # 输出警告信息，包括错误的返回码以及标准错误输出
        warnings.warn(f"Warning: {software} returned a non-zero exit code ({result.returncode}).")
        print(f"Error output: {result.stderr}")
    
    real_time, memory_usage = parse_time_output(result.stderr)
    if real_time and memory_usage:
        write_to_csv(software, chromosome.split('.')[0], real_time, memory_usage)
        print(f"{software} for {chromosome} finished: Time {real_time}, Memory {memory_usage}")
    else:
        print(f"Failed to retrieve time/memory data for {software} on {chromosome}")

# 断点续传，检测已经处理的染色体
def already_processed(software, chromosome):
    csv_file = "./csv/benchmark_results.csv"
    if not os.path.exists(csv_file):
        return False
    with open(csv_file, mode='r') as file:
        reader = csv.reader(file)
        for row in reader:
            if row[0] == software and row[1] == chromosome:
                return True
    return False

# 批量运行所有比对任务
def run_all(software):
    for chromosome in chromosomes:
        if already_processed(software, chromosome.split('.')[0]):
            print(f"Skipping {chromosome} for {software}, already processed.")
            continue
        run_software(software, chromosome)

if __name__ == "__main__":
    # 分成四步，分别运行四种比对方法
    print("Starting RaMA...")
    run_all("RaMA")
    
    # print("Starting minimap2...")
    # run_all("minimap2")
    
    # print("Starting wfmash...")
    # run_all("wfmash")
    
    # print("Starting UniAligner...")
    # run_all("UniAligner")
