import os
import re
import pandas as pd

# 提取日志中的时间和内存信息
def extract_time_memory_from_log(log_file):
    metrics = {
        "total_time": 0,
        "total_memory": 0
    }

    with open(log_file, "r") as f:
        lines = f.readlines()
        for line in lines:
            # 提取总时间和内存
            if "INFO: Thank you for using TandemAligner!" in line:
                time_match = re.search(r"\d+:\d+:\d+", line)
                memory_match = re.search(r"(\d+\.\d+|\d+)([MG]b)", line)

                # 解析时间
                if time_match:
                    time_str = time_match.group(0)
                    hours, minutes, seconds = map(int, time_str.split(":"))
                    metrics["total_time"] = hours * 3600 + minutes * 60 + seconds

                # 解析内存并统一单位为GB
                if memory_match:
                    memory_value = float(memory_match.group(1))
                    memory_unit = memory_match.group(2)

                    if memory_unit == "Mb":
                        metrics["total_memory"] = memory_value / 1024  # MB 转 GB
                    elif memory_unit == "Gb":
                        metrics["total_memory"] = memory_value  # 已经是GB

    return metrics

# 统计多次运行的平均值
def calculate_averages(log_dir, output_csv):
    chromosomes = [f"chr{i}_cen.fasta" for i in range(1, 23)] + ["chrX_cen.fasta"]
    chromosome_mapping = {f"chr{i}_cen.fasta": f"chr{i}" for i in range(1, 23)}
    chromosome_mapping["chrX_cen.fasta"] = "chrX"

    result_data = []

    for chromosome in chromosomes:
        chromosome_name = chromosome_mapping[chromosome]
        metrics_list = []

        for run in range(1, 11):  # 10次实验
            log_file = os.path.join(log_dir, f"run_{run}", chromosome_name, "tandem_aligner.log")
            if os.path.exists(log_file):
                metrics = extract_time_memory_from_log(log_file)
                metrics_list.append(metrics)

        if metrics_list:
            # 计算平均值
            avg_metrics = {key: sum([m[key] for m in metrics_list]) / len(metrics_list) for key in metrics_list[0]}
            avg_metrics["chromosome"] = chromosome_name
            result_data.append(avg_metrics)

    # 将结果写入CSV文件
    df = pd.DataFrame(result_data)
    output_folder = os.path.dirname(output_csv)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    df.to_csv(output_csv, index=False)
    print(f"Results written to {output_csv}")

# 运行代码
log_dir = "/mnt/d/Result/RaMA/exp/new_chm13_chm1/result/UniAligner"
output_csv = "/mnt/d/Result/RaMA/exp/new_chm13_chm1/csv/UniAligner_performance.csv"

calculate_averages(log_dir, output_csv)
