import pandas as pd

# 读取RaMA和UniAligner的CSV文件
rama_df = pd.read_csv('./csv/RaMA_performance.csv')
unialigner_df = pd.read_csv('./csv/UniAligner_performance.csv')

# 确保两个数据集按染色体对齐
if not rama_df['chromosome'].equals(unialigner_df['chromosome']):
    raise ValueError("RaMA和UniAligner数据集中的染色体不匹配")

# 计算每个染色体的时间加速比
rama_df['speed_ratio_rare_alignment'] = unialigner_df['total_time'] / rama_df['rare_alignment_time']
rama_df['speed_ratio_total_time'] = unialigner_df['total_time'] / rama_df['total_time']

# 计算每个染色体的内存加速比
rama_df['memory_ratio_rare_alignment'] = rama_df['rare_alignment_memory'] / unialigner_df['total_memory'] 
rama_df['memory_ratio_total_memory'] =  rama_df['total_memory'] / unialigner_df['total_memory']

# 计算所有染色体的时间加速比平均值
avg_speed_ratio_rare_alignment = rama_df['speed_ratio_rare_alignment'].mean()
avg_speed_ratio_total_time = rama_df['speed_ratio_total_time'].mean()

# 计算所有染色体的内存加速比平均值
avg_memory_ratio_rare_alignment = rama_df['memory_ratio_rare_alignment'].mean()
avg_memory_ratio_total_memory = rama_df['memory_ratio_total_memory'].mean()

# 计算时间平均值
avg_rama_rare_alignment_time = rama_df['rare_alignment_time'].mean()
avg_rama_total_time = rama_df['total_time'].mean()
avg_unialigner_total_time = unialigner_df['total_time'].mean()

# 计算内存平均值
avg_rama_rare_alignment_memory = rama_df['rare_alignment_memory'].mean()
avg_rama_total_memory = rama_df['total_memory'].mean()
avg_unialigner_total_memory = unialigner_df['total_memory'].mean()

# 计算基于平均值的时间和内存的加速比
speed_ratio_rare_alignment = avg_unialigner_total_time / avg_rama_rare_alignment_time
speed_ratio_total_time = avg_unialigner_total_time / avg_rama_total_time
memory_ratio_rare_alignment = avg_rama_rare_alignment_memory /avg_unialigner_total_memory 
memory_ratio_total_memory = avg_rama_total_memory / avg_unialigner_total_memory 

# 打印结果
print(f"RaMA的rare alignment时间平均值: {avg_rama_rare_alignment_time}")
print(f"RaMA的总时间平均值: {avg_rama_total_time}")
print(f"UniAligner的总时间平均值: {avg_unialigner_total_time}")
print(f"RaMA的rare alignment内存平均值: {avg_rama_rare_alignment_memory}")
print(f"RaMA的总内存平均值: {avg_rama_total_memory}")
print(f"UniAligner的总内存平均值: {avg_unialigner_total_memory}")
print(f"UniAligner与RaMA rare alignment时间的加速比（基于平均值）: {speed_ratio_rare_alignment}")
print(f"UniAligner与RaMA总时间的加速比（基于平均值）: {speed_ratio_total_time}")
print(f"UniAligner与RaMA rare alignment内存的加速比（基于平均值）: {memory_ratio_rare_alignment}")
print(f"UniAligner与RaMA总内存的加速比（基于平均值）: {memory_ratio_total_memory}")
print(f"每个染色体的rare alignment时间加速比的平均值: {avg_speed_ratio_rare_alignment}")
print(f"每个染色体的总时间加速比的平均值: {avg_speed_ratio_total_time}")
print(f"每个染色体的rare alignment内存加速比的平均值: {avg_memory_ratio_rare_alignment}")
print(f"每个染色体的总内存加速比的平均值: {avg_memory_ratio_total_memory}")

# 保存结果到CSV
result_df = pd.DataFrame({
    'Metric': ['Avg_RaMA_Rare_Alignment_Time', 'Avg_RaMA_Total_Time', 'Avg_UniAligner_Total_Time', 
               'Speed_Ratio_Rare_Alignment', 'Speed_Ratio_Total_Time', 
               'Avg_Speed_Ratio_Rare_Alignment_Per_Chromosome', 'Avg_Speed_Ratio_Total_Time_Per_Chromosome',
               'Avg_RaMA_Rare_Alignment_Memory', 'Avg_RaMA_Total_Memory', 'Avg_UniAligner_Total_Memory', 
               'Memory_Ratio_Rare_Alignment', 'Memory_Ratio_Total_Memory',
               'Avg_Memory_Ratio_Rare_Alignment_Per_Chromosome', 'Avg_Memory_Ratio_Total_Memory_Per_Chromosome'],
    'Value': [avg_rama_rare_alignment_time, avg_rama_total_time, avg_unialigner_total_time, 
              speed_ratio_rare_alignment, speed_ratio_total_time, 
              avg_speed_ratio_rare_alignment, avg_speed_ratio_total_time,
              avg_rama_rare_alignment_memory, avg_rama_total_memory, avg_unialigner_total_memory, 
              memory_ratio_rare_alignment, memory_ratio_total_memory,
              avg_memory_ratio_rare_alignment, avg_memory_ratio_total_memory]
})

result_df.to_csv('./csv/RaMA_UniAligner_speed_memory_comparison_with_chromosomes.csv', index=False)
