import pandas as pd
import os

# Define the base path to the chr directory
base_path = './result/chr/'

# List of chromosome directories (chr1 to chr22, chrX)
chromosome_dirs = [f'chr{i}' for i in range(1, 23)] + ['chrX']

# Initialize dictionaries to store the results
match_length_sums = {}
anchor_counts = {}
coverage = {}

# Loop over each chromosome directory
for chromosome in chromosome_dirs:
    # Define the paths to the final_anchor.csv and RaMA.log files within the chromosome directory
    csv_path = os.path.join(base_path, chromosome, 'final_anchor.csv')
    log_path = os.path.join(base_path, chromosome, 'RaMA.log')
    
    # Check if both files exist (to avoid missing file errors)
    if os.path.exists(csv_path) and os.path.exists(log_path):
        # Read the CSV file
        df = pd.read_csv(csv_path)
        
        # Calculate the sum of MatchLength column
        match_length_sum = df['MatchLength'].sum()
        
        # Calculate the count of anchors (number of rows)
        anchor_count = len(df)
        
        # Read the RaMA.log file and extract the sequence length from the second line
        with open(log_path, 'r') as log_file:
            log_lines = log_file.readlines()
            # Extract the sequence length from the second line
            sequence_length = int(log_lines[1].split()[-1])
        
        # Store the results in the dictionaries
        match_length_sums[chromosome] = match_length_sum
        anchor_counts[chromosome] = anchor_count
        coverage[chromosome] = match_length_sum / sequence_length

# Convert the results into a DataFrame
match_length_df = pd.DataFrame({
    'Chromosome': list(match_length_sums.keys()),
    'Anchor Length Sum': list(match_length_sums.values()),
    'Anchor Count': list(anchor_counts.values()),
    'Coverage': list(coverage.values())
})

# Define the output CSV path
output_csv_path = './csv/chr_match_length_anchor_counts.csv'

# Save the DataFrame to CSV
match_length_df.to_csv(output_csv_path, index=False)

print("CSV file created successfully.")
