import pandas as pd
import sys

## Two command line arguments are expected:
## 1. The path to the original .tsv file
## 2. The path to the output .tsv file


# Read in the original .tsv file
patient = sys.argv[1]
sample = sys.argv[2]

input_file = f"../Genotype/{patient}/{sample}.assigned"

print("Reading input file:", input_file)
df = pd.read_csv(input_file, sep='\t')

# Select the first two columns and rename them
print("Selecting the first two columns and renaming them")
df = df.iloc[:, :2]
df.columns = ["Cell_Barcode", "Subclone_Assignment"]

# Map the values in the "Subclone Assignment" column
if patient == "patient1":
    replacement_dict = {
        "UNASSIGN": "Unassigned",
        "C0": "SC1",
        "C3": "SC2",
        "C2": "SC3",
        "C1": "SC4",
        "C4": "SC5",
        "C5": "SC6"
    }
elif patient == "patient2":
    replacement_dict = {
        "UNASSIGN": "Unassigned",
        "C0": "SC2",
        "C1": "SC4",
        "C2": "SC1",
        "C3": "SC3"
    }
elif patient == "patient3":
    replacement_dict = {
        "UNASSIGN": "Unassigned",
        "C0": "SC2",
        "C1": "SC4",
        "C2": "SC3",
        "C3": "SC1",
        "C4": "SC5"
    }
elif patient == "patient4":
    replacement_dict = {
        "UNASSIGN": "Unassigned",
        "C0": "SC3",
        "C1": "SC2",
        "C2": "SC1",
        "C3": "SC4",
        "C4": "SC5"
    }
elif patient == "patient5":
    replacement_dict = {
        "UNASSIGN": "Unassigned",
        "C0": "SC5",
        "C1": "SC3",
        "C2": "SC1",
        "C3": "SC4",
        "C4": "SC2"
    }
elif patient == "patient6":
    replacement_dict = {
        "UNASSIGN": "Unassigned",
        "C0": "SC3",
        "C1": "SC2",
        "C2": "SC4",
        "C3": "SC1",
        "C4": "SC5"
    }

print("Mapping the values in the 'Subclone Assignment' column")
print(replacement_dict)

df["Subclone_Assignment"] = df["Subclone_Assignment"].replace(replacement_dict)

# Save the updated dataframe to a new .tsv file
print(f"Saving the updated dataframe to {patient}_{sample}_barcode_assignments.tsv")
output_file = f"{patient}_{sample}_barcode_assignments.tsv"  # Replace with your desired output path
df.to_csv(output_file, sep='\t', index=False)