#!/usr/bin/env bash

# Run the script directly: ./Run_Mediation.sh

# Define the aggregated result file (overwrite if exists)
aggregate_file="aggregate_results.txt"
> "$aggregate_file"  # Clear file

# Use an array 'triplets' to store all (dep, ind, med) combinations, one per line
triplets=(
"Week10 4_44390424 COQ2_liver"
"Week16 3_8099052 XM_027453326.2|ESF1|chr3|+_liver"
"Week28 3_17472979 MSH2_liver"
"Week2 3_83730307 LOC110353100_ovary"
"Week2 3_82902910 UBE3D_ovary"
"Week2 10_1058602 LOC110353173_ovary"
"Week2 10_4585629 chr10:3719263:3719661:clu_8557_+_ovary"
"Week16 3_8099756 NDUFAF5_ovary"
"Week18 4_17219482 SLC7A2_ovary"
"Week22 19_12742334 LOC119713219_ovary"
"Week23 19_12232120 LOC119713219_ovary"
"Week25 19_12742334 LOC119713219_ovary"
"Week26 19_12194222 ADPRM_ovary"
"Week29 19_12742334 LOC119713219_ovary"
"Week2 2_46465844 DSG2_spleen"
"Week2 5_37405951 SLC39A9_spleen"
"Week2 10_1596887 TMEM255A_spleen"
"Week2 10_6993683 XM_038184569.1|LOC119712977|chr10|-_spleen"
"Week17 19_11807282 LOC101790890_spleen"
"Week18 4_20038124 XM_005008984.5|PDLIM3|chr4|+_spleen"
"Week19 8_22446164 chr8:22505840:22506729:clu_51304_-_spleen"
"Week22 19_12842799 GLP2R_spleen"
"Week23 19_12245235 GLP2R_spleen"
"Week24 19_12842799 GLP2R_spleen"
"Week24 19_11807282 LOC101790890_spleen"
"Week25 19_12842799 GLP2R_spleen"
"Week25 19_11807282 LOC101790890_spleen"
"Week27 19_11807282 LOC101790890_spleen"
"Week28 3_17526707 LCLAT1_spleen"
"Week28 3_17858648 chr3:17495270:17495812:clu_36215_+_spleen"
"Week2 10_4585629 chr10:3719263:3719661:clu_8557_+_shell"
"Week10 22_2412909 XM_027443409.2|ACOT7|chr22|-_shell"
"Week22 19_11463219 LOC101790890_shell"
"Week23 19_11949700 XM_027471250.2|DNAH9|chr19|-_shell"
"Week24 19_11949700 XM_027471250.2|DNAH9|chr19|-_shell"
"Week25 19_11949700 XM_027471250.2|DNAH9|chr19|-_shell"
"Week27 19_11949700 XM_027471250.2|DNAH9|chr19|-_shell"
"Week28 3_16731473 LOC101800576_shell"
"Week28 19_11463219 LOC101790890_shell"
"Week29 19_11463219 LOC101790890_shell"
# ... more can be added
)

# Loop through each combination in the array
for triple in "${triplets[@]}"; do
    # Split into dep, ind, med
    dep=$(echo "$triple" | awk '{print $1}')
    ind=$(echo "$triple" | awk '{print $2}')
    med=$(echo "$triple" | awk '{print $3}')

    # Capture Rscript output directly to a variable
	# DATA_INPUT.txt: Rows = Individuals; Columns = All variables (Independent, Dependent, Mediator)
	# COV.txt: Rows = Individuals; Columns = All covariates
	# Add "--normalize" or "--center" to normalize and centered the variables if needed
    result=$(Rscript causal_mediation.R \
        --dependent "$dep" \
        --independent "$ind" \
        --mediator "$med" \
        --data_file "DATA_INPUT.txt" \  
        --cov_file "COV.txt" \
        --output_file "dummy" \
        --boot_sims 1000)

    # For debugging: print the result content
    # echo "$result"

    # Extract key metrics. Assumes the metrics are in specific columns from R output
    # Adjust awk field numbers if output format differs
    acme=$(echo "$result" | grep -E "^[[:space:]]*ACME" | awk '{print $2","$3","$4","$5}')
    ade=$(echo "$result" | grep -E "^[[:space:]]*ADE" | awk '{print $2","$3","$4","$5}')
    total=$(echo "$result" | grep -E "^[[:space:]]*Total Effect" | awk '{print $3","$4","$5","$6}')
    prop=$(echo "$result" | grep -E "^[[:space:]]*Prop\. Mediated" | awk '{print $3","$4","$5","$6}')

    # Mark extraction failure if any field is empty
    if [[ -z "$acme" || -z "$ade" || -z "$total" || -z "$prop" ]]; then
        echo "Warning: Extraction failed for $dep $ind $med. Check Rscript output." >&2
        result_line="$dep $ind $med: Extraction Failed"
    else
        result_line="$dep $ind $med: ACME($acme); ADE($ade); Total Effect($total); Prop. Mediated($prop)"
    fi

    # Append the result line to the aggregate file
    echo "$result_line" >> "$aggregate_file"

    echo "Done: $dep / $ind / $med"
done

echo "All results aggregated in $aggregate_file"
