#!/bin/bash
# Batch process GWAS files from T1 to T29: update the first column header to 'SNP', perform LD clumping,

# Genotype data path
geno="/home/dell/cleandata/DEPLS_mapcall/DEPLSmolQTLmapping" # genotype bfile
# Directory containing GWAS summary data 
gwas_dir="/home/dell/xiyang/DEPLS/CDLLATESTGWAS/file_for_coloc" 
# Define final merged TXT file name
combined_file="combined_clumped_results.txt"

if [ -f "${combined_file}" ]; then
    rm "${combined_file}"
fi

# Flag to indicate the first result (used to write header line)
first_result=1

# Loop through files from T1 to T29
for i in {1..29}
do
    # Construct the original GWAS file path (e.g., gwas.T1.sig.region.txt)
    orig_file="${gwas_dir}/gwas.T${i}.sig.region.txt"
    
    # Check if the original file exists
    if [ ! -f "${orig_file}" ]; then
        echo "File ${orig_file} not found, skipping..."
        continue
    fi
    
    updated_file="${gwas_dir}/gwas.T${i}.sig.region.updated.txt"
    
    # If the updated file doesn't exist, create it by replacing the first column header with 'SNP'
    if [ ! -f "${updated_file}" ]; then
        echo "Creating updated file ${updated_file} with first column header changed to 'SNP'..."
        awk 'BEGIN{FS=OFS="\t"} NR==1{$1="SNP"} 1' "${orig_file}" > "${updated_file}"
    else
        echo "Updated file ${updated_file} already exists, using it..."
    fi
    
    # Define PLINK output file prefix
    out_prefix="clumped_results_T${i}"
    
    echo "Processing ${updated_file} with PLINK..."
    
    plink --bfile "${geno}" \
          --allow-extra-chr \
          --chr-set 34 \
          --clump "${updated_file}" \
          --clump-field p_value \
          --clump-p1 2.56478019218206E-09 \
          --clump-p2 1E-8 \
          --clump-r2 0.01 \
          --clump-kb 1000 \
          --out "${out_prefix}"
    
    # Check if the PLINK clumped output file was generated
    clumped_file="${out_prefix}.clumped"
    if [ ! -f "${clumped_file}" ]; then
        echo "Clumped result file ${clumped_file} not found, skipping merging..."
        continue
    fi
    
    # If this is the first result, extract header line, convert to tab-delimited format, and write to combined result file
    if [ ${first_result} -eq 1 ]; then
        header=$(head -n 1 "${clumped_file}" | awk 'BEGIN{FS="[[:space:]]+"; OFS="\t"} {
            for(j=1; j<=NF; j++){
                printf "%s%s", $j, (j==NF ? "\tGWAS" : OFS)
            }
            print ""
        }')
        echo -e "${header}" > "${combined_file}"
        first_result=0
    fi
	
    tail -n +2 "${clumped_file}" | awk -v gwas="T${i}" 'BEGIN{FS="[[:space:]]+"; OFS="\t"} {
        for(j=1; j<=NF; j++){
            printf "%s%s", $j, (j==NF ? "\t" gwas : OFS)
        }
        print ""
    }' >> "${combined_file}"
done

echo "Batch analysis and merging completed"
echo "Combined result file: ${combined_file}"
