function variant_seq_struct = CreateRandomGCVariants(seq,coordinates,n_each_bin)
%function variant_seq_struct = CreateRandomGCVariants(seq,coordinates,n_each_bin)
% This function takes a sequence and a region in the sequence and creates
% sequences with random variants in GC content for this region
% n_each_bin - number of sequences in each GC% bin
%
% 1.3.12 addition - make sure that all sequences have the same number of
% mutations relative to the wild type sequnece 

seq = upper(seq);


upstream_seq = seq(1:(coordinates(1)-1));
downstream_seq = seq((coordinates(2)+1):end);

seq_2_modify = seq(coordinates(1):coordinates(2));

%calcualte the maximal number of mutations
wt_num_GC = length(regexp(seq_2_modify,'[GC]'));
max_num_mutations = max(wt_num_GC,length(seq_2_modify)-wt_num_GC);

%number of possible GC vaiants (bins)
num_of_bins = length(seq_2_modify) + 1;

for i = 1 : num_of_bins
    
    curr_variants = {};
    target_num_GC = i-1;
    
    for j = 1 : n_each_bin
        
        curr_variants{j} = [upstream_seq CreateGCVariant(seq_2_modify,wt_num_GC,target_num_GC,max_num_mutations) downstream_seq];
        
    end
    
    variant_seq_struct.GC_bin(i).variants = curr_variants;
    variant_seq_struct.GC_bin(i).GC = [num2str(100*((target_num_GC)./length(seq_2_modify))) '% GC'];
    
end


function mut_seq = CreateGCVariant(wt_seq,wt_num_GC,target_num_GC,max_num_mutations)

mut_seq = wt_seq;

num_mut_change = abs(wt_num_GC-target_num_GC);
num_mut_preserve = max_num_mutations - num_mut_change;
changing_mut_loc = zeros(1,length(mut_seq));
switch sign(wt_num_GC-target_num_GC)
    case{-1}
        changing_mut_loc(regexp(wt_seq,'[AT]'))=1;
    case{1}
        changing_mut_loc(regexp(wt_seq,'[GC]'))=1;
end

possible_changing_loc = find(changing_mut_loc);
possible_changing_loc = possible_changing_loc(randperm(length(possible_changing_loc)));
chosen_changing_loc = possible_changing_loc(1:num_mut_change);

tmp = zeros(1,length(mut_seq));
tmp(chosen_changing_loc) = 1;
tmp = ~tmp;
possible_preserving_loc = find(tmp);
possible_preserving_loc = possible_preserving_loc(randperm(length(possible_preserving_loc)));
chosen_preserving_loc = possible_preserving_loc(1:num_mut_preserve);

%mutate to change GC
for i = 1 : length(chosen_changing_loc)
    mut_seq(chosen_changing_loc(i)) = MutNucChangeGC(mut_seq(chosen_changing_loc(i)));
end

%mutate to preserve GC
for i = 1 : length(chosen_preserving_loc)
    mut_seq(chosen_preserving_loc(i)) = MutNucPreserveGC(mut_seq(chosen_preserving_loc(i)));
end


function new_nuc = MutNucChangeGC(old_nuc)

AT = 'AT';
GC = 'GC';

switch old_nuc
    case{'A'}
        new_nuc = GC(randi([1 2]));
    case{'T'}
        new_nuc = GC(randi([1 2]));
    case{'G'}
        new_nuc = AT(randi([1 2]));
    case{'C'}
        new_nuc = AT(randi([1 2]));
end

function new_nuc = MutNucPreserveGC(old_nuc)

switch old_nuc
    case{'A'}
        new_nuc = 'T';
    case{'T'}
        new_nuc = 'A';
    case{'G'}
        new_nuc = 'C';
    case{'C'}
        new_nuc = 'G';
end

