%% after DESeq2: 
% genes that are differentially expressed significantly (FDR < 0.05, log2 fold change > 1)

genes_sp = readtable([homeDir, 'R_DESeq/DESeq_out/genes.sp.csv'], 'PreserveVariableNames', true);
genes_cis = readtable([homeDir, 'R_DESeq/DESeq_out/genes.cis.csv'], 'PreserveVariableNames', true);
genes_trans = readtable([homeDir, 'R_DESeq/DESeq_out/genes.trans.csv'], 'PreserveVariableNames', true);
experiment_names = genes_sp.Properties.VariableNames(2:end);
experiment_names = strrep(experiment_names, '_', ' ');

sigGenes = struct;
orfs = genes_sp.Var1;
idxOrf = find(ismember(allOrfs, orfs));
experimentName = genes_sp.Properties.VariableNames(2:end);
Nexps = length(experimentName);

sigGenes.sp = nan(6701, Nexps);
sigGenes.sp(idxOrf, :) = table2array(genes_sp(:, 2:end));
sigGenes.cis = nan(6701, Nexps);
sigGenes.cis(idxOrf, :) = table2array(genes_cis(:, 2:end));
sigGenes.trans = nan(6701, Nexps);
sigGenes.trans(idxOrf, :) = table2array(genes_trans(:, 2:end));
types = {'sp', 'cis', 'trans'};

% genes that change between batches: filter out of diff exp genes
batchEffectGenes = readcell([homeDir, 'revision/sup_notes/batchEffectGenes.xlsx']);
batchEffectGenes_idx = find(ismember(allGenes, batchEffectGenes));
for i = 1:length(types)
    sigGenes.(types{i})(batchEffectGenes_idx, :)=  nan;
end
%% plot # differentially-expressed genes per experiment
d = [nansum(sigGenes.sp); nansum(sigGenes.cis); nansum(sigGenes.trans)];
figure; bar(d');
set(gca, 'xtick', 1:length(experiment_names), 'xticklabels', experiment_names); xtickangle(45);
ylabel('# differentially-expressed genes');
legend({'overall', 'cis', 'trans'});
set(gcf, 'color', 'w');
set(gca, 'fontsize', fs);
%%

titles = {'overall', 'cis', 'trans'};
figure;
for i = 1:3
    subplot(1,3,i);
    d = nansum(sigGenes.(types{i}), 2);
    d(d < 1) = [];
    histogram(d);
    xlabel('# experiments');
    ylabel('# differentially-expressed genes');
    title(titles{i});
    ylim([0 900]);
    set(gca, 'xtick', 1:6);
    set(gca, 'fontsize', fs);
end
set(gcf, 'color', 'w');
%% cis/trans per comparable sample

currExpStruct = expStruct;
nC = size(currExpStruct.cer.exp, 2);
comparable_samples = readtable([homeDir, 'Screen/read_organize_data/comparable_samples.xlsx']);
comparable_samples = table2array(comparable_samples);
comparable_samples = strrep(comparable_samples, '_', ' ');

sp = currExpStruct.cer.exp - currExpStruct.par.exp;
cis = currExpStruct.hyc.exp - currExpStruct.hyp.exp;

% cis: cross replicates
cis_switched_replicates = readtable([homeDir, 'gene_gene_corr/cistrans_cross_replicates/cis_switched_replicates.xlsx'], 'ReadVariableNames', false);
cis_switched_replicates = cis_switched_replicates.Var1;
cis_switched_replicates = strrep(cis_switched_replicates, '_', ' ');
cis_switched_replicates_hyb = strrep(cis_switched_replicates, 'hyc', 'hyb');
no_switching = {};
for i = 1:nC
    if isequal(comparable_samples(i, 3), cis_switched_replicates_hyb(i)) 
        no_switching = vertcat(no_switching, cis_switched_replicates_hyb{i});
    end
end
exp_hyc_switched = dataS.WCDhyc(:, ismember_smart(dataLabels_newname.WCDhyc, cis_switched_replicates));
cis_switched_replicates_hyp = strrep(cis_switched_replicates, 'hyc', 'hyp');
exp_hyp_switched = dataS.WCDhyp(:, ismember_smart(dataLabels_newname.WCDhyp, cis_switched_replicates_hyp));
cis_switched = exp_hyc_switched - exp_hyp_switched;
% samples that don't have repeats: take the cis effect of the mean of that
% time point

meidanCis = nan(6701, 3);
tp1 = find(contains(comparable_samples(:, 3), 'TP1'));
tp2 = find(contains(comparable_samples(:, 3), 'TP2'));
tp3 = find(contains(comparable_samples(:, 3), 'TP3'));
medianCis(:, 1) = nanmedian(cis(:, tp1), 2);
medianCis(:, 2) = nanmedian(cis(:, tp2), 2);
medianCis(:, 3) = nanmedian(cis(:, tp3), 2);
no_switching_idx = find(ismember(cis_switched_replicates_hyb, no_switching));
no_switching_tp1 = find(contains(no_switching, 'TP1'));
no_switching_tp2 = find(contains(no_switching, 'TP2'));
no_switching_tp3 = find(contains(no_switching, 'TP3'));
cis_switched(:, no_switching_idx(no_switching_tp1)) = repmat(medianCis(:, 1), 1, length(no_switching_tp1));
cis_switched(:, no_switching_idx(no_switching_tp2)) = repmat(medianCis(:, 2), 1, length(no_switching_tp2));
cis_switched(:, no_switching_idx(no_switching_tp3)) = repmat(medianCis(:, 3), 1, length(no_switching_tp3));

trans = sp - cis_switched;

comparable_samples_de = struct;
comparable_samples_de.sp = sp;
comparable_samples_de.cis = cis;
comparable_samples_de.trans = trans;

corr_cis_trans = nan(nC, 1);
corr_cis_trans_p = nan(nC, 1);
for i = 1:nC
    [corr_cis_trans(i),corr_cis_trans_p(i)]  = corr(cis(:, i), trans(:, i), 'rows', 'pairwise');
end
%% plot correlation cis, trans
figure; histogram(corr_cis_trans);
xlabel('correlation cis, trans'); ylabel('# comparable samples');
set(gca, 'fontsize', fs);
set(gcf, 'color', 'w');
axis square;
%%
[h, crit_p, adj_ci_cvrg, adj_p]=fdr_bh(corr_cis_trans_p);
length(find(adj_p < 0.01))

%%
[s, sortdidx] = sort(corr_cis_trans);
figure; scatter(1:length(s), s);
th = -0.4;
lowcorr = find(s < th);
text(1:length(s(lowcorr)), s(lowcorr), comparable_samples(sortdidx(lowcorr), 3));

%%
medianEffect = [nanmedian(comparable_samples_de.sp, 2), nanmedian(comparable_samples_de.cis, 2), nanmedian(comparable_samples_de.trans, 2)];
DE = struct;
DE.comparable_samples_de = comparable_samples_de;
DE.medianEffect = medianEffect;

%% significant genes in at least one experiment
N=1;
spGenes = find(nansum(sigGenes.sp, 2) >= N);
cisGenes = find(nansum(sigGenes.cis, 2) >= N);
transGenes = find(nansum(sigGenes.trans, 2) >= N);

nanGenes = sum(isnan(expStruct.cer.exp), 2);
spGenes(nanGenes(spGenes) > 0) = [];
cisGenes(nanGenes(cisGenes) > 0) = [];
transGenes(nanGenes(transGenes) > 0) = [];

disp({['# sp genes = ', num2str(length(spGenes))], ...
    ['# cis genes = ', num2str(length(cisGenes))], ...
    ['# trans genes = ', num2str(length(transGenes))]});

DE.spGenes = spGenes;
DE.cisGenes = cisGenes;
DE.transGenes = transGenes;
save('DE.mat', '-struct', 'DE');

%% venn diagram of DE
%For a three circle diagram:
%            data is a seven element vector of:
%               |A|
%               |A and B|
%               |B|
%               |B and C|
%               |C|
%               |C and A|
%               |A and B and C|
areas = {spGenes, cisGenes, transGenes};
intersects = {intersect(spGenes, cisGenes), intersect(spGenes, transGenes), intersect(cisGenes, transGenes), ...
    mintersect(spGenes, cisGenes, transGenes)};
A = cellfun(@length, areas);
I = cellfun(@length, intersects);
figure; vennX([A(1) I(1) A(2) I(2) A(3) I(3) I(4)], .05)
