% load and filter data
global PCMs expCorr F Flabels intint ifFlt corrThrs dataLabels NCorrThrs NgenesToOL corrThList
ifFlt = 0;
geneThrs = 10;
NCorrThrs = 20; % min number of genes to compute correlation
corrThrs = 0.2;
NgenesToOL = [3, 10, 50, 100, 200, 500, 1000, 2000, 4000];
corrThList = -1:.1:1;
expThrs = min(min(dataS.WCDcer));
expThrsList = repmat(expThrs, 1, length(F));
minExp = expThrs;
% screenS | dataS
startingFrom = 'dataS';
fltBadGenes=1;
%% load data
disp('loading data...');
switch startingFrom
    case 'screenS'
        datasetType = {'seq', 'seq', 'seq', 'seq', 'seq', 'seq', 'seq', 'seq'};
        expThrsList = repmat(log2(10), 1, 8);
        run load_filter_datasets_only_my_data_plusNoPi.m
    case 'dataS'
end

%% compare PCM of two datasets
disp('generating Pairwise correlation matrices...');
run PCMs_generate.m;

%% expected correlations
disp('generating expected correlation values...');
run expCorr_generate.m;

disp(['# genes: ', num2str(length(intint))]);
%%
% use good genes that agree with external datasets
% T = readtable('genes_for_gene_gene_correlation.xlsx');
% T = readtable('genes_for_gene_gene_correlation_NEW_082019.xlsx');
% goodGenes = find(ismember(allGenes, T.gene(T.is_a_good_gene==1)));
% badGenes = find(ismember(allGenes, T.gene(T.is_a_good_gene==0)));
% badGenes = [badGenes;GP.gene_table.TAF11];
% genesToTake = PCMs.(F{1}).genes;
% genesToTake(ismember(genesToTake, badGenes)) = [];
% %% execute: narrow the matrix to include only common genes
% for i = 1:length(F)
%     idx = find(ismember(PCMs.(F{i}).genes, genesToTake));
%     PCMs.(F{i}).data = PCMs.(F{i}).data(:, idx);
%     PCMs.(F{i}).PCM = PCMs.(F{i}).PCM(idx, idx);
%     PCMs.(F{i}).PCMflt = PCMs.(F{i}).PCMflt(idx, idx);
%     PCMs.(F{i}).pval = PCMs.(F{i}).pval(idx, idx);
%     PCMs.(F{i}).genes = PCMs.(F{i}).genes(idx);
%     
%     expCorr.(F{i}).genes = expCorr.(F{i}).genes(idx);
%     expCorr.(F{i}).corrDiag = expCorr.(F{i}).corrDiag(idx, :);
%     expCorr.(F{i}).PCMhalves = {expCorr.(F{i}).PCMhalves{1}(idx, idx), expCorr.(F{i}).PCMhalves{2}(idx, idx)};
% %     expCorr.(F{i}).OL = expCorr.(F{i}).OL(idx, :);
% %     expCorr.(F{i}).corrByTh = expCorr.(F{i}).corrByTh(idx, :);
% end
% 
% intint = genesToTake;
% disp(['# genes: ', num2str(length(intint))]);
%% divergece table
[obsP, expP, pvalP] = corr_of_corr(1,2, ifFlt);
ParentsDiv = (expP - obsP)./expP;
[obsH, expH, pvalH] = corr_of_corr(3,4, ifFlt);
HybridDiv = (expH - obsH)./expH;
[obsCerHyc] = corr_of_corr(1,3, ifFlt);
[obsParHyp] = corr_of_corr(2,4, ifFlt);
Ngenes = nan(length(intint), 4);
for i = 1:length(F)
    Ngenes(:, i) = sum(PCMs.(F{i}).PCMflt ~= 0, 2);
end
% overlap of top correlated genes
if ifFlt; measure = 'PCMflt'; else; measure = 'PCM'; end;
comps = {[1,2], [3,4], [1,3], [2,4]};
compsTitle = {'cerpar', 'hychyp', 'cerhyc', 'parhyp'};
OLs = nan(length(intint), length(NgenesToOL), length(comps));
for i = 1:4
    for j = 1:length(NgenesToOL)
        OLs(:, j, i) = OLngenes(PCMs.(F{comps{i}(1)}).(measure), PCMs.(F{comps{i}(2)}).(measure), NgenesToOL(j));
    end
end

%%
global divT;
divT = table(intint, GP.gene_infoR64.name(intint), ParentsDiv, HybridDiv, ...
    obsP, obsH, obsCerHyc, obsParHyp, ...
    nanmedian(expCorr.(F{1}).corrDiag, 2), nanmedian(expCorr.(F{2}).corrDiag, 2),...
    nanmedian(expCorr.(F{3}).corrDiag, 2), nanmedian(expCorr.(F{4}).corrDiag, 2), ...
    Ngenes(:, 1), Ngenes(:, 2), Ngenes(:, 3), Ngenes(:, 4), ...
    'variableNames', {'geneID', 'gene','div_cer_par', 'div_hyc_hyp',...
    'obs_cer_par', 'obs_hyc_hyp', 'obs_cer_hyc', 'obs_par_hyp',...
    'exp_cer', 'exp_par', 'exp_hyc', 'exp_hyp', 'Ngenes_cer', 'Ngenes_par', 'Ngenes_hyc','Ngenes_hyp'});

disp('divT is ready');

%% save PCM
name = datestr(datetime);
name = strrep(name(1:11), '-', '_');
compact_save_gene_gene_correlation(name);