%% expression between samples

transcript_data={
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\transcript_data\batch1_region0_detected_transcripts.csv';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\transcript_data\batch1_region2_detected_transcripts.csv';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\transcript_data\batch2_region0_detected_transcripts.csv';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\transcript_data\batch2_region1_detected_transcripts.csv';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\transcript_data\batch3_region0_detected_transcripts.csv';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\transcript_data\batch3_region1_detected_transcripts.csv';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\transcript_data\batch3_region2_detected_transcripts.csv';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\transcript_data\batch4_region0_detected_transcripts.csv';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\transcript_data\batch4_region1_detected_transcripts.csv';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\transcript_data\batch4_region2_detected_transcripts.csv'
    };

center_region={
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\selected_center_region/batch1/region0/centerRegionBoundaries.mat';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\selected_center_region/batch1/region2/centerRegionBoundaries.mat';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\selected_center_region/batch2/region0/centerRegionBoundaries.mat';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\selected_center_region/batch2/region1/centerRegionBoundaries.mat';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\selected_center_region/batch3/region0/centerRegionBoundaries.mat';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\selected_center_region/batch3/region1/centerRegionBoundaries.mat';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\selected_center_region/batch3/region2/centerRegionBoundaries.mat';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\selected_center_region/batch4/region0/centerRegionBoundaries.mat';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\selected_center_region/batch4/region1/centerRegionBoundaries.mat';
    'D:\uci_remnant\muscle_merfish_2023\genome_reserach\revision_2\raw_data\selected_center_region/batch4/region2/centerRegionBoundaries.mat';
}

offset={[11,11],[11,11],[10,10],[10,10],[10,10],[10,10],[9,9],[10,10],[9,9],[14,10]};
micron2pix=9.26;
scalee1=0.1;

allGenes={'ACTA1', 'ACTN2', 'ADAM15', 'ADAM19', 'ANKLE2', 'ATF6B', 'BAK1', 'BAX', 'BEX1', 'BGN', 'BMP1', 'BMP4', 'CASQ2', 'CCL2', 'CCNA1', 'CCNB1', 'CCND2', 'CCNL1', 'CD70', 'CDK1', 'CITED2', 'COL18A1', 'COL4A1', 'COL4A2', 'COL5A1', 'COL5A2', 'COL5A3', 'COL6A1', 'COL6A2', 'COL8A1', 'CXCL1', 'DBET', 'DCTN1', 'DNMT1', 'DNMT3B', 'DUXA', 'DUXAP8', 'DUXB', 'EIF2AK3', 'FADD', 'FAS', 'FN1', 'GDF15', 'GREM1', 'GSN', 'H3.Y', 'HOXA11', 'HSPA5', 'HSPA6', 'HSPB8', 'HYOU1', 'IFIT3', 'IFITM1', 'IFITM2', 'IGFBP3', 'ITGA10', 'ITGA11', 'KDM4E', 'KHDC1L', 'KLF17', 'KLHL41', 'LAMA5', 'LAMB1', 'LAMB2', 'LAMC1', 'LEUTX', 'LMOD2', 'LOX', 'LOXL1', 'LOXL2', 'LOXL3', 'LRATD1', 'LRIF1', 'MEF2C', 'MFAP4', 'MMP14', 'MMP19', 'MMP2', 'MSC', 'MTCH1', 'MYDGF', 'MYH3', 'MYH6', 'MYH7', 'MYH8', 'MYL2', 'MYL3', 'MYMX', 'MYOG', 'MYOM3', 'NCOA3', 'NEB', 'NKX2-5', 'NOG', 'PCOLCE', 'PDGFA', 'PNRC2', 'PRAMEF12', 'PRAMEF19', 'PRAMEF20', 'PRR15', 'PRR32', 'PTAR1', 'PTX3', 'RBBP6', 'RBP7', 'RFPL1', 'RFPL2', 'RFPL4B', 'RPN2', 'SEC31A', 'SERPINE1', 'SERPINE2', 'SLC14A1', 'SLC34A2', 'SLC38A1', 'SLC9A3R2', 'SMCHD1', 'SMPX', 'SOX4', 'SOX9', 'TAF11L11', 'TCAP', 'TGFB1', 'TGFBR1', 'TIMP3', 'TNNI2', 'TRIM72', 'TTN', 'TWIST1', 'VMO1', 'WFS1', 'WIPI1', 'XBP1', 'YIF1A', 'ZNF280A', 'ZNF296', 'ZNF596', 'ZNF705G', 'ZSCAN4'}
samples={'batch1-region0','batch1-region2','batch2-region0','batch2-region1','batch3-region0','batch3-region1','batch3-region2','batch4-region0','batch4-region1','batch4-region2'}
%% 1. count gene per sample

gene_per_sample=zeros(10,140);

tic;
for i=1:length(transcript_data)
    %% 1.1. load transcript data
    tbl=readtable(transcript_data{i});
    coor=table2array(tbl(:,{'global_x','global_y'}));
    gene=table2array(tbl(:,{'gene'}));
    coor=(coor-min(coor,[],1)+1)*micron2pix*scalee1;
    coor=coor+offset{i};
    load(center_region{i})
    coor_inrange_idx=inpolygon(coor(:,1),coor(:,2),regionBoundaryIn(:,1),regionBoundaryIn(:,2));
    gene_inregion=gene(coor_inrange_idx);
    
    %% 1.2. if an in region gene is a DUX, add to DUX, else add to non-DUX
    for j=1:length(gene_inregion)
        idx=ismember(allGenes,gene_inregion{j});
        idx=find(idx==1);
        gene_per_sample(i,idx)=gene_per_sample(i,idx)+1;
    end    
    toc;
end

%% 2. per genotype expression scatter plot
% load('D:\Lujia_UCI_PhD_files_from_drive_D\Kyoko_Xiangduo_MERFISH_cell_cluster_project\round20\cross_sample_count_per_gene.mat')
sample_sign=[1,0,0,1,1,0,2,1,2,0];
% samp
samp=find(sample_sign==0);
ctt=1;
figure;
for i=1:length(samp)-1
    for j=i+1:length(samp)
        subplot(1,6,ctt);
        plot(gene_per_sample(samp(i),:),gene_per_sample(samp(j),:),'.','markerSize',10);ctt=ctt+1;
        title([samples{samp(i)},' vs. ',samples{samp(j)}])
        [r,p]=corr(gene_per_sample(samp(i),:)',gene_per_sample(samp(j),:)');
        legend({['r=',num2str(r),' ,p=',num2str(p)]});
    end
end

% FSHD
samp=find(sample_sign==1);
ctt=1;
figure;
for i=1:length(samp)-1
    for j=i+1:length(samp)
        subplot(1,6,ctt);
        plot(gene_per_sample(samp(i),:),gene_per_sample(samp(j),:),'.','markerSize',10);ctt=ctt+1;
        title([samples{samp(i)},' vs. ',samples{samp(j)}])
        [r,p]=corr(gene_per_sample(samp(i),:)',gene_per_sample(samp(j),:)');
        legend({['r=',num2str(r),' ,p=',num2str(p)]});
    end
end

% DEL5
samp=find(sample_sign==2);
ctt=1;
figure;
for i=1:length(samp)-1
    for j=i+1:length(samp)
        subplot(1,6,ctt);
        plot(gene_per_sample(samp(i),:),gene_per_sample(samp(j),:),'.','markerSize',10);ctt=ctt+1;
        title([samples{samp(i)},' vs. ',samples{samp(j)}])
        [r,p]=corr(gene_per_sample(samp(i),:)',gene_per_sample(samp(j),:)');
        legend({['r=',num2str(r),' ,p=',num2str(p)]});
    end
end