%% Script for the in vitro/in vivo comparison 
% This script does:   (i) loads the data and in vitro Kds; 
                    % (ii) plots the model landscape (with the in vivo fitted mRNA/protein concentrations); 
                    % (iii) plots the measured in vivo landscape; 
                    % (iv) returns the z-scores

%% (i) loads the data and in vitro Kds
    %% load in vivo iCLIP measurement data
    FID=fopen(['JKRS13_vivo_DR_raw_peak_data.csv']); 
    D=textscan(FID,'%s %s %f %f %s %s %s %f %f %f %f','HeaderLines',1,'delimiter',',');
    Invivo1= D{11};

    FID=fopen(['lujh23a_vivo_DR_raw_peak_data.csv']); 
    D=textscan(FID,'%s %s %f %f %s %s %s %f %f %f %f','HeaderLines',1,'delimiter',',');
    Invivo2= D{11};  

    FID=fopen(['lujh32_vivo_DR_raw_peak_data.csv']); 
    D=textscan(FID,'%s %s %f %f %s %s %s %f %f %f %f','HeaderLines',1,'delimiter',',');
    Invivo3= D{11};

    % data preprocess: 
        % add a small value to deal with the zero counts 
        Invivo=[Invivo1 Invivo2 Invivo3]+1; 
        % select the interested binding sites
        Exclude=[167:339 745:799];
        Invivo=Invivo(~ismember(1:size(Invivo, 1), Exclude), :);
        % normalize by median to count for different sequencing depths
        Invivo=Invivo./repmat(median(Invivo),length(Invivo),1);

    % mark each intron with the binding sites included:
        % read the intron information from data file
        Minigene=D{7};
        intronno=D{8};
        intron=strcat(Minigene,'.',num2str(intronno));
        % select the interested binding sites
        intron=intron(~ismember(1:size(intron, 1), Exclude), :);
        % denote the intron no. (each one will be allowed a different level)
        k=1;
        bintron(1)=k;
        for i=2:length(intron)
            if strcmp(intron(i),intron(i-1))
                bintron(i)=bintron(i-1);
            else
                bintron(i)=bintron(i-1)+1;
            end
        end   

    %% load in vitro Kd and scaling factor informaiton from in vitro fitting result
       FID=fopen(['Kd_SF_fromInvitroFit.csv']); 
       D=textscan(FID,'%f %f %f','HeaderLines',1,'delimiter',',');
       pstandard = D{2};
       SFfit = D{3};

       % select the interested binding sites and normalize the scaling factor
       SFfit=SFfit(~ismember(1:size(SFfit, 1), Exclude), :);
       SFfit=SFfit./median(SFfit);
       pstandard=pstandard(~ismember(1:size(pstandard, 1), Exclude), :);


    %% load the free U protein and mRNA level estimated from in vivo fitting
       FID=fopen(['U_mRNA_level_fromInvivoFit.csv']); 
       D=textscan(FID,'%s %s %f','HeaderLines',0,'delimiter',',');
       pr=D{3};
    % Or: provide protein and intron (mRNA) level as a input vector: Ulevel is 1 by 1 vector, intronlevel is a 29 by 1 vector.
       % pr=log([Ulevel;intronlevel]); 
   
%% calculate the in vivo binding landscape from the provided Ulevel and intron level
    [res RNAU2] = FitInvivoAll3Replicate(pr,pstandard,Invivo,bintron,SFfit);
    n=571;
    Fitori=RNAU2;   

%% (ii) plots the model landscape 
    figure(1);
    h1=semilogy([1:571],(Fitori(1:571)'),'k.-','MarkerSize',5);
    
%% (iii) plots the measured in vivo landscape: mean+/-standard deviation.
    hold on;
    % since we assume the iCLIP signal follows a log-normal error model, we fit a zero-order function on log scale to estimate the constant variance from three in vivo replicates
    Linvivo=log(Invivo);
    sigmaA=polyfit(mean(Linvivo'),std(Linvivo'),0);  
    
    % plot the measured mean signal landscape
    h2=semilogy([1:571],exp(mean(Linvivo([1:571],:)')),'.','MarkerSize',5);
    legend([h1,h2],{'binding landscape from model','mean binding landscape from iCLIP measurement'});  
    % specify the in vivo signal standard deviation region
    x=exp(mean(Linvivo')+sigmaA);
    y=exp(mean(Linvivo')-sigmaA);
    X=[1:571,fliplr(1:571)];                %#create continuous x value array for plotting
    Y=[x(1:571),fliplr(y(1:571))];              %#create y values for out and then back
    
    % plot the one standard deviation above and below the mean in vivo signal
    fill(X,Y,'k','FaceAlpha',.3,'LineStyle','-','EdgeColor','none');
    set(gca, 'yscale', 'log','XMinorTick','on');
    
    % figure denotation and save
    xlim([0 572]);
    box on;
    xlabel('Binding sites on mRNA');
    ylabel('iCLIP signal');
    h=figure(1);
    saveas(h,['invivoiCLIP_landscape_model_experiment.fig']);
    close(figure(1));
    
%% (iv) returns the z-scores
	zscore=(mean(Linvivo(:,1:3)')-log(Fitori(1:n,1)'))./(sigmaA);




