#calculations performed in text for g_d_overlap figure discussion

logP <- read.table("log10P_human.txt",sep="\t",stringsAsFactors=FALSE, header=TRUE)

library(psych)

# Modified versin of psych::corr.test, which provides more accurate P vals and even more accurate -log10P values:

corr_test <- function (x, y = NULL, use = "pairwise", method = "pearson", 
    adjust = "holm", alpha = 0.05, ci = TRUE, minlength = 5) 
{
    cl <- match.call()
    if (is.null(y)) {
        r <- cor(x, use = use, method = method)
        sym <- TRUE
        n <- t(!is.na(x)) %*% (!is.na(x))
    }
    else {
        r <- cor(x, y, use = use, method = method)
        sym = FALSE
        n <- t(!is.na(x)) %*% (!is.na(y))
    }
    if ((use == "complete") | (min(n) == max(n))) 
        n <- min(n)
    t <- (r * sqrt(n - 2))/sqrt(1 - r^2)
    p <- -2 * expm1(pt(abs(t), (n - 2), log.p = TRUE))
    print(paste0("Accurate P vals:"))
    print(p)
    log10P <- -(log(2) + pt(abs(t), (n - 2), log.p = TRUE, lower.tail = FALSE))/log(10)
    print(paste0("Even more accurate-log10P vals:"))
    print(log10P)
    se <- sqrt((1 - r * r)/(n - 2))
    nvar <- ncol(r)
    p[p > 1] <- 1
    if (adjust != "none") {
        if (is.null(y)) {
            lp <- upper.tri(p)
            pa <- p[lp]
            pa <- p.adjust(pa, adjust)
            p[upper.tri(p, diag = FALSE)] <- pa
        }
        else {
            p[] <- p.adjust(p, adjust)
        }
    }
    z <- fisherz(r[lower.tri(r)])
    if (ci) {
        if (min(n) < 4) {
            warning("Number of subjects must be greater than 3 to find confidence intervals.")
        }
        if (sym) {
            ncors <- nvar * (nvar - 1)/2
        }
        else ncors <- prod(dim(r))
        if (adjust != "holm") {
            dif.corrected <- qnorm(1 - alpha/(2 * ncors))
        }
        else {
            ord <- order(abs(z), decreasing = FALSE)
            dif.corrected <- qnorm(1 - alpha/(2 * order(ord)))
        }
        alpha <- 1 - alpha/2
        dif <- qnorm(alpha)
        if (sym) {
            if (is.matrix(n)) {
                sef <- 1/sqrt(n[lower.tri(n)] - 3)
            }
            else {
                sef <- 1/sqrt(n - 3)
            }
            lower <- fisherz2r(z - dif * sef)
            upper <- fisherz2r(z + dif * sef)
            lower.corrected <- fisherz2r(z - dif.corrected * 
                sef)
            upper.corrected <- fisherz2r(z + dif.corrected * 
                sef)
            ci <- data.frame(lower = lower, r = r[lower.tri(r)], 
                upper = upper, p = p[lower.tri(p)])
            ci.adj <- data.frame(lower.adj = lower.corrected, 
                upper.adj = upper.corrected)
            cnR <- abbreviate(colnames(r), minlength = minlength)
            k <- 1
            for (i in 1:(nvar - 1)) {
                for (j in (i + 1):nvar) {
                  rownames(ci)[k] <- paste(cnR[i], cnR[j], sep = "-")
                  k <- k + 1
                }
            }
        }
        else {
            n.x <- NCOL(x)
            n.y <- NCOL(y)
            z <- fisherz(r)
            if (adjust != "holm") {
                dif.corrected <- qnorm(1 - (1 - alpha)/(n.x * 
                  n.y))
            }
            else {
                ord <- order(abs(z), decreasing = FALSE)
                dif.corrected <- qnorm(1 - (1 - alpha)/(order(ord)))
            }
            sef <- 1/sqrt(n - 3)
            lower <- as.vector(fisherz2r(z - dif * sef))
            upper <- as.vector(fisherz2r(z + dif * sef))
            lower.corrected <- fisherz2r(z - dif.corrected * 
                sef)
            upper.corrected <- fisherz2r(z + dif.corrected * 
                sef)
            ci <- data.frame(lower = lower, r = as.vector(r), 
                upper = upper, p = as.vector(p))
            ci.adj <- data.frame(lower.adj = as.vector(lower.corrected), 
                r = as.vector(r), upper.adj = as.vector(upper.corrected))
            cnR <- abbreviate(rownames(r), minlength = minlength)
            cnC <- abbreviate(colnames(r), minlength = minlength)
            k <- 1
            for (i in 1:NCOL(y)) {
                for (j in 1:NCOL(x)) {
                  rownames(ci)[k] <- paste(cnR[j], cnC[i], sep = "-")
                  k <- k + 1
                }
            }
        }
    }
    else {
        ci <- sef <- ci.adj <- NULL
    }
    result <- list(r = r, n = n, t = t, p = p, accurate_P = p, log10P = log10P, se = se, sef = sef, 
        adjust = adjust, sym = sym, ci = ci, ci.adj = ci.adj, 
        Call = cl)
    class(result) <- c("psych", "corr.test")
    return(result)
}


#--------correlations (and P values) between logP values g and d ------------
#-------- down sample every 1 Mb to remove autocorrelation ---------------

# Note parenthetically that Ix mostly correlates with g 75 nM as well as d_w2 to d_avg, esp d_w6, suggesting Ix mostly driven by drug effects.

cor(logP[seq(1,nrow(logP),by=100),c(5:16)], method="pearson",use = "pairwise.complete.obs")
              # log10p_g_0nM log10p_g_8nM log10p_g_25nM log10p_g_75nM log10p_g_avg log10p_d_w1 log10p_d_w2 log10p_d_w3 log10p_d_w4 log10p_d_w6 log10p_d_avg log10p_g_d_Ix
# log10p_g_0nM   1.000000000   0.98681379    0.87640494    0.21452613   0.85634878  0.22768443  0.16589639  0.01029153 -0.05199594 -0.10428034 -0.006678492   -0.16148531
# log10p_g_8nM   0.986813787   1.00000000    0.94030995    0.32273313   0.92534673  0.23755159  0.10528527 -0.04083682 -0.09131331 -0.13447775 -0.054765336   -0.16961179
# log10p_g_25nM  0.876404938   0.94030995    1.00000000    0.58921282   0.99908868  0.24468618  0.02149171 -0.05810726 -0.06996565 -0.07979553 -0.061839879   -0.05871879
# log10p_g_75nM  0.214526129   0.32273313    0.58921282    1.00000000   0.62110842  0.12417921  0.04572648  0.27329997  0.37763343  0.46935390  0.301496169    0.59905976
# log10p_g_avg   0.856348778   0.92534673    0.99908868    0.62110842   1.00000000  0.24367556  0.01573477 -0.05235527 -0.05873352 -0.06370388 -0.054622087   -0.03566823
# log10p_d_w1    0.227684428   0.23755159    0.24468618    0.12417921   0.24367556  1.00000000  0.47194529  0.10557001  0.01251672 -0.01926074  0.075118943   -0.01017604
# log10p_d_w2    0.165896387   0.10528527    0.02149171    0.04572648   0.01573477  0.47194529  1.00000000  0.83377147  0.69836937  0.58894065  0.798572156    0.37472937
# log10p_d_w3    0.010291529  -0.04083682   -0.05810726    0.27329997  -0.05235527  0.10557001  0.83377147  1.00000000  0.97334083  0.91196427  0.997888752    0.72708052
# log10p_d_w4   -0.051995942  -0.09131331   -0.06996565    0.37763343  -0.05873352  0.01251672  0.69836937  0.97334083  1.00000000  0.97424496  0.985917820    0.83326923
# log10p_d_w6   -0.104280338  -0.13447775   -0.07979553    0.46935390  -0.06370388 -0.01926074  0.58894065  0.91196427  0.97424496  1.00000000  0.932573940    0.93082702
# log10p_d_avg  -0.006678492  -0.05476534   -0.06183988    0.30149617  -0.05462209  0.07511894  0.79857216  0.99788875  0.98591782  0.93257394  1.000000000    0.75706362
# log10p_g_d_Ix -0.161485307  -0.16961179   -0.05871879    0.59905976  -0.03566823 -0.01017604  0.37472937  0.72708052  0.83326923  0.93082702  0.757063624    1.00000000



# corr.test gives quick R values and P values for dataframe, but only 2 dec places. For more accurate correls use cor(), for P values use cor.test on selected columns. Or use results from chart.Correlation() (shown in Fig in paper).

corr.test(logP[seq(1,nrow(logP),by=100),c(5:16)], method="pearson",use = "pairwise.complete.obs", adjust = "none")
# Call:corr.test(x = logP[seq(1, nrow(logP), by = 100), c(5:16)], use = "pairwise.complete.obs", 
    # method = "pearson", adjust = "none")
# Correlation matrix 
              # log10p_g_0nM log10p_g_8nM log10p_g_25nM log10p_g_75nM log10p_g_avg log10p_d_w1 log10p_d_w2 log10p_d_w3 log10p_d_w4 log10p_d_w6 log10p_d_avg log10p_g_d_Ix
# log10p_g_0nM          1.00         0.99          0.88          0.21         0.86        0.23        0.17        0.01       -0.05       -0.10        -0.01         -0.16
# log10p_g_8nM          0.99         1.00          0.94          0.32         0.93        0.24        0.11       -0.04       -0.09       -0.13        -0.05         -0.17
# log10p_g_25nM         0.88         0.94          1.00          0.59         1.00        0.24        0.02       -0.06       -0.07       -0.08        -0.06         -0.06
# log10p_g_75nM         0.21         0.32          0.59          1.00         0.62        0.12        0.05        0.27        0.38        0.47         0.30          0.60
# log10p_g_avg          0.86         0.93          1.00          0.62         1.00        0.24        0.02       -0.05       -0.06       -0.06        -0.05         -0.04
# log10p_d_w1           0.23         0.24          0.24          0.12         0.24        1.00        0.47        0.11        0.01       -0.02         0.08         -0.01
# log10p_d_w2           0.17         0.11          0.02          0.05         0.02        0.47        1.00        0.83        0.70        0.59         0.80          0.37
# log10p_d_w3           0.01        -0.04         -0.06          0.27        -0.05        0.11        0.83        1.00        0.97        0.91         1.00          0.73
# log10p_d_w4          -0.05        -0.09         -0.07          0.38        -0.06        0.01        0.70        0.97        1.00        0.97         0.99          0.83
# log10p_d_w6          -0.10        -0.13         -0.08          0.47        -0.06       -0.02        0.59        0.91        0.97        1.00         0.93          0.93
# log10p_d_avg         -0.01        -0.05         -0.06          0.30        -0.05        0.08        0.80        1.00        0.99        0.93         1.00          0.76
# log10p_g_d_Ix        -0.16        -0.17         -0.06          0.60        -0.04       -0.01        0.37        0.73        0.83        0.93         0.76          1.00
# Sample Size 
# [1] 3054
# Probability values (Entries above the diagonal are adjusted for multiple tests.) 
              # log10p_g_0nM log10p_g_8nM log10p_g_25nM log10p_g_75nM log10p_g_avg log10p_d_w1 log10p_d_w2 log10p_d_w3 log10p_d_w4 log10p_d_w6 log10p_d_avg log10p_g_d_Ix
# log10p_g_0nM          0.00         0.00          0.00          0.00         0.00        0.00        0.00        0.57        0.00        0.00         0.71          0.00
# log10p_g_8nM          0.00         0.00          0.00          0.00         0.00        0.00        0.00        0.02        0.00        0.00         0.00          0.00
# log10p_g_25nM         0.00         0.00          0.00          0.00         0.00        0.00        0.24        0.00        0.00        0.00         0.00          0.00
# log10p_g_75nM         0.00         0.00          0.00          0.00         0.00        0.00        0.01        0.00        0.00        0.00         0.00          0.00
# log10p_g_avg          0.00         0.00          0.00          0.00         0.00        0.00        0.38        0.00        0.00        0.00         0.00          0.05
# log10p_d_w1           0.00         0.00          0.00          0.00         0.00        0.00        0.00        0.00        0.49        0.29         0.00          0.57
# log10p_d_w2           0.00         0.00          0.24          0.01         0.38        0.00        0.00        0.00        0.00        0.00         0.00          0.00
# log10p_d_w3           0.57         0.02          0.00          0.00         0.00        0.00        0.00        0.00        0.00        0.00         0.00          0.00
# log10p_d_w4           0.00         0.00          0.00          0.00         0.00        0.49        0.00        0.00        0.00        0.00         0.00          0.00
# log10p_d_w6           0.00         0.00          0.00          0.00         0.00        0.29        0.00        0.00        0.00        0.00         0.00          0.00
# log10p_d_avg          0.71         0.00          0.00          0.00         0.00        0.00        0.00        0.00        0.00        0.00         0.00          0.00
# log10p_g_d_Ix         0.00         0.00          0.00          0.00         0.05        0.57        0.00        0.00        0.00        0.00         0.00          0.00

 # To see confidence intervals of the correlations, print with the short=FALSE option

corr_test(logP[seq(1,nrow(logP),by=100),c(5:16)], method="pearson",use = "pairwise.complete.obs", adjust = "none")
# [1] "Accurate P vals:"
              # log10p_g_0nM log10p_g_8nM log10p_g_25nM log10p_g_75nM log10p_g_avg   log10p_d_w1   log10p_d_w2  log10p_d_w3   log10p_d_w4   log10p_d_w6 log10p_d_avg log10p_g_d_Ix
# log10p_g_0nM  0.000000e+00 0.000000e+00  0.000000e+00  3.991574e-33 0.000000e+00  3.318515e-37  2.752360e-20 5.696798e-01  4.050266e-03  7.638851e-09 7.121824e-01  2.719893e-19
# log10p_g_8nM  0.000000e+00 0.000000e+00  0.000000e+00  5.697480e-75 0.000000e+00  1.933262e-40  5.469661e-09 2.402164e-02  4.309658e-07  8.488196e-14 2.465610e-03  3.804043e-21
# log10p_g_25nM 0.000000e+00 0.000000e+00  0.000000e+00 5.919166e-285 0.000000e+00  7.122208e-43  2.350905e-01 1.315477e-03  1.089820e-04  1.010208e-05 6.277522e-04  1.168540e-03
# log10p_g_75nM 3.991574e-33 5.697480e-75 5.919166e-285  0.000000e+00 0.000000e+00  5.723157e-12  1.149530e-02 1.877249e-53 4.114012e-104 3.757593e-167 3.314005e-65 6.011142e-297
# log10p_g_avg  0.000000e+00 0.000000e+00  0.000000e+00  0.000000e+00 0.000000e+00  1.593025e-42  3.847113e-01 3.802347e-03  1.165196e-03  4.273277e-04 2.531092e-03  4.872915e-02
# log10p_d_w1   3.318515e-37 1.933262e-40  7.122208e-43  5.723157e-12 1.593025e-42  0.000000e+00 3.132861e-169 4.972924e-09  4.892794e-01  2.872972e-01 3.246177e-05  5.740186e-01
# log10p_d_w2   2.752360e-20 5.469661e-09  2.350905e-01  1.149530e-02 3.847113e-01 3.132861e-169  0.000000e+00 0.000000e+00  0.000000e+00 1.252862e-284 0.000000e+00 2.015153e-102
# log10p_d_w3   5.696798e-01 2.402164e-02  1.315477e-03  1.877249e-53 3.802347e-03  4.972924e-09  0.000000e+00 0.000000e+00  0.000000e+00  0.000000e+00 0.000000e+00  0.000000e+00
# log10p_d_w4   4.050266e-03 4.309658e-07  1.089820e-04 4.114012e-104 1.165196e-03  4.892794e-01  0.000000e+00 0.000000e+00  0.000000e+00  0.000000e+00 0.000000e+00  0.000000e+00
# log10p_d_w6   7.638851e-09 8.488196e-14  1.010208e-05 3.757593e-167 4.273277e-04  2.872972e-01 1.252862e-284 0.000000e+00  0.000000e+00  0.000000e+00 0.000000e+00  0.000000e+00
# log10p_d_avg  7.121824e-01 2.465610e-03  6.277522e-04  3.314005e-65 2.531092e-03  3.246177e-05  0.000000e+00 0.000000e+00  0.000000e+00  0.000000e+00 0.000000e+00  0.000000e+00
# log10p_g_d_Ix 2.719893e-19 3.804043e-21  1.168540e-03 6.011142e-297 4.872915e-02  5.740186e-01 2.015153e-102 0.000000e+00  0.000000e+00  0.000000e+00 0.000000e+00  0.000000e+00
# [1] "Even more accurate-log10P vals:"
              # log10p_g_0nM log10p_g_8nM log10p_g_25nM log10p_g_75nM log10p_g_avg log10p_d_w1 log10p_d_w2  log10p_d_w3 log10p_d_w4  log10p_d_w6 log10p_d_avg log10p_g_d_Ix
# log10p_g_0nM           Inf  2415.543551   970.2930865      32.39886   877.744546  36.4790562  19.5602948    0.2443692    2.392516    8.1169719    0.1474087     18.565448
# log10p_g_8nM  2415.5435507          Inf  1430.4960478      74.24432  1287.373553  39.7137092   8.2620396    1.6193973    6.365557   13.0711846    2.6080757     20.419755
# log10p_g_25nM  970.2930865  1430.496048           Inf     284.22774  4182.312870  42.1473853   0.6287649    2.8809166    3.962645    4.9955890    3.2022118      2.932357
# log10p_g_75nM   32.3988559    74.244317   284.2277395           Inf   324.646753  11.2423643   1.9394798   52.7264780  103.385734  166.4250903   64.4796469    296.221043
# log10p_g_avg   877.7445459  1287.373553  4182.3128696     324.64675          Inf  41.7977775   0.4148650    2.4199483    2.933601    3.3692390    2.5966920      1.312211
# log10p_d_w1     36.4790562    39.713709    42.1473853      11.24236    41.797778         Inf 168.5040589    8.3033882    0.310443    0.5416686    4.4886278      0.241074
# log10p_d_w2     19.5602948     8.262040     0.6287649       1.93948     0.414865 168.5040589         Inf  789.1000058  444.976039  283.9020966  674.6364948    101.695692
# log10p_d_w3      0.2443692     1.619397     2.8809166      52.72648     2.419948   8.3033882 789.1000058          Inf 1953.508221 1182.7126838 3625.9206382    500.174384
# log10p_d_w4      2.3925164     6.365557     3.9626454     103.38573     2.933601   0.3104430 444.9760392 1953.5082213         Inf 1976.0712094 2372.2750811    787.281939
# log10p_d_w6      8.1169719    13.071185     4.9955890     166.42509     3.369239   0.5416686 283.9020966 1182.7126838 1976.071209          Inf 1352.3751799   1336.021843
# log10p_d_avg     0.1474087     2.608076     3.2022118      64.47965     2.596692   4.4886278 674.6364948 3625.9206382 2372.275081 1352.3751799          Inf    565.912290
# log10p_g_d_Ix   18.5654482    20.419755     2.9323565     296.22104     1.312211   0.2410740 101.6956919  500.1743836  787.281939 1336.0218426  565.9122898           Inf
# Call:corr_test(x = logP[seq(1, nrow(logP), by = 100), c(5:16)], use = "pairwise.complete.obs", 
    # method = "pearson", adjust = "none")
    
# <<<<<<<<<<<<<<< use in paper >>>>>>>>>>>>>>>>>>>>>>>>
# Correlation matrix 
              # log10p_g_0nM log10p_g_8nM log10p_g_25nM log10p_g_75nM log10p_g_avg log10p_d_w1 log10p_d_w2 log10p_d_w3 log10p_d_w4 log10p_d_w6 log10p_d_avg log10p_g_d_Ix
# log10p_g_0nM          1.00         0.99          0.88          0.21         0.86        0.23        0.17        0.01       -0.05       -0.10        -0.01         -0.16
# log10p_g_8nM          0.99         1.00          0.94          0.32         0.93        0.24        0.11       -0.04       -0.09       -0.13        -0.05         -0.17
# log10p_g_25nM         0.88         0.94          1.00          0.59         1.00        0.24        0.02       -0.06       -0.07       -0.08        -0.06         -0.06
# log10p_g_75nM         0.21         0.32          0.59          1.00         0.62        0.12        0.05        0.27        0.38        0.47         0.30          0.60
# log10p_g_avg          0.86         0.93          1.00          0.62         1.00        0.24        0.02       -0.05       -0.06       -0.06        -0.05         -0.04
# log10p_d_w1           0.23         0.24          0.24          0.12         0.24        1.00        0.47        0.11        0.01       -0.02         0.08         -0.01
# log10p_d_w2           0.17         0.11          0.02          0.05         0.02        0.47        1.00        0.83        0.70        0.59         0.80          0.37
# log10p_d_w3           0.01        -0.04         -0.06          0.27        -0.05        0.11        0.83        1.00        0.97        0.91         1.00          0.73
# log10p_d_w4          -0.05        -0.09         -0.07          0.38        -0.06        0.01        0.70        0.97        1.00        0.97         0.99          0.83
# log10p_d_w6          -0.10        -0.13         -0.08          0.47        -0.06       -0.02        0.59        0.91        0.97        1.00         0.93          0.93
# log10p_d_avg         -0.01        -0.05         -0.06          0.30        -0.05        0.08        0.80        1.00        0.99        0.93         1.00          0.76
# log10p_g_d_Ix        -0.16        -0.17         -0.06          0.60        -0.04       -0.01        0.37        0.73        0.83        0.93         0.76          1.00
# Sample Size 
# [1] 3054
# ^^^^^^^^^^^^^^ use in paper ^^^^^^^^^^^^^^^^^^^^^^^

# Probability values (Entries above the diagonal are adjusted for multiple tests.) 
              # log10p_g_0nM log10p_g_8nM log10p_g_25nM log10p_g_75nM log10p_g_avg log10p_d_w1 log10p_d_w2 log10p_d_w3 log10p_d_w4 log10p_d_w6 log10p_d_avg log10p_g_d_Ix
# log10p_g_0nM          0.00         0.00          0.00          0.00         0.00        0.00        0.00        0.57        0.00        0.00         0.71          0.00
# log10p_g_8nM          0.00         0.00          0.00          0.00         0.00        0.00        0.00        0.02        0.00        0.00         0.00          0.00
# log10p_g_25nM         0.00         0.00          0.00          0.00         0.00        0.00        0.24        0.00        0.00        0.00         0.00          0.00
# log10p_g_75nM         0.00         0.00          0.00          0.00         0.00        0.00        0.01        0.00        0.00        0.00         0.00          0.00
# log10p_g_avg          0.00         0.00          0.00          0.00         0.00        0.00        0.38        0.00        0.00        0.00         0.00          0.05
# log10p_d_w1           0.00         0.00          0.00          0.00         0.00        0.00        0.00        0.00        0.49        0.29         0.00          0.57
# log10p_d_w2           0.00         0.00          0.24          0.01         0.38        0.00        0.00        0.00        0.00        0.00         0.00          0.00
# log10p_d_w3           0.57         0.02          0.00          0.00         0.00        0.00        0.00        0.00        0.00        0.00         0.00          0.00
# log10p_d_w4           0.00         0.00          0.00          0.00         0.00        0.49        0.00        0.00        0.00        0.00         0.00          0.00
# log10p_d_w6           0.00         0.00          0.00          0.00         0.00        0.29        0.00        0.00        0.00        0.00         0.00          0.00
# log10p_d_avg          0.71         0.00          0.00          0.00         0.00        0.00        0.00        0.00        0.00        0.00         0.00          0.00
# log10p_g_d_Ix         0.00         0.00          0.00          0.00         0.05        0.57        0.00        0.00        0.00        0.00         0.00          0.00

 # To see confidence intervals of the correlations, print with the short=FALSE option



# ----------------------------------------- Compare g vs g, d vs d, and g vs d sharing ------------------------ 


# Use corr value. Could also use differences in log10P vals of correlations, but R value seems simple and conservative.
# Also, in g_d_comb_fish_1.R, some p vals are 0, so cannot compare -log10P, and some odds ratios are Inf, so cannot compare ensemble odds ratios, either (except for specific comparisons without 0 and/or Inf.) However, elected to use selected individual odds ratios for comparisons.
# Gives very similar results to g_d_comb_fish_1.R


sem <- function(x) {sqrt(var(x,na.rm=TRUE)/sum(!is.na(x)))}

compare <- function(a,b) {
	print(t.test(a,b))
	
	print(paste0("exact P value = ", t.test(a,b)$p.value))
	
	print(paste0("mean of a = ", mean(a, na.rm = TRUE)))
	print(paste0("sem of a = ", sem(a)))
	print(paste0("sd of a = ", sd(a, na.rm = TRUE)))
	print(paste0("number in a = ", sum(!is.na(a))))
	
	print(paste0("mean of b = ", mean(b, na.rm = TRUE)))
	print(paste0("sem of b = ", sem(b)))
	print(paste0("sd of b = ", sd(b, na.rm = TRUE)))
	print(paste0("number in b = ", sum(!is.na(b))))
	
}


g_and_d_cor <- cor(logP[seq(1,nrow(logP),by=100),c(5:16)], method="pearson",use = "pairwise.complete.obs")

# compare g-g and d-d sharing: no significant difference

compare(g_and_d_cor[1:5,1:5][upper.tri(g_and_d_cor[1:5,1:5])],g_and_d_cor[6:11,6:11][upper.tri(g_and_d_cor[6:11,6:11])])

	# Welch Two Sample t-test

# data:  a and b
# t = 0.81673, df = 22.787, p-value = 0.4225
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -0.1694082  0.3902569
# sample estimates:
# mean of x mean of y 
# 0.7331893 0.6227650 

# [1] "exact P value = 0.422538439928698"
# [1] "mean of a = 0.733189336398883"
# [1] "sem of a = 0.0895238879647901"
# [1] "sd of a = 0.283099390962473"
# [1] "number in a = 10"
# [1] "mean of b = 0.622764961580445"
# [1] "sem of b = 0.10131757312663"
# [1] "sd of b = 0.392401273397592"
# [1] "number in b = 15"




# Compare g-g and g-d sharing, significant difference, g-g higher than g-d:

compare(g_and_d_cor[1:5,1:5][upper.tri(g_and_d_cor[1:5,1:5])],as.vector(as.matrix(g_and_d_cor[1:5,6:11])))

	# Welch Two Sample t-test

# data:  a and b
# t = 7.1068, df = 11.06, p-value = 1.919e-05
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # 0.4629868 0.8780242
# sample estimates:
 # mean of x  mean of y 
# 0.73318934 0.06268385 

# [1] "exact P value = 1.91929054407828e-05"
# [1] "mean of a = 0.733189336398883"
# [1] "sem of a = 0.0895238879647901"
# [1] "sd of a = 0.283099390962473"
# [1] "number in a = 10"
# [1] "mean of b = 0.0626838507310454"
# [1] "sem of b = 0.0297784120837997"
# [1] "sd of b = 0.163103080249815"
# [1] "number in b = 30"




# Compare d-d and g-d sharing, significant difference, d-d higher than g-d

compare(g_and_d_cor[6:11,6:11][upper.tri(g_and_d_cor[6:11,6:11])],as.vector(as.matrix(g_and_d_cor[1:5,6:11])))

	# Welch Two Sample t-test

# data:  a and b
# t = 5.3036, df = 16.464, p-value = 6.487e-05
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # 0.3367242 0.7834380
# sample estimates:
 # mean of x  mean of y 
# 0.62276496 0.06268385 

# [1] "exact P value = 6.48667272123993e-05"
# [1] "mean of a = 0.622764961580445"
# [1] "sem of a = 0.10131757312663"
# [1] "sd of a = 0.392401273397592"
# [1] "number in a = 15"
# [1] "mean of b = 0.0626838507310454"
# [1] "sem of b = 0.0297784120837997"
# [1] "sd of b = 0.163103080249815"
# [1] "number in b = 30"





# Main source of g-d sharing is from g_75nM, as one might expect!

g_and_d_cor[1:5,6:11]
              # log10p_d_w1 log10p_d_w2 log10p_d_w3 log10p_d_w4 log10p_d_w6 log10p_d_avg
# log10p_g_0nM    0.2276844  0.16589639  0.01029153 -0.05199594 -0.10428034 -0.006678492
# log10p_g_8nM    0.2375516  0.10528527 -0.04083682 -0.09131331 -0.13447775 -0.054765336
# log10p_g_25nM   0.2446862  0.02149171 -0.05810726 -0.06996565 -0.07979553 -0.061839879
# log10p_g_75nM   0.1241792  0.04572648  0.27329997  0.37763343  0.46935390  0.301496169
# log10p_g_avg    0.2436756  0.01573477 -0.05235527 -0.05873352 -0.06370388 -0.054622087


# And, difference in sharing ratio of g-d for 75 nM is significantly different from rest of g-d:
# <<<<<<<<<< use in paper >>>>>>>>>>>>>>>>>>

compare(as.vector(as.matrix(g_and_d_cor[1:5,6:11]["log10p_g_75nM",])), as.vector(as.matrix(g_and_d_cor[1:5,6:11][-c(4),])))

	# Welch Two Sample t-test

# data:  a and b
# t = 3.678, df = 6.5721, p-value = 0.00881
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # 0.08825725 0.41823694
# sample estimates:
 # mean of x  mean of y 
# 0.26528153 0.01203443 

# [1] "exact P value = 0.00880990379702155"
# [1] "mean of a = 0.26528152553972"
# [1] "sem of a = 0.0642283838675373"
# [1] "sd of a = 0.157326767479073"
# [1] "number in a = 6"
# [1] "mean of b = 0.0120344320288767"
# [1] "sem of b = 0.024813567036567"
# [1] "sd of b = 0.121561155875867"
# [1] "number in b = 24"

# ^^^^^^^^^^^^^^ use in paper ^^^^^^^^^^^^^^^^^^^^^^^^^^

# ----------------------------------------- Compare Ix vs g, Ix vs d ------------------------ 

# Very clearly, Ix mostly overlaps with d, not g (except for g 75 nM)

# # Check Ix overlap using correlations. Could also use differences in pvals (or logP vals), but correlation seems simple and conservative.
# Also, in g_d_comb_fish_1.R, some p vals are 0, so cannot compare -log10P 


# Cor Ix vs g significantly higher than Ix vs d:

compare(as.numeric(as.vector(g_and_d_cor["log10p_g_d_Ix",c(1:5)])),as.numeric(as.vector(g_and_d_cor["log10p_g_d_Ix",c(6:11)])))

	# Welch Two Sample t-test <<<<<<<<<<<<< use in paper

# data:  a and b
# t = -2.7839, df = 8.9029, p-value = 0.02148
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -1.029268 -0.105566
# sample estimates:
 # mean of x  mean of y 
# 0.03471513 0.60213229 

# [1] "exact P value = 0.02148240312299"
# [1] "mean of a = 0.0347151274814249"
# [1] "sem of a = 0.143598863617821"
# [1] "sd of a = 0.32109682054117"
# [1] "number in a = 5"
# [1] "mean of b = 0.602132286309171"
# [1] "sem of b = 0.144650598828787"
# [1] "sd of b = 0.354320158118559"
# [1] "number in b = 6"


# Ix vs g 75nM significantly higher R than Ix vs g 0nM, 8nM, 25nM, avg

t.test(g_and_d_cor["log10p_g_d_Ix",c(1:3,5)],mu=g_and_d_cor["log10p_g_d_Ix",c(4)])

	# One Sample t-test

# data:  g_and_d_cor["log10p_g_d_Ix", c(1:3, 5)]
# t = -20.43, df = 3, p-value = 0.0002564
# alternative hypothesis: true mean is not equal to 0.5990598
# 95 percent confidence interval:
 # -0.21625609  0.00351403
# sample estimates:
# mean of x 
# -0.106371 




# Two sample t test with pooled variance. More conservative and also more appropriate.

t.test(g_and_d_cor["log10p_g_d_Ix",c(1:3,5)],g_and_d_cor["log10p_g_d_Ix",c(4)],var.equal=TRUE)

	# Two Sample t-test <<<<<<<<<<<<< use in paper

# data:  g_and_d_cor["log10p_g_d_Ix", c(1:3, 5)] and g_and_d_cor["log10p_g_d_Ix", c(4)]
# t = -9.1368, df = 3, p-value = 0.002771
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -0.9511412 -0.4597203
# sample estimates:
 # mean of x  mean of y 
# -0.1063710  0.5990598






mean(g_and_d_cor["log10p_g_d_Ix",c(1:3,5)])
# [1] -0.106371 <<<<<<<<<<<<< use in paper

sem(g_and_d_cor["log10p_g_d_Ix",c(1:3,5)])
# [1] 0.03452849 <<<<<<<<<<<<< use in paper































