# Function cf https://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#testing-for-overdispersioncomputing-overdispersion-factor


overdisp_fun <- function(model) {
    rdf <- df.residual(model)
    rp <- residuals(model,type="pearson")
    Pearson.chisq <- sum(rp^2)
    prat <- Pearson.chisq/rdf
    pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
    c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}

delta_logLik <- function(m1,m2,REML=TRUE) {	
	stat <- c(chi.sq = numeric(), df = numeric(), p = numeric())
	stat["chi.sq"] <- 2*((logLik(m1,REML)-logLik(m2,REML))[1])
	stat["df"] <- attr(logLik(m1,REML), "df")-attr(logLik(m2,REML), "df")
	stat["p"] <- (pchisq(q=stat["chi.sq"],df=stat["df"],lower.tail=FALSE))[1]
	stat
}

# clone <- read.table("clone.txt",header=TRUE,stringsAsFactors=FALSE,sep="\t")
# clone$Pool <- as.factor(clone$Pool)


# m1_pois <- gam(round(RH_clones) ~ gamma * dil + s(Pool, bs = "re"), data = clone, family = poisson, method = "REML")
# m1_neg_binom <- gam(round(RH_clones) ~ gamma * dil + s(Pool, bs = "re"), data = clone, family = nb, method = "REML")

# overdisp_fun(m1_pois)
       # # chisq         ratio           rdf             p 
 # # 5.699763e+02  3.797075e+01  1.501093e+01 8.595096e-112


# overdisp_fun(m1_neg_binom)
     # # chisq      ratio        rdf          p 
# # 15.3604385  0.9896293 15.5214064  0.4637013 

# negative binomial clearly fits better

# Get same answers, but with fewer parameters (no p value), from msme function. Also ratio in Ben Bolker function is same as dispersion. So Ben Bolker function is superior. cf https://stats.stackexchange.com/questions/66586/is-there-a-test-to-determine-whether-glm-overdispersion-is-significant

# library(msme)
# Loading required package: lattice

# P__disp(m1_pois)
# # pearson.chi2   dispersion 
   # # 569.97628     37.97075 
   
# P__disp(m1_neg_binom)
# # pearson.chi2   dispersion 
  # # 15.3604385    0.9896293 
  
 # Cannot compare pois and nb using anova and LLR because often simpler model (nb, based on df) has better loglikelihood than more complex model (pois). So simply use REML, AIC or BIC to comapre. 
# cf http://r.789695.n4.nabble.com/Why-there-is-no-p-value-from-likelihood-ratio-test-using-anova-in-GAM-model-fitting-td888781.html

# ---------- Do neg binomial separated by 1 Mb ------------------

 library(mgcv)
 library(multcomp)

 copy_raw <- read.table("RH_human_gseq.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE)
 cell <- read.table("cell_label_info.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE)
 sum_reads <- colSums(copy_raw[,5:ncol(copy_raw)])
 reads <- read.table("RH_pool_human_total_align.txt",sep="\t",stringsAsFactors=FALSE,header=TRUE) # uses mapped human reads, cf human_AIC_1.R
 

# down sample copy_raw to prevent autocorrelation
 
copy_raw_sub <- copy_raw[seq(1,nrow(copy_raw),100),] 


# takes about 30 min
# read in "overdisp_1.txt" if wish to save time

ptm <- proc.time()

copy_raw_sub_l <- reshape(copy_raw_sub, 
  varying = c(colnames(copy_raw_sub[c(5:ncol(copy_raw_sub))])), 
  v.names = "copy",
  timevar = "RH_ID", 
  times = c(colnames(copy_raw_sub[c(5:ncol(copy_raw_sub))])), 
  new.row.names = 1:1e6,
  direction = "long")
  
copy_raw_sub_l$week <- 0
copy_raw_sub_l[grepl("_w0_",copy_raw_sub_l$RH_ID),]$week <- 0
copy_raw_sub_l[grepl("_w1_",copy_raw_sub_l$RH_ID),]$week <- 1
copy_raw_sub_l[grepl("_w2_",copy_raw_sub_l$RH_ID),]$week <- 2
copy_raw_sub_l[grepl("_w3_",copy_raw_sub_l$RH_ID),]$week <- 3
copy_raw_sub_l[grepl("_w4_",copy_raw_sub_l$RH_ID),]$week <- 4
copy_raw_sub_l[grepl("_w6_",copy_raw_sub_l$RH_ID),]$week <- 6

copy_raw_sub_l$conc <- 0
copy_raw_sub_l[grepl("_d0",copy_raw_sub_l$RH_ID),]$conc <- 0
copy_raw_sub_l[grepl("_d8",copy_raw_sub_l$RH_ID),]$conc <- 8
copy_raw_sub_l[grepl("_d25",copy_raw_sub_l$RH_ID),]$conc <- 25
copy_raw_sub_l[grepl("_d75",copy_raw_sub_l$RH_ID),]$conc <- 75

copy_raw_sub_l$pool <- 0
copy_raw_sub_l[grepl("RH1_",copy_raw_sub_l$RH_ID),]$pool <- 1
copy_raw_sub_l[grepl("RH2_",copy_raw_sub_l$RH_ID),]$pool <- 2
copy_raw_sub_l[grepl("RH3_",copy_raw_sub_l$RH_ID),]$pool <- 3
copy_raw_sub_l[grepl("RH4_",copy_raw_sub_l$RH_ID),]$pool <- 4
copy_raw_sub_l[grepl("RH5_",copy_raw_sub_l$RH_ID),]$pool <- 5
copy_raw_sub_l[grepl("RH6_",copy_raw_sub_l$RH_ID),]$pool <- 6



copy_raw_sub_l <- merge(copy_raw_sub_l,cell)
copy_raw_sub_l$sum_reads <- sum_reads[copy_raw_sub_l$RH_ID]
copy_raw_sub_l  <- merge(copy_raw_sub_l,reads[,c(1:5,9)])
colnames(copy_raw_sub_l)[13] <- "total_reads"


copy_raw_sub_l$pool <- as.factor(copy_raw_sub_l$pool)
copy_raw_sub_l$cell <- as.factor(copy_raw_sub_l$cell)


# log10p_raw_sub <- data.frame(
							# Chromosome = as.character(), 
							# posS = integer(), 
							# posE = integer(), 
							# pos = numeric(), 
							# log10p_g_0nM = numeric(),
							# log10p_g_8nM = numeric(),
							# log10p_g_25nM = numeric(),
							# log10p_g_75nM = numeric(),
							# log10p_g_avg = numeric(),
							# log10p_d_w1 = numeric(),
							# log10p_d_w2 = numeric(),
							# log10p_d_w3 = numeric(),
							# log10p_d_w4 = numeric(),
							# log10p_d_w6 = numeric(),
							# log10p_d_avg = numeric(),
							# log10p_g_d_Ix = numeric(),
							# coef_g_0nM = numeric(),
							# coef_g_8nM = numeric(),
							# coef_g_25nM = numeric(),
							# coef_g_75nM = numeric(),
							# coef_g_avg = numeric(),
							# coef_d_w1 = numeric(),
							# coef_d_w2 = numeric(),
							# coef_d_w3 = numeric(),
							# coef_d_w4 = numeric(),
							# coef_d_w6 = numeric(),
							# coef_d_avg = numeric(),
							# coef_g_d_Ix = numeric(), 
							# stringsAsFactors=FALSE
							# )
							
ans <- data.frame(
				pois_chisq = numeric(),
				pois_ratio = numeric(),
				pois_rdf = numeric(),
				pois_p = numeric(),
				
				pois_pool_edf = numeric(),
				pois_pool_Ref.df = numeric(),  
				pois_pool_Chi.sq = numeric(),
				pois_pool_p_value = numeric(),
				
				pois_cell_edf = numeric(),
				pois_cell_Ref.df = numeric(),
				pois_cell_Chi.sq = numeric(),
				pois_cell_p_value = numeric(),
				
				pois_AIC = numeric(),
				pois_BIC = numeric(),
				pois_REML = numeric(),
				
				nb_chisq = numeric(),
				nb_ratio = numeric(),
				nb_rdf = numeric(),
				nb_p = numeric(),
				
				nb_pool_edf = numeric(),
				nb_pool_Ref.df = numeric(),  
				nb_pool_Chi.sq = numeric(),
				nb_pool_p_value = numeric(),
				
				nb_cell_edf = numeric(),
				nb_cell_Ref.df = numeric(),
				nb_cell_Chi.sq = numeric(),
				nb_cell_p_value = numeric(),
				
				nb_re_anova_Chi.sq = numeric(),
				nb_re_anova_df = numeric(),
				nb_re_anova_p = numeric(),
				
				nb_pool_anova_Chi.sq = numeric(),
				nb_pool_anova_df = numeric(),
				nb_pool_anova_p = numeric(),
				
				nb_cell_anova_Chi.sq = numeric(),
				nb_cell_anova_df = numeric(),
				nb_cell_anova_p = numeric(),
				
				nb_AIC = numeric(),
				nb_BIC = numeric(),
				nb_REML = numeric(),
				
				nb_no_re_AIC = numeric(),
				nb_no_re_BIC = numeric(),
				nb_no_re_REML = numeric(),
				
				nb_no_pool_AIC = numeric(),
				nb_no_pool_BIC = numeric(),
				nb_no_pool_REML = numeric(),
				
				nb_no_cell_AIC = numeric(),
				nb_no_cell_BIC = numeric(),
				nb_no_cell_REML = numeric(),
				
				nb_no_re_logLik_Chi.sq  = numeric(),
				nb_no_re_logLik_df  = numeric(),
				nb_no_re_logLik_p  = numeric(),
				
				nb_no_pool_logLik_Chi.sq  = numeric(),
				nb_no_pool_logLik_df  = numeric(),
				nb_no_pool_logLik_p  = numeric(),
				
				nb_no_cell_logLik_Chi.sq  = numeric(),
				nb_no_cell_logLik_df  = numeric(),
				nb_no_cell_logLik_p  = numeric()
				
				)

ngroup <- max(copy_raw_sub_l$id)

for (i in 1:ngroup) { 
	cat("i = ",i,"/",ngroup, "\n")
	tryCatch ({
		
				m1_pois <- gam(copy ~ week * conc + s(pool, bs = "re") + s(cell, bs = "re") + offset(log(total_reads)), data = subset(copy_raw_sub_l,copy_raw_sub_l$id == i), family = poisson, method = "REML")
				
				m1_nb <- gam(copy ~ week * conc + s(pool, bs = "re") + s(cell, bs = "re") + offset(log(total_reads)), data = subset(copy_raw_sub_l,copy_raw_sub_l$id == i), family = nb, method = "REML")
				
				m1_nb_no_re <- gam(copy ~ week * conc + offset(log(total_reads)), data = subset(copy_raw_sub_l,copy_raw_sub_l$id == i), family = nb, method = "REML")
				
				m1_nb_no_pool <- gam(copy ~ week * conc + s(cell, bs = "re") + offset(log(total_reads)), data = subset(copy_raw_sub_l,copy_raw_sub_l$id == i), family = nb, method = "REML")
				
				m1_nb_no_cell <- gam(copy ~ week * conc + s(pool, bs = "re") + offset(log(total_reads)), data = subset(copy_raw_sub_l,copy_raw_sub_l$id == i), family = nb, method = "REML")

				
				ans[i,c("pois_chisq","pois_ratio","pois_rdf","pois_p")] <- overdisp_fun(m1_pois)
				ans[i,c("pois_pool_edf","pois_pool_Ref.df","pois_pool_Chi.sq","pois_pool_p_value")] <- summary(m1_pois)$s.table[1,]
				ans[i,c("pois_cell_edf","pois_cell_Ref.df","pois_cell_Chi.sq","pois_cell_p_value")] <- summary(m1_pois)$s.table[2,]
				ans[i,"pois_AIC"] <- AIC(m1_pois)
				ans[i,"pois_BIC"] <- BIC(m1_pois)
				ans[i,"pois_REML"] <- summary(m1_pois)$sp.criterion

				
				ans[i,c("nb_chisq","nb_ratio","nb_rdf","nb_p")] <- overdisp_fun(m1_nb)
				ans[i,c("nb_pool_edf","nb_pool_Ref.df","nb_pool_Chi.sq","nb_pool_p_value")] <- summary(m1_nb)$s.table[1,]
				ans[i,c("nb_cell_edf","nb_cell_Ref.df","nb_cell_Chi.sq","nb_cell_p_value")] <- summary(m1_nb)$s.table[2,]
				
				
				ans[i,c("nb_re_anova_Chi.sq")] <- anova(m1_nb,m1_nb_no_re,test="Chisq")$Deviance[2]
				ans[i,c("nb_re_anova_df")] <- anova(m1_nb,m1_nb_no_re,test="Chisq")$Df[2]
				ans[i,c("nb_re_anova_p")] <- anova(m1_nb,m1_nb_no_re,test="Chisq")$Pr[2]
				
				
				ans[i,c("nb_pool_anova_Chi.sq")] <- anova(m1_nb,m1_nb_no_pool,test="Chisq")$Deviance[2]
				ans[i,c("nb_pool_anova_df")] <- anova(m1_nb,m1_nb_no_pool,test="Chisq")$Df[2]
				ans[i,c("nb_pool_anova_p")] <- anova(m1_nb,m1_nb_no_pool,test="Chisq")$Pr[2]
				
				
				ans[i,c("nb_cell_anova_Chi.sq")] <- anova(m1_nb,m1_nb_no_cell,test="Chisq")$Deviance[2]
				ans[i,c("nb_cell_anova_df")] <- anova(m1_nb,m1_nb_no_cell,test="Chisq")$Df[2]
				ans[i,c("nb_cell_anova_p")] <- anova(m1_nb,m1_nb_no_cell,test="Chisq")$Pr[2]
				
				
				ans[i,"nb_AIC"] <- AIC(m1_nb)
				ans[i,"nb_BIC"] <- BIC(m1_nb)
				ans[i,"nb_REML"] <- summary(m1_nb)$sp.criterion
				
				
				ans[i,"nb_no_re_AIC"] <- AIC(m1_nb_no_re)
				ans[i,"nb_no_re_BIC"] <- BIC(m1_nb_no_re)
				ans[i,"nb_no_re_REML"] <- summary(m1_nb_no_re)$sp.criterion
				
				
				ans[i,"nb_no_pool_AIC"] <- AIC(m1_nb_no_pool)
				ans[i,"nb_no_pool_BIC"] <- BIC(m1_nb_no_pool)
				ans[i,"nb_no_pool_REML"] <- summary(m1_nb_no_pool)$sp.criterion
				
				
				ans[i,"nb_no_cell_AIC"] <- AIC(m1_nb_no_cell)
				ans[i,"nb_no_cell_BIC"] <- BIC(m1_nb_no_cell)
				ans[i,"nb_no_cell_REML"] <- summary(m1_nb_no_cell)$sp.criterion
			
				
				
				ans[i,"nb_no_re_logLik_Chi.sq"]  = delta_logLik(m1_nb, m1_nb_no_re)["chi.sq"]
				ans[i,"nb_no_re_logLik_df"] = delta_logLik(m1_nb, m1_nb_no_re)["df"]
				ans[i,"nb_no_re_logLik_p"] = delta_logLik(m1_nb, m1_nb_no_re)["p"]
				
				
				ans[i,"nb_no_pool_logLik_Chi.sq"]  = delta_logLik(m1_nb, m1_nb_no_pool)["chi.sq"]
				ans[i,"nb_no_pool_logLik_df"] = delta_logLik(m1_nb, m1_nb_no_pool)["df"]
				ans[i,"nb_no_pool_logLik_p"] = delta_logLik(m1_nb, m1_nb_no_pool)["p"]
				

				ans[i,"nb_no_cell_logLik_Chi.sq"]  = delta_logLik(m1_nb, m1_nb_no_cell)["chi.sq"]
				ans[i,"nb_no_cell_logLik_df"] = delta_logLik(m1_nb, m1_nb_no_cell)["df"]
				ans[i,"nb_no_cell_logLik_p"] = delta_logLik(m1_nb, m1_nb_no_cell)["p"]

				
		 
# # 		glht_growth <- glht(m1, linfct = c(
					# "week == 0",
 					# "week + 8*week:conc == 0",
					# "week + 25*week:conc == 0",
 					# "week + 75*week:conc == 0",
 					# "week + (27)*week:conc == 0"))
 					
		# glht_drug <- glht(m1, linfct = c(
					# "conc + 1*week:conc == 0",
  					# "conc + 2*week:conc == 0",
 					# "conc + 3*week:conc == 0",
  					# "conc + 4*week:conc == 0",
 					# "conc + 6*week:conc == 0",
 					# "conc + (3.2)*week:conc == 0"))
 					
 		# glht_omni <- glht(m1)
 					
 		
 					
 	# growth_stat <- summary(glht_growth,test = adjusted("none"))$test$tstat
 	# drug_stat <- summary(glht_drug,test = adjusted("none"))$test$tstat
 	# Ix_stat <- summary(glht_omni,test = adjusted("none"))$test$tstat["week:conc"]
 	# growth_coef <- summary(glht_growth,test = adjusted("none"))$test$coefficients
 	# drug_coef <- summary(glht_drug,test = adjusted("none"))$test$coefficients
 	# Ix_coef <- summary(glht_omni,test = adjusted("none"))$test$coefficients["week:conc"]
 		
 	# log10p_raw_sub[i,] <- c(
						 	# copy_raw_sub[i,1:4],
 							# -log10(2*pnorm(-abs(growth_stat))),
 							# -log10(2*pnorm(-abs(drug_stat))),
 							# -log10(2*pnorm(-abs(Ix_stat))),
 							# growth_coef, 
 							# drug_coef,
 							# Ix_coef
 							# )
		}, error = function(e) {cat ("Error on line ", i, ": ", conditionMessage(e),"\n")})
}

# write.table(ans,"overdisp_1.txt",quote=FALSE, sep="\t",row.names=FALSE)

# outputName=paste("task-",j,".RData",sep="")
# outputPath=file.path("Output",outputName)
# save("log10p_raw_sub",file=outputPath)

print(proc.time() - ptm)

# ----------- Numbers to quote in paper -----------------

head(ans)
    # pois_chisq   pois_ratio  pois_rdf        pois_p pois_pool_edf pois_pool_Ref.df pois_pool_Chi.sq pois_pool_p_value pois_cell_edf pois_cell_Ref.df
# 1 2.553513e-14 2.300462e-16 111.00000  1.000000e+00  3.226586e-15                5     2.575295e-42      1.0000000000  3.962824e-15               28
# 2 3.913225e+02 4.601656e+00  85.03949  8.019032e-41  4.473757e+00                5     1.499833e+04      0.0002422434  2.148676e+01               28
# 3 1.010093e+03 1.209957e+01  83.48176 5.346683e-159  4.454257e+00                5     1.315750e+05      0.0179856498  2.306399e+01               28
# 4 2.512479e+03 3.016522e+01  83.29060  0.000000e+00  4.399216e+00                5     9.205866e+05      0.0002570769  2.331019e+01               28
# 5 4.271548e+03 5.132672e+01  83.22269  0.000000e+00  4.747892e+00                5     7.659483e+06      0.0009902399  2.302941e+01               28
# 6 3.817987e+03 4.583622e+01  83.29628  0.000000e+00  4.813680e+00                5     7.473904e+06      0.0001625913  2.289004e+01               28
  # pois_cell_Chi.sq pois_cell_p_value pois_AIC   pois_BIC  pois_REML     nb_chisq     nb_ratio    nb_rdf      nb_p  nb_pool_edf nb_pool_Ref.df nb_pool_Chi.sq
# 1     6.295684e-57      1.000000e+00    8.000   18.97973  -58.53478 2.553513e-14 2.300462e-16 111.00000 1.0000000 3.649667e-15              5   5.318917e-43
# 2     1.712204e+03      2.878052e-01 1263.853 1346.60818  683.43114 9.203594e+01 1.010102e+00  91.11546 0.4532998 4.527411e+00              5   6.034175e+02
# 3     2.341632e+04      1.483593e-01 1732.160 1818.91133  934.98054 9.282825e+01 1.037708e+00  89.45507 0.3825469 4.619711e+00              5   1.252544e+03
# 4     1.162197e+05      2.198547e-02 3155.553 3242.71644 1655.01685 9.142477e+01 9.951998e-01  91.86574 0.4933606 4.645261e+00              5   7.135586e+02
# 5     4.416407e+05      1.352667e-03 4565.984 4653.30711 2370.69495 9.949542e+01 1.018547e+00  97.68367 0.4299609 4.900526e+00              5   7.848553e+02
# 6     1.008487e+06      1.450120e-07 4595.493 4682.63518 2383.54600 9.019402e+01 9.702598e-01  92.95862 0.5618893 4.895267e+00              5   1.956480e+03
  # nb_pool_p_value  nb_cell_edf nb_cell_Ref.df nb_cell_Chi.sq nb_cell_p_value nb_re_anova_Chi.sq nb_re_anova_df nb_re_anova_p nb_pool_anova_Chi.sq
# 1    1.000000e+00 1.067189e-14             28   1.069296e-56    1.000000e+00       5.099595e-14  -4.263256e-14            NA         3.020157e-15
# 2    2.426736e-09 1.535712e+01             28   9.537556e+01    1.252862e-01      -1.884852e+01  -2.570948e+01    0.83219673         3.635460e+00
# 3    1.440996e-11 1.692522e+01             28   4.956535e+02    1.119103e-04      -2.849024e+01  -2.666900e+01    0.36886952         3.725212e+00
# 4    3.450044e-14 1.448900e+01             28   2.293588e+02    6.119906e-05      -2.921665e+01  -2.549863e+01    0.27768266         5.082095e+00
# 5    5.174544e-54 8.415804e+00             28   5.639532e+01    6.766772e-04      -2.816339e+01  -2.075717e+01    0.12785527         1.300312e+01
# 6    6.710276e-51 1.314611e+01             28   3.069486e+02    8.386214e-05      -3.401191e+01  -2.448041e+01    0.09512435         8.519877e+00
  # nb_pool_anova_df nb_pool_anova_p nb_cell_anova_Chi.sq nb_cell_anova_df nb_cell_anova_p   nb_AIC     nb_BIC   nb_REML nb_no_re_AIC nb_no_re_BIC nb_no_re_REML
# 1    -1.421085e-14              NA         5.099595e-14    -2.842171e-14              NA   10.000   23.72466 -58.53478       10.000     23.72466     -58.53478
# 2     1.832836e+00      0.14169418        -1.316821e+01    -2.060878e+01       0.8907388 1128.924 1199.92365 591.59795     1234.381   1248.10591     630.79570
# 3     1.155351e+00      0.06637436        -1.822749e+01    -2.160592e+01       0.6702010 1145.254 1220.34965 600.01688     1296.900   1310.62459     658.33668
# 4     2.128778e+00      0.08823727        -1.515122e+01    -2.041665e+01       0.7884045 1222.383 1292.25275 632.60649     1353.155   1366.87960     685.23224
# 5     7.154990e+00      0.07746834        -8.845594e+00    -1.571298e+01       0.9108035 1370.307 1427.23977 702.92019     1538.785   1552.50984     777.02171
# 6     3.511078e+00      0.05363167        -1.333249e+01    -1.945082e+01       0.8408397 1345.701 1412.42042 696.31030     1562.661   1576.38616     789.09693
  # nb_no_pool_AIC nb_no_pool_BIC nb_no_pool_REML nb_no_cell_AIC nb_no_cell_BIC nb_no_cell_REML nb_no_re_logLik_Chi.sq nb_no_re_logLik_df nb_no_re_logLik_p
# 1         10.000       23.72466       -58.53478         10.000       23.72466       -58.53478                 0.0000       2.842171e-14      1.000000e+00
# 2       1132.216     1212.88225       599.41519       1157.150     1184.35073       599.15678               147.1881       2.086559e+01      5.241458e-21
# 3       1146.751     1230.58425       608.80181       1182.456     1209.78950       610.11521               196.3615       2.235787e+01      8.732706e-30
# 4       1224.359     1305.90393       641.73368       1240.613     1267.97452       637.46563               171.6795       2.045395e+01      6.732933e-26
# 5       1372.536     1457.19490       720.82211       1370.506     1397.91377       703.95145               199.9607       1.574113e+01      5.762473e-34
# 6       1348.497     1435.08093       715.32293       1360.930     1388.34036       700.61246               255.5731       1.930636e+01      3.448332e-43
  # nb_no_pool_logLik_Chi.sq nb_no_pool_logLik_df nb_no_pool_logLik_p nb_no_cell_logLik_Chi.sq nb_no_cell_logLik_df nb_no_cell_logLik_p
# 1             5.151435e-14         1.065814e-14        1.636709e-13                  0.00000         1.687539e-14        1.000000e+00
# 2            -3.751600e+00        -3.521695e+00                 NaN                 60.13815         1.595620e+01        4.807598e-07
# 3            -4.869052e+00        -3.183113e+00                 NaN                 72.00244         1.740016e+01        1.319174e-08
# 4            -6.531437e+00        -4.253510e+00                 NaN                 49.20211         1.548607e+01        2.222651e-05
# 5            -1.797282e+01        -1.010087e+01                 NaN                 21.71200         1.075632e+01        2.390374e-02
# 6            -1.167705e+01        -7.236682e+00                 NaN                 43.87046         1.432065e+01        7.602171e-05

sem <- function(x) {sqrt(var(x,na.rm=TRUE)/sum(!is.na(x)))}

compare <- function(a,b) {
	print(t.test(a,b))
	
	print(paste0("exact P value = ", t.test(a,b)$p.value))
	
	print(paste0("mean of a = ", mean(a, na.rm = TRUE)))
	print(paste0("sem of a = ", sem(a)))
	print(paste0("sd of a = ", sd(a, na.rm = TRUE)))
	print(paste0("number in a = ", sum(!is.na(a))))
	
	print(paste0("mean of b = ", mean(b, na.rm = TRUE)))
	print(paste0("sem of b = ", sem(b)))
	print(paste0("sd of b = ", sd(b, na.rm = TRUE)))
	print(paste0("number in b = ", sum(!is.na(b))))
	
}

# --------------- Dispersion ---------------------------

# compare chi^2 for dispersion index

compare(ans$pois_chisq,ans$nb_chisq)

# Welch Two Sample t-test

# data:  a and b
# t = 69.382, df = 3112.7, p-value < 2.2e-16
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # 2479.844 2624.080
# sample estimates:
 # mean of x  mean of y 
# 2641.40343   89.44151 

# [1] "exact P value = 0"
# [1] "mean of a = 2641.40343472942"
# [1] "sem of a = 36.7790429872683"
# [1] "sd of a = 2052.05966860706"
# [1] "number in a = 3113"
# [1] "mean of b = 89.4415093104334"
# [1] "sem of b = 0.38671157925323"
# [1] "sd of b = 21.5762883075451"
# [1] "number in b = 3113"

# dispersion index (overdsipersion):
compare(ans$pois_ratio,ans$nb_ratio)
	# Welch Two Sample t-test

# data:  a and b
# t = 69.357, df = 3112.5, p-value < 2.2e-16
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # 29.83815 31.57429
# sample estimates:
 # mean of x  mean of y 
# 31.6689606  0.9627408 

# [1] "exact P value = 0"
# [1] "mean of a = 31.6689605743029" <<<<<<<<<< use in paper
# [1] "sem of a = 0.442711504429489" <<<<<<<<<< use in paper
# [1] "sd of a = 24.7007629693516"
# [1] "number in a = 3113"
# [1] "mean of b = 0.962740825625776" <<<<<<<<<< use in paper
# [1] "sem of b = 0.00400022443033653" <<<<<<<<<< use in paper
# [1] "sd of b = 0.22318958167867"
# [1] "number in b = 3113"

# Is pois ratio sig diff from 1?
t.test(ans$pois_ratio,mu = 1)

	# One Sample t-test

# data:  ans$pois_ratio
# t = 69.275, df = 3112, p-value < 2.2e-16
# alternative hypothesis: true mean is not equal to 1
# 95 percent confidence interval:
 # 30.80092 32.53700
# sample estimates:
# mean of x 
 # 31.66896 

t.test(ans$pois_ratio,mu = 1)$p.value
# [1] 0

# Is nb dipsersion (ratio) sig diff from 1?
t.test(ans$nb_ratio,mu = 1)

	# One Sample t-test

# data:  ans$nb_ratio
# t = -9.3143, df = 3112, p-value < 2.2e-16
# alternative hypothesis: true mean is not equal to 1
# 95 percent confidence interval:
 # 0.9548975 0.9705842
# sample estimates:
# mean of x 
# 0.9627408 

t.test(ans$nb_ratio,mu = 1)$p.value
# [1] 2.253838e-20

# Compare dispersion p values

compare(ans$pois_p,ans$nb_p)

	# Welch Two Sample t-test

# data:  a and b
# t = -82.266, df = 6100.8, p-value < 2.2e-16
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -0.4664593 -0.4447458
# sample estimates:
 # mean of x  mean of y 
# 0.04335479 0.49895737 

# [1] "exact P value = 0"
# [1] "mean of a = 0.0433547887392583"
# [1] "sem of a = 0.00362721975108041"
# [1] "sd of a = 0.202378059781046"
# [1] "number in a = 3113"
# [1] "mean of b = 0.498957369996145"
# [1] "sem of b = 0.00418503379845924"
# [1] "sd of b = 0.233500884526779"
# [1] "number in b = 3113"

pois_pval <- ans$pois_p
sum(pois_pval==0)
# [1] 1847

pois_pval[pois_pval==0] <- .Machine$double.xmin

compare(-log10(pois_pval),-log10(ans$nb_p))

	# Welch Two Sample t-test

# data:  a and b
# t = 147.97, df = 3112.1, p-value < 2.2e-16
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # 245.9435 252.5487
# sample estimates:
  # mean of x   mean of y 
# 249.6135267   0.3674259 

# [1] "exact P value = 0"
# [1] "mean of a = 249.613526691682"
# [1] "sem of a = 1.68439322235068"
# [1] "sd of a = 93.9794817080319"
# [1] "number in a = 3113"
# [1] "mean of b = 0.367425878434801"
# [1] "sem of b = 0.00496737164694143"
# [1] "sd of b = 0.277150849716218"
# [1] "number in b = 3113"


# get values for pois and nb dispersion ratios rdf

# # compare(ans$pois_rdf,ans$nb_rdf)

	# Welch Two Sample t-test

# data:  a and b
# t = -59.75, df = 6224, p-value < 2.2e-16
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -8.954378 -8.385477
# sample estimates:
# mean of x mean of y 
 # 84.91997  93.58990 

# [1] "exact P value = 0"
# [1] "mean of a = 84.9199726873044"
# [1] "sem of a = 0.102730980262942"
# [1] "sd of a = 5.73179953015705"
# [1] "number in a = 3113"
# [1] "mean of b = 93.5899003056276"
# [1] "sem of b = 0.102474529594287"
# [1] "sd of b = 5.71749105360655"
# [1] "number in b = 3113"


# --------- Random effects in nb ---------------------


# Significance pool random effect in nb, direct from gam
# unfortunately gam does not give df 
# Ref.df, or reference degrees of freedom, gives closest df and is quoted in paper:
# https://stackoverflow.com/questions/53773685/formal-quotation-of-smooth-random-terms-in-mgcvgam-mixed-model
# See also: https://stats.stackexchange.com/questions/70871/generalised-additive-model-what-is-ref-df-in-rs-output


mean(ans$nb_pool_Chi.sq)
# [1] 873.6206 <<<<<<<<<<< use in paper

sem(ans$nb_pool_Chi.sq)
# [1] 14.24601 <<<<<<<<<<< use in paper

mean(ans$nb_pool_Ref.df)
# [1] 5 <<<<<<<<<<< use in paper

sem(ans$nb_pool_Ref.df)
# [1] 0 

mean(ans$nb_pool_edf)
# [1] 4.284518 

sem(ans$nb_pool_edf)
# [1] 0.01978414 


mean(ans$nb_pool_p_value)
# [1] 0.0501568

sem(ans$nb_pool_p_value)
# [1] 0.003719157

mean(-log10(ans$nb_pool_p_value))
# [1] 18.00037 <<<<<<<<<<< use in paper

sem(-log10(ans$nb_pool_p_value))
# [1] 0.3054683 <<<<<<<<<<< use in paper





# Significance cell random effect in nb, direct from gam
# gives less significant mean -log10P than pool

mean(ans$nb_cell_Chi.sq)
# [1] 226.4421 <<<<<<<<<<< use in paper

sem(ans$nb_cell_Chi.sq)
# [1] 4.382417 <<<<<<<<<<< use in paper

mean(ans$nb_cell_Ref.df)
# [1] 28 <<<<<<<<<<< use in paper

sem(ans$nb_cell_Ref.df)
# [1] 0

mean(ans$nb_cell_edf)
# [1] 13.12558 

sem(ans$nb_cell_edf)
# [1] 0.09627365 


mean(ans$nb_cell_p_value)
# [1] 0.07686681

sem(ans$nb_cell_p_value)
# [1] 0.003976513

mean(-log10(ans$nb_cell_p_value))
# [1] 2.526998 <<<<<<<<<<< use in paper

sem(-log10(ans$nb_cell_p_value))
# [1] 0.03308612 <<<<<<<<<<< use in paper









# examine p value for random effects using anova and LLR
# mean p vals are insignificant, a finding I commonly get using anova and gam for random effects
# chi sq vals can be negative, implying that adding extra random effect terms makes model worse!
# seems anova works well for lmer and glmmTMB, but not gam. 
# summary.gam() provides P vals for random effects but no df, however is the most reliable method when using gam object.

mean(ans[,c("nb_re_anova_Chi.sq")])
# [1] -25.05257 

sem(ans[,c("nb_re_anova_Chi.sq")])
# [1] 0.1568541 

mean(ans[,c("nb_re_anova_df")])
# [1] -23.09131 

sem(ans[,c("nb_re_anova_df")])
# [1] 0.1180582 

mean(ans[,c("nb_re_anova_p")],na.rm=TRUE)
# [1] 0.392495 

sem(ans[,c("nb_re_anova_p")])
# [1] 0.004189943 

mean(-log10(ans[,c("nb_re_anova_p")]),na.rm=TRUE)
# [1] 0.5246692

sem(-log10(ans[,c("nb_re_anova_p")]))
# [1] 0.00741481


# percent significant
sum(ans[,c("nb_re_anova_p")] < 0.05,na.rm=TRUE)/length(ans[,c("nb_re_anova_p")])
# [1] 0.03983296

# total sig:
sum(ans[,c("nb_re_anova_p")] < 0.05,na.rm=TRUE)
# [1] 124

# total:
length(ans[,c("nb_re_anova_p")])
# [1] 3113





mean(ans[,c("nb_pool_anova_Chi.sq")])
# [1] 6.058241

sem(ans[,c("nb_pool_anova_Chi.sq")])
# [1] 0.07777886

mean(ans[,c("nb_pool_anova_df")])
# [1] 3.227457

sem(ans[,c("nb_pool_anova_df")])
# [1] 0.07701499

mean(ans[,c("nb_pool_anova_p")],na.rm=TRUE)
# [1] 0.1052777

sem(ans[,c("nb_pool_anova_p")])
# [1] 0.002350412

mean(-log10(ans[,c("nb_pool_anova_p")]),na.rm=TRUE)
# [1] 1.132965

sem(-log10(ans[,c("nb_pool_anova_p")]))
# [1] 0.006465135


# percent significant
sum(ans[,c("nb_pool_anova_p")] < 0.05,na.rm=TRUE)/length(ans[,c("nb_pool_anova_p")])
# [1] 0.2534533

# total sig:
sum(ans[,c("nb_pool_anova_p")] < 0.05,na.rm=TRUE)
# [1] 789

# total:
length(ans[,c("nb_pool_anova_p")])
# [1] 3113









mean(ans[,c("nb_cell_anova_Chi.sq")])
# [1] -14.14227

sem(ans[,c("nb_cell_anova_Chi.sq")])
# [1] 0.1089434

mean(ans[,c("nb_cell_anova_df")])
# [1] -18.22555

sem(ans[,c("nb_cell_anova_df")])
# [1] 0.1047915

mean(ans[,c("nb_cell_anova_p")],na.rm=TRUE)
# [1] 0.7236124

sem(ans[,c("nb_cell_anova_p")])
# [1] 0.003358573

mean(-log10(ans[,c("nb_cell_anova_p")]),na.rm=TRUE)
# [1] 0.1722439

sem(-log10(ans[,c("nb_cell_anova_p")]))
# [1] 0.004116155


# percent significant
sum(ans[,c("nb_cell_anova_p")] < 0.05,na.rm=TRUE)/length(ans[,c("nb_cell_anova_p")])
# 0.01349181

# total sig:
sum(ans[,c("nb_cell_anova_p")] < 0.05,na.rm=TRUE)
# [1] 42

# total:
length(ans[,c("nb_cell_anova_p")])
# [1] 3113












# delta_logLik() to assess random effects

mean(ans[,"nb_no_re_logLik_Chi.sq"])
# [1] 165.8081 


sem(ans[,"nb_no_re_logLik_Chi.sq"])
# [1] 1.051246 


mean(ans[,"nb_no_re_logLik_df"])
# [1] 18.78761 


sem(ans[,"nb_no_re_logLik_df"])
# [1] 0.1011574 


mean(ans[,"nb_no_re_logLik_p"])
# [1] 0.0427092


sem(ans[,"nb_no_re_logLik_df"])
# [1] 0.1011574


mean(-log10(ans[,"nb_no_re_logLik_p"]))
# [1] 24.92573 


sem(-log10(ans[,"nb_no_re_logLik_df"]))
# [1] 0.05360768 







# Many invalid because lots of NaNs due to inconsistent directions chisq and df
mean(ans[,"nb_no_pool_logLik_Chi.sq"], na.rm=TRUE)
# [1] -7.70946

sem(ans[,"nb_no_pool_logLik_Chi.sq"])
# [1] 0.1095698

mean(ans[,"nb_no_pool_logLik_df"], na.rm=TRUE)
# [1] -4.790925

sem(ans[,"nb_no_pool_logLik_df"])
# [1] 0.07041886

mean(ans[,"nb_no_pool_logLik_p"], na.rm=TRUE)
# [1] 0.3530599

sem(ans[,"nb_no_pool_logLik_df"])
# [1] 0.07041886

mean(-log10(ans[,"nb_no_pool_logLik_p"]), na.rm=TRUE)
# [1] 7.520444

sem(-log10(ans[,"nb_no_pool_logLik_df"]))
# [1] 0.4441669









# Many invalid because lots of NaNs due to inconsistent directions chisq and df
mean(ans[,"nb_no_cell_logLik_Chi.sq"], na.rm=TRUE)
# [1] 51.79754


sem(ans[,"nb_no_cell_logLik_Chi.sq"])
# [1] 0.536196


mean(ans[,"nb_no_cell_logLik_df"], na.rm=TRUE)
# [1] 14.07902


sem(ans[,"nb_no_cell_logLik_df"])
# [1] 0.08937531


mean(ans[,"nb_no_cell_logLik_p"], na.rm=TRUE)
# [1] 0.06014905


sem(ans[,"nb_no_cell_logLik_df"])
# [1] 0.08937531


mean(-log10(ans[,"nb_no_cell_logLik_p"]), na.rm=TRUE)
# [1] 5.531392


sem(-log10(ans[,"nb_no_cell_logLik_df"]))
# [1] 0.05441566






# ---------- Compare Pois and nb AIC, BIC and REML ---------------------------
# NB LLR and anova does not work comparing pois and nb, because nb is smaller model (more resid df) but lower resid deviance than pois, so LLR does not give P value. see http://r.789695.n4.nabble.com/Why-there-is-no-p-value-from-likelihood-ratio-test-using-anova-in-GAM-model-fitting-td888781.html
# Simon Wood suggests comparing using AIC in this situation.

# For example,

i
# [1] 1200


anova(gam(copy ~ week * conc + s(pool, bs = "re") + s(cell, bs = "re") + offset(log(total_reads)), data = subset(copy_raw_sub_l,copy_raw_sub_l$id == i), family = nb, method = "REML"),gam(copy ~ week * conc + s(pool, bs = "re") + s(cell, bs = "re") + offset(log(total_reads)), data = subset(copy_raw_sub_l,copy_raw_sub_l$id == i), family = poisson, method = "REML"),test="Chisq")
# Analysis of Deviance Table

# Model 1: copy ~ week * conc + s(pool, bs = "re") + s(cell, bs = "re") + 
    # offset(log(total_reads))
# Model 2: copy ~ week * conc + s(pool, bs = "re") + s(cell, bs = "re") + 
    # offset(log(total_reads))
  # Resid. Df Resid. Dev     Df Deviance Pr(>Chi)
# 1    94.376     108.60                         
# 2    83.791     376.16 10.585  -267.56    

# All good, suggesting nb >> pois


# compare pois and nb AIC

compare(ans$pois_AIC,ans$nb_AIC)

	# Welch Two Sample t-test

# data:  a and b
# t = 57.122, df = 3233.6, p-value < 2.2e-16
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # 2092.468 2241.219
# sample estimates:
# mean of x mean of y 
 # 3349.193  1182.350 

# [1] "exact P value = 0"
# [1] "mean of a = 3349.19335184892"
# [1] "sem of a = 37.5679495741619"
# [1] "sd of a = 2096.07613172773"
# [1] "number in a = 3113"
# [1] "mean of b = 1182.34996241425"
# [1] "sem of b = 5.25212445251675"
# [1] "sd of b = 293.038423192398"
# [1] "number in b = 3113"


# compare pois and nb BIC

compare(ans$pois_BIC,ans$nb_BIC)

	# Welch Two Sample t-test

# data:  a and b
# t = 57.376, df = 3241.8, p-value < 2.2e-16
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # 2109.728 2259.021
# sample estimates:
# mean of x mean of y 
 # 3432.020  1247.645 

# [1] "exact P value = 0"
# [1] "mean of a = 3432.01959734414"
# [1] "sem of a = 37.6804082885142"
# [1] "sd of a = 2102.35068303092"
# [1] "number in a = 3113"
# [1] "mean of b = 1247.64532395214"
# [1] "sem of b = 5.44302414895041"
# [1] "sd of b = 303.68953143185"
# [1] "number in b = 3113"

# compare pois and nb REML

compare(ans$pois_REML,ans$nb_REML)

	# Welch Two Sample t-test

# data:  a and b
# t = 58.437, df = 3255.1, p-value < 2.2e-16
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # 1091.955 1167.774
# sample estimates:
# mean of x mean of y 
# 1740.0268  610.1625 

# [1] "exact P value = 0"
# [1] "mean of a = 1740.0267776085"
# [1] "sem of a = 19.1161982319899"
# [1] "sd of a = 1066.57422876781"
# [1] "number in a = 3113"
# [1] "mean of b = 610.162495755686"
# [1] "sem of b = 2.89958882382376"
# [1] "sd of b = 161.780427048421"
# [1] "number in b = 3113"

# compare nb with and w/o random effects, AIC

compare(ans$nb_AIC,ans$nb_no_re_AIC)
	# Welch Two Sample t-test

# data:  a and b
# t = -16.539, df = 6182.1, p-value < 2.2e-16  <<<<<<<<<<< use in paper
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -143.4324 -113.0333
# sample estimates:
# mean of x mean of y 
 # 1182.350  1310.583 

# [1] "exact P value = 3.73028720374828e-60"  <<<<<<<<<<< use in paper
# [1] "mean of a = 1182.34996241425" <<<<<<<<<<< use in paper
# [1] "sem of a = 5.25212445251675"  <<<<<<<<<<< use in paper
# [1] "sd of a = 293.038423192398"
# [1] "number in a = 3113"
# [1] "mean of b = 1310.58282301606"  <<<<<<<<<<< use in paper
# [1] "sem of b = 5.70368296473334"  <<<<<<<<<<< use in paper
# [1] "sd of b = 318.232798458134"
# [1] "number in b = 3113"


compare(ans$nb_AIC,ans$nb_no_pool_AIC)

	# Welch Two Sample t-test

# data:  a and b
# t = -0.25205, df = 6224, p-value = 0.801
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -16.43482  12.69004
# sample estimates:
# mean of x mean of y 
 # 1182.350  1184.222 

# [1] "exact P value = 0.801007044051018"
# [1] "mean of a = 1182.34996241425"
# [1] "sem of a = 5.25212445251675"
# [1] "sd of a = 293.038423192398"
# [1] "number in a = 3113"
# [1] "mean of b = 1184.22235178495"
# [1] "sem of b = 5.25336335594768"
# [1] "sd of b = 293.107546898656"
# [1] "number in b = 3113"


compare(ans$nb_AIC,ans$nb_no_cell_AIC)

	# Welch Two Sample t-test

# data:  a and b
# t = -3.1474, df = 6221, p-value = 0.001655
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -38.363458  -8.915559
# sample estimates:
# mean of x mean of y 
 # 1182.350  1205.989 

# [1] "exact P value = 0.00165529105089654"
# [1] "mean of a = 1182.34996241425"
# [1] "sem of a = 5.25212445251675"
# [1] "sd of a = 293.038423192398"
# [1] "number in a = 3113"
# [1] "mean of b = 1205.98947120428"
# [1] "sem of b = 5.36924035678283"
# [1] "sd of b = 299.572818983503"
# [1] "number in b = 3113"


# compare nb with and w/o random effects, BIC

compare(ans$nb_BIC,ans$nb_no_re_BIC)

	# Welch Two Sample t-test

# data:  a and b
# t = -9.7237, df = 6210.4, p-value < 2.2e-16
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -92.11766 -61.20666
# sample estimates:
# mean of x mean of y 
 # 1247.645  1324.307 

# [1] "exact P value = 3.43831967801675e-22"
# [1] "mean of a = 1247.64532395214"
# [1] "sem of a = 5.44302414895041"
# [1] "sd of a = 303.68953143185"
# [1] "number in a = 3113"
# [1] "mean of b = 1324.30748365787"
# [1] "sem of b = 5.70368296473334"
# [1] "sd of b = 318.232798458134"
# [1] "number in b = 3113"



compare(ans$nb_BIC,ans$nb_no_pool_BIC)

	# Welch Two Sample t-test

# data:  a and b
# t = -1.9471, df = 6223.9, p-value = 0.05157
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -30.1487676   0.1024615
# sample estimates:
# mean of x mean of y 
 # 1247.645  1262.668 

# [1] "exact P value = 0.0515717713478629"
# [1] "mean of a = 1247.64532395214"
# [1] "sem of a = 5.44302414895041"
# [1] "sd of a = 303.68953143185"
# [1] "number in a = 3113"
# [1] "mean of b = 1262.66847699711"
# [1] "sem of b = 5.46872229443031"
# [1] "sd of b = 305.123340569177"
# [1] "number in b = 3113"



compare(ans$nb_BIC,ans$nb_no_cell_BIC)

	# Welch Two Sample t-test

# data:  a and b
# t = 1.9552, df = 6223.8, p-value = 0.0506
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -0.03912367 30.05199064
# sample estimates:
# mean of x mean of y 
 # 1247.645  1232.639 

# [1] "exact P value = 0.0505985386356398"
# [1] "mean of a = 1247.64532395214"
# [1] "sem of a = 5.44302414895041"
# [1] "sd of a = 303.68953143185"
# [1] "number in a = 3113"
# [1] "mean of b = 1232.6388904679"
# [1] "sem of b = 5.41095089444882"
# [1] "sd of b = 301.900027772756"
# [1] "number in b = 3113"






# compare nb with and w/o random effects, REML

compare(ans$nb_REML,ans$nb_no_re_REML)

	# Welch Two Sample t-test

# data:  a and b
# t = -12.169, df = 6201.2, p-value < 2.2e-16
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -59.77721 -43.18966
# sample estimates:
# mean of x mean of y 
 # 610.1625  661.6459 

# [1] "exact P value = 1.10069721051489e-33"
# [1] "mean of a = 610.162495755686"
# [1] "sem of a = 2.89958882382376"
# [1] "sd of a = 161.780427048421"
# [1] "number in a = 3113"
# [1] "mean of b = 661.645932922874"
# [1] "sem of b = 3.08087558020809"
# [1] "sd of b = 171.895188363925"
# [1] "number in b = 3113"



compare(ans$nb_REML,ans$nb_no_pool_REML)

	# Welch Two Sample t-test

# data:  a and b
# t = -2.2644, df = 6223.5, p-value = 0.02358
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -17.401969  -1.252581
# sample estimates:
# mean of x mean of y 
 # 610.1625  619.4898 

# [1] "exact P value = 0.0235811850724093"
# [1] "mean of a = 610.162495755686"
# [1] "sem of a = 2.89958882382376"
# [1] "sd of a = 161.780427048421"
# [1] "number in a = 3113"
# [1] "mean of b = 619.489770721726"
# [1] "sem of b = 2.92552250548677"
# [1] "sd of b = 163.227377753952"
# [1] "number in b = 3113"




compare(ans$nb_REML,ans$nb_no_cell_REML)

	# Welch Two Sample t-test

# data:  a and b
# t = -1.7061, df = 6223.1, p-value = 0.08804
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
 # -15.123961   1.048885
# sample estimates:
# mean of x mean of y 
 # 610.1625  617.2000 

# [1] "exact P value = 0.0880449161544133"
# [1] "mean of a = 610.162495755686"
# [1] "sem of a = 2.89958882382376"
# [1] "sd of a = 161.780427048421"
# [1] "number in a = 3113"
# [1] "mean of b = 617.200033919361"
# [1] "sem of b = 2.93393998316469"
# [1] "sd of b = 163.697024733625"
# [1] "number in b = 3113"
















































