# Poisson regression using D. virilis and D. melanogaster data v. 16nov2013 # part of the ms "Efficient identification of Y chromosome sequences in the human and Drosophila genomes" AB Carvalho and AG Clark # Genome Research VOLUME 23 ISSUE 11 NOVEMBER 2013 PAGES: 1894-1907 #implements the Poisson regression test for the equality of the gain and loss rates #data file structure: #branchlength, events, gainloss, branch_name dataset <- "AF_vir_mel" #Procedure 1: Assumption-free (D. virilis + D. melanogaster) #dataset <- "HGL_vir" #Procedure 2: Homogeneous gain loss (D. virilis data) #dataset <- "HGL_mel" #Procedure 2: Homogeneous gain loss (D. melanogaster data) #dataset <- "HGL_vir_mel" #Procedure 2: Homogeneous gain loss (D. virilis + D. melanogaster) if (dataset == "AF_vir_mel" ) { bias <- 1.000 ;d<-read.table("AF_vir_mel_data.txt", header=T, sep=",")} if (dataset == "HGL_vir" ) { bias <- 1.185 ;d<-read.table("HGL_vir_data.txt", header=T, sep=",") } if (dataset == "HGL_mel" ) { bias <- 1.436 ;d<-read.table("HGL_mel_data.txt", header=T, sep=",") } if (dataset == "HGL_vir_mel" ) { bias <- 1.321 ;d<-read.table("HGL_vir_mel_data.txt", header=T, sep=",")} names(d) attach(d) factor(gainloss) gainloss_test2<-glm(events ~ offset(log(branchlength)) + gainloss, family=poisson) summary(gainloss_test2) #the following line test for the fit of the data to a Poisson distribuition 1 - pchisq(deviance(gainloss_test2), df.residual(gainloss_test2)) coef_regr <- coefficients(summary(gainloss_test2)) beta1 <- coef_regr[2,1] #this is the Poisson regression coefficient for "gainloss" Std_Err_beta1 <- coef_regr[2,2] #this is Std. Error of the Poisson regression coefficient for "gainloss" z <- beta1 / Std_Err_beta1 #this is the standard test for Ho = 0 adjusted_z <- (abs(beta1) - log(bias)) / Std_Err_beta1 #this is the test for Ho = log(bias) noquote (c("P value for gain/loss ratio <> ", bias , 2*(1-pnorm(adjusted_z)))) gain_loss_ratio <- exp(abs(beta1)) noquote (c("gain/loss ratio = ", gain_loss_ratio )) adjusted_beta1 <- (abs(beta1) - log(bias)) adjusted_gain_loss_ratio <- exp(adjusted_beta1) noquote (c("unbiased gain/loss ratio = ", adjusted_gain_loss_ratio )) noquote (c("95% confidence interval for the unbiased gain/loss ratio:")) exp( adjusted_beta1 - 1.96 * Std_Err_beta1 ) exp( adjusted_beta1 + 1.96 * Std_Err_beta1 )