# flowering time validation experiments- combine
# 4/3/2017
# omitting T-DNA lines which were later found to be not interesting in later
# GWAS versions, etc.
library(lme4)
library(beeswarm)
library(lmerTest)

# with seeds derived from an ALMOST CERTAINLY homozygous SALK_113736 strain
# (though i've done too much pcr to believe in truth anymore)
late_113_expt = read.csv('validations/113736_homo_021417.csv',header=T)
late_113_expt$genotype_label[late_113_expt$genotype_label==1] = 'Col'
late_113_expt$genotype_label[late_113_expt$genotype_label==2] = '113736'

late_113_expt = cbind(late_113_expt, rep('late_113', nrow(late_113_expt)))
colnames(late_113_expt)[ncol(late_113_expt)] = 'expt'

# after bulking lines, before checking on segregation
newseeds = read.csv('validations/strwa_tdnas_validate_expt_newseeds_012017.csv',header=T)
newseeds$genotype_label[newseeds$genotype_label==1] = 'Col'
newseeds$genotype_label[newseeds$genotype_label==3] = '66521'
# needed to genotype these to sort out homozygous mutants
newseeds$genotype_label[newseeds$genotype_label==4 & newseeds$X113736_geno == 'mu/mu'] = '113736'

newseeds = cbind(newseeds, rep('newseeds', nrow(newseeds)))
colnames(newseeds)[ncol(newseeds)] = 'expt'

# repeating with unbulked seeds, only the agl65 T-DNA
agl65_only = read.csv('validations/last_tdna_010317.csv',header=T)
agl65_only$genotype_label[agl65_only$genotype_label==1] = 'Col'
agl65_only$genotype_label[agl65_only$genotype_label==2] = '66521'

agl65_only = cbind(agl65_only, rep('agl65_only', nrow(newseeds)))
colnames(agl65_only)[ncol(agl65_only)] = 'expt'

# one more experiment forthcoming
last_expt = read.csv('validations/last_tdnas_strwa_031317.csv', header=TRUE)
last_expt$genotype_label[last_expt$genotype_label==2] = 'Col'
last_expt$genotype_label[last_expt$genotype_label==4] = '113736'
last_expt$genotype_label[last_expt$genotype_label==3] = 'agl65'

last_expt = cbind(last_expt, rep('last_expt', nrow(last_expt)))
colnames(last_expt)[ncol(last_expt)] = 'expt'

all_expts = rbind(late_113_expt[,colnames(agl65_only)], 
                  newseeds[,colnames(agl65_only)], 
                  agl65_only[,colnames(agl65_only)],
                  last_expt[,colnames(agl65_only)]
                  )

# CS66521 is stock number for agl65-1, SALK_66521 is T3 line from which homozygous AT4G01390 mutants isolated
all_expts = na.omit(all_expts[all_expts$genotype_label %in% c('Col','66521','113736'),])

# regress out exptal and position effects and plot
RLN_resid_mod = lmer(RLN ~ (1|tray_row) + (1|expt), data= all_expts)
RLN_resids = residuals(RLN_resid_mod)
DTF_resid_mod = lmer(DTF ~ (1|tray_row) + (1|expt), data= all_expts)
DTF_resids = residuals(DTF_resid_mod)

par(mfrow=c(1,2))
pl = beeswarm(DTF_resids ~ all_expts$genotype_label,pch=19, las=2,
              xlab='', ylab='Days to flowering (residuals)', cex=.6)
meds = by(DTF_resids, all_expts$genotype_label, mean)
segments(unique(round(pl$x))-.2,meds,unique(round(pl$x))+.2,meds, col='red', lwd=2)
pl = beeswarm(RLN_resids ~ all_expts$genotype_label, pch=19, las=2,
         xlab='', ylab='Rosette leaves at flowering (residuals)', cex=.6)
meds = by(RLN_resids, all_expts$genotype_label, mean)
segments(unique(round(pl$x))-.2,meds,unique(round(pl$x))+.2,meds, col='red', lwd=2)
par(mfrow=c(1,1))

# models including genotype
DTF_geno_mod = lmer(DTF ~ factor(genotype_label, levels=c('Col','66521','113736')) + (1|tray_row) + (1|expt), data= all_expts)
RLN_geno_mod = lmer(RLN ~ factor(genotype_label, levels=c('Col','66521','113736')) + (1|tray_row) + (1|expt), data= all_expts)

cols = c('red','green','blue')
names(cols) = c('Col','113736','66521')

plot(DTF_resids, RLN_resids, col=cols[all_expts$genotype_label])

print(summary(DTF_geno_mod))
print(summary(RLN_geno_mod))

