source(file.path('primary_data_analysis','00_library_of_functions.R'))
pseudogenes <- readRDS(file.path('data','pseudogenes.rds'))
load(file=file.path('data','pol2modelingclustering.RData'))
load(file=file.path('data','RNAPII_modeling_elaborated.RData'))

##############
## panel A #####
#############

pdf(file.path('figures','S3A.pdf'), height=4, width=4)
plotAIC(pol2modelingclust)
dev.off()

################
## panel B-C #######
#################

pseudogenes_ordered <- pseudogenes[ordRows]
optimized_models_ordered <- optimized_models[ordRows]

pdf(file.path('figures','S3BC.pdf'), height=length(optimized_models)*.8, width=6)
par(mfrow=c(length(pseudogenes_ordered),4), mar=c(1,2,0,0.5)+.1)
for(i in seq_along(pseudogenes_ordered) )
	rnapii_plot(tpts, pseudogenes_ordered[[i]], optimized_models_ordered[[i]], lwd=2.5,
		xaxis=i==length(pseudogenes_ordered), panels=1:4)
dev.off()

################
## panel D ########
################

load(file=file.path('data','bootstraps_tranche1.rds'))
load(file=file.path('data','bootstraps_tranche2.rds'))
load(file=file.path('data','bootstraps_tranche3.rds'))
load(file=file.path('data','bootstraps_tranche4.rds'))

load(file=file.path('data','trainingSetModels1.rds'))
trainingSetModels1 <- trainingSetModels
load(file=file.path('data','trainingSetModels2.rds'))
trainingSetModels2 <- trainingSetModels
load(file=file.path('data','trainingSetModels3.rds'))
trainingSetModels3 <- trainingSetModels
load(file=file.path('data','trainingSetModels4.rds'))
trainingSetModels4 <- trainingSetModels

testSets <- c(testSets1,testSets2,testSets3,testSets4)
trainingSets <- c(trainingSets1,trainingSets2,trainingSets3,trainingSets4)
trainingSetModels <- c(trainingSetModels1,trainingSetModels2,trainingSetModels3,trainingSetModels4)

tpts <- c(0,1/6,1/3,1/2,2,4)
meanMatrix <- function(mat)
	matrix(rep(apply(mat,2,mean),nrow(mat)),
		nrow=nrow(mat),ncol=ncol(mat), byrow=TRUE)
varexplVVVV_norm <- sapply(seq_along(trainingSetModels), function(i) {
	tryCatch({
		xmod <- model(tpts, trainingSetModels[[i]])
		xtrain <- trainingSets[[i]]
		xexp <- testSets[[i]]
		xmod_norm <- t(t(xmod)*colMeans(xexp)/colMeans(xtrain))
		value_VVVV_norm <- sum((xmod_norm - xexp)^2)
		value_KKKK <- sum((testSets[[i]] - meanMatrix(testSets[[i]]))^2)
		(value_KKKK-value_VVVV_norm)/value_KKKK
		}, error=function(e) NA)
	})
varexplVVVV_norm[varexplVVVV_norm<0] <- NA
varexplVVVV_norm <- unlist(split(varexplVVVV_norm, rep(1:12,each=10))[ordRows])

clIdx <- which(pol2modelingclust$kmeans$size>50)

pdf(file.path('figures','S3D.pdf'), height=4, width=6.5)
par(mar=c(5,4,2,2)+.1)
set.seed(1) # for jitter
plot(
	jitter(rep(seq_along(ordRows),each=10)),
	varexplVVVV_norm, 
	ylim=c(0,1), pch=20, cex=1,
	xlab='Clusters',
	ylab='Fraction of explained variance'
	)
axis(1,at=seq_along(ordRows),labels=seq_along(ordRows))
axis(3,at=seq_along(ordRows),
	labels=table(pol2modelingclust$kmeans$cluster)[clIdx][ordRows], tick=F, cex.axis=.7)
abline(v=seq(1.5,11.5), lty=3)
dev.off()
