setwd('/Volumes/BB_Backup_3/BD_aging_project/2018-09_revision_analyses/Machine_learning/rerun_with_same_package_version//Model_RData/RF/')
source('parsing_functions_vRF_newCaret.R')

# load input data
load('/Volumes/BB_Backup_3/BD_aging_project/Machine_learning_aging/Predict_Fold_change/2016-11-21_Complete_feature_matrices_FOLD_CHANGE_NA_RM.RData')
load('/Volumes/BB_Backup_3/BD_aging_project/Machine_learning_aging/Predict_Fold_change/2017-03-20_Complete_feature_matrices_CEREB_FOLD_CHANGE_NA_RM.RData')

# 2018-09-20
# run for new caret version (6.0-80))


#####################################
##########       Liver       ########
#####################################

### 1. Learning with Samplings ###
load('Chromatin/2018-09-17_Liver_RF_model_withSampling_aging_changes_FDR0.1.RData')
get_rf_metrics_and_summarize_withSAMP("Liver", my.liver.rf.samp.fit,my.liver.features.v2[-my.liver.training.idx,])
  
### 2. Learning without constant class
my.no.cst.testing <- remove_cst(my.liver.features.v2[-my.liver.training.idx,])
load('Chromatin/2018-09-17_Liver_RF_model_noCONSTANT_aging_changes_FDR0.1.RData')
get_rf_metrics_and_summarize_noCST("Liver", my.liver.rf.noCST.fit,my.no.cst.testing)
#####################################
#####################################


#####################################
##########       Heart       ########
#####################################

### 1. Learning with Samplings ###
load('Chromatin/2018-09-17_Heart_RF_model_withSampling_aging_changes_FDR0.1.RData')
get_rf_metrics_and_summarize_withSAMP("Heart", my.heart.rf.samp.fit,my.heart.features.v2[-my.heart.training.idx,])

### 2. Learning without constant class
my.no.cst.testing <- remove_cst(my.heart.features.v2[-my.heart.training.idx,])

load('Chromatin/2018-09-17_Heart_RF_model_noCONSTANT_aging_changes_FDR0.1.RData')
get_rf_metrics_and_summarize_noCST("Heart", my.heart.rf.noCST.fit,my.no.cst.testing)
#####################################
#####################################


#####################################
##########    Cerebellum     ########
#####################################

### 1. Learning with Samplings ###
load('Chromatin/2018-09-17_cereb_RF_model_withSampling_aging_changes_FDR0.1.RData')
get_rf_metrics_and_summarize_withSAMP("Cerebellum", my.cereb.rf.samp.fit,my.cereb.features.v2[-my.cereb.training.idx,])

### 2. Learning without constant class
my.no.cst.testing <- remove_cst(my.cereb.features.v2[-my.cereb.training.idx,])

load('Chromatin/2018-09-17_cereb_RF_model_noCONSTANT_aging_changes_FDR0.1.RData')
get_rf_metrics_and_summarize_noCST("Cerebellum", my.cereb.rf.noCST.fit,my.no.cst.testing)
#####################################
#####################################


#####################################
##########       OB       ########
#####################################

### 1. Learning with Samplings ###
load('Chromatin/2018-09-17_OB_withBrainExtra_RF_model_withSampling_aging_changes_FDR0.1.RData')
get_rf_metrics_and_summarize_withSAMP("Olfactory_Bulb", my.OB.rf.samp.fit,my.ob.features.v2[-my.OB.training.idx,])

### 2. Learning without constant class
my.no.cst.testing <- remove_cst(my.ob.features.v2[-my.OB.training.idx,])

load('Chromatin/2018-09-17_OB_withBrainExtra_RF_model_noCONSTANT_aging_changes_FDR0.1.RData')
get_rf_metrics_and_summarize_noCST("Olfactory_Bulb", my.OB.rf.noCST.fit,my.no.cst.testing)
#####################################
#####################################



#################################################################################################################################
################################################   Cross TIssue predictions   ###################################################
#### only ob had None left after filtering... remove these examples to be able to see cross tissue accuracy except for OB
# > summary(factor(my.ob.data$SE_3m))
# None   Super Typical 
# 632     132    3704 

# modify to get only testing on the model of the cognate tissue
my.ob.features.v3 <- my.ob.features.v2[my.ob.features.v2$SE_3m != "None",]
my.ob.features.v3.test <- my.ob.features.v2[setdiff(which(my.ob.features.v2$SE_3m != "None"),my.OB.training.idx) ,]

# get data without constant for no cst models
my.no.cst.liver <- remove_cst(my.liver.features.v2)
my.no.cst.heart <- remove_cst(my.heart.features.v2)
my.no.cst.cereb <- remove_cst(my.cereb.features.v2)
my.no.cst.ob    <- remove_cst(my.ob.features.v3)

my.no.cst.liver.test <- remove_cst(my.liver.features.v2[-my.liver.training.idx,])
my.no.cst.heart.test <- remove_cst(my.heart.features.v2[-my.heart.training.idx,])
my.no.cst.cereb.test <- remove_cst(my.cereb.features.v2[-my.cereb.training.idx,])
my.no.cst.ob.test    <- remove_cst(my.ob.features.v3.test)


#####################################
##########       Liver       ########
#####################################

### 1. Learning with Samplings ###
load('Chromatin/2018-09-17_Liver_RF_model_withSampling_aging_changes_FDR0.1.RData')
CT_acc_withSAMP("Liver", my.liver.rf.samp.fit,my.liver.features.v2[-my.liver.training.idx,], my.heart.features.v2, my.cereb.features.v2, my.ob.features.v3)

### 2. Learning without constant class
load('Chromatin/2018-09-17_Liver_RF_model_noCONSTANT_aging_changes_FDR0.1.RData')
CT_acc_noCST("Liver", my.liver.rf.noCST.fit,my.no.cst.liver.test, my.no.cst.heart, my.no.cst.cereb,my.no.cst.ob)


#####################################
##########       Heart       ########
#####################################

### 1. Learning with Samplings ###
load('Chromatin/2018-09-17_Heart_RF_model_withSampling_aging_changes_FDR0.1.RData')
CT_acc_withSAMP("Heart", my.heart.rf.samp.fit,my.liver.features.v2, my.heart.features.v2[-my.heart.training.idx,], my.cereb.features.v2, my.ob.features.v3)


### 2. Learning without constant class
load('Chromatin/2018-09-17_Heart_RF_model_noCONSTANT_aging_changes_FDR0.1.RData')
CT_acc_noCST("Heart", my.heart.rf.noCST.fit,my.no.cst.liver, my.no.cst.heart.test, my.no.cst.cereb,my.no.cst.ob)



#####################################
##########    Cerebellum     ########
#####################################

### 1. Learning with Samplings ###
load('Chromatin/2018-09-17_cereb_RF_model_withSampling_aging_changes_FDR0.1.RData')
CT_acc_withSAMP("Cerebellum", my.cereb.rf.samp.fit,my.liver.features.v2, my.heart.features.v2, my.cereb.features.v2[-my.cereb.training.idx,], my.ob.features.v3)

### 2. Learning without constant class
load('Chromatin/2018-09-17_cereb_RF_model_noCONSTANT_aging_changes_FDR0.1.RData')
CT_acc_noCST("Cerebellum", my.cereb.rf.noCST.fit,my.no.cst.liver, my.no.cst.heart, my.no.cst.cereb.test,my.no.cst.ob)


#####################################
##########       OB       ########
#####################################

# ### 1. Learning with Samplings ###
load('Chromatin/2018-09-17_OB_withBrainExtra_RF_model_withSampling_aging_changes_FDR0.1.RData')
CT_acc_withSAMP("Olfactory_Bulb", my.OB.rf.samp.fit,my.liver.features.v2, my.heart.features.v2, my.cereb.features.v2, my.ob.features.v2[-my.OB.training.idx,])


### 2. Learning without constant class
load('Chromatin/2018-09-17_OB_withBrainExtra_RF_model_noCONSTANT_aging_changes_FDR0.1.RData')
CT_acc_noCST("Olfactory_Bulb", my.OB.rf.noCST.fit,my.no.cst.liver, my.no.cst.heart, my.no.cst.cereb,my.no.cst.ob.test)


####################################################################################################################################################
# make heatmap of cross tissues accuracies
library('pheatmap')
my.accuracy.colors <- c("floralwhite","lightsalmon","indianred1","firebrick3","firebrick","firebrick4")

### 1. with Samplings
my.ob.samp    <- read.table('Chromatin/cross_tissues_accuracies/2018-09-20_Olfactory_Bulb_chromatin_model_cross_tissue_AVERAGE_RF_metrics_classification_withSampling.txt')
my.cereb.samp <- read.table('Chromatin/cross_tissues_accuracies/2018-09-20_Cerebellum_chromatin_model_cross_tissue_AVERAGE_RF_metrics_classification_withSampling.txt')
my.heart.samp <- read.table('Chromatin/cross_tissues_accuracies/2018-09-20_Heart_chromatin_model_cross_tissue_AVERAGE_RF_metrics_classification_withSampling.txt')
my.liver.samp <- read.table('Chromatin/cross_tissues_accuracies/2018-09-20_Liver_chromatin_model_cross_tissue_AVERAGE_RF_metrics_classification_withSampling.txt')

my.samp.accuracies <- data.frame(cbind(my.heart.samp$accuracy,
                                       my.liver.samp$accuracy,
                                       my.cereb.samp$accuracy,
                                       my.ob.samp$accuracy))
rownames(my.samp.accuracies) <- rownames(my.ob.samp)
colnames(my.samp.accuracies) <- paste(rownames(my.samp.accuracies),"trained_model",sep="_")
my.random <- rep(0.5,4)
my.perfect <- rep(1,4)
my.samp.accuracies.all <- rbind(my.samp.accuracies,my.random,my.perfect)
rownames(my.samp.accuracies.all)[5:6] <- c("random accuracy","perfect accuracy")

# plot combined accuracy heatmaps
pdf(paste(Sys.Date(),"_cross_tissue_RF_accuracies_with_sampling_heatmap.pdf", sep="_"), onefile=F)
pheatmap(my.samp.accuracies.all, cluster_rows = F, cluster_cols = F, 
         col = colorRampPalette(my.accuracy.colors)(50),
         main = "3-class classification with sampling")
dev.off()

### 2. without constant class
my.ob.cst    <- read.table('Chromatin/cross_tissues_accuracies/2018-09-20_Olfactory_Bulb_chromatin_model_cross_tissue_RF_metrics_classification_noCST.txt')
my.cereb.cst <- read.table('Chromatin/cross_tissues_accuracies/2018-09-20_Cerebellum_chromatin_model_cross_tissue_RF_metrics_classification_noCST.txt')
my.heart.cst <- read.table('Chromatin/cross_tissues_accuracies/2018-09-20_Heart_chromatin_model_cross_tissue_RF_metrics_classification_noCST.txt')
my.liver.cst <- read.table('Chromatin/cross_tissues_accuracies/2018-09-20_Liver_chromatin_model_cross_tissue_RF_metrics_classification_noCST.txt')

my.cst.accuracies <- data.frame(cbind(my.heart.cst$accuracy,
                                      my.liver.cst$accuracy,
                                      my.cereb.cst$accuracy,
                                      my.ob.cst$accuracy))
rownames(my.cst.accuracies) <- rownames(my.ob.cst)
colnames(my.cst.accuracies) <- paste(rownames(my.cst.accuracies),"trained_model",sep="_")
my.random <- rep(0.5,4)
my.perfect <- rep(1,4)
my.cst.accuracies.all <- rbind(my.cst.accuracies,my.random,my.perfect)
rownames(my.cst.accuracies.all)[5:6] <- c("random accuracy","perfect accuracy")

# plot combined accuracy heatmaps
pdf(paste(Sys.Date(),"_cross_tissue_RF_accuracies_noCST_heatmap.pdf", sep="_"), onefile=F)
pheatmap(my.cst.accuracies.all, cluster_rows = F, cluster_cols = F,
         col = colorRampPalette(my.accuracy.colors)(50),
         main = "2-class classification (no sampling)")
dev.off()

