## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = '#>'
)

## ----load-TOP-package, eval=TRUE, message=FALSE, warning=FALSE----------------
library(TOP)

## ----load-combined-data, eval=TRUE, include=FALSE-----------------------------
data <- readRDS(system.file("extdata/example_data", "CTCF.K562.ATAC.chip.example.data.rds", package = "TOP"))
cols <- c('chr','start','end','name','pwm.score','strand','p.value', paste0('bin', 1:5))
data <- data[, cols]

## ---- eval=TRUE---------------------------------------------------------------
head(data,3)

## ----predict-TOP-use-pretrained-coef, eval=TRUE-------------------------------
TOP_result <- predict_TOP(data, 
                          tf_name = 'CTCF', 
                          cell_type = 'K562', 
                          use_model = 'ATAC', 
                          level = 'bottom', 
                          logistic_model = FALSE, 
                          transform = 'asinh')

## ----predict-TOP-mean-coef-best, eval=TRUE------------------------------------
TOP_result <- predict_TOP(data, 
                          tf_name = 'CTCF', 
                          cell_type = 'K562', 
                          use_model = 'ATAC', 
                          level = 'best', 
                          logistic_model = FALSE, 
                          transform = 'asinh')

## ----load-TOP-posterior-mean-coef, eval=TRUE----------------------------------
TOP_coef <- readRDS(system.file("extdata/trained_model_coef/ATAC", "TOP_M5_posterior_mean_coef.rds", package = "TOP"))

## ----predict-TOP-mean-coef-best-2, eval=TRUE----------------------------------
TOP_result <- predict_TOP(data, 
                          TOP_coef = TOP_coef, 
                          tf_name = 'CTCF', 
                          cell_type = 'K562', 
                          level = 'best', 
                          logistic_model = FALSE, 
                          transform = 'asinh')

## -----------------------------------------------------------------------------
TOP_predictions <- TOP_result$predictions
head(TOP_predictions, 5)

## ----plot-predicted-measured, eval=TRUE, fig.width=5, fig.height=5------------
data_chip <- readRDS(system.file("extdata/example_data", "CTCF.K562.ATAC.chip.example.data.rds", package = "TOP"))
scatterplot_predictions(x = asinh(data_chip$chip),
                        y = asinh(TOP_predictions$predicted),
                        title = 'Predicting CTCF occupancy in K562 cell',
                        xlab = 'asinh(measured occupancy)',
                        ylab = 'asinh(predicted occupancy using bottom level coefficients)',
                        xlim = c(0,8),
                        ylim = c(0,8))

## ----predict-TOP-mean-coef-middle, eval=TRUE----------------------------------
TOP_middle_result <- predict_TOP(data, 
                                 TOP_coef = TOP_coef, 
                                 tf_name = 'CTCF', 
                                 level = 'middle', 
                                 logistic_model = FALSE, 
                                 transform = 'asinh') 
TOP_middle_predictions <- TOP_middle_result$predictions

## ----plot-predicted-measured-middle, eval=TRUE, fig.width=5, fig.height=5-----
scatterplot_predictions(x = asinh(data_chip$chip),
                        y = asinh(TOP_middle_predictions$predicted),
                        title = 'Predicting CTCF occupancy in K562 cell',
                        xlab = 'asinh(measured occupancy)',
                        ylab = 'asinh(predicted occupancy using middle level coefficients)',
                        xlim = c(0,8),
                        ylim = c(0,8))

## ----predict-TOP-logistic-mean-coef, eval=TRUE--------------------------------
TOP_result <- predict_TOP(data, 
                          tf_name = 'CTCF',
                          cell_type = 'K562',
                          use_model = 'ATAC',
                          level = 'best',
                          logistic_model = TRUE)
logistic_predicted <- TOP_result$predictions
head(logistic_predicted, 5)

