# This is an almost verbatim copy of the reviewer's script, except that
# 
# 1. File paths have been changed.
# 
# 2. Code has been added to ensure that rows in 'catalog' and 'input' are in the
# same order.

sink('script-out.txt', split = T)

require(NMF) 

catalog <- read.delim('COSMIC_plus_cisplatin_DT40.tsv', sep = '\t') 

# New code: We attach rownames to catalog
rownames(catalog) <-
  paste0(catalog$Before,catalog$Ref,catalog$After,">",catalog$Var)


H <- data.matrix(catalog[,5:ncol(catalog)]) 

inputfiles <- Sys.glob('input catalogs/*') 

for (f in inputfiles){   
  cat(sprintf('\nProcessing %s \n', basename(f)))   
  # read the file
  input <- read.delim(f, sep = '\t')
  
  # New code: The next 3 lines ensure that the rows in input and catalog are in
  # the same order.
  rownames(input) <-
    paste0(input$Before,input$Ref,input$After,">",input$Var)
  input <- input[rownames(catalog),]
  
  # derive the V matrix that we will solve to   
  V <- data.matrix(input[,5:ncol(input)])      
  # solve   
  soln <- fcnnls(H, V)      
  # convert to percentages   
  psoln <- apply(soln$x, 2, function(v){v / sum(v)})      
  # print a summary of the percentages   
  cisdf <- data.frame(t(psoln[!grepl('Sign', rownames(psoln)), ]))  
  print(summary(cisdf))    
  # print the samples that are possibly cisplatin positive (>5%)  
  cat(sprintf('\nSamples with more than 5%% cisplatin\n')) 
  print(subset(cisdf, Cisplatin > 0.05))
  } 
  