--- title: "periscope: sub-genomic RNA identification in SARS-CoV-2 Genomic Sequencing Data" author: Matthew D Parker, Benjamin B Lindsey, Shay Leary, Silvana Gaudieri, Abha Chopra, Matthew Wyles, Adrienn Angyal, Luke R Green, Paul Parsons, Rachel M Tucker, Rebecca Brown, Danielle Groves, Katie Johnson, Laura Carrilero, Joe Heffer, David Partridge, Cariad Evans, Mohammad Raza, Alexander J Keeley, Nikki Smith, Ana Da Silva Filipe, James Shepherd, Chris Davis, Alain Kohl, Elihu Aranday-Cortes, Lily Tong, Emma C Thomson, Dennis Wang, Simon Mallal, Thushan I de Silva output: html_document: self_contained: no --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = FALSE, warning = FALSE,dev = c('pdf', 'png')) require(ggplot2) require(reshape2) require(scales) require(tidyverse) require(ftplottools) require(viridis) require(ggsignif) require(ggpubr) require(GGally) require(FactoMineR) require(factoextra) require(png) navy=rgb(25,62,114,maxColorValue = 255) coral=rgb(234,93,78,maxColorValue = 255) orange=rgb(242,147,48,maxColorValue = 255) yellow=rgb(254,212,122,maxColorValue = 255) purple=rgb(102,103,173,maxColorValue = 255) aqua=rgb(46,169,176,maxColorValue = 255) green=rgb(70,168,108,maxColorValue = 255) grey=rgb(172,188,195,maxColorValue = 255) # Define some functions reorder_within <- function(x, by, within, fun = mean, sep = "___", ...) { new_x <- paste(x, within, sep = sep) stats::reorder(new_x, by, FUN = fun) } #============================== # reorder x axis #============================== scale_x_reordered <- function(..., sep = "___") { reg <- paste0(sep, ".+$") ggplot2::scale_x_discrete(labels = function(x) gsub(reg, "", x), ...) } #============================== # scale between 0 and 1 #============================== normalize <- function(x){ return((x-min(x)) / (max(x)-min(x))) } #============================== # format ggpairs #============================== gpairs_lower <- function(g){ g$plots <- g$plots[-(1:g$nrow)] g$yAxisLabels <- g$yAxisLabels[-1] g$nrow <- g$nrow -1 g$plots <- g$plots[-(seq(g$ncol, length(g$plots), by = g$ncol))] g$xAxisLabels <- g$xAxisLabels[-g$ncol] g$ncol <- g$ncol - 1 g } #============================== # Add regression line to ggpairs & R/p values #============================== add_stat_smooth <- function(data, mapping, ...){ p <- ggplot(data = data, mapping = mapping) + geom_point() + stat_smooth(method=lm, fill="grey70", color="grey50", ...)+ stat_cor(geom="text",size=3,method="pearson",fill='#FFFFFF',color='#4D4845',alpha=1,label.y=y,vjust=1,label.padding=unit(0.2, "lines"),aes(label=paste("R",as.character(..r..),sep="~`=`~")))+ stat_cor(geom="text",size=3,method="pearson",fill='#FFFFFF',color='#4D4845',alpha=1,label.y=y,label.padding=unit(0.2, "lines"),aes(label=paste("p",as.character(..p..*n),sep="~`=`~")),vjust=2.1)+ theme( ) # ggpubr::stat_regline_equation(geom="label",color="grey18",aes(label = paste(..rr.label..)),fill='white',color=grey,size=3,alpha=0.5) p } #============================== # Make certain labels on an axis bold #============================== #https://stackoverflow.com/questions/39694490/highlighting-individual-axis-labels-in-bold-using-ggplot2 colorado <- function(src, boulder) { if (!is.factor(src)) src <- factor(src) # make sure it's a factor src_levels <- levels(src) # retrieve the levels in their order brave <- boulder %in% src_levels # make sure everything we want to make bold is actually in the factor levels if (all(brave)) { # if so b_pos <- purrr::map_int(boulder, ~which(.==src_levels)) # then find out where they are b_vec <- rep("plain", length(src_levels)) # make'm all plain first b_vec[b_pos] <- "bold" # make our targets bold b_vec # return the new vector } else { stop("All elements of 'boulder' must be in src") } } ``` This R markdown document is provided to re-create most of the figures and analyses in the above titled publication. Figures for the manuscript were edited for presentation purposes for the final version using Adobe Illustrator, using the raw figures generated here. # Main Figures ## Figure 2 #### In Vivo and In Vitro Dectection and Quantification of Canonical Sub-Genomic RNA ```{r figure_2_sub_genomic_rna , fig.width=8.25, fig.height=11.75, echo=F,warnings=F,message=F} #============================== # Change column names for nicer display #============================== column_names <- c( 'sgRPTg_HQ' = 'HQ sgRNA Reads per\n1000 Genomic Reads', 'sgRPTg_LQ' = 'LQ sgRNA Reads per\n1000 Genomic Reads', 'sgRPTg_LLQ' = 'LLQ sgRNA Reads per\n1000 Genomic Reads', 'sgRPHT_HQ' = 'sgRPHT\nHQ', 'sgRPHT_LQ' = 'sgRPHT\nLQ', 'sgRPHT_LLQ' = 'sgRPHT\nLLQ', 'sgRNA_HQ_count' = 'sgRNA HQ\nRaw Count', 'sgRNA_LQ_count' = 'sgRNA LQ\nRaw Count', 'sgRNA_LLQ_count' = 'sgRNA LLQ\nRaw Count', 'sgRNA_LLQ_count' = 'sgRNA LLQ\nRaw Count' ) #============================== # Sheffield ARTIC nanopore Cohort #============================== sg = read.csv("Supplemental_File_S1.csv",comment.char='#') # We exclude ORF1a as it does not produce sgRNA and N* which is the noncanonical sgRNA produced in KR mutants sg = sg %>% filter(orf!="ORF1a") %>% filter(orf!="N*") b = ggplot(sg,aes(reorder(orf,-gRNA_count,median),gRNA_count))+ geom_boxplot(color=navy)+ ft_theme()+ scale_y_continuous("gRNA Reads")+ theme( panel.background=element_rect(fill="grey97",color="white"), legend.position = "None", plot.margin = margin(t=0, l=0.5, r=0, b=0.5, unit = "cm"), strip.text.x = element_text(face = "bold",colour = '#4D4845'), axis.text.x=element_text(angle=90,hjust=1,size=8) )+ scale_color_manual("Quality",values=c(navy,navy,grey))+ scale_x_discrete("ORF") c = ggplot(sg,aes(reorder(orf,-gRHPT,median),gRHPT))+ geom_boxplot(color=navy)+ ft_theme()+ scale_y_continuous("gRPHT")+ theme( panel.background=element_rect(fill="grey97",color="white"), legend.position = "None", strip.text.x = element_text(face = "bold",colour = '#4D4845'), plot.margin = margin(t=0, l=0.5, r=0, b=0.1, unit = "cm"), axis.text.x=element_text(angle=90,hjust=1,size=8) )+ scale_color_manual("Quality",values=c(navy,navy,grey))+ scale_x_discrete("ORF") m=melt(sg%>%select(sample,orf,sgRNA_HQ_count,sgRNA_LQ_count,sgRNA_LLQ_count)) d = ggplot(m,aes(x=reorder_within(orf,-value,variable,median),value,color=variable))+ geom_boxplot()+ ft_theme()+ scale_y_continuous(expression(bold(paste("sgRNA Reads (log"["10"],")"))),trans=scales::pseudo_log_trans(base = 10),breaks=c(0,10,100,1000,10000),expand=c(0,0.1))+ annotation_logticks(sides = "l",outside=T) + facet_wrap(~variable,ncol=3,scales="free_x",labeller = as_labeller(column_names))+ theme( panel.background=element_rect(fill="grey97",color="white"), legend.position="none", panel.spacing.x=unit(1, "lines"), strip.text.x = element_text(face = "bold",colour = '#4D4845'), plot.margin = margin(t=0, l=0.5, r=0, b=0.1, unit = "cm"), axis.text.x=element_text(angle=90,hjust=1,size=8,vjust=0.5), axis.text.y = element_text(margin = margin(r = 5)) )+ scale_color_manual("Quality",values=c(navy,navy,grey))+ scale_x_reordered("ORF")+ coord_cartesian(clip = "off") m=melt(sg%>%filter(!is.na(sgRPHT_HQ))%>%select(sample,orf,sgRPHT_HQ,sgRPHT_LQ,sgRPHT_LLQ)) e = ggplot(m,aes(x=reorder_within(orf,-value,variable,median),value,color=variable))+ geom_boxplot()+ ft_theme()+ scale_y_continuous(expression(bold(paste("sgRPHT (log"["10"],")"))),trans=scales::pseudo_log_trans(base = 10),breaks=c(0,10,100,1000,10000),expand=c(0,0.1))+ facet_wrap(~variable,ncol=3,scales="free_x",labeller = as_labeller(column_names))+ annotation_logticks(sides = "l",outside=T) + theme( panel.background=element_rect(fill="grey97",color="white"), legend.position = "none", plot.margin = margin(t=0, l=0.5, r=0, b=0.1, unit = "cm"), legend.justification = "center", panel.spacing.x=unit(1, "lines"), strip.text.x = element_text(face = "bold",colour = '#4D4845'), axis.text.x=element_text(angle=90,hjust=1,size=8,vjust=0.5), axis.text.y = element_text(margin = margin(r = 5)) )+ scale_color_manual("Quality",values=c(navy,navy,grey))+ scale_x_reordered("ORF")+ coord_cartesian(clip = "off") m=melt(sg %>% select(sample,orf,sgRPTg_HQ,sgRPTg_LQ)) m$orf = str_replace(m$orf,"ORF10","ORF10*") m$location="SHEF" glasgow = read.csv("Supplemental_File_F7.csv",comment.char='#') g=melt(glasgow%>%filter(orf!="ORF1a")%>%filter(orf!="ORF10")%>%filter(orf!="N*")%>%filter(!is.na(sgRPTg_HQ))%>%select(sample,orf,sgRPTg_HQ,sgRPTg_LQ)) g$location="GLAS" all_loc=rbind(m,g) all_loc$location<- factor(all_loc$location,levels=c("SHEF","GLAS")) a = ggplot(all_loc,aes(x=reorder_within(orf,-value,variable,median),value,color=location))+ geom_boxplot()+ ft_theme()+ scale_y_continuous(expression(bold(paste("sgRPTg (log"["10"],")"))),trans=scales::pseudo_log_trans(base = 10),breaks=c(0,10,100,1000,10000),expand=c(0,0.1))+ annotation_logticks(sides = "l",outside=T) + facet_wrap(~variable,ncol=3,scales="free_x",labeller = as_labeller(column_names))+ theme( panel.background=element_rect(fill="grey97",color="white"), legend.position = "bottom", plot.margin = margin(t=0, l=0.5, r=0, b=0.1, unit = "cm"), legend.justification = "center", panel.spacing.x=unit(1, "lines"), strip.text.x = element_text(face = "bold",colour = '#4D4845'), axis.text.x=element_text(angle=90,hjust=1), axis.text.y = element_text(margin = margin(r = 5)) )+ scale_color_manual("Location",values=c(navy,coral))+ scale_x_reordered("ORF")+ coord_cartesian(clip = "off") #============================== # In Vitro Data - comparisson with Illumina #============================== #============================== # Illumina #============================== cell_illumina = read.csv("Supplemental_File_S5.csv",comment.char='#') cell_illumina = cell_illumina %>% separate(sample,c("line","virus","time","s"),sep="_") cell_illumina$line = str_replace(cell_illumina$line,"VAT","veroE6\nACE2\nTMPRSS2") cell_illumina$line = str_replace(cell_illumina$line,"VA","veroE6\nACE2") cell_illumina$line = str_replace(cell_illumina$line,"V","veroE6") cell_illumina$line = str_replace(cell_illumina$line,"vero","Vero") illumina_normalised = ggplot(cell_illumina%>%filter(orf!="ORF10") %>% filter(orf!='ORF1a') %>% filter(orf!="N*"), aes(time,sgRPHT,group=orf,color=orf))+ geom_point()+ geom_line()+ ggtitle("Illumina Metagenomic")+ facet_grid(virus~line)+ ft_theme()+ scale_y_continuous(expression(bold(paste("Sum Normalized sgRNA (log"["10"],")"))),trans=scales::pseudo_log_trans(base = 10),breaks=c(0,10,100,3000))+ scale_x_discrete("Time (h)")+ theme( panel.background=element_rect(fill="grey97",color="white"), plot.title = element_text(hjust = 0.5,colour = '#4D4845', size=10 ), legend.justification = "center", legend.position="bottom", panel.spacing.x=unit(0.1, "lines"), legend.key.size = unit(0.5, "cm"), legend.key.height = unit(0.5, "cm"), plot.margin = margin(t=0.2, l=0.5, r=0, b=0.1, unit = "cm"), strip.text.y = element_text(face = "bold",colour = '#4D4845', size=8), strip.text.x = element_text(face = "bold",colour = '#4D4845', size=8) )+ scale_color_manual("ORF",values=c(navy,coral,orange,yellow,purple,aqua,green,grey,"black")) #============================== # ONT #============================== cell_ont = read.csv("Supplemental_File_S6.csv",comment.char='#') cell_ont = cell_ont %>% separate(sample,c("line","virus","time","s"),sep="_") cell_ont$line = str_replace(cell_ont$line,"VAT","veroE6\nACE2\nTMPRSS2") cell_ont$line = str_replace(cell_ont$line,"VA","veroE6\nACE2") cell_ont$line = str_replace(cell_ont$line,"V","veroE6") cell_ont$line = str_replace(cell_ont$line,"vero","Vero") ont_normalised = ggplot(cell_ont%>%filter(orf!="ORF10") %>% filter(orf!='ORF1a') %>% filter(orf!='N*'),aes(time,sgRPHT_HQ+sgRPHT_LQ,group=orf,color=orf))+ geom_point()+ geom_line()+ facet_grid(virus~line)+ ggtitle("ONT ARTIC")+ ft_theme()+ scale_y_continuous(expression(bold(paste("Sum Normalized sgRNA (log"["10"],")"))),trans=scales::pseudo_log_trans(base = 10),breaks=c(0,10,100,300))+ scale_x_discrete("Time (h)")+ theme( panel.background=element_rect(fill="grey97",color="white"), legend.position = "none", panel.spacing.x=unit(0.1, "lines"), legend.key.size = unit(0.5, "cm"), legend.key.height = unit(0.5, "cm"), plot.margin = margin(t=0, l=0.5, r=0, b=0.1, unit = "cm"), plot.title = element_text(hjust = 0.5,colour = '#4D4845', size=10 ), strip.text.y = element_text(face = "bold",colour = '#4D4845', size=8), strip.text.x = element_text(face = "bold",colour = '#4D4845', size=8) )+ scale_color_manual("ORF",values=c(navy,coral,orange,yellow,purple,aqua,green,grey,"black")) #============================== # Comparisson Plot #============================== ill=cell_illumina%>%filter(orf!="ORF10") %>% filter(orf!='ORF1a') %>% group_by(line,virus,time) %>% select(line,virus,time,sgRPHT) %>% summarise(sum=sum(sgRPHT)) ill$class="Illumina Metagenomic" ill$sum_norm=normalize(ill$sum) ont=cell_ont %>% filter(orf!='ORF1a') %>% group_by(line,virus,time) %>% select(line,virus,time,sgRPHT_HQ,sgRPHT_LQ) %>% summarise(sum=sum(sgRPHT_HQ+sgRPHT_LQ)) ont$class="ONT Artic" ont$sum_norm=normalize(ont$sum) all=rbind(ill,ont) f = ggplot(all,aes(time,sum_norm,group=class,color=class))+ geom_point()+ geom_line()+ facet_grid(virus~line)+ ft_theme()+ scale_y_continuous("Scaled Normalized\nTotal sgRNA",breaks=c(0,0.5,1))+ scale_x_discrete("Time (h)")+ theme( panel.background=element_rect(fill="grey97",color="white"), legend.position = "bottom", legend.key.size = unit(0.5, "cm"), legend.key.height = unit(0.5, "cm"), panel.spacing.x=unit(0.1, "lines"), plot.margin = margin(t=0, l=0.5, r=0, b=0.5, unit = "cm"), strip.text.x = element_text(face = "bold",colour = '#4D4845',size=8), strip.text.y = element_text(face = "bold",colour = '#4D4845',size=8) )+scale_color_manual("Data",values=c(orange,navy)) legend <- get_legend(illumina_normalised) illumina_normalised = illumina_normalised+theme(legend.position = "none") col1_row1 = ggarrange(b,c,labels=c("B","C"),nrow=1) col1 = ggarrange(col1_row1,d,e,labels = c("","D","E"),nrow=3,heights=c(1,1,1,1)) col2 = ggarrange(f,ont_normalised,illumina_normalised,legend,labels=c('F','G','',''),ncol=1,nrow=4,heights=c(0.9,0.9,0.9,0.15)) row2 = ggarrange(col1,col2,widths=c(0.8,1)) figure2 = ggarrange(a,row2,nrow=2,heights=c(0.4,1),labels=c("A","")) figure2 ``` ## Figure 3 #### Technical Replicates, Detection Limit, and Batch Effects ```{r figure_3_technical_replicates , fig.width=8.25, fig.height=11.75, echo=F,warnings=F,message=F} #============================== # Technical Replicates 1 #============================== sg = read.csv("Supplemental_File_S1.csv",comment.char='#') rep=read.table("Supplemental_File_S4.csv",sep=",",header=T) rep = rep %>% filter(orf != "ORF1a") rep$orf <- factor(rep$orf,levels=c("N*","S","ORF3a","E","M","ORF6","ORF7a","ORF8","N","ORF10")) rep$sample <- factor(rep$sample,levels=c("SHEF-C2AC0","repeat1","repeat2","repeat3","SHEF-C1F86","repeat4","repeat5","repeat6")) rep$sample = str_replace(rep$sample,"repeat","Repeat ") rep$sample = str_replace(rep$sample,"-","\n") rep$class = ifelse(rep$sample=="SHEF-C2AC0","Superscript","Lunarscript") rep$class = ifelse(rep$sample=="repeat1","Superscript",rep$class) rep$class = ifelse(rep$sample=="repeat2","Superscript",rep$class) rep$class = ifelse(rep$sample=="repeat3","Superscript",rep$class) rep$class = factor(rep$class,levels=c("Superscript","Lunarscript")) y=120 n=6 a=ggpairs(rep%>% filter(orf!="ORF1a")%>% filter(orf!="N*")%>% filter(orf!="ORF10")%>% filter(sample %in% c("SHEF\nC2AC0","Repeat 1","Repeat 2","Repeat 3"))%>% select(sample,orf,sgRPTg_HQ)%>% spread(sample,sgRPTg_HQ) ,columns=c(2,3,4,5),xlab="sgRPTg (High Quality)",ylab="sgRPTg (High Quality)",axisLabels = "show" ,lower = list(mapping = aes(color = orf),continuous = add_stat_smooth ),upper=list(continuous='blank'),diag = list(continuous = "blankDiag"))+ ft_theme()+ theme( panel.background=element_rect(fill="grey97",color="white"), panel.spacing.x=unit(0.1, "lines"), plot.margin = margin(t=0, l=0.5, r=0.5, b=0.5, unit = "cm"), legend.position = "None", strip.text.x = element_text(face = "bold",colour = '#4D4845'), strip.text.y = element_text(face = "bold",colour = '#4D4845'), axis.text.x=element_text(angle=45,hjust=1))+scale_color_manual("ORF",values=c(navy,coral,green,purple,yellow,orange,grey,aqua,"black"))+ scale_x_continuous("sgRPTg (High Quality)")+ scale_y_continuous("sgRPTg (High Quality)")+ coord_cartesian(ylim=c(0,120)) a=gpairs_lower(a) #============================== # Technical Replicates 2 #============================== y=40 b=ggpairs(rep%>% filter(orf!="ORF1a")%>% filter(orf!="N*")%>% filter(orf!="ORF10")%>% filter(sample %in% c("SHEF\nC1F86","Repeat 4","Repeat 5","Repeat 6"))%>% select(sample,orf,sgRPTg_HQ)%>% spread(sample,sgRPTg_HQ) ,columns=c(2,3,4,5),xlab="sgRPTg (High Quality)",ylab="sgRPTg (High Quality)",axisLabels = "show" ,lower = list(mapping = aes(color = orf),continuous = add_stat_smooth),upper=list(continuous='blank'),diag = list(continuous = "blankDiag"))+ ft_theme()+ theme(panel.background=element_rect(fill="grey97",color="white"), panel.spacing.x=unit(0.1, "lines"), plot.margin = margin(t=0, l=0.5, r=0.5, b=0.5, unit = "cm"), legend.position = "None", strip.text.x = element_text(face = "bold",colour = '#4D4845'), strip.text.y = element_text(face = "bold",colour = '#4D4845'), axis.text.x=element_text(angle=45,hjust=1))+scale_color_manual("ORF",values=c(navy,coral,green,purple,yellow,orange,grey,aqua,"black"))+ scale_x_continuous("sgRPTg (High Quality)")+ scale_y_continuous("sgRPTg (High Quality)")+ coord_cartesian(ylim=c(0,40)) b=gpairs_lower(b) #============================== # Downsampling #============================== y=log10(11000) down=read.csv("Supplemental_File_S3.csv",comment.char='#') down$reads <- factor(down$reads,levels=c("5000","10000","50000","100000","200000","500000")) down$orf <- factor(down$orf,levels=c("N*","ORF1a","S","ORF3a","E","M","ORF6","ORF7a","ORF8","N","ORF10")) spread_down=down%>% filter(orf!="ORF1a")%>% filter(orf!="N*")%>% select(sample,reads,orf,sgRPTg_HQ)%>% spread(reads,sgRPTg_HQ) %>% replace_na(list("5000" = 0)) %>% replace_na(list("10000" = 0)) %>% replace_na(list("50000" = 0)) %>% replace_na(list("100000" = 0)) %>% replace_na(list("200000" = 0)) %>% replace_na(list("500000" = 0)) n=15 g = ggpairs(spread_down ,columns=c(3,4,5,6,7,8),xlab=expression(bold(paste("sgRPTg (High Quality - log"["10"],")"))),ylab=expression(bold(paste("sgRPTg (High Quality - log"["10"],")"))),legend = c(2,1) ,lower = list(mapping = aes(color = orf),continuous = add_stat_smooth,axisLabels = "show"),diag = list(continuous = "blankDiag"),upper = list(continuous = "blank"))+ ft_theme()+ scale_x_continuous(expression(bold(paste("sgRPTg (High Quality - log"["10"],")"))),trans=scales::pseudo_log_trans(base = 10),breaks=c(0,10,100,1000,10000))+ scale_y_continuous(expression(bold(paste("sgRPTg (High Quality - log"["10"],")"))),trans=scales::pseudo_log_trans(base = 10),breaks=c(0,10,100,1000,10000))+ theme(panel.background=element_rect(fill="grey97",color="white"), axis.title = element_text(face = "bold",colour = '#4D4845'), legend.margin = margin(t = 3, r = 2, b = 2, l = 2, unit = "pt"), legend.position="bottom", panel.spacing.x=unit(0.1, "lines"), legend.justification="center", strip.text.x = element_text(face = "bold",colour = '#4D4845'), strip.text.y = element_text(face = "bold",colour = '#4D4845'), axis.text.x=element_text(angle=45,hjust=1))+ scale_color_manual("ORF",values=c(navy,coral,green,purple,yellow,orange,grey,aqua,"black"))+ coord_cartesian(ylim = c(1,10000)) g = gpairs_lower(g) #============================== # PCA Analysis #============================== # Run information runs=read.csv("Supplemental_File_S8.csv",comment.char='#') # CT Values ct = read.csv("Supplemental_File_S9.csv",comment.char='#') all=merge(sg,runs) all = merge(all,ct,all=T) all$primer <- factor(all$primer,levels=c("V3","V1")) all = all %>% separate(nanopore,c("date","time","device","flow_cell","id"),sep="_") fa=as.data.frame(all %>% filter(orf!="ORF1a") %>% dplyr::select(sample,orf,sgRPTg_HQ,sgRPTg_LQ)) %>% mutate(sgRPTg=sgRPTg_HQ+sgRPTg_LQ) fa=fa%>% group_by(orf) %>% mutate(sgRPTg=normalize(sgRPTg)) s = fa %>%select(sample,orf,sgRPTg) %>% spread(orf,sgRPTg) s = as.data.frame(s) s <- s %>% mutate_all(funs(replace_na(.,0))) sx=s[2:10]-rowMeans(s[2:10]) row.names(s)=s$sample labels = unique(as.data.frame(all %>% filter(orf!="ORF1a") %>% dplyr::select(sample,id,primer,device,mapped_reads,E_CT_normalised))) pca.res=PCA(sx, scale.unit = F, graph=F) #============================== # Primers #============================== c = fviz_pca_ind(pca.res,geom.ind = "")+ ft_theme()+ geom_point(alpha=0.5,aes(color=labels$primer))+ scale_color_manual("Artic Primers",values=c(navy,coral))+ theme( legend.title = element_blank(), legend.text = element_text(size=8), plot.title = element_blank(), legend.margin = margin(t=3), legend.position = "bottom" ) #============================== # Run #============================== d = fviz_pca_ind(pca.res,col.ind = as.factor(labels$id),geom="")+ ft_theme()+ theme( legend.position = "None", plot.title = element_blank() )+ geom_point(alpha=0.5,shape=19,aes(color=labels$id))+ scale_color_viridis("Run",discrete = T,option = "magma")+ scale_fill_viridis("Run",discrete = T,option = "magma") #============================== # Mapped Reads #============================== e = fviz_pca_ind(pca.res, alpha.var=0,geom = c(""))+ ft_theme()+ ggtitle("")+ geom_point(alpha=0.5,aes(color=labels$mapped_reads/100000))+ scale_color_gradientn("Mapped Read Count/100k",colors=c(navy,yellow,coral))+ theme( legend.title = element_blank(), legend.text = element_text(angle=45,hjust=1,size=8), plot.title = element_blank(), legend.key.size = unit(0.5, "cm"), legend.key.height = unit(0.1, "cm"), legend.margin = margin(t=3), legend.position = "bottom" ) #============================== # ECT #============================== f = fviz_pca_ind(pca.res, alpha.var=0,geom = c(""))+ ft_theme()+ ggtitle("")+ geom_point(alpha=0.5,aes(color=labels$E_CT_normalised))+scale_color_gradientn("Normalized Ect",colors=c(navy,yellow,coral))+theme(legend.text = element_text(angle=45,hjust=1))+ theme( legend.title = element_blank(), legend.justification = "center", legend.text = element_text(size=8), plot.title = element_blank(), legend.key.size = unit(0.5, "cm"), legend.key.height = unit(0.1, "cm"), legend.margin = margin(t=3), legend.position = "bottom" ) row1=ggarrange(ggmatrix_gtable(a),ggmatrix_gtable(b),labels=c("A","B")) row2 = ggarrange(c,d,e,f,labels=c("C","D","E","F"),ncol=4,nrow=1) row3= ggarrange(ggmatrix_gtable(g),labels=c("G")) figure3 = ggarrange(row1,row2,row3,nrow=3,ncol=1,heights=c(0.5,0.4,1)) figure3 ``` ## Figure 4 #### Noncanonical sub-genomic RNA ```{r figure_4_non_canonical_sgRNA , fig.width=8.25, fig.height=11.75, echo=F,warnings=F,message=F} #============================== # Noncanonical sgRNA, with comparison to Glasgow ONT #============================== novel_glas=read.csv("Supplemental_File_S10.csv",comment.char='#') novel_glas = novel_glas %>% separate(orf,c("novel","pos"),sep="_") novel_glas$pos=as.numeric(as.character(novel_glas$pos)) novel_glas$location="GLAS" novel_glas$y=-0.05 novel_shef=read.csv("Supplemental_File_S2.csv",comment.char='#') #novel_shef = novel_shef %>% separate(orf,c("novel","pos"),sep="_") novel_shef$pos=as.numeric(as.character(novel_shef$pos)) novel_shef$location="SHEF" novel_shef$y=0.05 novel=rbind(novel_glas,novel_shef) novel=novel %>% filter(pos > 50) novel$start=50 summary = novel%>%mutate(total=(as.numeric(as.character(nsgRNA_HQ_count))+ as.numeric(as.character(nsgRNA_LQ_count))))%>%group_by(start,y,location,pos)%>%summarise(n=n(),m=sum(total)) full=ggplot(mapping=aes(x=start,y=y,xend=pos,yend=y))+ geom_curve(data=summary%>%filter(location=="SHEF")%>%filter(n>5),lineend = "round",curvature=-0.3,color=grey,size=0.1)+ geom_curve(data=summary%>%filter(location=="GLAS")%>%filter(n>3),lineend = "round",curvature=0.3,color=grey,size=0.1)+ scale_alpha_continuous("Number of Samples")+ ft_theme()+ scale_x_continuous("Position")+ annotate(geom="rect",xmin=0,xmax=30000,ymin=-0.05,ymax=0.05,color="grey80",fill="grey80")+ annotate(geom="rect",xmin=0,xmax=200,ymin=-0.05,ymax=0.05,color="grey30",fill="grey30")+ annotate(geom="rect",xmin=200,xmax=13483,ymin=-0.05,ymax=0.05,color="grey50",fill="grey50")+ annotate(geom="text",x=7000,y=0,color="white",label="ORF1a",fontface='bold')+ annotate(geom="rect",xmin=13483,xmax=30000,ymin=-0.05,ymax=0.05,color="grey60",fill="grey60")+ annotate(geom="text",x=17000,y=0,color="white",label="ORF1b",fontface='bold')+ annotate(geom="rect",xmin=21532,xmax=30000,ymin=-0.05,ymax=0.05,color=navy,fill=navy)+ annotate(geom="text",x=23500,y=0,color="white",label="S",fontface='bold')+ annotate(geom="rect",xmin=25361,xmax=30000,ymin=-0.05,ymax=0.05,color=coral,fill=coral)+ annotate(geom="text",x=25750,y=0,color="white",label="3a",fontface='bold')+ annotate(geom="rect",xmin=26213,xmax=30000,ymin=-0.05,ymax=0.05,color=green,fill=green)+ annotate(geom="rect",xmin=26449,xmax=30000,ymin=-0.05,ymax=0.05,color=purple,fill=purple)+ annotate(geom="text",x=26700,y=0,color="white",label="M",fontface='bold')+ annotate(geom="rect",xmin=27017,xmax=30000,ymin=-0.05,ymax=0.05,color=yellow,fill=yellow)+ annotate(geom="rect",xmin=27364,xmax=30000,ymin=-0.05,ymax=0.05,color=orange,fill=orange)+ annotate(geom="rect",xmin=27864,xmax=30000,ymin=-0.05,ymax=0.05,color=grey,fill=grey)+ annotate(geom="rect",xmin=28235,xmax=30000,ymin=-0.05,ymax=0.05,color=aqua,fill=aqua)+ annotate(geom="text",x=28800,y=0,color="white",label="N",fontface='bold')+ annotate(geom="rect",xmin=29510,xmax=30000,ymin=-0.05,ymax=0.05,color="grey80",fill="grey80")+ annotate(geom="rect",xmin=0,xmax=30000,ymin=-0.05,ymax=0.05,fill="white",alpha=0.3)+ geom_point(alpha=0.8,data=summary%>%filter(location=="SHEF")%>%filter(n>5)%>%arrange(n),aes(x=pos,y=0.05,size=m,color=n))+ geom_point(alpha=0.8,data=summary%>%filter(location=="GLAS")%>%filter(n>3),aes(x=pos,y=-0.05,size=m,color=n))+ theme( axis.text.y=element_blank(), axis.title.y=element_blank(), axis.line.y = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "right", legend.justification = "center", legend.direction = "vertical", legend.key.size = unit(0.5, "cm"), legend.key.height = unit(0.3, "cm"), )+ scale_color_gradient("Number of\nSamples",low=navy,high=coral)+ annotate(geom="text",label="Sheffield ONT ARTIC (n>5)",fontface='bold',x=1000,y=0.5,color="#4D4845", hjust = 0)+ annotate(geom="text",label="Glasgow ONT ARTIC (n>3)",fontface='bold',x=1000,y=-0.5,color="#4D4845", hjust = 0)+ scale_size_continuous("Number of\nReads") zoom = full+ coord_cartesian(xlim=c(22000,30000),ylim=c(-0.2,0.2))+ annotate(geom="text",x=26330,y=0,color="white",label="E",fontface='bold')+ annotate(geom="text",x=27170,y=0,color="white",label="6",fontface='bold')+ annotate(geom="text",x=27600,y=0,color="white",label="7a",fontface='bold')+ annotate(geom="text",x=28010,y=0,color="white",label="8",fontface='bold')+ annotate(geom="text",x=29750,y=0,color="white",label="10",fontface='bold')+ theme( legend.position="none", axis.title.x = element_blank(), panel.background = element_rect(colour="#4D4845"), plot.title = element_text(size=10,colour="#4D4845") )+ggtitle("Zoom 22000-30000") #============================== # Glasgow bait data comparison #============================== novel_glasgow_bait = read.csv("Supplemental_File_S11.csv",comment.char='#') novel_glasgow_bait = novel_glasgow_bait %>% separate(orf,c("novel","pos"),sep="_") novel_glasgow_bait$pos=as.numeric(as.character(novel_glasgow_bait$pos)) novel_glasgow_bait$location="GLAS" novel_glasgow_bait$y=-0.05 novel_bait=rbind( novel_glasgow_bait %>% select(sample,pos,sgRNA_count,location,y), novel_shef %>% mutate(sgRNA_count=nsgRNA_HQ_count + nsgRNA_LQ_count) %>% select(sample,pos,sgRNA_count,location,y) ) novel_bait=novel_bait %>% filter(pos > 50) novel_bait$start=50 summary_bait = novel_bait%>%group_by(start,y,location,pos)%>%summarise(n=n(),m=sum(sgRNA_count)) bait=ggplot(mapping=aes(x=start,y=y,xend=pos,yend=y))+ geom_curve(data=summary_bait%>%filter(n>5)%>%filter(location=="SHEF"),lineend = "round",curvature=-0.3,color=grey,size=0.1)+ geom_curve(data=summary_bait%>%filter(location=="GLAS"),lineend = "round",curvature=0.3,color=grey,size=0.1)+ # geom_curve(data=interactions,curvature=-0.4,color=green)+ scale_alpha_continuous("Number of Samples")+ ft_theme()+ scale_x_continuous("Position")+ annotate(geom="rect",xmin=0,xmax=30000,ymin=-0.05,ymax=0.05,color="grey80",fill="grey80")+ annotate(geom="rect",xmin=0,xmax=200,ymin=-0.05,ymax=0.05,color="grey30",fill="grey30")+ annotate(geom="rect",xmin=200,xmax=13483,ymin=-0.05,ymax=0.05,color="grey50",fill="grey50")+ annotate(geom="text",x=7000,y=0,color="white",label="ORF1a",fontface='bold')+ annotate(geom="rect",xmin=13483,xmax=30000,ymin=-0.05,ymax=0.05,color="grey60",fill="grey60")+ annotate(geom="text",x=17000,y=0,color="white",label="ORF1b",fontface='bold')+ annotate(geom="rect",xmin=21532,xmax=30000,ymin=-0.05,ymax=0.05,color=navy,fill=navy)+ annotate(geom="text",x=23500,y=0,color="white",label="S",fontface='bold')+ annotate(geom="rect",xmin=25361,xmax=30000,ymin=-0.05,ymax=0.05,color=coral,fill=coral)+ annotate(geom="text",x=25750,y=0,color="white",label="3a",fontface='bold')+ annotate(geom="rect",xmin=26213,xmax=30000,ymin=-0.05,ymax=0.05,color=green,fill=green)+ annotate(geom="rect",xmin=26449,xmax=30000,ymin=-0.05,ymax=0.05,color=purple,fill=purple)+ annotate(geom="text",x=26700,y=0,color="white",label="M",fontface='bold')+ annotate(geom="rect",xmin=27017,xmax=30000,ymin=-0.05,ymax=0.05,color=yellow,fill=yellow)+ annotate(geom="rect",xmin=27364,xmax=30000,ymin=-0.05,ymax=0.05,color=orange,fill=orange)+ annotate(geom="rect",xmin=27864,xmax=30000,ymin=-0.05,ymax=0.05,color=grey,fill=grey)+ annotate(geom="rect",xmin=28235,xmax=30000,ymin=-0.05,ymax=0.05,color=aqua,fill=aqua)+ annotate(geom="text",x=28800,y=0,color="white",label="N",fontface='bold')+ annotate(geom="rect",xmin=29510,xmax=30000,ymin=-0.05,ymax=0.05,color="grey80",fill="grey80")+ annotate(geom="rect",xmin=0,xmax=30000,ymin=-0.05,ymax=0.05,fill="white",alpha=0.3)+ geom_point(alpha=0.8,data=summary_bait%>%filter(n>5)%>%filter(location=="SHEF")%>%arrange(n),aes(x=pos,y=0.05,size=m,color=n))+ geom_point(alpha=0.8,data=summary_bait%>%filter(location=="GLAS"),aes(x=pos,y=-0.05,size=m,color=n))+ theme( axis.text.y=element_blank(), axis.title.y=element_blank(), axis.line.y = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "right", legend.justification = "center", legend.direction = "vertical" )+ scale_color_gradient("Number of Samples",low=navy,high=coral)+ annotate(geom="text",label="Sheffield ONT ARTIC (n>5)",fontface='bold',x=22000,y=0.15,color="#4D4845", hjust = 0)+ annotate(geom="text",label="Glasgow Illumina Bait",fontface='bold',x=22000,y=-0.15,color="#4D4845", hjust = 0)+ scale_size_continuous("Number of Reads")+ coord_cartesian(xlim=c(22000,30000),ylim=c(-0.2,0.2))+ annotate(geom="text",x=26330,y=0,color="white",label="E",fontface='bold')+ annotate(geom="text",x=27170,y=0,color="white",label="6",fontface='bold')+ annotate(geom="text",x=27600,y=0,color="white",label="7a",fontface='bold')+ annotate(geom="text",x=28010,y=0,color="white",label="8",fontface='bold')+ annotate(geom="text",x=29750,y=0,color="white",label="10",fontface='bold')+ theme( legend.position="none", axis.title.x = element_blank(), # panel.background = element_rect(colour="#4D4845"), ) img1 <- readPNG("images/figure_4_reads.png") reads = ggplot() + background_image(img1) + # This ensures that the image leaves some space at the edges theme(plot.margin = margin(t=0.5, l=0.5, r=0.5, b=0.5, unit = "cm")) orfs = c("S","ORF3a","E","M","ORF6","ORF7a","N","27544") counts = c(75,5,19,282,84,16,751,177) nov = data.frame(orfs,counts) nov$orfs=factor(nov$orfs,levels=c("S","ORF3a","E","M","ORF6","ORF7a","N","27544")) counts = ggplot(nov,aes(reorder(orfs,-counts),counts,fill=orfs))+ geom_bar(stat="identity",color="white")+ ft_theme()+ theme( legend.position = "None", axis.text.x=element_text(angle=90,hjust=1,vjust=0.5,size=10) )+ scale_y_continuous("sgRNA Raw Count",expand=c(0,0))+ scale_x_discrete("ORF")+ scale_fill_manual(values=c(navy,coral,green,purple,yellow,orange,aqua,grey)) #============================== # In Vitro Data - Illumnina #============================== cell_illumina = read.csv("Supplemental_File_S5.csv",comment.char='#') cell_illumina = cell_illumina %>% separate(sample,c("line","virus","time","s"),sep="_") cell_illumina$line = str_replace(cell_illumina$line,"VAT","ACE2\nTMPRSS2") cell_illumina$line = str_replace(cell_illumina$line,"VA","ACE2") cell_illumina$line = str_replace(cell_illumina$line,"V","WT") cell_illumina$line<- factor(cell_illumina$line,levels=c("WT","ACE2","ACE2\nTMPRSS2")) illumina_normalised = ggplot(cell_illumina %>% filter(orf!='ORF1a') %>% filter(orf!='ORF10') %>% filter(orf!="N*"), aes(time,sgRPHT,group=orf,color=orf))+ geom_point()+ geom_line()+ ggtitle("Illumina\nMetagenomic")+ facet_grid(virus~line)+ ft_theme()+ scale_y_continuous(expression(bold(paste("Sum Normalized nsgRNA (log"["10"],")"))),trans=scales::pseudo_log_trans(base = 10),breaks=c(0,10,100,3000))+ scale_x_discrete("Time (h)")+ theme( panel.background=element_rect(fill="grey97",color="white"), plot.title = element_text(hjust = 0.5,colour = '#4D4845', size=10 ), legend.justification = "center", legend.position="bottom", panel.spacing.x=unit(0.1, "lines"), strip.text.y = element_text(face = "bold",colour = '#4D4845', size=8), strip.text.x = element_text(face = "bold",colour = '#4D4845', size=8) )+ scale_color_manual("ORF",values=c(navy,coral,orange,yellow,purple,aqua,green,grey,"black")) #============================== # In Vitro Data - ONT #============================== cell_ont = read.csv("Supplemental_File_F6.csv",comment.char='#') cell_ont = cell_ont %>% separate(sample,c("line","virus","time","s"),sep="_") cell_ont$line = str_replace(cell_ont$line,"VAT","ACE2\nTMPRSS2") cell_ont$line = str_replace(cell_ont$line,"VA","ACE2") cell_ont$line = str_replace(cell_ont$line,"V","WT") cell_ont$line<- factor(cell_ont$line,levels=c("WT","ACE2","ACE2\nTMPRSS2")) ont_normalised = ggplot(cell_ont %>% filter(orf!='ORF1a') %>% filter(orf!='ORF10') %>% filter(orf!='N*'),aes(time,sgRPHT_HQ+sgRPHT_LQ,group=orf,color=orf))+ geom_point()+ geom_line()+ facet_grid(virus~line)+ ggtitle("ONT ARTIC")+ ft_theme()+ scale_y_continuous(expression(bold(paste("Sum Normalized nsgRNA (log"["10"],")"))),trans=scales::pseudo_log_trans(base = 10),breaks=c(0,10,100,300))+ scale_x_discrete("Time (h)")+ theme( panel.background=element_rect(fill="grey97",color="white"), legend.position = "none", panel.spacing.x=unit(0.1, "lines"), plot.title = element_text(hjust = 0.5,colour = '#4D4845', size=10 ), strip.text.y = element_text(face = "bold",colour = '#4D4845', size=8), strip.text.x = element_text(face = "bold",colour = '#4D4845', size=8) )+ scale_color_manual("ORF",values=c(navy,coral,orange,yellow,purple,aqua,green,grey,"black")) #============================== # In Vitro Data - All #============================== ill=cell_illumina %>% filter(orf!='ORF1a') %>% group_by(line,virus,time) %>% select(line,virus,time,sgRPHT) %>% summarise(sum=sum(sgRPHT)) ill$class="Illumina\nMetagenomic" ill$sum_norm=normalize(ill$sum) ont=cell_ont %>% filter(orf!='ORF1a') %>% group_by(line,virus,time) %>% select(line,virus,time,sgRPHT_HQ,sgRPHT_LQ) %>% summarise(sum=sum(sgRPHT_HQ+sgRPHT_LQ)) ont$class="ONT Artic" ont$sum_norm=normalize(ont$sum) all=rbind(ill,ont) #============================== # Noncanonical in cell lines (E,F,G) #============================== novel = read.csv("Supplemental_File_S12.csv",comment.char='#') novel = novel %>% separate(sample,c("line","virus","time","s"),sep="_") novel = novel %>% separate(orf,c("novel","pos"),sep="_") novel$y=-0.05 novel$start=50 sum = novel %>% group_by(line,virus,time) %>% summarise(count=sum(sgRPHT)) sum$count=normalize(sum$count) sum$class="Illumina\nMetagenomic" novel$class="Illumina\nMetagenomic" novel_ont = read.csv("Supplemental_File_S13.csv",comment.char='#') novel_ont$y=0.05 novel_ont = novel_ont %>% separate(sample,c("line","virus","time","s"),sep="_") novel_ont = novel_ont %>% separate(orf,c("novel","pos"),sep="_") sum_ont = novel_ont %>% group_by(line,virus,time) %>% summarise(count=sum(nsgRPHT_HQ+nsgRPHT_LQ)) sum_ont$count = normalize(sum_ont$count) sum_ont$class="ONT Artic" # novel_ont$class="ONT ARTIC" # novel_ont$start=50 # novel=rbind(novel%>%select(line,virus,time,pos,sgRNA_count,class,y,start),novel_ont%>%mutate(sgRNA_count=(nsgRNA_HQ_count+nsgRNA_LQ_count)/(mapped_reads/100000)) %>% select(line,virus,time,pos,sgRNA_count,class,y,start)) # novel$pos=as.numeric(as.character(novel$pos)) all_sum=rbind(sum,sum_ont) all_sum$line = str_replace(all_sum$line,"VAT","ACE2\nTMPRSS2") all_sum$line = str_replace(all_sum$line,"VA","ACE2") all_sum$line = str_replace(all_sum$line,"V","WT") all_sum$line<- factor(all_sum$line,levels=c("WT","ACE2","ACE2\nTMPRSS2")) e=ggplot(all_sum,aes(time,count,group=class,color=class))+ geom_point(data=all,aes(time,sum_norm),alpha=0.5)+ geom_line(data=all,aes(time,sum_norm),alpha=0.5,linetype="dashed")+ geom_point()+ geom_line()+ facet_wrap(virus~line,ncol=9)+ ft_theme()+ theme( panel.background=element_rect(fill="grey97",color="white"), strip.text.x = element_text(size=8,face = "bold",margin = margin(0,0,0,0),colour = '#4D4845'), legend.position = "right", legend.title = element_text(size=9), legend.text = element_text(size=9), legend.justification = "center", legend.direction = "vertical", panel.spacing.x=unit(0.1, "lines"), )+ scale_color_manual("Technology",values=c(orange,navy))+ scale_x_discrete("Time (h)")+ scale_y_continuous("Scaled Normalized\nTotal sgRNA",breaks=c(0,0.5,1)) row2 = ggarrange(reads,counts,labels=c("B","C"),widths=c(1,0.4)) row4 = ggarrange(e,labels=c("E"),ncol=1) ggarrange(full,zoom,row2,bait,row4,labels=c("A",'','','D',''),nrow=5,heights=c(0.5,0.3,0.65,0.2,0.35)) ``` ## Figure 5 #### Variants in sub-genomic RNA ```{r figure_5_variants , fig.width=8.25, fig.height=11.75, echo=F,warnings=F,message=F} #============================== # all varinats plot #============================== variants=read.table("Supplemental_File_S16.csv",sep=",",header=T) positions=variants %>% filter(!is.na(value)) %>% filter(variable %in% c("sgRNA_HQ")) %>% group_by(position,value) variants_f=variants%>%filter(position %in% positions$position) variants_f$variable = str_replace(variants_f$variable,"sgRNA_HQ","Sub-genomic RNA (HQ)") variants_f$variable = str_replace(variants_f$variable,"gRNA","Genomic RNA") variants_f = na.omit(variants_f%>%filter(position>1000)%>%filter(variable %in% c("Genomic RNA","Sub-genomic RNA (HQ)"))) variants_f$position=as.factor(variants_f$position) a = ggplot(variants_f,aes(position,value,color=base,fill=base))+ geom_bar(stat="identity",position="fill")+ scale_y_continuous("Proportion of bases in\nall samples with variant")+ ft_theme()+ scale_fill_manual("Base",values=c(green,navy,orange,coral))+ scale_x_discrete("Position (>1000)")+ annotate(geom = "rect",xmin="27046",xmax="27046",ymin=0,ymax=1)+ scale_color_manual("Base",values=c(green,navy,orange,coral))+ facet_wrap(~variable,ncol=1,nrow = 2,shrink=T,drop = T)+ theme(axis.text.x = element_text(angle=45,hjust=1,size=8,face=colorado(variants_f$position,c("27046","28256"))), axis.title.x = element_text(margin=margin(t=5,r=0,b=5,l=0,unit="pt")), strip.text.x = element_text(face = "bold",hjust=0,color="#4D4845"), legend.position = "bottom", legend.justification = "center" )+ coord_cartesian(clip="off")+ annotate(geom="tile",x="27046",y=0.5,fill=NA,color="white",size=1)+ annotate(geom="tile",x="28256",y=0.5,fill=NA,color="white",size=1) sg = read.csv("Supplemental_File_S1.csv",comment.char='#') n_variant_m=melt(sg%>%filter(orf!="ORF1a")%>%filter(orf!="N*")%>%filter(orf!="ORF1a")%>%filter(!is.na(sgRPHT_HQ))%>%mutate(color=ifelse(sample %in% c("SHEF-C81D4","SHEF-C2DB8","SHEF-CD6EF","SHEF-C49C1","SHEF-CEA6A","SHEF-C0C35","SHEF-CD6EF","SHEF-C3BF9","SHEF-C04C4","SHEF-BFEE8"),"True","False"))%>%mutate(HQ_LQ=sgRPTg_HQ+sgRPTg_LQ)%>%select(sample,orf,color,HQ_LQ)) e = ggplot(n_variant_m%>%filter(orf!="ORF1a")%>%filter(orf!="N*")%>%filter(orf=="ORF6"),aes(color,value))+ geom_boxplot(outlier.shape = NA,aes(color=color))+ geom_jitter(height = 0,aes(color=color),alpha=0.3)+ ft_theme()+ scale_y_continuous(expression(bold(paste("sgRPTg (log"["10"],")"))),trans=scales::pseudo_log_trans(base = 10),breaks=c(0,10,100,1000,10000))+ theme( panel.background=element_rect(fill="grey97",color="white"), legend.position = "None", strip.text.x = element_text(face = "bold"), axis.text.x=element_text(angle=90,hjust=1,vjust=0.1) )+ scale_color_manual("Quality",values=c(navy,coral,green))+ scale_x_discrete("27046 Variant\nStatus") n_variant_m=melt(sg%>%filter(orf!="ORF1a")%>%filter(orf!="N*")%>%filter(orf!="ORF1a")%>%filter(!is.na(sgRPHT_HQ))%>%mutate(color=ifelse(sample %in% c("SHEF-C0C35"),"True","False"))%>%mutate(HQ_LQ=sgRPTg_HQ+sgRPTg_LQ)%>%select(sample,orf,color,HQ_LQ)) c = ggplot(n_variant_m%>%filter(orf!="ORF1a")%>%filter(orf!="N*")%>%filter(orf=="ORF6"),aes(color,value))+ geom_boxplot(outlier.shape = NA,aes(color=color))+ geom_jitter(height = 0,aes(color=color),alpha=0.3)+ ft_theme()+ scale_y_continuous(expression(bold(paste("sgRPTg (log"["10"],")"))),trans=scales::pseudo_log_trans(base = 10),breaks=c(0,10,100,1000,10000))+ theme( panel.background=element_rect(fill="grey97",color="white"), legend.position = "None", strip.text.x = element_text(face = "bold"), axis.text.x=element_text(angle=90,hjust=1,vjust=0.1) )+ scale_color_manual("Quality",values=c(navy,coral,green))+ scale_x_discrete("28256 Variant\nStatus") #============================== # Get images from file #============================== two_seven <- readPNG("images/figure_5_variant_27046.png") two_seven = ggplot() + background_image(two_seven) + # This ensures that the image leaves some space at the edges theme(plot.margin = margin(t=1, l=1, r=1, b=1, unit = "cm")) two_eight <- readPNG("images/figure_5_variant_28256.png") two_eight = ggplot() + background_image(two_eight) + # This ensures that the image leaves some space at the edges theme(plot.margin = margin(t=1, l=1, r=1, b=1, unit = "cm")) row2=ggarrange(two_eight,c,labels=c("B","C"),nrow=1,widths = c(1,0.4)) row3=ggarrange(two_seven,e,labels=c("D","E"),nrow=1,widths = c(1,0.4)) ggarrange(a,row2,row3,labels=c("A","",""),nrow=3,heights=c(1,0.9,0.9)) ``` # Supplementary Figures #### Supplementary Figure S1 - Size of reads in SHEF ONT Artic data broken down by periscope assigned class in a representituve sample (SHEF-BFDBE) ```{r supp_figure_s1_read_lengths , fig.width=7, fig.height=4, echo=F,warnings=F,message=F} #============================== # Read lengths plot #============================== read_lengths=read.csv("Supplemental_File_S15.csv",sep="\t") ggplot(read_lengths,aes(length,color=class,fill=class))+ geom_density(alpha=0.5)+ ft_theme()+ theme( panel.background=element_rect(fill="grey97",color="white") )+ scale_x_continuous("Read Length",limits=c(0,1000))+ scale_y_continuous("Density")+ scale_color_manual("Read Class",values=c(navy,coral))+ scale_fill_manual("Read Class",values=c(navy,coral)) ``` #### Supplementary Figure S4 - Mapped Read Counts ```{r supp_figure_s4_mapped_reads , fig.width=10, fig.height=5, echo=F,warnings=F,message=F} #============================== # Clinical Isolate data #============================== shef = read.csv("Supplemental_File_S1.csv",comment.char='#') shef_mapped_reads = unique(shef %>% select(sample,mapped_reads) ) shef_mapped_reads$location = "SHEF" glas = read.csv("Supplemental_File_F7.csv",comment.char='#') glas_mapped_reads = unique(glas %>% select(sample,mapped_reads) ) glas_mapped_reads$location = "GLAS" shef_med=round(median(shef_mapped_reads$mapped_reads),2) glas_med=round(median(glas_mapped_reads$mapped_reads),2) all_mapped=rbind(glas_mapped_reads,shef_mapped_reads) a=ggplot( all_mapped, aes(location,mapped_reads,color=location) )+ geom_boxplot(outlier.shape = NA)+ geom_jitter(height = 0,alpha=0.1)+ ft_theme()+ geom_hline(color=grey,yintercept = 200000)+ scale_y_log10(expression(bold(paste("Mapped Read Count (log"["10"],")"))),limits=c(1,10000000))+ scale_x_discrete("Sequencing Centre")+ scale_color_manual("Sequencing Centre",values=c(navy,coral))+ theme( legend.position = "none", panel.background=element_rect(fill="grey97",color="white") ) + annotate(geom="text",label=paste("Median",shef_med,sep="\n"),x="SHEF",y=1000,fontface='bold',color="#4D4845",size=4)+ annotate(geom="text",label=paste("Median",glas_med,sep="\n"),x="GLAS",y=1000,fontface='bold',color="#4D4845",size=4)+ annotation_logticks(sides = "l") b=ggplot( all_mapped, aes(mapped_reads,color=location,fill=location) )+ geom_density(alpha=0.5)+ ft_theme()+ scale_y_continuous("Density")+ scale_x_log10("",limits=c(1,10000000))+ scale_color_manual("Sequencing\nCentre",values=c(navy,coral))+ scale_fill_manual("Sequencing\nCentre",values=c(navy,coral))+ theme( panel.background=element_rect(fill="grey97",color="white"), axis.title.y = element_blank(), axis.text.y = element_blank(), legend.position = "right", legend.justification = "center", legend.direction = "vertical", legend.margin=margin(unit(x=c(10,10,10,10),units="mm")) ) + geom_vline(color=grey,xintercept = 200000)+ coord_flip() #============================== # Cell Line Data #============================== novel = read.csv("Supplemental_File_S12.csv",comment.char='#') novel$class="Illumina\nMetagenomic" novel = unique(novel %>% select(sample,mapped_reads,class) ) novel_ont = read.csv("Supplemental_File_S13.csv",comment.char='#') novel_ont$class = "ONT Artic" novel_ont = unique(novel_ont %>% select(sample,mapped_reads,class) ) cell_mapped = rbind(novel,novel_ont) ill_med=round(median(novel$mapped_reads),2) ont_med=round(median(novel_ont$mapped_reads),2) c=ggplot( cell_mapped, aes(class,mapped_reads,color=class) )+ geom_boxplot(outlier.shape = NA)+ geom_jitter(height = 0,alpha=0.5)+ ft_theme()+ geom_hline(color=grey,yintercept = 200000)+ scale_y_log10(expression(bold(paste("Mapped Read Count (log"["10"],")"))),limits=c(1,10000000))+ scale_x_discrete("Sequencing Technoloy")+ scale_color_manual("Sequencing Technology",values=c(navy,coral))+ theme( legend.position = "none", panel.background=element_rect(fill="grey97",color="white"), axis.text.x = element_text(size=8) ) + annotate(geom="text",label=paste("Median",ill_med,sep="\n"),x="Illumina\nMetagenomic",y=1000,fontface='bold',color="#4D4845",size=4)+ annotate(geom="text",label=paste("Median",ont_med,sep="\n"),x="ONT Artic",y=1000,fontface='bold',color="#4D4845",size=4)+ annotation_logticks(sides = "l") d=ggplot( cell_mapped, aes(mapped_reads,color=class,fill=class) )+ geom_density(alpha=0.5)+ ft_theme()+ scale_y_continuous("Density")+ scale_x_log10("",limits=c(1,10000000))+ scale_color_manual("Sequencing\nTechnology",values=c(navy,coral))+ scale_fill_manual("Sequencing\nTechnology",values=c(navy,coral))+ theme( panel.background=element_rect(fill="grey97",color="white"), axis.title.y = element_blank(), axis.text.y = element_blank(), legend.position = "right", legend.justification = "center", legend.direction = "vertical", axis.text.x = element_text(size=8), legend.margin=margin(unit(x=c(10,10,10,10),units="mm")) ) + geom_vline(color=grey,xintercept = 200000)+ coord_flip() patients = ggarrange(a,b,widths = c(0.9,1),align="h") cell_lines = ggarrange(c,d,widths = c(0.8,1),align="h") ggarrange(patients,cell_lines,labels=c("A","B"),widths =c(0.9,1) ) ``` #### Supplementary Figure S5 - Sub-Genomic RNA Proportions ```{r supp_figure_s5_sgRNA_proportions , fig.width=7,fig.height=9, echo=F,warnings=F,message=F} #============================== # Clinial Isolates Canonical ONT #============================== shef_canonical_ont = read.csv("Supplemental_File_S1.csv",comment.char='#') shef_canonical_ont$location = "SHEF" shef_canonical_ont$class = "ONT Artic" shef_canonical_ont$status = "Canonical" shef_canonical_ont = shef_canonical_ont %>% filter(orf!='ORF1a') %>% group_by(sample,location,class,status,mapped_reads) %>% summarise(raw_reads=sum(sgRNA_HQ_count+sgRNA_LQ_count)) glas_canonical_ont = read.csv("Supplemental_File_F7.csv",comment.char='#') glas_canonical_ont$location = "GLAS" glas_canonical_ont$class = "ONT Artic" glas_canonical_ont$status = "Canonical" glas_canonical_ont = glas_canonical_ont %>% filter(orf!='ORF1a') %>% group_by(sample,location,class,status,mapped_reads) %>% summarise(raw_reads=sum(sgRNA_HQ_count+sgRNA_LQ_count)) #============================== # Clinial Isolates Noncanonical ONT #============================== glas_non_canonical_ont=read.csv("Supplemental_File_S10.csv",comment.char='#') glas_non_canonical_ont$location = "GLAS" glas_non_canonical_ont$class = "ONT Artic" glas_non_canonical_ont$status = "Noncanonical" glas_non_canonical_ont = glas_non_canonical_ont %>% group_by(sample,location,class,status,mapped_reads) %>% summarise(raw_reads=sum(nsgRNA_HQ_count + nsgRNA_LQ_count)) shef_non_canonical_ont=read.csv("Supplemental_File_S2.csv",comment.char='#') shef_non_canonical_ont$location = "SHEF" shef_non_canonical_ont$class = "ONT Artic" shef_non_canonical_ont$status = "Noncanonical" shef_non_canonical_ont = shef_non_canonical_ont %>% group_by(sample,location,class,status,mapped_reads) %>% summarise(raw_reads=sum(nsgRNA_HQ_count + nsgRNA_LQ_count)) #============================== # Clinial Isolates Illumina #============================== non_canonical_illumina = read.csv("Supplemental_File_S11.csv",comment.char='#') non_canonical_illumina$location = "GLAS" non_canonical_illumina$class = "Illumina Bait" non_canonical_illumina$status = "Noncanonical" non_canonical_illumina = non_canonical_illumina %>% group_by(sample,location,class,status,mapped_reads) %>% summarise(raw_reads=sum(sgRNA_count)) canonical_illumina = read.csv("supplementary_file_f14_glasgow_bait_capure.csv",comment.char='#') canonical_illumina$location = "GLAS" canonical_illumina$class = "Illumina Bait" canonical_illumina$status = "Canonical" canonical_illumina = canonical_illumina %>% filter(orf!='ORF1a') %>% group_by(sample,location,class,status,mapped_reads) %>% summarise(raw_reads=sum(sgRNA_count)) all_clinical = rbind(shef_canonical_ont,glas_canonical_ont,shef_non_canonical_ont,glas_non_canonical_ont,canonical_illumina,non_canonical_illumina) all_clinical$percent = (all_clinical$raw_reads/all_clinical$mapped_reads) * 100 #============================== # Cell Lines Canonical ONT #============================== cell_canonical_ont = read.csv("Supplemental_File_S6.csv",comment.char='#') cell_canonical_ont$location = "IN VITRO" cell_canonical_ont$class = "ONT Artic" cell_canonical_ont$status = "Canonical" cell_canonical_ont = cell_canonical_ont %>% group_by(sample,location,class,status,mapped_reads) %>% summarise(raw_reads=sum(sgRNA_HQ_count+sgRNA_LQ_count)) #============================== # Cell Lines Noncanonical ONT #============================== cell_non_canonical_ont = read.csv("Supplemental_File_S13.csv",comment.char='#') cell_non_canonical_ont$location = "IN VITRO" cell_non_canonical_ont$class = "ONT Artic" cell_non_canonical_ont$status = "Noncanonical" cell_non_canonical_ont = cell_non_canonical_ont %>% group_by(sample,location,class,status,mapped_reads) %>% summarise(raw_reads=sum(nsgRNA_HQ_count + nsgRNA_LQ_count)) all_cell_ont = rbind(cell_canonical_ont,cell_non_canonical_ont) all_cell_ont$percent = (all_cell_ont$raw_reads/all_cell_ont$mapped_reads) * 100 all_ont=rbind(all_clinical,all_cell_ont) #============================== # Cell Lines Canonical Illumina #============================== cell_canonical_illumina = read.csv("Supplemental_File_S5.csv",comment.char='#') cell_canonical_illumina$location = "IN VITRO" cell_canonical_illumina$class = "Illumina\nMetagenomic" cell_canonical_illumina$status = "Canonical" cell_canonical_illumina = cell_canonical_illumina %>% filter(orf!='ORF1a') %>% group_by(sample,location,class,status,mapped_reads) %>% summarise(raw_reads=sum(sgRNA_count)) #============================== # Cell Lines Noncanonical Illumina #============================== cell_non_canonical_illumina = read.csv("Supplemental_File_S12.csv",comment.char='#') cell_non_canonical_illumina$location = "IN VITRO" cell_non_canonical_illumina$class = "Illumina\nMetagenomic" cell_non_canonical_illumina$status = "Noncanonical" cell_non_canonical_illumina = cell_non_canonical_illumina %>% group_by(sample,location,class,status,mapped_reads) %>% summarise(raw_reads=sum(sgRNA_count)) all_cell_illumina = rbind(cell_canonical_illumina,cell_non_canonical_illumina) all_cell_illumina$percent = (all_cell_illumina$raw_reads/all_cell_ont$mapped_reads) * 100 all = rbind(all_ont,all_cell_illumina) a=ggplot(all,aes(location,percent,color=status))+ geom_boxplot(outlier.shape = NA)+ geom_jitter(height=0,alpha=0.3)+ facet_grid(status~class,scales="free_x",space="free")+ scale_y_log10(expression(bold(paste("sgRNA as a Percent of Total Reads (log"["10"],")"))),labels = comma)+ scale_x_discrete("Dataset")+ ft_theme()+ annotation_logticks(sides="l",color="#4D4845",outside=T)+ scale_color_manual("sgRNA Type",values=c(navy,coral))+ theme( panel.background=element_rect(fill="grey97",color="white"), strip.text.x = element_text(face = "bold",colour = '#4D4845'), strip.text.y = element_text(face = "bold",colour = '#4D4845'), panel.spacing.x=unit(1, "lines"), axis.text.y = element_text(margin = margin(r = 5)) )+ coord_cartesian(clip = "off") #============================== # Get percentage of sgRNA that are Noncanonical #============================== non_percent = all %>% group_by(sample,location,class) %>% summarise(value=(raw_reads[status=="Noncanonical"]/(raw_reads[status=="Canonical"]+raw_reads[status=="Noncanonical"]))*100) b=ggplot(non_percent,aes(location,value),color=coral)+ geom_boxplot(outlier.shape = NA,color=coral)+ geom_jitter(height=0,alpha=0.3,color=coral)+ facet_grid(~class,scales="free_x",space="free")+ scale_y_log10(expression(bold(paste("Percentage of Total sgRNA that is Noncanonical (log"["10"],")"))),labels = comma)+ scale_x_discrete("Dataset")+ ft_theme()+ annotation_logticks(sides="l",color="#4D4845",outside=T)+ theme( panel.background=element_rect(fill="grey97",color="white"), strip.text.x = element_text(face = "bold",colour = '#4D4845'), strip.text.y = element_text(face = "bold",colour = '#4D4845'), panel.spacing.x=unit(1, "lines"), axis.text.y = element_text(margin = margin(r = 5)) )+ coord_cartesian(clip = "off") ggarrange(a,b,labels=c("A","B"),nrow=2) ``` #### Supplementary Figure S6 - Number of samples with at the most frequently represented noncanonical sub-genomic RNAs ```{r supp_figure_s6_non_canonical_sgRNA_counts , fig.width=8.25, fig.height=3, echo=F,warnings=F,message=F} #============================== # Non-cannonical counts #============================== shef_non_canonical_ont=read.csv("Supplemental_File_S2.csv",comment.char='#') ggplot(shef_non_canonical_ont %>% filter(pos>50) %>% group_by(pos) %>% summarise(n=n()) %>% filter(n>100),aes(reorder(as.factor(pos),-n),n))+ geom_bar(stat="identity",fill=navy,color=navy,width = 0.8)+ ft_theme()+ scale_x_discrete("Read Start Position")+ scale_y_continuous("Number of Samples")+ theme(axis.text.x=element_text(angle=45,hjust=1), panel.background=element_rect(fill="grey97",color="white")) ``` #### Supplementary Figure S7 - Read-Level evidence for noncanonical sub-genomic RNA at 5785 and 10639 ```{r supp_figure_s7_highly_expressed_non_canonical_sgRNA, fig.width=8.25, fig.height=7, echo=F,warnings=F,message=F} #============================== # Get images from file #============================== five <- readPNG("images/supp_figure_5785.png") five = ggplot() + background_image(five) + # This ensures that the image leaves some space at the edges theme(plot.margin = margin(t=1, l=1, r=1, b=1, unit = "cm")) ten <- readPNG("images/supp_figure_10639.png") ten = ggplot() + background_image(ten) + # This ensures that the image leaves some space at the edges theme(plot.margin = margin(t=1, l=1, r=1, b=1, unit = "cm")) #============================== # 10639 #============================== orfs = c("S","ORF3a","M","N","10639") counts = c(80,9,219,154,136) nov = data.frame(orfs,counts) nov$orfs=factor(nov$orfs,levels=c("S","ORF3a","E","M","ORF6","ORF7a","N","10639")) ten_bars = ggplot(nov,aes(reorder(orfs,-counts),counts,fill=orfs,color=orfs))+ geom_bar(stat="identity")+ ft_theme()+ theme( legend.position = "None", axis.text.x=element_text(angle=45,hjust=1) )+ scale_y_continuous("Sub-genomic RNA Count")+ scale_x_discrete("Sub-genomic RNA")+ scale_fill_manual(values=c(navy,coral,purple,aqua,grey,orange,aqua,grey))+ scale_color_manual(values=c(navy,coral,purple,aqua,grey,orange,aqua,grey)) counts = c(22,1,74,741,41,1,8,943) #============================== # 5785 #============================== orfs = c("S","ORF3a","E","M","ORF6","ORF7a","ORF8","N","5785") counts = c(22,1,74,741,41,1,8,943,31) nov = data.frame(orfs,counts) nov$orfs=factor(nov$orfs,levels=c("S","ORF3a","E","M","ORF6","ORF7a","ORF8","N","5785")) five_bars = ggplot(nov,aes(reorder(orfs,-counts),counts,fill=orfs,color=orfs))+ geom_bar(stat="identity")+ ft_theme()+ theme( legend.position = "None", axis.text.x=element_text(angle=45,hjust=1,size=8) )+ scale_y_continuous("Sub-genomic RNA Count")+ scale_x_discrete("Sub-genomic RNA")+ scale_fill_manual(values=c(navy,coral,green,purple,yellow,orange,grey,aqua,grey))+ scale_color_manual(values=c(navy,coral,green,purple,yellow,orange,grey,aqua,grey)) ggarrange(ten,ten_bars,five,five_bars,nrow=2,ncol=2,labels=c("A","B","C","D"),widths = c(1,0.3),heights = c(0.9,1)) ``` #### Supplementary Figure S8 - sgRNA from Illumina Bait Based Capture ```{r supp_figure_s8_illumina_bait_capture , fig.width=8.25, fig.height=11.75, echo=F,warnings=F,message=F} #============================== # Clinical Samples Illumina Bait Based Capture - Canonical #============================== bait = read.csv("Supplemental_File_S14.csv",comment.char='#') # bait = bait %>% filter(orf!="N*") coverage=ggplot(bait,aes(reorder(orf,-coverage),coverage))+ ft_theme()+ theme( panel.background=element_rect(fill="grey97",color="white"), legend.position = "None", strip.text.x = element_text(face = "bold"), axis.text.x=element_text(angle=90,hjust=1) )+ geom_boxplot(outlier.shape = NA,color=navy)+ geom_jitter(color=navy,height = 0)+ scale_color_manual(values=c(navy))+ scale_x_discrete("ORF")+ scale_y_continuous("Median\nCoverage (+/-20bp)") sgrna=ggplot(bait,aes(reorder(orf,-sgRNA_count),sgRNA_count))+ ft_theme()+ theme( panel.background=element_rect(fill="grey97",color="white"), legend.position = "None", strip.text.x = element_text(face = "bold"), axis.text.x=element_text(angle=90,hjust=1) )+ geom_boxplot(outlier.shape = NA,color=navy)+ geom_jitter(color=navy,height = 0)+ scale_color_manual(values=c(navy))+ scale_x_discrete("ORF")+ scale_y_log10(expression(bold(paste("sgRNA Reads (log"["10"],")")))) norm_mapped=ggplot(bait,aes(reorder(orf,-sgRPHT),sgRPHT))+ ft_theme()+ geom_jitter(color=navy,height = 0)+ theme( panel.background=element_rect(fill="grey97",color="white"), legend.position = "None", strip.text.x = element_text(face = "bold"), axis.text.x=element_text(angle=90,hjust=1) )+ geom_boxplot(color=navy)+ scale_color_manual(values=c(navy))+ scale_x_discrete("ORF")+ scale_y_log10(expression(bold(paste("sgRPHT (log"["10"],")")))) norm_local=ggplot(bait,aes(reorder(orf,-sgRPTL),sgRPTL))+ ft_theme()+ theme( panel.background=element_rect(fill="grey97",color="white"), legend.position = "None", strip.text.x = element_text(face = "bold"), axis.text.x=element_text(angle=90,hjust=1) )+ geom_boxplot(outlier.shape = NA,color=navy)+ geom_jitter(color=navy,height = 0)+ scale_color_manual(values=c(navy))+ scale_x_discrete("ORF")+ scale_y_log10(expression(bold(paste("sgRPTL (log"["10"],")")))) canonical = ggarrange(coverage,sgrna,norm_mapped,norm_local,labels=c('A','B','C','D'),nrow=2,ncol=2) #============================== # Clinical Samples Illumina Bait Bases Capture - Noncanonical #============================== novel_illunima = read.csv("Supplemental_File_S11.csv",comment.char='#') novel_illunima= novel_illunima %>% separate(sample,c("sample","netflex","s"),sep="_") novel_illunima = novel_illunima %>% separate(orf,c("novel","pos"),sep="_") novel_illunima$pos=as.numeric(as.character(novel_illunima$pos)) novel_illunima$start=50 novel_illunima$y=0.05 full=ggplot(mapping=aes(x=start,y=y,xend=pos,yend=y))+ geom_curve(data=novel_illunima%>%group_by(start,y,pos)%>%summarise(n=n()),lineend = "round",curvature=-0.3,color=grey,size=0.1)+ scale_alpha_continuous("Number of Samples")+ ft_theme()+ scale_x_continuous("Position")+ annotate(geom="rect",xmin=0,xmax=30000,ymin=-0.05,ymax=0.05,color="grey80",fill="grey80")+ annotate(geom="rect",xmin=0,xmax=200,ymin=-0.05,ymax=0.05,color="grey30",fill="grey30")+ annotate(geom="rect",xmin=200,xmax=13483,ymin=-0.05,ymax=0.05,color="grey50",fill="grey50")+ annotate(geom="text",x=7000,y=0,color="white",label="ORF1a",fontface='bold')+ annotate(geom="rect",xmin=13483,xmax=30000,ymin=-0.05,ymax=0.05,color="grey60",fill="grey60")+ annotate(geom="text",x=17000,y=0,color="white",label="ORF1b",fontface='bold')+ annotate(geom="rect",xmin=21532,xmax=30000,ymin=-0.05,ymax=0.05,color=navy,fill=navy)+ annotate(geom="text",x=23500,y=0,color="white",label="S",fontface='bold')+ annotate(geom="rect",xmin=25361,xmax=30000,ymin=-0.05,ymax=0.05,color=coral,fill=coral)+ annotate(geom="text",x=25750,y=0,color="white",label="3a",fontface='bold')+ annotate(geom="rect",xmin=26213,xmax=30000,ymin=-0.05,ymax=0.05,color=green,fill=green)+ annotate(geom="rect",xmin=26449,xmax=30000,ymin=-0.05,ymax=0.05,color=purple,fill=purple)+ annotate(geom="text",x=26700,y=0,color="white",label="M",fontface='bold')+ annotate(geom="rect",xmin=27017,xmax=30000,ymin=-0.05,ymax=0.05,color=yellow,fill=yellow)+ annotate(geom="rect",xmin=27364,xmax=30000,ymin=-0.05,ymax=0.05,color=orange,fill=orange)+ annotate(geom="rect",xmin=27864,xmax=30000,ymin=-0.05,ymax=0.05,color=grey,fill=grey)+ annotate(geom="rect",xmin=28235,xmax=30000,ymin=-0.05,ymax=0.05,color=aqua,fill=aqua)+ annotate(geom="text",x=28800,y=0,color="white",label="N",fontface='bold')+ annotate(geom="rect",xmin=29510,xmax=30000,ymin=-0.05,ymax=0.05,color="grey80",fill="grey80")+ annotate(geom="rect",xmin=0,xmax=30000,ymin=-0.05,ymax=0.05,fill="white",alpha=0.3)+ geom_point(alpha=0.8,data=novel_illunima,aes(x=pos,y=0.05,size=sgRNA_count),color=navy,alpha=0.5)+ theme( axis.text.y=element_blank(), axis.title.y=element_blank(), axis.line.y = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), strip.text.x = element_text(face="bold") )+ scale_size_continuous("Number of Reads")+ scale_y_continuous(limits=c(-0.1,0.4)) count=ggplot(novel_illunima,aes(pos,sgRNA_count,color=sample))+geom_bar(stat="identity")+scale_color_manual(values=c(navy,coral,grey,yellow,purple))+ ft_theme()+ theme( legend.position = "None", panel.background=element_rect(fill="grey97",color="white") )+ scale_y_continuous("sgRNA\nCount")+ scale_x_continuous("Genome Position") sum=ggplot(novel_illunima %>% group_by(pos) %>% summarise(n=n()),aes(pos,n))+geom_bar(stat="identity",fill=navy,color=navy)+ ft_theme()+ theme( legend.position = "None", panel.background=element_rect(fill="grey97",color="white") )+ scale_y_continuous("Number\nof Samples")+ scale_x_continuous("Genome Position") non_canonical=ggarrange(full,count,sum,nrow=3,labels=c('E','F','G'),align ="v",heights=c(1,0.7,0.7)) ggarrange(canonical,non_canonical,nrow=2,heights=c(0.8,1),align="hv") ``` #### Supplementary Figure S9 - Noncanonical sub-genomic RNA detected in an vitro infection model ```{r supp_figure_s9_non_canonical_sgRNA_in_vitro , fig.width=8.25, fig.height=11.75, echo=F,warnings=F,message=F} #============================== # Cell Lines Noncanonical ONT #============================== cell_non_canonical_ont = read.csv("Supplemental_File_S13.csv",comment.char='#') cell_non_canonical_ont = cell_non_canonical_ont %>% separate(orf,c("novel","pos"),sep="_") cell_non_canonical_ont$location = "IN VITRO" cell_non_canonical_ont$class = "ONT Artic" cell_non_canonical_ont$status = "Noncanonical" cell_non_canonical_ont$y=0.05 cell_non_canonical_ont = cell_non_canonical_ont %>% separate(sample,c("line","virus","time"),sep="_") cell_non_canonical_ont$pos = as.numeric(as.character(cell_non_canonical_ont$pos)) cell_non_canonical_ont = cell_non_canonical_ont %>% filter(pos>50) %>% group_by(line,virus,time,pos,location,class,status,y) %>% summarise(raw_reads=sum((nsgRNA_HQ_count + nsgRNA_LQ_count)/(mapped_reads/100000)),n=n()) cell_non_canonical_ont$line = str_replace(cell_non_canonical_ont$line,"VAT","veroE6\nACE2\nTMPRSS2") cell_non_canonical_ont$line = str_replace(cell_non_canonical_ont$line,"VA","veroE6\nACE2") cell_non_canonical_ont$line = str_replace(cell_non_canonical_ont$line,"V","veroE6") cell_non_canonical_ont$line = str_replace(cell_non_canonical_ont$line,"vero","Vero") #SHOULD I DIVIDE BY MAPPED READS???? OTTHER WISE YOU CAN'T COMPARE!!! YES DO THIS #============================== # Cell Lines Noncanonical Illumina #============================== cell_non_canonical_illumina = read.csv("Supplemental_File_S12.csv",comment.char='#') cell_non_canonical_illumina = cell_non_canonical_illumina %>% separate(orf,c("novel","pos"),sep="_") cell_non_canonical_illumina$location = "IN VITRO" cell_non_canonical_illumina$class = "Illumina\nMetagenomic" cell_non_canonical_illumina$status = "Noncanonical" cell_non_canonical_illumina$y=-0.05 cell_non_canonical_illumina = cell_non_canonical_illumina %>% separate(sample,c("line","virus","time","s"),sep="_") cell_non_canonical_illumina$pos = as.numeric(as.character(cell_non_canonical_illumina$pos)) cell_non_canonical_illumina = cell_non_canonical_illumina %>% group_by(line,virus,time,pos,location,class,status,y) %>% summarise(raw_reads=sum(sgRNA_count/(mapped_reads/100000)),n=n()) cell_non_canonical_illumina$line = str_replace(cell_non_canonical_illumina$line,"VAT","veroE6\nACE2\nTMPRSS2") cell_non_canonical_illumina$line = str_replace(cell_non_canonical_illumina$line,"VA","veroE6\nACE2") cell_non_canonical_illumina$line = str_replace(cell_non_canonical_illumina$line,"V","veroE6") cell_non_canonical_illumina$line = str_replace(cell_non_canonical_illumina$line,"vero","Vero") phe2 = rbind(cell_non_canonical_ont,cell_non_canonical_illumina)%>% filter(virus=="PHE2") %>% filter(line!="VA") phe2$start=50 a = ggplot(mapping=aes(x=start,y=y,xend=pos,yend=y))+ ggtitle("PHE2")+ geom_curve(data=phe2%>%filter(class=="ONT Artic"),lineend = "round",curvature=-0.3,color=grey,size=0.1)+ geom_curve(data=phe2%>%filter(class=="Illumina\nMetagenomic"),lineend = "round",curvature=0.3,color=grey,size=0.1)+ # geom_curve(data=interactions,curvature=-0.4,color=green)+ scale_alpha_continuous("Number of Samples")+ ft_theme()+ scale_x_continuous("Position")+ annotate(geom="rect",xmin=0,xmax=30000,ymin=-0.05,ymax=0.05,color="grey80",fill="grey80")+ annotate(geom="rect",xmin=0,xmax=200,ymin=-0.05,ymax=0.05,color="grey30",fill="grey30")+ annotate(geom="rect",xmin=200,xmax=13483,ymin=-0.05,ymax=0.05,color="grey50",fill="grey50")+ annotate(geom="text",x=7000,y=0,color="white",label="ORF1a",fontface='bold')+ annotate(geom="rect",xmin=13483,xmax=30000,ymin=-0.05,ymax=0.05,color="grey60",fill="grey60")+ annotate(geom="text",x=17000,y=0,color="white",label="ORF1b",fontface='bold')+ annotate(geom="rect",xmin=21532,xmax=30000,ymin=-0.05,ymax=0.05,color=navy,fill=navy)+ annotate(geom="text",x=23500,y=0,color="white",label="S",fontface='bold')+ annotate(geom="rect",xmin=25361,xmax=30000,ymin=-0.05,ymax=0.05,color=coral,fill=coral)+ annotate(geom="text",x=25750,y=0,color="white",label="3a",fontface='bold')+ annotate(geom="rect",xmin=26213,xmax=30000,ymin=-0.05,ymax=0.05,color=green,fill=green)+ annotate(geom="rect",xmin=26449,xmax=30000,ymin=-0.05,ymax=0.05,color=purple,fill=purple)+ annotate(geom="text",x=26700,y=0,color="white",label="M",fontface='bold')+ annotate(geom="rect",xmin=27017,xmax=30000,ymin=-0.05,ymax=0.05,color=yellow,fill=yellow)+ annotate(geom="rect",xmin=27364,xmax=30000,ymin=-0.05,ymax=0.05,color=orange,fill=orange)+ annotate(geom="rect",xmin=27864,xmax=30000,ymin=-0.05,ymax=0.05,color=grey,fill=grey)+ annotate(geom="rect",xmin=28235,xmax=30000,ymin=-0.05,ymax=0.05,color=aqua,fill=aqua)+ annotate(geom="text",x=28800,y=0,color="white",label="N",fontface='bold')+ annotate(geom="rect",xmin=29510,xmax=30000,ymin=-0.05,ymax=0.05,color="grey80",fill="grey80")+ annotate(geom="rect",xmin=0,xmax=30000,ymin=-0.05,ymax=0.05,fill="white",alpha=0.3)+ geom_point(alpha=0.8,data=phe2%>%filter(class=="ONT Artic")%>%arrange(n),aes(x=pos,y=0.05,size=raw_reads,color=class))+ geom_point(alpha=0.8,data=phe2%>%filter(class=="Illumina\nMetagenomic"),aes(x=pos,y=-0.05,size=raw_reads,color=class))+ theme( plot.title = element_text(hjust = 0.5,colour = '#4D4845'), axis.text.y=element_blank(), axis.title.y=element_blank(), axis.line.y = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.justification = "center", panel.background=element_rect(fill="grey97",color="white"), strip.text.y = element_text(face = "bold",colour = '#4D4845'), strip.text.x = element_text(face = "bold",colour = '#4D4845') )+ scale_color_manual("Tech",values=c(orange,navy))+ scale_size_continuous("Reads per\n100000 Mapped",limits = c(0, 10))+facet_grid(line~time)+ scale_y_continuous(limits=c(-0.3,0.3)) gla1 = rbind(cell_non_canonical_ont,cell_non_canonical_illumina)%>% filter(virus=="GLA1")%>% filter(line!="VA") gla1$start=50 b = ggplot(mapping=aes(x=start,y=y,xend=pos,yend=y))+ ggtitle("GLA1")+ geom_curve(data=gla1%>%filter(class=="ONT Artic"),lineend = "round",curvature=-0.3,color=grey,size=0.1)+ geom_curve(data=gla1%>%filter(class=="Illumina\nMetagenomic"),lineend = "round",curvature=0.3,color=grey,size=0.1)+ # geom_curve(data=interactions,curvature=-0.4,color=green)+ scale_alpha_continuous("Number of Samples")+ ft_theme()+ scale_x_continuous("Position")+ annotate(geom="rect",xmin=0,xmax=30000,ymin=-0.05,ymax=0.05,color="grey80",fill="grey80")+ annotate(geom="rect",xmin=0,xmax=200,ymin=-0.05,ymax=0.05,color="grey30",fill="grey30")+ annotate(geom="rect",xmin=200,xmax=13483,ymin=-0.05,ymax=0.05,color="grey50",fill="grey50")+ annotate(geom="text",x=7000,y=0,color="white",label="ORF1a",fontface='bold')+ annotate(geom="rect",xmin=13483,xmax=30000,ymin=-0.05,ymax=0.05,color="grey60",fill="grey60")+ annotate(geom="text",x=17000,y=0,color="white",label="ORF1b",fontface='bold')+ annotate(geom="rect",xmin=21532,xmax=30000,ymin=-0.05,ymax=0.05,color=navy,fill=navy)+ annotate(geom="text",x=23500,y=0,color="white",label="S",fontface='bold')+ annotate(geom="rect",xmin=25361,xmax=30000,ymin=-0.05,ymax=0.05,color=coral,fill=coral)+ annotate(geom="text",x=25750,y=0,color="white",label="3a",fontface='bold')+ annotate(geom="rect",xmin=26213,xmax=30000,ymin=-0.05,ymax=0.05,color=green,fill=green)+ annotate(geom="rect",xmin=26449,xmax=30000,ymin=-0.05,ymax=0.05,color=purple,fill=purple)+ annotate(geom="text",x=26700,y=0,color="white",label="M",fontface='bold')+ annotate(geom="rect",xmin=27017,xmax=30000,ymin=-0.05,ymax=0.05,color=yellow,fill=yellow)+ annotate(geom="rect",xmin=27364,xmax=30000,ymin=-0.05,ymax=0.05,color=orange,fill=orange)+ annotate(geom="rect",xmin=27864,xmax=30000,ymin=-0.05,ymax=0.05,color=grey,fill=grey)+ annotate(geom="rect",xmin=28235,xmax=30000,ymin=-0.05,ymax=0.05,color=aqua,fill=aqua)+ annotate(geom="text",x=28800,y=0,color="white",label="N",fontface='bold')+ annotate(geom="rect",xmin=29510,xmax=30000,ymin=-0.05,ymax=0.05,color="grey80",fill="grey80")+ annotate(geom="rect",xmin=0,xmax=30000,ymin=-0.05,ymax=0.05,fill="white",alpha=0.3)+ geom_point(alpha=0.8,data=gla1%>%filter(class=="ONT Artic")%>%arrange(n),aes(x=pos,y=0.05,size=raw_reads,color=class))+ geom_point(alpha=0.8,data=gla1%>%filter(class=="Illumina\nMetagenomic"),aes(x=pos,y=-0.05,size=raw_reads,color=class))+ theme( plot.title = element_text(hjust = 0.5,colour = '#4D4845'), axis.text.y=element_blank(), axis.title.y=element_blank(), axis.line.y = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.justification = "center", panel.background=element_rect(fill="grey97",color="white"), strip.text.y = element_text(face = "bold",colour = '#4D4845'), strip.text.x = element_text(face = "bold",colour = '#4D4845') )+ scale_color_manual("Tech",values=c(orange,navy))+ scale_size_continuous("Reads per\n100000 Mapped",limits = c(0,10))+facet_grid(line~time)+ scale_y_continuous(limits=c(-0.3,0.3)) two = rbind(cell_non_canonical_ont,cell_non_canonical_illumina)%>% filter(virus=="GLA2")%>% filter(line!="VA") two$start=50 c = ggplot(mapping=aes(x=start,y=y,xend=pos,yend=y))+ ggtitle("GLA2")+ geom_curve(data=two%>%filter(class=="ONT Artic"),lineend = "round",curvature=-0.3,color=grey,size=0.1)+ geom_curve(data=two%>%filter(class=="Illumina\nMetagenomic"),lineend = "round",curvature=0.3,color=grey,size=0.1)+ # geom_curve(data=interactions,curvature=-0.4,color=green)+ ft_theme()+ scale_x_continuous("Position")+ annotate(geom="rect",xmin=0,xmax=30000,ymin=-0.05,ymax=0.05,color="grey80",fill="grey80")+ annotate(geom="rect",xmin=0,xmax=200,ymin=-0.05,ymax=0.05,color="grey30",fill="grey30")+ annotate(geom="rect",xmin=200,xmax=13483,ymin=-0.05,ymax=0.05,color="grey50",fill="grey50")+ annotate(geom="text",x=7000,y=0,color="white",label="ORF1a",fontface='bold')+ annotate(geom="rect",xmin=13483,xmax=30000,ymin=-0.05,ymax=0.05,color="grey60",fill="grey60")+ annotate(geom="text",x=17000,y=0,color="white",label="ORF1b",fontface='bold')+ annotate(geom="rect",xmin=21532,xmax=30000,ymin=-0.05,ymax=0.05,color=navy,fill=navy)+ annotate(geom="text",x=23500,y=0,color="white",label="S",fontface='bold')+ annotate(geom="rect",xmin=25361,xmax=30000,ymin=-0.05,ymax=0.05,color=coral,fill=coral)+ annotate(geom="text",x=25750,y=0,color="white",label="3a",fontface='bold')+ annotate(geom="rect",xmin=26213,xmax=30000,ymin=-0.05,ymax=0.05,color=green,fill=green)+ annotate(geom="rect",xmin=26449,xmax=30000,ymin=-0.05,ymax=0.05,color=purple,fill=purple)+ annotate(geom="text",x=26700,y=0,color="white",label="M",fontface='bold')+ annotate(geom="rect",xmin=27017,xmax=30000,ymin=-0.05,ymax=0.05,color=yellow,fill=yellow)+ annotate(geom="rect",xmin=27364,xmax=30000,ymin=-0.05,ymax=0.05,color=orange,fill=orange)+ annotate(geom="rect",xmin=27864,xmax=30000,ymin=-0.05,ymax=0.05,color=grey,fill=grey)+ annotate(geom="rect",xmin=28235,xmax=30000,ymin=-0.05,ymax=0.05,color=aqua,fill=aqua)+ annotate(geom="text",x=28800,y=0,color="white",label="N",fontface='bold')+ annotate(geom="rect",xmin=29510,xmax=30000,ymin=-0.05,ymax=0.05,color="grey80",fill="grey80")+ annotate(geom="rect",xmin=0,xmax=30000,ymin=-0.05,ymax=0.05,fill="white",alpha=0.3)+ geom_point(alpha=0.8,data=two%>%filter(class=="ONT Artic")%>%arrange(n),aes(x=pos,y=0.05,size=raw_reads,color=class))+ geom_point(alpha=0.8,data=two%>%filter(class=="Illumina\nMetagenomic"),aes(x=pos,y=-0.05,size=raw_reads,color=class))+ theme( plot.title = element_text(hjust = 0.5,colour = '#4D4845'), axis.text.y=element_blank(), axis.title.y=element_blank(), axis.line.y = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.justification = "center", panel.background=element_rect(fill="grey97",color="white"), strip.text.y = element_text(face = "bold",colour = '#4D4845'), strip.text.x = element_text(face = "bold",colour = '#4D4845') )+ scale_color_manual("Tech",values=c(orange,navy))+ scale_size_continuous("Reads per\n100000 Mapped",limits = c(0, 10))+facet_grid(line~time)+ scale_y_continuous(limits=c(-0.3,0.3)) ggarrange(a,b,c,labels=c("A","B","C"),nrow=3,legend="bottom",common.legend = T) ``` #### Supplementary Figure S11 - Evidence for sgRNA for ORF3b and ORF7b? ```{r supp_figure_s11_3b_7b ,fig.width=8.25, fig.height=11.75, echo=F,warnings=F,message=F} #============================== # Load some data #============================== novel_glas=read.csv("Supplemental_File_S10.csv",comment.char='#') novel_glas = novel_glas %>% separate(orf,c("novel","pos"),sep="_") novel_glas$pos=as.numeric(as.character(novel_glas$pos)) novel_glas$location="GLAS" novel_glas$y=-0.05 novel_shef=read.csv("Supplemental_File_S2.csv",comment.char='#') #novel_shef = novel_shef %>% separate(orf,c("novel","pos"),sep="_") novel_shef$pos=as.numeric(as.character(novel_shef$pos)) novel_shef$location="SHEF" novel_shef$y=0.05 novel=rbind(novel_glas,novel_shef) novel=novel %>% filter(pos > 50) %>% filter(nsgRNA_HQ_count!=0) novel$start=50 summary = novel%>%mutate(total=(as.numeric(as.character(nsgRNA_HQ_count))+ as.numeric(as.character(nsgRNA_LQ_count))))%>%group_by(start,y,location,pos)%>%summarise(n=n(),m=sum(total)) full=ggplot(mapping=aes(x=start,y=y,xend=pos,yend=y))+ geom_curve(data=summary%>%filter(location=="SHEF"),lineend = "round",curvature=-0.3,color=grey,size=0.1)+ geom_curve(data=summary%>%filter(location=="GLAS"),lineend = "round",curvature=0.3,color=grey,size=0.1)+ scale_alpha_continuous("Number of Samples")+ ft_theme()+ scale_x_continuous("Position")+ annotate(geom="rect",xmin=0,xmax=30000,ymin=-0.05,ymax=0.05,color="grey80",fill="grey80")+ annotate(geom="rect",xmin=0,xmax=200,ymin=-0.05,ymax=0.05,color="grey30",fill="grey30")+ annotate(geom="rect",xmin=200,xmax=13483,ymin=-0.05,ymax=0.05,color="grey50",fill="grey50")+ annotate(geom="text",x=7000,y=0,color="white",label="ORF1a",fontface='bold')+ annotate(geom="rect",xmin=13483,xmax=30000,ymin=-0.05,ymax=0.05,color="grey60",fill="grey60")+ annotate(geom="text",x=17000,y=0,color="white",label="ORF1b",fontface='bold')+ annotate(geom="rect",xmin=21532,xmax=30000,ymin=-0.05,ymax=0.05,color=navy,fill=navy)+ annotate(geom="text",x=23500,y=0,color="white",label="S",fontface='bold')+ annotate(geom="rect",xmin=25361,xmax=30000,ymin=-0.05,ymax=0.05,color=coral,fill=coral)+ annotate(geom="text",x=25750,y=0,color="white",label="3a",fontface='bold')+ annotate(geom="rect",xmin=26213,xmax=30000,ymin=-0.05,ymax=0.05,color=green,fill=green)+ annotate(geom="rect",xmin=26449,xmax=30000,ymin=-0.05,ymax=0.05,color=purple,fill=purple)+ annotate(geom="text",x=26700,y=0,color="white",label="M",fontface='bold')+ annotate(geom="rect",xmin=27017,xmax=30000,ymin=-0.05,ymax=0.05,color=yellow,fill=yellow)+ annotate(geom="rect",xmin=27364,xmax=30000,ymin=-0.05,ymax=0.05,color=orange,fill=orange)+ annotate(geom="rect",xmin=27864,xmax=30000,ymin=-0.05,ymax=0.05,color=grey,fill=grey)+ annotate(geom="rect",xmin=28235,xmax=30000,ymin=-0.05,ymax=0.05,color=aqua,fill=aqua)+ annotate(geom="text",x=28800,y=0,color="white",label="N",fontface='bold')+ annotate(geom="rect",xmin=29510,xmax=30000,ymin=-0.05,ymax=0.05,color="grey80",fill="grey80")+ annotate(geom="rect",xmin=0,xmax=30000,ymin=-0.05,ymax=0.05,fill="white",alpha=0.3)+ geom_point(alpha=0.8,data=summary%>%filter(location=="SHEF")%>%arrange(n),aes(x=pos,y=0.05,size=m,color=n))+ geom_point(alpha=0.8,data=summary%>%filter(location=="GLAS"),aes(x=pos,y=-0.05,size=m,color=n))+ theme( axis.text.y=element_blank(), axis.title.y=element_blank(), axis.line.y = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "right", legend.justification = "center", legend.direction = "vertical", legend.key.size = unit(0.5, "cm"), legend.key.height = unit(0.3, "cm"), )+ scale_color_gradient("Number of\nSamples",low=navy,high=coral)+ annotate(geom="text",label="Sheffield ONT ARTIC (n>5)",fontface='bold',x=1000,y=0.5,color="#4D4845", hjust = 0)+ annotate(geom="text",label="Glasgow ONT ARTIC (n>3)",fontface='bold',x=1000,y=-0.5,color="#4D4845", hjust = 0)+ scale_size_continuous("Number of\nReads") #============================== # ORF3b #============================== orf3b = full + coord_cartesian(xlim=c(25790,25840),ylim=c(-0.2,0.2))+ theme( legend.position="none", axis.title.x = element_blank(), panel.background = element_rect(colour="#4D4845"), plot.title = element_text(size=10,colour="#4D4845") )+ annotate(geom="rect",xmin=25814,xmax=Inf,ymin=-0.035,ymax=0.035,alpha=0.7,color=grey,fill=grey)+ annotate(geom = "text",label="ORF3b?",x=25817,y=0,hjust=0,color="white",fontface=2)+ annotate(geom = "text",label="A",x=25786,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25787,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25788,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25789,y=0.17,hjust=0)+ annotate(geom = "text",label="G",x=25790,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25791,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25792,y=0.17,hjust=0)+ annotate(geom = "text",label="G",x=25793,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25794,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25795,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25796,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25797,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25798,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25799,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25800,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25801,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25802,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25803,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25804,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25805,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25806,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25807,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25808,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25809,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25810,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25811,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25812,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25813,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25814,y=0.17,hjust=0,color=green)+ annotate(geom = "text",label="T",x=25815,y=0.17,hjust=0,color=green)+ annotate(geom = "text",label="G",x=25816,y=0.17,hjust=0,color=green)+ annotate(geom = "text",label="M",x=25815,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="M",x=25818,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="P",x=25821,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="T",x=25824,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="I",x=25827,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="F",x=25830,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="F",x=25833,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="A",x=25836,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="G",x=25839,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="I",x=25842,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="L",x=25845,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="I",x=25848,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="A",x=25817,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25818,y=0.17,hjust=0)+ annotate(geom = "text",label="G",x=25819,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25820,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25821,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25822,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25823,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25824,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25825,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25826,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25827,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25828,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25829,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25830,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25831,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25832,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25833,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25834,y=0.17,hjust=0)+ annotate(geom = "text",label="G",x=25835,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25836,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25837,y=0.17,hjust=0)+ annotate(geom = "text",label="G",x=25838,y=0.17,hjust=0)+ annotate(geom = "text",label="G",x=25839,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25840,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25841,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25842,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25843,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25844,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25845,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25846,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25847,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25848,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25849,y=0.17,hjust=0)+ annotate(geom = "text",label="G",x=25850,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25851,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=25852,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=25853,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=25854,y=0.17,hjust=0) orf3b_hist = ggplot(novel_shef %>% filter(nsgRNA_HQ_count!=0),aes(pos))+ geom_bar(color=NA,fill=coral)+ ft_theme()+ scale_y_continuous("Sample\nCount")+ theme( panel.background=element_rect(fill="grey97",colour="#4D4845"), axis.title.x = element_blank() )+ coord_cartesian(xlim=c(25790,25840),ylim=c(0,35)) #============================== # ORF7b #============================== zoom_more = full + coord_cartesian(xlim=c(27740,27780),ylim=c(-0.2,0.2))+ theme( legend.position="none", axis.title.x = element_blank(), panel.background = element_rect(colour="#4D4845"), plot.title = element_text(size=10,colour="#4D4845") )+ annotate(geom = "text",label="F",x=27734,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="T",x=27737,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="L",x=27740,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="K",x=27743,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="R",x=27746,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="K",x=27749,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="T",x=27752,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="E",x=27755,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="*",x=27758,y=0.1,hjust=0,fontface=2)+ annotate(geom = "text",label="M",x=27757,y=-0.17,hjust=0,fontface=2)+ annotate(geom = "text",label="I",x=27760,y=-0.17,hjust=0,fontface=2)+ annotate(geom = "text",label="E",x=27763,y=-0.17,hjust=0,fontface=2)+ annotate(geom = "text",label="L",x=27766,y=-0.17,hjust=0,fontface=2)+ annotate(geom = "text",label="S",x=27769,y=-0.17,hjust=0,fontface=2)+ annotate(geom = "text",label="I",x=27772,y=-0.17,hjust=0,fontface=2)+ annotate(geom = "text",label="D",x=27775,y=-0.17,hjust=0,fontface=2)+ annotate(geom = "text",label="F",x=27778,y=-0.17,hjust=0,fontface=2)+ annotate(geom = "text",label="Y",x=27781,y=-0.17,hjust=0,fontface=2)+ annotate(geom = "text",label="D",x=27775,y=-0.17,hjust=0,fontface=2)+ annotate(geom = "text",label="A",x=27738,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=27739,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=27740,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=27741,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=27742,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=27743,y=0.17,hjust=0,color=navy,fontface =2)+ annotate(geom = "text",label="A",x=27744,y=0.17,hjust=0,color=navy,fontface =2)+ annotate(geom = "text",label="A",x=27745,y=0.17,hjust=0,color=navy,fontface =2)+ annotate(geom = "text",label="G",x=27746,y=0.17,hjust=0,color=navy,fontface =2)+ annotate(geom = "text",label="A",x=27747,y=0.17,hjust=0,color=navy,fontface =2)+ annotate(geom = "text",label="A",x=27748,y=0.17,hjust=0,color=navy,fontface =2)+ annotate(geom = "text",label="A",x=27749,y=0.17,hjust=0,color=navy,fontface =2)+ annotate(geom = "text",label="G",x=27750,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=27751,y=0.17,hjust=0)+ annotate(geom = "text",label="C",x=27752,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=27753,y=0.17,hjust=0)+ annotate(geom = "text",label="G",x=27754,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=27755,y=0.17,hjust=0)+ annotate(geom = "text",label="A",x=27756,y=0.17,hjust=0)+ annotate(geom = "text",label="T",x=27757,y=0.17,hjust=0,color=coral)+ annotate(geom = "text",label="G",x=27758,y=0.17,hjust=0,color=coral)+ annotate(geom = "text",label="A",x=27759,y=0.17,hjust=0,color=coral)+ annotate(geom = "text",label="7a",x=27760,y=0.125,hjust=0, fontface =2)+ annotate(geom = "text",label="7b",x=27754,y=-0.125,hjust=0, fontface =2)+ annotate(geom = "text",label="A",x=27756,y=-0.1,hjust=0,color=green)+ annotate(geom = "text",label="T",x=27757,y=-0.1,hjust=0,color=green)+ annotate(geom = "text",label="G",x=27758,y=-0.1,hjust=0,color=green)+ annotate(geom = "text",label="A",x=27759,y=-0.1,hjust=0)+ annotate(geom = "text",label="T",x=27760,y=-0.1,hjust=0)+ annotate(geom = "text",label="T",x=27761,y=-0.1,hjust=0)+ annotate(geom = "text",label="G",x=27762,y=-0.1,hjust=0)+ annotate(geom = "text",label="A",x=27763,y=-0.1,hjust=0)+ annotate(geom = "text",label="A",x=27764,y=-0.1,hjust=0)+ annotate(geom = "text",label="C",x=27765,y=-0.1,hjust=0)+ annotate(geom = "text",label="T",x=27766,y=-0.1,hjust=0)+ annotate(geom = "text",label="T",x=27767,y=-0.1,hjust=0)+ annotate(geom = "text",label="T",x=27768,y=-0.1,hjust=0)+ annotate(geom = "text",label="C",x=27769,y=-0.1,hjust=0)+ annotate(geom = "text",label="A",x=27770,y=-0.1,hjust=0)+ annotate(geom = "text",label="T",x=27771,y=-0.1,hjust=0)+ annotate(geom = "text",label="T",x=27772,y=-0.1,hjust=0)+ annotate(geom = "text",label="A",x=27773,y=-0.1,hjust=0)+ annotate(geom = "text",label="A",x=27774,y=-0.1,hjust=0)+ annotate(geom = "text",label="T",x=27775,y=-0.1,hjust=0)+ annotate(geom = "text",label="T",x=27776,y=-0.1,hjust=0)+ annotate(geom = "text",label="G",x=27777,y=-0.1,hjust=0)+ annotate(geom = "text",label="A",x=27778,y=-0.1,hjust=0)+ annotate(geom = "text",label="C",x=27779,y=-0.1,hjust=0)+ annotate(geom = "text",label="T",x=27780,y=-0.1,hjust=0)+ annotate(geom = "text",label="T",x=27781,y=-0.1,hjust=0)+ annotate(geom = "text",label="C",x=27782,y=-0.1,hjust=0)+ annotate(geom = "text",label="T",x=27783,y=-0.1,hjust=0) orf7b_hist = ggplot(novel_shef %>% filter(nsgRNA_HQ_count!=0),aes(pos))+ geom_bar(fill=orange,color=NA)+ ft_theme()+ scale_y_continuous("Sample\nCount")+ theme( panel.background=element_rect(fill="grey97",colour="#4D4845"), axis.title.x = element_blank() )+ coord_cartesian(xlim=c(27740,27780),ylim=c(0,450)) reads=readPNG("images/sup_figure_non-canonical_query_orf7b.png") read_plot=ggplot()+ background_image(reads)+ scale_x_continuous(limits=c(27738.5,27781))+ scale_y_continuous(limits=c(-0.1,5))+ ft_theme()+ annotate(geom="rect",fill=navy,alpha=0.4,xmin = -Inf, xmax = 27758,ymin=-Inf,ymax=Inf)+ annotate(geom="rect",fill=coral,alpha=0.4,xmin =27758, xmax = 27762,ymin=-Inf,ymax=Inf)+ annotate(geom = "text",label="Leader",color="white",x=27745,y=3,hjust=0,size=7)+ annotate(geom = "text",label="?",color="white",x=27759,y=3,hjust=0,size=7)+ annotate(geom="rect",fill="white",color='#4D4845',xmin = -Inf, xmax = Inf,ymin=-Inf,ymax=0.5)+ annotate(geom = "text",label="A",x=27738,y=0.1,hjust=0)+ annotate(geom = "text",label="C",x=27739,y=0.1,hjust=0)+ annotate(geom = "text",label="T",x=27740,y=0.1,hjust=0)+ annotate(geom = "text",label="C",x=27741,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27742,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27743,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27744,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27745,y=0.1,hjust=0)+ annotate(geom = "text",label="G",x=27746,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27747,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27748,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27749,y=0.1,hjust=0)+ annotate(geom = "text",label="G",x=27750,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27751,y=0.1,hjust=0)+ annotate(geom = "text",label="C",x=27752,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27753,y=0.1,hjust=0)+ annotate(geom = "text",label="G",x=27754,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27755,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27756,y=0.1,hjust=0,color=green)+ annotate(geom = "text",label="T",x=27757,y=0.1,hjust=0,color=green)+ annotate(geom = "text",label="G",x=27758,y=0.1,hjust=0,color=green)+ annotate(geom = "text",label="A",x=27759,y=0.1,hjust=0)+ annotate(geom = "text",label="T",x=27760,y=0.1,hjust=0)+ annotate(geom = "text",label="T",x=27761,y=0.1,hjust=0)+ annotate(geom = "text",label="G",x=27762,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27763,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27764,y=0.1,hjust=0)+ annotate(geom = "text",label="C",x=27765,y=0.1,hjust=0)+ annotate(geom = "text",label="T",x=27766,y=0.1,hjust=0)+ annotate(geom = "text",label="T",x=27767,y=0.1,hjust=0)+ annotate(geom = "text",label="T",x=27768,y=0.1,hjust=0)+ annotate(geom = "text",label="C",x=27769,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27770,y=0.1,hjust=0)+ annotate(geom = "text",label="T",x=27771,y=0.1,hjust=0)+ annotate(geom = "text",label="T",x=27772,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27773,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27774,y=0.1,hjust=0)+ annotate(geom = "text",label="T",x=27775,y=0.1,hjust=0)+ annotate(geom = "text",label="T",x=27776,y=0.1,hjust=0)+ annotate(geom = "text",label="G",x=27777,y=0.1,hjust=0)+ annotate(geom = "text",label="A",x=27778,y=0.1,hjust=0)+ annotate(geom = "text",label="C",x=27779,y=0.1,hjust=0)+ annotate(geom = "text",label="T",x=27780,y=0.1,hjust=0)+ annotate(geom = "text",label="C",x=27782,y=-0.1,hjust=0)+ annotate(geom = "text",label="T",x=27783,y=-0.1,hjust=0)+ theme( panel.background = element_rect(colour="#4D4845"), axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.y = element_blank() ) ggarrange(orf3b,orf3b_hist,zoom_more,orf7b_hist,read_plot,nrow=5,align="hv",labels=c('A','B','C','D','E'),heights=c(1,0.7,1,0.7,0.8)) ``` #### Supplementary Figure S13 - Normalized ECT and Consensus coverage are not Correlated with the Raw amount of Sub-Genomic RNA Detected ```{r supp_figure_s13_ect_coverage , fig.width=6, fig.height=6, echo=F,warnings=F,message=F} #============================== # Normalized ECT #============================== ct = read.csv("Supplemental_File_S9.csv",comment.char='#') sg = read.csv("Supplemental_File_S1.csv",comment.char='#') all = merge(sg,ct) all=all%>%filter(orf!="ORF1a")%>%group_by(sample,E_CT_normalised)%>%summarise(sum=sum(sgRNA_HQ_count+sgRNA_LQ_count))%>%select(sample,sum,E_CT_normalised) test=cor.test(all$sum, all$E_CT_normalised, method="spearman") a = ggplot(all,aes(sum,E_CT_normalised))+ geom_point(color=navy,alpha=0.5)+ geom_smooth(method="lm",color=coral)+ annotate(geom="text",label=paste("Spearman's Rank Correlation\nrho=",round(test$estimate,3),", p=",round(test$p.value,4),sep=""),x=4000,y=1.5,color="#4D4845")+ ft_theme()+ scale_x_continuous("Total HQ Raw sgRNA Count")+ scale_y_reverse("ECT Normalized to RNASEP",limits=c(2,0))+ theme(panel.background=element_rect(fill="grey97",color="white")) #============================== # Genome Coverage #============================== coverage = read.table("Supplemental_File_S17.csv",sep=",",header=T,fill=T) sg_cov=merge(sg,coverage) s=sg_cov%>%filter(orf!="ORF1a")%>%group_by(sample,coverage)%>%summarise(sum=sum(sgRNA_HQ_count))%>%select(sample,sum,coverage) test = cor.test(s$coverage,s$sum,method="spearman") b = ggplot(s,aes(coverage,sum))+ geom_point(color=navy,alpha=0.5)+ geom_smooth(method="lm",color=coral)+ annotate(geom="text",label=paste("Spearman's Rank Correlation\nrho=",round(test$estimate,3),", p=",round(test$p.value,4),sep=""),x=0.96,y=5000,color="#4D4845")+ ft_theme()+ scale_x_continuous("Total HQ Raw sgRNA Count")+ scale_y_continuous("Coverage")+ theme(panel.background=element_rect(fill="grey97",color="white")) ggarrange(a,b,nrow=2,labels=c("A","B")) ```