data <- read.csv("PCA_data_Dareen.csv", header=T)
head(data)
logpca1 <- log(pca[,2:11])
logpca1 <- log(data[,2:11])
head(data)
logpca1 <- log(data[,2:11])
head(logpca1)
prcomp(logpca1, center=TRUE, scale.=TRUE)
prcomp(logpca1, center=TRUE, scale.=TRUE, na.rm=TRUE)
prcomp(logpca1, center=TRUE, scale.=TRUE, na.action)
prcomp(logpca1, center=TRUE, scale.=TRUE, na.omit)
prcomp(logpca1, center=TRUE, scale.=TRUE, na.action=na.omit)
prcomp(na.omit(logpca1), center=TRUE, scale=TRUE)
out <-prcomp(na.omit(logpca1), center=TRUE, scale=TRUE)
write.table(out, "test_out.txt")
write.table(out, "test_out.txt", sep='\t')
class(out)
out
data <- read.csv("PCA_data_Dareen.csv", header=T)
prcomp(na.omit(logpca1), center=TRUE, scale=TRUE)
logpca1 <- log(data[,2:11])
prcomp(na.omit(logpca1), center=TRUE, scale=TRUE)
library(ggfortify)
install.packages("ggfortify")
install.packages("ggplot2")
library(ggplot2)
library(ggfortify)
autoplot(prcomp(na.omit(logpca1), center=TRUE, scale=TRUE))
data <- read.table("data1_without_formulas_Dareen.xlsx", header=T)
data <- read.table("Dareen_full_with_groups.txt", header=T, sep='\t')
logpca1 <- log(data[,4:14])
logpca1
logpca1 <-cbind(logpca1,data[16])
logpca1 <- log(data[,4:14])
logpca1 <- log(data[,4:15])
library(ggfortify)
df <- iris[c(1, 2, 3, 4)]
autoplot(prcomp(df))
autoplot(prcomp(df), data = iris, colour = 'Species')
logpca1 <- log(data[,4:14])
autoplot(prcomp(logpca1), data = data, colour = 'Group')
autoplot(prcomp(logpca1))
dataD <- read.table("Dareen_full_with_groups.txt", header=T, sep='\t')
logpca1 <- log(data[,4:14])
autoplot(prcomp(logpca1))
autoplot(prcomp(na.omit(logpca1), center=TRUE, scale=TRUE))
autoplot(prcomp(na.omit(logpca1), center=TRUE, scale=TRUE), data=dataD, colour = "Group")
df <- log(dataD[,4:14])
autoplot(prcomp(na.omit(df), center=TRUE, scale=TRUE), data=dataD, colour = "Group")
df <- iris[c(1, 2, 3, 4)]
autoplot(prcomp(df))
iris
len(df)
length(df)
df
dataD <- read.table("Dareen_full_with_groups.txt", header=T, sep='\t')
logpca1 <- log(dataD[,4:14])
autoplot(prcomp(logpca1), data= dataD, colour="Group")
autoplot(na.omit(prcomp(logpca1)), data= dataD, colour="Group")
x
autoplot(na.omit(prcomp(logpca1)), data= dataD, colour="Group")
autoplot(na.omit(prcomp(logpca1)))
autoplot(prcomp(na.omit(logpca1)))
autoplot(prcomp(na.omit(logpca1)), data=dataD, colour="Group")
logpca1 <- log(dataD[,4:14])
prcomp(logpca1, na.rm=TRUE)
logpca1 <- log(dataD[,4:14])
autoplot(prcomp(na.omit(logpca1)), data=dataD, colour="Group", na.rm=TRUE)
autoplot(prcomp(na.omit(logpca1)), data=dataD, colour="Group", na.omit=TRUE)
dataD <- read.table("Dareen_cleaned_with_group.txt", header=TRUE, sep='\t')
logpca1 <- log(dataD, [,4:14])
logpca1 <- log(dataD[,4:14])
autoplot(prcomp(na.omit(logpca1), center=TRUE, scale=TRUE))
autoplot(prcomp(na.omit(logpca1), center=TRUE, scale=TRUE), data = dataD, colour="Group")
logpca1
write.table(logpca1, "logpca1.txt", sep='\t')
dataD <- read.table("Dareen_cleaned_with_group.txt", header=TRUE, sep='\t')
df_for_pca <-dataD[,4:14]
autoplot(prcomp(na.omit(df_for_pca), center=TRUE, scale=TRUE), data = dataD, colour="Group")
dataD <- read.table("Dareen_cleaned_with_group.txt", header=TRUE, sep='\t')
logpca1 <- log(dataD[,4:14])
autoplot(prcomp(na.omit(logpca1), center=TRUE, scale=TRUE))
data <- read.table("Dareen_full_with_groups.txt", header=T)
data <- read.table("Dareen_cleaned_with_group.txt", header=T)
data <- read.table("Dareen_cleaned_with_group.txt", header=TRUE)
df1 <- read.table("Dareen_full_with_groups.txt", header = TRUE)
df1 <- read.table("Dareen_full_with_groups.txt", header = TRUE, sep='\t')
logpca1 <- log(df1[,2:11])
logpca1 <- log(df1[,4:14])
library(ggfortify)
autoplot(prcomp(logpca1))
autoplot(prcomp(na.omit(logpca1)))
write.table(na.omit(logpca1))
write.table(na.omit(logpca1), "table_test.txt")
logpca1 <- log(df1[,4:14])
head(logpca1)
logpca1 <- df1[,15]
logpca1 <- log(df1[,4:14])
logpca1 <- logpca1+df1[,15]
logpca1 <- logpca1+df1[15]
test_size <- na.omit(logpca1)
logpca_test <- data.frame()
logpca_test <- log(df1[,4:14])
logpca_labels <- logpca_test + df1[15]
autoplot(prcomp(na.omit(logpca1)))
head(pca)
df <- read.table("Dareen_full_with_groups.txt", header=T, sep='\t')
df <- pca [c(2:12)]
prcomp(na.omit(df))
pca <- read.table("Dareen_full_with_groups.txt", header=T, sep='\t')
df <- pca[c(4:14)]
autoplot(prcomp(na.omit(df)))
source("https://bioconductor.org/biocLite.R")
biocLite("SGSeq")
source("https://bioconductor.org/biocLite.R")
?BiocUpgrade
source("https://bioconductor.org/biocLite.R")
biocLite("BiocUpgrade")
Bioconductor
biocLite("BiocUpgrade")
biocLite
biocLite("SGSeq")
remove.packages("BiocInstaller")
source("https://bioconductor.org/biocLite.R")
source("https://bioconductor.org/biocLite.R")
biocLite()
source("https://bioconductor.org/biocLite.R")
biocLite()
biocLite()
remove.packages("BiocInstaller")
version
source("https://bioconductor.org/biocLite.R")
source("https://bioconductor.org/biocLite.R")
source("http://bioconductor.org/biocLite.R")
source("https://bioconductor.org/biocLite.R")
biocLite("BiocInstaller")
source("https://bioconductor.org/biocLite.R")
biocLite("BiocUpgrade")
source("https://bioconductor.org/biocLite.R")
biocLite("SGSeq")
browseVignettes("SGSeq")
SGSeq
analyzeFeatures()
library("SGSeq")
if (!require("devtools")) install.packages("devtools", repos='http://cran.us.r-project.org')
devtools::install_github("davidaknowles/leafcutter/leafcutter")
if (!require("devtools")) install.packages("devtools", repos='http://cran.us.r-project.org')
install.packages("ggplot2")
library(ggplot2)
install.packages("ggplot2", repos = "https://cran.rstudio.com", dependencies = TRUE)
library(ggplot2)
install.packages("ggplot2")
library("ggplot2")
install.packages("ggplot2")
install.packages("ggplot2")
install.packages("ggplot2")
if (!requireNamespace("BiocManager"))
install.packages("BiocManager")
if (!requireNamespace("BiocManager"))
install.packages("BiocManager")
BiocManager::install()
BiocManager::install()
BiocManager::install(lib="C:\Program Files\R\R-3.5.1")
BiocManager::install(lib="C:\filestore.soton.ac.uk\users\jl1e18\R")
source("http://bioconductor.org/biocLite.R")
.libPaths()
version
.libPaths()
installed.packages()[, c("Package", "LibPath")]
BiocManager::install()
BiocManager::install()
BiocManager::install()
library(forgein)
library(foreign)
library(lattice)
library(MASS)
library(Matrix)
library(mgcv)
library(survival)
BiocManager::install("SGSeq", version = "3.8")
browseVignettes("SGSeq")
library(SGSeq)
library(SGSeq)
setwd("D:/GenomeResearch_update/CodeBase/CodeForFigures/Figure1_MAPS_Deficit/plotting")
data <- read.table("MAPS_FS10_combined_to_CI.txt", header = T)
ymin <- data$ps_adjusted - 1.96*data$standard_error
ymax <- data$ps_adjusted + 1.96*data$standard_error
data_new = cbind(data, ymin, ymax)
write.table(data_new, "MAPS_FS10_combined_with_CI.txt", sep = '\t')
library(ggplot2)
cbPalette4 <- c( "#D55E00","#0072B2", "#009E73", "#56B4E9", "#CC79A7", "#E69F00", "#F0E442", "#999999")
data <- read.table("MAPS_FS10_combined_with_CI.txt", header = T)
labelsCQ = c("Nonsense" , "Missense" , "Synonymous" , "", "A-25" , "A-24" , "A-23" , "A-22" , "A-21" , "A-20" , "A-19" , "A-18" , "A-17" , "A-16" , "A-15" , "A-14" , "A-13" , "A-12" , "A-11" , "A-10" , "A-9" , "A-8" , "A-7" , "A-6" , "A-5" , "A-4" , "A-3" , "A-2" , "A-1" , "A" , "A+1" , "A+2" , "A+3" , "A+4" , "A+5" , "A+6" , "A+7" , "A+8" , "A+9" , "A+10" , "", "D-10" , "D-9" , "D-8" , "D-7" , "D-6" , "D-5" , "D-4" , "D-3" , "D-2" , "D-1" , "D" , "D+1" , "D+2" , "D+3" , "D+4" , "D+5" , "D+6" , "D+7" , "D+8" , "D+9" , "D+10", "", "don_A", "don_C", "don_G", "don_T", "", "PyPu", "Other")
limits <- aes(ymin = data$ymin, ymax = data$ymax)
plot <-
ggplot(data, aes(x=Order, y=ps_adjusted, colour = Set)) +
geom_point(aes(size = 0.02)) +
ylab("MAPS") +
xlab("Position") + ylim(-0.1,0.2) +
geom_errorbar(limits, width=0) +
scale_colour_manual(values = cbPalette4) +
scale_x_discrete(limits=labelsCQ) +
theme(axis.text=element_text(size=16), axis.title=element_text(size=16)) +
theme(legend.text=element_text(size=16))  + theme(axis.text.x=element_text(angle = 90, vjust = 0.5, hjust = 0.9))
plot + theme(
panel.background = element_rect(fill = "transparent",colour = NA),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = "transparent",colour = NA))+
expand_limits(x=c(0,71))
ggsave("MAPS_vars_full_CI_FS10.pdf", bg="transparent",   width = 45, height = 15, units = "cm")
ratios <- read.table("Deficit_data_to_CI.txt", header = T)
## Work out confidence intervals
numCQtypes<-length(ratios[,1])
CQsingletonnum<-ratios$prop
CQsingletonlower<- rep(0,numCQtypes) #lower limit of 95% confidence interval
CQsingletonupper<- rep(0,numCQtypes) #uppper limit of 95% confidence interval
# loop to calculate 95% confidence intervals for each consequence class
for (i in seq(1,numCQtypes)) {
CQsingletonlower[i]<-quantile(rbinom(10000, ratios$total[i], ratios$prop[i]), probs=0.025) / ratios$total[i]
CQsingletonupper[i]<-quantile(rbinom(10000, ratios$total[i], ratios$prop[i]), probs=0.975) / ratios$total[i]
}
new_ratios <- cbind(ratios, CQsingletonlower, CQsingletonupper)
write.table(new_ratios, "deficit_combined_with_CI.txt", sep='\t')
## Plot
data <- read.table("deficit_combined_with_CI.txt", header = T)
labelsCQ = c("Nonsense" , "Missense" , "Synonymous" , "", "A-25" , "A-24" , "A-23" , "A-22" , "A-21" , "A-20" , "A-19" , "A-18" , "A-17" , "A-16" , "A-15" , "A-14" , "A-13" , "A-12" , "A-11" , "A-10" , "A-9" , "A-8" , "A-7" , "A-6" , "A-5" , "A-4" , "A-3" , "A-2" , "A-1" , "A" , "A+1" , "A+2" , "A+3" , "A+4" , "A+5" , "A+6" , "A+7" , "A+8" , "A+9" , "A+10" , "", "D-10" , "D-9" , "D-8" , "D-7" , "D-6" , "D-5" , "D-4" , "D-3" , "D-2" , "D-1" , "D" , "D+1" , "D+2" , "D+3" , "D+4" , "D+5" , "D+6" , "D+7" , "D+8" , "D+9" , "D+10", "", "don_A", "don_C", "don_G", "don_T", "", "PyPu", "Other")
limits <- aes(ymin = data$CQsingletonlower, ymax = data$CQsingletonupper)
plot <-
ggplot(data, aes(x=Order, y=prop, colour = "#56B4E9")) +
geom_point(aes(size = 0.02)) +
ylab("Proportion of parental \nvariants in high pLI genes") +
xlab("Position") + ylim(0,0.4) +
geom_errorbar(limits, width=0) +
scale_colour_manual(values = cbPalette) +
scale_x_discrete(limits=labelsCQ) +
theme(axis.text=element_text(size=16), axis.title=element_text(size=16)) +
theme(legend.text=element_text(size=16))  + theme(axis.text.x=element_text(angle = 90, vjust = 0.5, hjust = 0.9))
plot + theme(
panel.background = element_rect(fill = "transparent",colour = NA),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = "transparent",colour = NA))+
expand_limits(x=c(0,71))
ggsave("proportion_parental_vars_full_CI_noNA_FS10.pdf", bg="transparent",   width = 45, height = 15, units = "cm")
cbPalette <- c("#56B4E9","#009E73", "#0072B2",  "#CC79A7",  "#D55E00", "#E69F00",  "#F0E442",  "#999999")
ratios <- read.table("Deficit_data_to_CI.txt", header = T)
## Work out confidence intervals
numCQtypes<-length(ratios[,1])
CQsingletonnum<-ratios$prop
CQsingletonlower<- rep(0,numCQtypes) #lower limit of 95% confidence interval
CQsingletonupper<- rep(0,numCQtypes) #uppper limit of 95% confidence interval
# loop to calculate 95% confidence intervals for each consequence class
for (i in seq(1,numCQtypes)) {
CQsingletonlower[i]<-quantile(rbinom(10000, ratios$total[i], ratios$prop[i]), probs=0.025) / ratios$total[i]
CQsingletonupper[i]<-quantile(rbinom(10000, ratios$total[i], ratios$prop[i]), probs=0.975) / ratios$total[i]
}
new_ratios <- cbind(ratios, CQsingletonlower, CQsingletonupper)
write.table(new_ratios, "deficit_combined_with_CI.txt", sep='\t')
## Plot
data <- read.table("deficit_combined_with_CI.txt", header = T)
labelsCQ = c("Nonsense" , "Missense" , "Synonymous" , "", "A-25" , "A-24" , "A-23" , "A-22" , "A-21" , "A-20" , "A-19" , "A-18" , "A-17" , "A-16" , "A-15" , "A-14" , "A-13" , "A-12" , "A-11" , "A-10" , "A-9" , "A-8" , "A-7" , "A-6" , "A-5" , "A-4" , "A-3" , "A-2" , "A-1" , "A" , "A+1" , "A+2" , "A+3" , "A+4" , "A+5" , "A+6" , "A+7" , "A+8" , "A+9" , "A+10" , "", "D-10" , "D-9" , "D-8" , "D-7" , "D-6" , "D-5" , "D-4" , "D-3" , "D-2" , "D-1" , "D" , "D+1" , "D+2" , "D+3" , "D+4" , "D+5" , "D+6" , "D+7" , "D+8" , "D+9" , "D+10", "", "don_A", "don_C", "don_G", "don_T", "", "PyPu", "Other")
limits <- aes(ymin = data$CQsingletonlower, ymax = data$CQsingletonupper)
plot <-
ggplot(data, aes(x=Order, y=prop, colour = "#56B4E9")) +
geom_point(aes(size = 0.02)) +
ylab("Proportion of parental \nvariants in high pLI genes") +
xlab("Position") + ylim(0,0.4) +
geom_errorbar(limits, width=0) +
scale_colour_manual(values = cbPalette) +
scale_x_discrete(limits=labelsCQ) +
theme(axis.text=element_text(size=16), axis.title=element_text(size=16)) +
theme(legend.text=element_text(size=16))  + theme(axis.text.x=element_text(angle = 90, vjust = 0.5, hjust = 0.9))
plot + theme(
panel.background = element_rect(fill = "transparent",colour = NA),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = "transparent",colour = NA))+
expand_limits(x=c(0,71))
ggsave("proportion_parental_vars_full_CI_noNA_FS10.pdf", bg="transparent",   width = 45, height = 15, units = "cm")
