choose(4,0) * .25^0*.75^4
1 - (0.31+0.42)
0.05*9
.45/2
.225*2
?dpois()
dpois(2, 3)
dpois(3, 3)
201+193+6
400/999
40+34+1+5
80/999
.4*.08*999
6/31.968
1-.187
library(swirl)
swirl()
install.packages("BiocManager")
install.packages("BiocManager")
BiocManager::install("Biostrings")
?available.genomes
??available.genomes
BiocManager::install("BSgenome")
?available.genomes
library(Biostrings)
library(BSgenome)
?available.genomes
available.genomes()
if (interactive()) {
if (!require("BiocManager"))
install.packages("BiocManager")
BiocManager::install("BSgenome.Scerevisiae.UCSC.sacCer3")
}
library(BSgenome.Scerevisiae.UCSC.sacCer3)
genome <- getBSgenome("BSgenome.Scerevisiae.UCSC.sacCer3")
str(genome)
ls(genome)
ls("package:BSgenome.Scerevisiae.UCSC.sacCer3")
library(BSgenome.Scerevisiae.UCSC.sacCer3)
Scerevisiae
?matchPattern
matchPattern(DpnII, Scerevisiae)
Scerevisiae[[1]]
Scerevisiae[[2]]
Scerevisiae$chrI
seqnames()
seqnames(Scerevisiae)
?append
# find instances of DpnII site
DpnII = 'GATC'
DpnII_sites = matchPattern(DpnII, Scerevisiae[[1]])
DpnII_sites
library(tidyverse)
library(tidyverse)
temp = as_tibble(DpnII_sites)
temp = row_bind(DpnII_sites)
temp = rbind(DpnII_sites)
temp
View(temp)
DpnII_sites
DpnII_sites = append(matchPattern(reverseComplement(DpnII), Scerevisiae[[i]]))
DpnII_sites = append(matchPattern(reverseComplement(DpnII), Scerevisiae[[1]]))
?reverseComplement
# find instances of DpnII site
DpnII = DNAString('GATC')
DpnII_sites = matchPattern(DpnII, Scerevisiae[[1]])
DpnII_sites = append(matchPattern(reverseComplement(DpnII), Scerevisiae[[1]]))
DpnII_sites = append(DpnOII_site, matchPattern(reverseComplement(DpnII), Scerevisiae[[1]]))
DpnII_sites = append(DpnII_site, matchPattern(reverseComplement(DpnII), Scerevisiae[[1]]))
DpnII_sites = append(DpnII_sites, matchPattern(reverseComplement(DpnII), Scerevisiae[[1]]))
DpnII_sites
DpnII_sites = matchPattern(DpnII, Scerevisiae[[1]])
DpnII_sites
DpnII_sites = append(DpnII_sites, matchPattern(reverseComplement(DpnII), Scerevisiae[[1]]))
DpnII_sites
write.table(matchPattern(DpnII, Scerevisiae[[1]]), file = 'DpnII_sites.txt', append = append,
quote=FALSE, sep="\t", row.names=FALSE, col.names=!append)
?write.table
write.table(matchPattern(DpnII, Scerevisiae[[1]]), file = 'DpnII_sites.txt', append = append,
quote=FALSE, sep="\t", row.names=FALSE,)
write.table(matchPattern(DpnII, Scerevisiae[[1]]), file = 'DpnII_sites.txt', append = append,
quote=FALSE, sep="\t", row.names=FALSE)
sites = matchPattern(DpnII, Scerevisiae[[1]])
/seqnames()
?seqnames()
sites = matchPattern(DpnII, Scerevisiae[[1]])
data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(matches),end=end(matches),
strand=rep.int(strand, length(matches)),patternID=names(matches),
check.names=FALSE)
data.frame(chromosome=rep(seqnames(Scerevisiae)[1], length(sites)),
start=start(matches),end=end(matches),
strand=rep.int(strand, length(matches)),patternID=names(matches),
check.names=FALSE)
data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int(strand, length(sites)), patternID=names(sites),
check.names=FALSE)
seqnames(Scerevisiae)[1]
sites
length(sites)
start(sites)
rep.int(seqnames(Scerevisiae)[1], length(sites))
start(sites)
data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int(strand, length(sites)), patternID=names(sites))
data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int(strand='+', length(sites)), patternID=names(sites))
data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)), patternID=names(sites))
strand=rep.int('+', length(sites))
sites = matchPattern(DpnII, Scerevisiae[[1]])
data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)), patternID=names(sites))
patternID=names(sites)
sites
str(sites)
data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)), patternID=rep.int(DpnII_sites, length(sites)))
reverseComplement(DpnII)
# find instances of DpnII site
DpnII = DNAString('GATC')
sites = matchPattern(DpnII, Scerevisiae[[1]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)), patternID=rep.int(DpnII_sites, length(sites)))
all_sites = as_tibble(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)), patternID=rep.int(DpnII_sites, length(sites)))
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)), patternID=rep.int(DpnII_sites, length(sites)))
all_sites = bind_rows(all_sites, data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)),
patternID=rep.int(DpnII_sites, length(sites))))
tail all_sites
tail(all_sites)
# find instances of DpnII site
DpnII = DNAString('GATC')
sites = matchPattern(DpnII, Scerevisiae[[1]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)), patternID=rep.int(DpnII_sites, length(sites)))
sites = matchPattern(DpnII, Scerevisiae[[1]])
all_sites = bind_rows(all_sites, data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('-', length(sites)),
patternID=rep.int(DpnII_sites, length(sites))))
head(all_sites)
tail(all_sites)
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)), patternID=rep.int(DpnII, length(sites)))
all_sites = bind_rows(all_sites, data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('-', length(sites)),
patternID=rep.int(reverseComplement(DpnII, length(sites)))))
sites = matchPattern(DpnII, Scerevisiae[[1]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)), patternID=rep.int(DpnII, length(sites)))
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)), patternID=rep.int(DpnII, length(sites)))
sites = matchPattern(reverseComlement(DpnII), Scerevisiae[[1]])
sites = matchPattern(reverseComplement(DpnII), Scerevisiae[[1]])
all_sites = bind_rows(all_sites, data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('-', length(sites)),
patternID=rep.int(reverseComplement(DpnII, length(sites)))))
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)), patternID=rep.int(DpnII, length(sites)))
sites = matchPattern(reverseComplement(DpnII), Scerevisiae[[1]])
all_sites = bind_rows(all_sites, data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('-', length(sites)),
patternID=rep.int(reverseComplement(DpnII, times=length(sites)))))
sites = matchPattern(DpnII, Scerevisiae[[1]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)), patternID=rep.int(DpnII, length(sites)))
sites = matchPattern(reverseComplement(DpnII), Scerevisiae[[1]])
all_sites = bind_rows(all_sites, data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
strand=rep.int('-', length(sites)),
head(all_sites)
.
head(all_sites)
# find instances of DpnII site
DpnII = DNAString('GATC')
sites = matchPattern(DpnII, Scerevisiae[[1]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)), patternID=rep.int(DpnII, length(sites)))
sites = matchPattern(reverseComplement(DpnII), Scerevisiae[[1]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('-', length(sites)),
patternID=rep.int(reverseComplement(DpnII), length(sites)))
tail(all_sites)
head(all_sites)
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('+', length(sites)), patternID=rep.int(DpnII, length(sites)))
all_sites
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep('+', length(sites)), patternID=rep.int(DpnII, length(sites)))
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep('+', length(sites)), patternID=rep(DpnII, length(sites)))
head(all_sites)
?reverse
# find instances of DpnII site
DpnII = 'GATC'
sites = matchPattern(DpnII, Scerevisiae[[1]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep('+', length(sites)), patternID=rep(DpnII, length(sites)))
head(all())
head(all_sites)
sites = matchPattern(reverseComplement(DNAstring(DpnII)), Scerevisiae[[1]])
sites = matchPattern(reverseComplement(DNAString(DpnII)), Scerevisiae[[1]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep.int('-', length(sites)),
patternID=rep(reverse(DpnII), length(sites)))
tail(all_sites)
head(all_sites)
sites
?matchPattern
sites = matchPattern(DpnII, Scerevisiae[[1]])
sites
sites = matchPattern(reverseComplement(DNAString(DpnII)), Scerevisiae[[1]])
sites
reverseComplement(DNAString(DpnII))
NlaIII = 'CATG'
sites = matchPattern(DpnII, Scerevisiae[[1]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep('+', length(sites)), patternID=rep(DpnII, length(sites)))
for(i in 2:length(seqnames(Scerevisiae))) {
sites = matchPattern(DpnII, Scerevisiae[[1]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
patternID=rep(DpnII, length(sites)))
}
for(i in 1:length(seqnames(Scerevisiae))) {
sites = matchPattern(NlaIII, Scerevisiae[[1]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
patternID=rep(NlaIII, length(sites)))
}
tail(all_sites)
# find instances of DpnII site
DpnII = 'GATC'
NlaIII = 'CATG'
sites = matchPattern(DpnII, Scerevisiae[[1]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep('+', length(sites)), patternID=rep(DpnII, length(sites)))
for(i in 2:length(seqnames(Scerevisiae))) {
sites = matchPattern(DpnII, Scerevisiae[[i]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[i], length(sites)),
start=start(sites),end=end(sites),
patternID=rep(DpnII, length(sites)))
}
for(i in 1:length(seqnames(Scerevisiae))) {
sites = matchPattern(NlaIII, Scerevisiae[[i]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[i], length(sites)),
start=start(sites),end=end(sites),
patternID=rep(NlaIII, length(sites)))
}
tail(all_sites)
sites = matchPattern(DpnII, Scerevisiae[[1]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
strand=rep('+', length(sites)), patternID=rep(DpnII, length(sites)))
for(i in 2:length(seqnames(Scerevisiae))) {
sites = matchPattern(DpnII, Scerevisiae[[i]])
all_sites = rbind(all_sites, data.frame(chromosome=rep.int(seqnames(Scerevisiae)[i], length(sites)),
start=start(sites),end=end(sites),
patternID=rep(DpnII, length(sites))))
}
for(i in 1:length(seqnames(Scerevisiae))) {
sites = matchPattern(NlaIII, Scerevisiae[[i]])
all_sites = rbind(all_sites, data.frame(chromosome=rep.int(seqnames(Scerevisiae)[i], length(sites)),
start=start(sites),end=end(sites),
patternID=rep(NlaIII, length(sites))))
}
NlaIII = 'CATG'
sites = matchPattern(DpnII, Scerevisiae[[1]])
all_sites = data.frame(chromosome=rep.int(seqnames(Scerevisiae)[1], length(sites)),
start=start(sites),end=end(sites),
patternID=rep(DpnII, length(sites)))
for(i in 2:length(seqnames(Scerevisiae))) {
sites = matchPattern(DpnII, Scerevisiae[[i]])
all_sites = rbind(all_sites, data.frame(chromosome=rep.int(seqnames(Scerevisiae)[i], length(sites)),
start=start(sites),end=end(sites),
patternID=rep(DpnII, length(sites))))
}
for(i in 1:length(seqnames(Scerevisiae))) {
sites = matchPattern(NlaIII, Scerevisiae[[i]])
all_sites = rbind(all_sites, data.frame(chromosome=rep.int(seqnames(Scerevisiae)[i], length(sites)),
start=start(sites),end=end(sites),
patternID=rep(NlaIII, length(sites))))
}
# find hermes sites
NlaIII = 'TNNNNA'
# find hermes sites
hermes = 'TNNNNA'
sites = matchPattern(hermes, Scerevisiae[[1]])
sites
head(all_sites)
#find distance between sites
all_sites = all_sites %>% group_by(patternID) %>% group_by(chromosome)
all_sites
str(all_sites)
#find distance between sites
all_sites_distDpnII = all_sites %>% filter(patternID == 'GATC') %>% group_by(chromosome)
all_sites_distDpnII
?lag
temp = 1:5
temp - lag(temp)
temp - lag(temp, default = first(lah))
temp - lag(temp, default = first(lag))
?first
temp
temp - lag(temp, default = first(lag))
temp - lag(temp)
temp = c(2,4,5)
temp - lag(temp)
head(all_sites_distDpnII)
#find distance between sites
all_sites_distDpnII = all_sites %>% filter(patternID == 'GATC') %>% group_by(chromosome) %>%
mutate(distance = start - lag(end))
head(all_sites_distDpnII)
1150-340
str(all_sites_distDpnII)
max(all_sites_distDpnII$distance)
?max
max(all_sites_distDpnII$distance, na.rm = T)
library(ggforce)
ggplot(all_sites_distDpnII, aes(lag, group = chromosome)) +
geom_sina()
str(all_sites_distDpnII)
ggplot(all_sites_distDpnII, aes(chromosome, lag)) +
geom_sina()
ggplot(all_sites_distDpnII, aes(chromosome, distance)) +
geom_sina()
head(all_sites_distDpnII)
all_sites_distNlaIII = all_sites %>% filter(patternID == 'CATG') %>% group_by(chromosome) %>%
mutate(distance = start - lag(end))
ggplot(all_sites_distNlaIII, aes(chromosome, distance)) +
geom_sina()
ggplot(all_sites_distDpnII, aes(start, distance)) +
geom_point()
ggplot(all_sites_distDpnII, aes(start, distance)) +
geom_point() +
facet_wrap(~chromosome)
ggplot(all_sites_distNlaIII, aes(start, distance)) +
geom_point() +
facet_wrap(~chromosome)
ggplot(all_sites_distDpnII, aes(start, distance)) +
geom_point() +
facet_wrap(~chromosome) +
ggtitle('DpnII cut sites in S288C')
ggplot(all_sites_distNlaIII, aes(start, distance)) +
geom_point() +
facet_wrap(~chromosome) +
ggtitle('NlaIII cut sites in S288C')
var1 <- 5
var2 <- 3
sum(var1, var2)
is(is)
is(sum)
ls()
ls
clear()
?paste
research_program <- 'SURP'
year <- '2019'
'SURP 2019'
paste(research_program, year)
current_program <- paste(research_program, year)
ls()
?paste
current_program <- paste(research_program, year, sep = '-')
current_program
current_program <- paste(research_program, year, sep = '%')
current_program
aste(research_program, year, sep = 'YAY')
paste(research_program, year, sep = 'YAY')
paste0(1:12)
paste(1:12)
?sum
sum(var1, var2)
sum(2, 5)
sum(2, 5, 7, 7, 78, 9)
?paste()
ls
ls()
paste(current_program, year, "surp")
paste(current_program, year, "surp", sep = '-')
paste(current_program, year, "surp", sep = '-', sep = '.')
first_var <- paste(current_program, year)
first_var
paste(first_var, 'surp', sep='.')
log(.01/.99)
log(.5/.5)
log(.001/.999)
setwd("/Volumes/GoogleDrive/My Drive/Gresham Lab_Grace/france_satay/library_prep/france_saturation_scripts/grace_adapt_forBGI/cds_lengths")
library(tidyverse)
?read_tsv
gff = read_tsv("GCF_000146045.2_R64_genomic_GAP1.gff", comment='#', col_names = F)
head(gff)
?split
?separate
gff$X9[1]
View(gff)
gff = read_tsv("GCF_000146045.2_R64_genomic_GAP1.gff", comment='#', col_names = F) %>%
separate(X9, into=c('','','Name')) %>%
mutate(length=X5-X4) %>% select(Name, length)
gff = read_tsv("GCF_000146045.2_R64_genomic_GAP1.gff", comment='#', col_names = F) %>%
separate(X9, into=c('gar','bage','Name')) %>%
filter(X3 == "CDS")
head(gff)
gff = read_tsv("GCF_000146045.2_R64_genomic_GAP1.gff", comment='#', col_names = F) #%>%
gff$X9
separate(X9, into=c('gar','bage','Name'), sep=';') %>%
filter(X3 == "CDS")
gff = read_tsv("GCF_000146045.2_R64_genomic_GAP1.gff", comment='#', col_names = F) %>%
separate(X9, into=c('gar','bage','Name'), sep=';') %>%
filter(X3 == "CDS")
head(gff)
gff = read_tsv("GCF_000146045.2_R64_genomic_GAP1.gff", comment='#', col_names = F) %>%
separate(X9, into=c('gar','bage','Name'), sep=';') %>%
filter(X3 == "CDS") %>%
mutate(length=X5-X4) %>% select(Name, length)
head(gff)
gff = read_tsv("GCF_000146045.2_R64_genomic_GAP1.gff", comment='#', col_names = F)
head(gff)
gff$X9[1]
cds_length = gff %>%
separate(X9, into=c('gar','bage','Name'), sep=';') %>%
filter(X3 == "CDS") %>%
mutate(length=X5-X4) %>% select(Name, length)
head(cds_length)
cds_length$Name[1]
cds_length = gff %>%
separate(X9, into=c('a','b','c', 'd', 'e', 'f','g','gene'), sep=';') %>%
filter(X3 == "CDS") %>%
mutate(length=X5-X4) %>% select(Name, length)
cds_length = gff %>%
separate(X9, into=c('a','b','c', 'd', 'e', 'f','g','gene'), sep=';') %>%
filter(X3 == "CDS") %>%
mutate(length=X5-X4) %>% select(gene, length)
head(cds_length)
cds_length = gff %>%
separate(X9, into=c('a','b','c', 'd', 'e', 'f','gene'), sep=';') %>%
filter(X3 == "CDS") %>%
mutate(length=X5-X4) %>% select(gene, length)
head(cds_length)
cds_length = gff %>%
separate(X9, into=c('a''gene'), sep='gene=') %>%
filter(X3 == "CDS") %>%
mutate(length=X5-X4) %>% select(gene, length)
cds_length = gff %>%
separate(X9, into=c('a','gene'), sep='gene=') %>%
filter(X3 == "CDS") %>%
mutate(length=X5-X4) %>% select(gene, length)
head(cds_length)
cds_length = gff_cds %>%
separate(X9, into=c('a','gene'), sep='gene=') %>%
mutate(length=X5-X4) %>% select(gene, length)
View(gff)
gff_cds = gff %>% filter(X3="CDS")
gff_cds = gff %>% filter(X3=="CDS")
head(gff_cds)
gff_cds$X9[1]
cds_length = gff_cds %>%
separate(X9, into=c('a','gene'), sep='gene=') %>%
mutate(length=X5-X4) %>% select(gene, length)
head(cds_length)
gff_cds$X9[1:2]
View(gff_cds)
gff = read_tsv("GCF_000146045.2_R64_genomic_GAP1.gff", comment='#', col_names = F)
gff_cds = gff %>% filter(X3=="CDS")
#need ID ex. ID=cds5903
cds_length = gff_cds %>%
separate(X9, into=c('id'), sep=';') %>%
mutate(length=X5-X4) %>% select(id, length)
head(cds_length)
tail(cds_length)
write_tsv(cds_length, "cdsLength.tab")
write_tsv(cds_length, "cdsLength.tab", col_names = F)
library(tidyverse)
gff = read_tsv("GCF_000146045.2_R64_genomic_GAP1.gff", comment='#', col_names = F)
gff_cds = gff %>% filter(X3=="CDS")
#need ID ex. ID=cds5903
cds_length = gff_cds %>%
separate(X9, into=c('id'), sep=';') %>%
mutate(length=X5-X4) %>% select(id, length)
write_tsv(cds_length$id, "listAllGenes.txt", col_names = F)
write(cds_length$id, "listAllGenes.txt", col_names = F)
write(cds_length$id, "listAllGenes.txt")
