## Intersecting point is on the line, use the formula
ix <- x1 + u * (x2 - x1)
iy <- y1 + u * (y2 - y1)
ans <- lineMagnitude(px, py, ix, iy)
}
ans
}
distancePointLineTest <- function() {
if(abs(distancePointSegment(  5,   5,  10, 10, 20, 20) - 7.07106781186548)>.0001)
stop("error 1")
if(abs(distancePointSegment( 15,  15,  10, 10, 20, 20) - 0)>.0001)
stop("error 2")
if(abs(distancePointSegment( 15,  15,  20, 10, 20, 20) - 5)>.0001)
stop("error 3")
if(abs(distancePointSegment(  0,  15,  20, 10, 20, 20) - 20)>.0001)
stop("error 4")
if(abs(distancePointSegment(  0,  25,  20, 10, 20, 20) - 20.6155281280883)>.0001)
stop("error 5")
if(abs(distancePointSegment(-13, -25, -50, 10, 20, 20) - 39.8808224589213)>.0001)
stop("error 6")
if(abs(distancePointSegment(  0,   3,   0, -4,  5,  0) - 5.466082)>.0001)
stop("error 7")
if(abs(distancePointSegment(  0,   9,   0, -4,  0, 15) - 0)>.0001)
stop("error 8")
if(abs(distancePointSegment(  0,   0,   0, -2,  2,  0)^2 - 2)>.0001)
stop("error 9")
return(TRUE)
}
a <- read.table('/Users/Jcazet/Downloads/D13-C1_S1_out_cell_readcounts.txt', header=F, stringsAsFactors = F)
x=cumsum(a$V1)
x=x/max(x)
#get slope
checkVal <- min(which(x > 0.85))
inSlope <- x[checkVal]/checkVal
testDF <- data.frame(x = 1:checkVal, y = x[1:checkVal])
testDF$res <- 0
for(j in 1:nrow(testDF)){
testDF$res[j] <-  distancePointLine(testDF[j,'x'],testDF[j,'y'],inSlope,0)
}
cutoff <- which.max(testDF$res)
plot(1:length(x), x, type='l', col="blue", xlab="cell barcodes sorted by number of reads [descending]", ylab="cumulative fraction of reads",xlim=c(1,checkVal*2))
abline(v=cutoff)
text(checkVal,0.5,paste0('estimated # Cells = ',cutoff))
library(Seurat)
tfs <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/AEP_Assembly/annotations/tfIDs.txt')
ds <- readRDS('/Users/Jcazet/Desktop/aepAtlasNonDub.rds')
View(tfs)
tfs <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/AEP_Assembly/annotations/tfIDs.txt', header = F)
tfs$V1 <- gsub('_','-', tfs$V1)
tfs <- tfs[tfs$V1 %in% rownames(ds),]
tfs <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/AEP_Assembly/annotations/tfIDs.txt', header = F)
tfs$V1 <- gsub('_','-', tfs$V1)
View(tfs)
tfs$V1 <- gsub('HVAEP1_','-', tfs$V1)
tfs <- tfs[tfs$V1 %in% rownames(ds),]
head(rownames(Ds))
head(rownames(ds))
tfs <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/AEP_Assembly/annotations/tfIDs.txt', header = F)
tfs$V1 <- gsub('HVAEP1_','', tfs$V1)
tfs <- tfs[tfs$V1 %in% rownames(ds),]
length(rownames(ds))
DefaultAssay(ds) <- 'SCT'
tfs <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/AEP_Assembly/annotations/tfIDs.txt', header = F)
tfs$V1 <- gsub('HVAEP1_','', tfs$V1)
tfs <- tfs[tfs$V1 %in% rownames(ds),]
annots <- read.csv('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/AEP_Assembly/annotations/HVAEP1_annotation.csv')
View(annots)
annots$H_vulgarisAEP <- gsub('HVAEP1_T\\(d+)[.]\\d+','G\\1',annots$H_vulgarisAEP)
View(annots)
annots <- read.csv('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/AEP_Assembly/annotations/HVAEP1_annotation.csv')
annots$H_vulgarisAEP <- gsub('HVAEP1_T\\(\\d+)[.]\\d+','G\\1',annots$H_vulgarisAEP)
View(annots)
annots$H_vulgarisAEP <- gsub('HVAEP1_T(\\d+)[.]\\d+','G\\1',annots$H_vulgarisAEP)
annots$UniprotHit <- gsub(';.*','',annots$UniprotHit)
unlink('~/Desktop/plots', recursive=T)
dir.create('~/Desktop/plots', showWarnings = F)
lapply(tfs,function(x) {
FeaturePlot(ds, x, order=T) + NoAxes()
ggsave(paste('~/Desktop/plots/',x,'_',annots[annots$H_vulgarisAEP == x,'UniprotHit'],'.png',sep=''), width = 6, height = 6, dpi = 300)
})
library(ggplot2)
lapply(tfs,function(x) {
FeaturePlot(ds, x, order=T) + NoAxes()
ggsave(paste('~/Desktop/plots/',x,'_',annots[annots$H_vulgarisAEP == x,'UniprotHit'],'.png',sep=''), width = 6, height = 6, dpi = 300)
})
FeaturePlot(ds,'G017621')
FeaturePlot(ds,'G017621', order=T)
FeaturePlot(ds,'G026882', order=T)
FeaturePlot(ds,'G017526', order=T)
repBed <- read.delim('/Users/Jcazet/Desktop/amil.reps.gff', header = F)
View(repBed)
repBed <- read.delim('/Users/Jcazet/Desktop/amil.reps.gff', header = F, skip = 2)
View(repBed)
repBed$fam <- gsub('Target=','',repBed$V9)
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed.fList <- split(repBed$fam)
repBed.fList <- split(repBed,repBed$fam)
View(repBed.fList)
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 3]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 20]
repBed <- read.delim('/Users/Jcazet/Desktop/amil.reps.gff', header = F, skip = 2)
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed.fList <- split(repBed,repBed$fam)[-1]
View(repBed.fList)
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 20]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
View(repBed.sd)
repBed.sd <- sapply(repBed.sd, mean)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
View(repBed.sd)
?mean
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed <- read.delim('/Users/Jcazet/Desktop/amil.reps.gff', header = F, skip = 2)
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
View(repBed.fList)
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 2]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
View(repBed.sd)
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 400]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 100])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 200]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
library(Seurat)
library(ggplot2)
repBed <- read.delim('/Users/Jcazet/Desktop/jaNemVect1.1.chroms.fa.out.gff', header = F, skip = 2)
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 200]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
repBed <- read.delim('/Users/Jcazet/Desktop/jaNemVect1.1.chroms.fa.out.gff', header = F, skip = 2)
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
View(repBed)
repBed <- read.delim('/Users/Jcazet/Desktop/jaNemVect1.1.chroms.fa.out.gff', header = F, skip = 3)
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed <- read.delim('/Users/Jcazet/Desktop/jaNemVect1.1.chroms.fa.out.gff', header = F, skip = 3)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 200]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
View(repBed.sd)
chr.length <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/Nvec200/jaNemVect1.1.chroms.fa.fai')
View(chr.length)
chr.length <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/Nvec200/jaNemVect1.1.chroms.fa.fai',header=F)
repBed <- read.delim('/Users/Jcazet/Desktop/jaNemVect1.1.chroms.fa.out.gff', header = F, skip = 3)
chr.length <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/Nvec200/jaNemVect1.1.chroms.fa.fai',header=F)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
library(plyr)
repBed$chrLength <- mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F)
repBed$V4 <- repBed$V4/repBed$chrLength
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
View(repBed)
repBed <- read.delim('/Users/Jcazet/Desktop/jaNemVect1.1.chroms.fa.out.gff', header = F, skip = 3)
chr.length <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/Nvec200/jaNemVect1.1.chroms.fa.fai',header=F)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
View(repBed.sd)
repBed <- read.delim('/Users/Jcazet/Desktop/jaNemVect1.1.chroms.fa.out.gff', header = F, skip = 3)
chr.length <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/Nvec200/jaNemVect1.1.chroms.fa.fai',header=F)
library(Seurat)
library(ggplot2)
library(plyr)
repBed <- read.delim('/Users/Jcazet/Desktop/amil.reps.gff', header = F, skip = 3)
chr.length <- read.delim("/Volumes/GoogleDrive/My Drive/Juliano_lab/References/amil/amil.chroms.fa.fai")
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
View(chr.length)
repBed <- read.delim('/Users/Jcazet/Desktop/amil.reps.gff', header = F, skip = 3)
chr.length <- read.delim("/Volumes/GoogleDrive/My Drive/Juliano_lab/References/amil/amil.chroms.fa.fai", header=F)
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
View(chr.length)
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
View(repBed.sd)
library(Seurat)
library(ggplot2)
library(plyr)
repBed <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/amil/Amil_v2.1.chroms.fa.out.gff', header = F, skip = 3)
View(repBed)
chr.length <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/amil/amil2.chroms.fa.fai',header=F)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
View(repBed.sd)
library(Seurat)
library(ggplot2)
library(plyr)
repBed <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/amil/resc_genome.chroms.fa.out.gff', header = F, skip = 3)
repBed <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/resc/resc_genome.chroms.fa.out.gff', header = F, skip = 3)
chr.length <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/resc/resc_genome.chroms.fa.fai',header=F)
View(repBed)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
View(repBed.sd)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 20])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 5]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
View(repBed.sd)
setwd("/Volumes/Data/genome/revision")
source("/Volumes/Data/genome/revision/syntVisVirid.R", echo=TRUE)
setwd("/Volumes/Data/genome/revision/hic")
library(plyr)
repBed <- read.delim('hoct/hoct.genome.chroms.fa.out.gff', header = F, skip = 3)
repBed <- read.delim('hoct/hoct.genome.chroms.fa.out.gff', header = F, skip = 3)
chr.length <- read.delim('hoct/hoct.genome.chroms.fa.fai',header=F)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 20])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 5]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
View(repBed.sd)
repBed.sd <- repBed.sd[order(repBed.sd$sd),]
library(rstudioapi)
setwd(dirname(getActiveDocumentContext()$path))
spec <- 'hoct'
setwd(spec)
i <- 1
comm <- paste0('./../pullCentCand.sh ',repBed[i,'name'],' hoct.genome.chroms.fa.out.gff ',i)
comm
for(i in 1:10){
comm <- paste0('./../pullCentCand.sh ',repBed[i,'name'],' hoct.genome.chroms.fa.out.gff ',i)
system(comm)
}
getwd()
comm <- paste0('./../pullCentCand.sh ',repBed[i,'name'],' hoct.genome.chroms.fa.out.gff ',i)
comm <- paste0('cd ', getwd(),' && ',comm)
comm
for(i in 1:10){
comm <- paste0('./../pullCentCand.sh ',repBed[i,'name'],' hoct.genome.chroms.fa.out.gff ',i)
comm <- paste0('cd ', getwd(),' && ',comm)
system()
}
for(i in 1:10){
comm <- paste0('./../pullCentCand.sh ',repBed[i,'name'],' hoct.genome.chroms.fa.out.gff ',i)
comm <- paste0('cd ', getwd(),' && ',comm)
system(comm)
}
comm
for(i in 1:10){
comm <- paste0('./../pullCentCand.sh ',repBed[i,'name'],' hoct.genome.chroms.fa.out.gff ',i)
comm <- paste0('cd ', getwd(),' && pwd && ',comm)
system(comm)
}
comm
for(i in 1:10){
comm <- paste0('./../pullCentCand.sh ',repBed[i,1],' hoct.genome.chroms.fa.out.gff ',i)
comm <- paste0('cd ', getwd(),' && ',comm)
system(comm)
}
comm
for(i in 1:10){
comm <- paste0('./../pullCentCand.sh ',repBed.sd[i,1],' hoct.genome.chroms.fa.out.gff ',i)
comm <- paste0('cd ', getwd(),' && ',comm)
system(comm)
}
library(plyr)
library(rstudioapi)
setwd(dirname(getActiveDocumentContext()$path))
spec <- 'hoct'
spec <- 'resc'
setwd(spec)
repBed <- list.files(pattern='*.out.gff')
repBed <- list.files(pattern='*.out.gff')
repBed <- read.delim(repBed, header = F, skip = 3)
chr.length <- list.files(pattern='*.chroms.fa.fai')
chr.length <- read.delim(chr.length,header=F)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 20])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 5]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
repBed.sd <- repBed.sd[order(repBed.sd$sd),]
library(plyr)
library(rstudioapi)
setwd(dirname(getActiveDocumentContext()$path))
spec <- 'hoct'
spec <- 'resc'
setwd(spec)
repBed.path <- list.files(pattern='*.out.gff')
repBed <- read.delim(repBed.path, header = F, skip = 3)
chr.length.path <- list.files(pattern='*.chroms.fa.fai')
chr.length <- read.delim(chr.length.path,header=F)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
#repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 20])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 5]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
repBed.sd <- repBed.sd[order(repBed.sd$sd),]
for(i in 1:10){
comm <- paste0('./../pullCentCand.sh ',repBed.sd[i,1],' ', repBed.path,' ',i)
comm <- paste0('cd ', getwd(),' && ',comm)
system(comm)
}
library(plyr)
library(rstudioapi)
setwd(dirname(getActiveDocumentContext()$path))
spec <- 'jaNemVect'
setwd(spec)
repBed.path <- list.files(pattern='*.out.gff')
repBed <- read.delim(repBed.path, header = F, skip = 3)
chr.length.path <- list.files(pattern='*.chroms.fa.fai')
chr.length <- read.delim(chr.length.path,header=F)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 20])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 5]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
repBed.sd <- repBed.sd[order(repBed.sd$sd),]
for(i in 1:10){
comm <- paste0('./../pullCentCand.sh ',repBed.sd[i,1],' ', repBed.path,' ',i)
comm <- paste0('cd ', getwd(),' && ',comm)
system(comm)
}
spec <- 'hoct'
setwd(spec)
setwd("/Volumes/Data/genome/revision/hic")
spec <- 'hoct'
setwd(spec)
repBed.path <- list.files(pattern='*.out.gff')
repBed <- read.delim(repBed.path, header = F, skip = 3)
chr.length.path <- list.files(pattern='*.chroms.fa.fai')
chr.length <- read.delim(chr.length.path,header=F)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
#repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 20])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 5]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
repBed.sd <- repBed.sd[order(repBed.sd$sd),]
View(repBed.sd)
repBed.sd <- repBed.sd[order(-repBed.sd$sd),]
for(i in 1:10){
comm <- paste0('./../pullCentCand.sh ',repBed.sd[i,1],' ', repBed.path,' ',i)
comm <- paste0('cd ', getwd(),' && ',comm)
system(comm)
}
View(repBed)
repBed.path <- list.files(pattern='*.out.gff')
repBed <- read.delim(repBed.path, header = F, skip = 3)
