FeaturePlot(ds,'G018201', order=T)
View(annots)
FeaturePlot(ds,'G002332', order=T)
FeaturePlot(ds,'G001060', order=T)
FeaturePlot(ds,'G010914', order=T)
FeaturePlot(ds,'G002340', order=T)
FeaturePlot(ds,'G015632', order=T)
FeaturePlot(ds,'G020495', order=T)
FeaturePlot(ds,'G024963', order=T)
FeaturePlot(ds,'G001215', order=T)
FeaturePlot(ds,'G001685', order=T)
FeaturePlot(ds,'G020223', order=T)
FeaturePlot(ds,'G011095', order=T)
FeaturePlot(ds,'G015273', order=T)
FeaturePlot(ds,'G011095', order=T)
FeaturePlot(ds,'G001692', order=T)
FeaturePlot(ds,'G006825', order=T)
c1 <- CellSelector(gg)
c2 <- CellSelector(gg)
iMark <- FindMarkers(ds, ident.1 = c1, ident.2 = c2, only.pos = T, recorrect_umi=F)
View(iMark)
FeaturePlot(ds,'G011799', order=T)
FeaturePlot(ds,'G013843', order=T)
FeaturePlot(ds,'G024984', order=T)
FeaturePlot(ds,'G011799', order=T)
FeaturePlot(ds,'G013843', order=T)
FeaturePlot(ds,'G024984', order=T)
FeaturePlot(ds,'G001060', order=T)
FeaturePlot(ds,'G007201', order=T)
FeaturePlot(ds,'G028862', order=T)
FeaturePlot(ds,'G009854', order=T)
FeaturePlot(ds,'G013048', order=T)
FeaturePlot(ds,'G003797', order=T)
FeaturePlot(ds,'G022128', order=T)
FeaturePlot(ds,'G007619', order=T)
FeaturePlot(ds,'G011350', order=T)
FeaturePlot(ds,'G010196', order=T)
FeaturePlot(ds,'G005618', order=T)
gScores <- read.delim('/Volumes/Data/genome/ds/nmf/final/whole_unfilt_fine_narrow.gene_spectra_score.k_56.dt_0_13.txt')
gScores[1:5,1:5]
gScores <- read.delim('/Volumes/Data/genome/ds/nmf/final/whole_unfilt_fine_narrow.gene_spectra_score.k_56.dt_0_13.txt', row.names = 1)
gScores[1:5,1:5]
gScores <- t(gScores)
View(gScores)
FeaturePlot(ds,'G002340', order=T)
FeaturePlot(ds,'G011095', order=T)
FeaturePlot(ds,'G021326', order=T)
FeaturePlot(ds,'G021326', order=F)
FeaturePlot(ds,'G006103', order=F)
FeaturePlot(ds,'G006103', order=T)
FeaturePlot(ds,'G018201', order=T)
FeaturePlot(ds,'G007699', order=T)
FeaturePlot(ds,'G008423', order=T)
FeaturePlot(ds,'G017543', order=T)
FeaturePlot(ds,'G023348', order=T)
FeaturePlot(ds,'G001215', order=T)
FeaturePlot(ds,'G005514', order=T)
FeaturePlot(ds,'G021465', order=T)
FeaturePlot(ds,'G025687', order=T)
FeaturePlot(ds,'G022797', order=T)
FeaturePlot(ds,'G022496', order=T)
FeaturePlot(ds,'G019766', order=T)
FeaturePlot(ds,'G003313', order=T)
FeaturePlot(ds,'G006972', order=T)
FeaturePlot(ds,'G009542', order=T)
FeaturePlot(ds,'G008761', order=T)
which.max(gScores['HVAEP1.G002332'])
which.max(gScores['HVAEP1.G002332',])
gScores <- gScores[order(-gScores[,9]),]
FeaturePlot(ds,rownames(gScores)[1], order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[1]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[2]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[3]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[4]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[5]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[6]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[7]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[8]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[9]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[10]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[11]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[12]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[13]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[14]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[15]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[16]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[17]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[18]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[19]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[20]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[21]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[22]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[23]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[24]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[25]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[26]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[27]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[28]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[29]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[30]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[31]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[32]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[33]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[34]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[35]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[36]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[37]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[38]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[39]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[40]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[41]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[42]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[43]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[44]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[45]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[46]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[47]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[48]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[49]), order=T)
FeaturePlot(ds,gsub('.*[.]','',rownames(gScores)[50]), order=T)
#distance functions
distancePointLine <- function(x, y, slope, intercept) {
## x, y is the point to test.
## slope, intercept is the line to check distance.
##
## Returns distance from the line.
##
## Returns 9999 on 0 denominator conditions.
x1 <- x-10
x2 <- x+10
y1 <- x1*slope+intercept
y2 <- x2*slope+intercept
distancePointSegment(x,y, x1,y1, x2,y2)
}
distancePointSegment <- function(px, py, x1, y1, x2, y2) {
## px,py is the point to test.
## x1,y1,x2,y2 is the line to check distance.
##
## Returns distance from the line, or if the intersecting point on the line nearest
## the point tested is outside the endpoints of the line, the distance to the
## nearest endpoint.
##
## Returns 9999 on 0 denominator conditions.
lineMagnitude <- function(x1, y1, x2, y2) sqrt((x2-x1)^2+(y2-y1)^2)
ans <- NULL
ix <- iy <- 0   # intersecting point
lineMag <- lineMagnitude(x1, y1, x2, y2)
if( lineMag < 0.00000001) {
warning("short segment")
return(9999)
}
u <- (((px - x1) * (x2 - x1)) + ((py - y1) * (y2 - y1)))
u <- u / (lineMag * lineMag)
if((u < 0.00001) || (u > 1)) {
## closest point does not fall within the line segment, take the shorter distance
## to an endpoint
ix <- lineMagnitude(px, py, x1, y1)
iy <- lineMagnitude(px, py, x2, y2)
if(ix > iy)  ans <- iy
else ans <- ix
} else {
## Intersecting point is on the line, use the formula
ix <- x1 + u * (x2 - x1)
iy <- y1 + u * (y2 - y1)
ans <- lineMagnitude(px, py, ix, iy)
}
ans
}
distancePointLineTest <- function() {
if(abs(distancePointSegment(  5,   5,  10, 10, 20, 20) - 7.07106781186548)>.0001)
stop("error 1")
if(abs(distancePointSegment( 15,  15,  10, 10, 20, 20) - 0)>.0001)
stop("error 2")
if(abs(distancePointSegment( 15,  15,  20, 10, 20, 20) - 5)>.0001)
stop("error 3")
if(abs(distancePointSegment(  0,  15,  20, 10, 20, 20) - 20)>.0001)
stop("error 4")
if(abs(distancePointSegment(  0,  25,  20, 10, 20, 20) - 20.6155281280883)>.0001)
stop("error 5")
if(abs(distancePointSegment(-13, -25, -50, 10, 20, 20) - 39.8808224589213)>.0001)
stop("error 6")
if(abs(distancePointSegment(  0,   3,   0, -4,  5,  0) - 5.466082)>.0001)
stop("error 7")
if(abs(distancePointSegment(  0,   9,   0, -4,  0, 15) - 0)>.0001)
stop("error 8")
if(abs(distancePointSegment(  0,   0,   0, -2,  2,  0)^2 - 2)>.0001)
stop("error 9")
return(TRUE)
}
a <- read.table('/Users/Jcazet/Downloads/D13-C1_S1_out_cell_readcounts.txt', header=F, stringsAsFactors = F)
x=cumsum(a$V1)
x=x/max(x)
#get slope
checkVal <- min(which(x > 0.85))
inSlope <- x[checkVal]/checkVal
testDF <- data.frame(x = 1:checkVal, y = x[1:checkVal])
testDF$res <- 0
for(j in 1:nrow(testDF)){
testDF$res[j] <-  distancePointLine(testDF[j,'x'],testDF[j,'y'],inSlope,0)
}
cutoff <- which.max(testDF$res)
plot(1:length(x), x, type='l', col="blue", xlab="cell barcodes sorted by number of reads [descending]", ylab="cumulative fraction of reads",xlim=c(1,checkVal*2))
abline(v=cutoff)
text(checkVal,0.5,paste0('estimated # Cells = ',cutoff))
library(Seurat)
tfs <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/AEP_Assembly/annotations/tfIDs.txt')
ds <- readRDS('/Users/Jcazet/Desktop/aepAtlasNonDub.rds')
View(tfs)
tfs <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/AEP_Assembly/annotations/tfIDs.txt', header = F)
tfs$V1 <- gsub('_','-', tfs$V1)
tfs <- tfs[tfs$V1 %in% rownames(ds),]
tfs <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/AEP_Assembly/annotations/tfIDs.txt', header = F)
tfs$V1 <- gsub('_','-', tfs$V1)
View(tfs)
tfs$V1 <- gsub('HVAEP1_','-', tfs$V1)
tfs <- tfs[tfs$V1 %in% rownames(ds),]
head(rownames(Ds))
head(rownames(ds))
tfs <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/AEP_Assembly/annotations/tfIDs.txt', header = F)
tfs$V1 <- gsub('HVAEP1_','', tfs$V1)
tfs <- tfs[tfs$V1 %in% rownames(ds),]
length(rownames(ds))
DefaultAssay(ds) <- 'SCT'
tfs <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/AEP_Assembly/annotations/tfIDs.txt', header = F)
tfs$V1 <- gsub('HVAEP1_','', tfs$V1)
tfs <- tfs[tfs$V1 %in% rownames(ds),]
annots <- read.csv('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/AEP_Assembly/annotations/HVAEP1_annotation.csv')
View(annots)
annots$H_vulgarisAEP <- gsub('HVAEP1_T\\(d+)[.]\\d+','G\\1',annots$H_vulgarisAEP)
View(annots)
annots <- read.csv('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/AEP_Assembly/annotations/HVAEP1_annotation.csv')
annots$H_vulgarisAEP <- gsub('HVAEP1_T\\(\\d+)[.]\\d+','G\\1',annots$H_vulgarisAEP)
View(annots)
annots$H_vulgarisAEP <- gsub('HVAEP1_T(\\d+)[.]\\d+','G\\1',annots$H_vulgarisAEP)
annots$UniprotHit <- gsub(';.*','',annots$UniprotHit)
unlink('~/Desktop/plots', recursive=T)
dir.create('~/Desktop/plots', showWarnings = F)
lapply(tfs,function(x) {
FeaturePlot(ds, x, order=T) + NoAxes()
ggsave(paste('~/Desktop/plots/',x,'_',annots[annots$H_vulgarisAEP == x,'UniprotHit'],'.png',sep=''), width = 6, height = 6, dpi = 300)
})
library(ggplot2)
lapply(tfs,function(x) {
FeaturePlot(ds, x, order=T) + NoAxes()
ggsave(paste('~/Desktop/plots/',x,'_',annots[annots$H_vulgarisAEP == x,'UniprotHit'],'.png',sep=''), width = 6, height = 6, dpi = 300)
})
FeaturePlot(ds,'G017621')
FeaturePlot(ds,'G017621', order=T)
FeaturePlot(ds,'G026882', order=T)
FeaturePlot(ds,'G017526', order=T)
repBed <- read.delim('/Users/Jcazet/Desktop/amil.reps.gff', header = F)
View(repBed)
repBed <- read.delim('/Users/Jcazet/Desktop/amil.reps.gff', header = F, skip = 2)
View(repBed)
repBed$fam <- gsub('Target=','',repBed$V9)
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed.fList <- split(repBed$fam)
repBed.fList <- split(repBed,repBed$fam)
View(repBed.fList)
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 3]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 20]
repBed <- read.delim('/Users/Jcazet/Desktop/amil.reps.gff', header = F, skip = 2)
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed.fList <- split(repBed,repBed$fam)[-1]
View(repBed.fList)
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 20]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
View(repBed.sd)
repBed.sd <- sapply(repBed.sd, mean)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
View(repBed.sd)
?mean
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed <- read.delim('/Users/Jcazet/Desktop/amil.reps.gff', header = F, skip = 2)
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
View(repBed.fList)
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 2]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
View(repBed.sd)
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 400]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 100])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 200]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
library(Seurat)
library(ggplot2)
repBed <- read.delim('/Users/Jcazet/Desktop/jaNemVect1.1.chroms.fa.out.gff', header = F, skip = 2)
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 200]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
repBed <- read.delim('/Users/Jcazet/Desktop/jaNemVect1.1.chroms.fa.out.gff', header = F, skip = 2)
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
View(repBed)
repBed <- read.delim('/Users/Jcazet/Desktop/jaNemVect1.1.chroms.fa.out.gff', header = F, skip = 3)
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed <- read.delim('/Users/Jcazet/Desktop/jaNemVect1.1.chroms.fa.out.gff', header = F, skip = 3)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 200]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd)
View(repBed.sd)
chr.length <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/Nvec200/jaNemVect1.1.chroms.fa.fai')
View(chr.length)
chr.length <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/Nvec200/jaNemVect1.1.chroms.fa.fai',header=F)
repBed <- read.delim('/Users/Jcazet/Desktop/jaNemVect1.1.chroms.fa.out.gff', header = F, skip = 3)
chr.length <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/Nvec200/jaNemVect1.1.chroms.fa.fai',header=F)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
library(plyr)
repBed$chrLength <- mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F)
repBed$V4 <- repBed$V4/repBed$chrLength
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
View(repBed)
repBed <- read.delim('/Users/Jcazet/Desktop/jaNemVect1.1.chroms.fa.out.gff', header = F, skip = 3)
chr.length <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/Nvec200/jaNemVect1.1.chroms.fa.fai',header=F)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
View(repBed.sd)
repBed <- read.delim('/Users/Jcazet/Desktop/jaNemVect1.1.chroms.fa.out.gff', header = F, skip = 3)
chr.length <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/Nvec200/jaNemVect1.1.chroms.fa.fai',header=F)
library(Seurat)
library(ggplot2)
library(plyr)
repBed <- read.delim('/Users/Jcazet/Desktop/amil.reps.gff', header = F, skip = 3)
chr.length <- read.delim("/Volumes/GoogleDrive/My Drive/Juliano_lab/References/amil/amil.chroms.fa.fai")
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
View(chr.length)
repBed <- read.delim('/Users/Jcazet/Desktop/amil.reps.gff', header = F, skip = 3)
chr.length <- read.delim("/Volumes/GoogleDrive/My Drive/Juliano_lab/References/amil/amil.chroms.fa.fai", header=F)
repBed$fam <- gsub('Target=| .*','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
View(chr.length)
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
View(repBed.sd)
library(Seurat)
library(ggplot2)
library(plyr)
repBed <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/amil/Amil_v2.1.chroms.fa.out.gff', header = F, skip = 3)
View(repBed)
chr.length <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/amil/amil2.chroms.fa.fai',header=F)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
View(repBed.sd)
library(Seurat)
library(ggplot2)
library(plyr)
repBed <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/amil/resc_genome.chroms.fa.out.gff', header = F, skip = 3)
repBed <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/resc/resc_genome.chroms.fa.out.gff', header = F, skip = 3)
chr.length <- read.delim('/Volumes/GoogleDrive/My Drive/Juliano_lab/References/resc/resc_genome.chroms.fa.fai',header=F)
View(repBed)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 10])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 3]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
View(repBed.sd)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 20])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 5]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
View(repBed.sd)
library(plyr)
library(rstudioapi)
setwd(dirname(getActiveDocumentContext()$path))
spec <- 'dili'
setwd(spec)
repBed.path <- list.files(pattern='*.out.gff')
repBed <- read.delim(repBed.path, header = F, skip = 3)
chr.length.path <- list.files(pattern='*.chroms.fa.fai')
chr.length <- read.delim(chr.length.path,header=F)
repBed$fam <- gsub('Target Motif:','',repBed$V9)
repBed$fam <- gsub(' .*','',repBed$fam)
repBed <- repBed[!grepl('^\\(',repBed$fam),]
repBed$chrLength <- as.numeric(mapvalues(repBed$V1, from = chr.length$V1, to = chr.length$V2, warn_missing = F))
repBed$V4 <- repBed$V4/repBed$chrLength
repBed.fList <- split(repBed,repBed$fam)[-1]
repBed.fList <- repBed.fList[sapply(repBed.fList,nrow) > 100]
repBed.fList <- lapply(repBed.fList, function(x) split(x,x$V1))
repBed.fList <- lapply(repBed.fList, function(x) x[sapply(x, nrow) > 20])
repBed.fList <- repBed.fList[sapply(repBed.fList,length) > 5]
repBed.sd <- lapply(repBed.fList, function(x) vapply(x, function(y) sd(y$V4), numeric(1)))
repBed.mn <- lapply(repBed.fList, function(x) vapply(x, function(y) mean(y$V4), numeric(1)))
repBed.sd <- sapply(repBed.sd, mean, na.rm = T)
repBed.mn <- sapply(repBed.mn, mean, na.rm = T)
repBed.sd <- data.frame(name = names(repBed.fList), sd = repBed.sd, mean = repBed.mn)
repBed.sd <- repBed.sd[order(repBed.sd$sd),]
View(repBed.sd)
for(i in 1:10){
comm <- paste0('./../pullCentCand.sh ',repBed.sd[i,1],' ', repBed.path,' ',i)
comm <- paste0('cd ', getwd(),' && ',comm)
system(comm)
}
