Differential Expression analysis
define functions
fc_threshold = 1
get_results <- function(contrast, qlf, p_value = 0.05, n_top = Inf) {
# identify significant DE genes
is.de <- decideTests(qlf, p.value = p_value)
summary_de <- summary(is.de)
top_tags <- topTags(qlf, n = n_top)
upregulated <- sum(is.de == 1)
downregulated <- sum(is.de == -1)
no_change <- sum(is.de == 0)
# apply a threshold for DE genes
thresholded_results <- top_tags$table[
top_tags$table$FDR <= 0.05 & abs(top_tags$table$logFC) >= fc_threshold, ]
return(list(
contrast = contrast,
qlf = qlf,
is_de = is.de,
summary_de = summary_de,
top_tags = top_tags,
upregulated = upregulated,
downregulated = downregulated,
no_change = no_change,
thresholded_results = thresholded_results
))
}
# Function to plot multiple group comparisons
plot_all_results <- function(results_list) {
plot_data <- do.call(rbind, lapply(results_list, function(result) {
data.frame(
Category = c("Upregulated", "Downregulated", "No Change"),
Count = c(result$upregulated, result$downregulated, result$no_change),
Comparison = result$contrast
)
}))
plot <- ggplot(plot_data, aes(x = Comparison, y = Count, fill = Category)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = Count),
position = position_dodge(width = 0.9),
vjust = -0.5) +
labs(title = "Differential Expression Across Comparisons",
x = "Comparison",
y = "Number of Genes") +
theme_minimal() +
scale_fill_manual(values = c("Upregulated" = "green", "Downregulated" = "red", "No Change" = "gray"))
print(plot)
}
plot_thresholded_results <- function(results_list) {
plot_data <- lapply(results_list, function(result) {
thresholded_results <- result$thresholded_results
# Count the number of upregulated genes (logFC >= 1.0)
upregulated_genes <- thresholded_results[thresholded_results$logFC >= fc_threshold & thresholded_results$FDR <= 0.05, ]
num_upregulated <- nrow(upregulated_genes)
# Count the number of downregulated genes (logFC <= -1.0)
downregulated_genes <- thresholded_results[thresholded_results$logFC <= -fc_threshold & thresholded_results$FDR <= 0.05, ]
num_downregulated <- nrow(downregulated_genes)
all <- num_downregulated + num_upregulated
data.frame(
Category = c("Upregulated", "Downregulated", "ALL"),
Count = c(num_upregulated, num_downregulated, all),
Comparison = result$contrast
)
})
plot_data <- do.call(rbind, plot_data)
plot <- ggplot(plot_data, aes(x = Comparison, y = Count, fill = Category)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = Count),
position = position_dodge(width = 0.9),
vjust = -0.5) + # Adds numbers on top of bars
labs(title = "Differential Expression Across Comparisons (padj <= 0.05 or |logFC| >= 0.5) ",
x = "Comparison",
y = "Number of Genes") +
theme_minimal() +
scale_fill_manual(values = c("Upregulated" = "green", "Downregulated" = "red", "ALL" = "gray"))
print(plot)
return(plot_data)
}
custom_rollmean <- function(x, k = 40) {
n <- length(x)
half_window <- k / 2
rolling_avg <- rep(NA, n) # Initialize the rolling average vector
# First half (start) of the series: fewer previous values
for (i in 1:half_window) {
rolling_avg[i] <- mean(x[1:(i + half_window)], na.rm = TRUE)
}
# Middle part: use full window size
for (i in (half_window + 1):(n - half_window)) {
rolling_avg[i] <- mean(x[(i - half_window):(i + half_window)], na.rm = TRUE)
}
# Last half (end) of the series: fewer subsequent values
for (i in (n - half_window + 1):n) {
rolling_avg[i] <- mean(x[(i - half_window):n], na.rm = TRUE)
}
return(rolling_avg)
}
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl")
# region of interest
chromosome_of_interest <- "8"
deletion_start <- 0
deletion_end <- 7247573
duplication_start <- 11828865
duplication_end <- 40361770
attributes <- c(
"ensembl_gene_id",
"external_gene_name",
"chromosome_name",
"start_position",
"end_position",
"strand",
"gene_biotype",
"description"
)
# deletion region
filters_del <- c("chromosome_name", "start", "end")
values_del <- list(chromosome_of_interest, deletion_start, deletion_end)
genes_in_deletion <- getBM(
attributes = attributes,
filters = filters_del,
values = values_del,
mart = ensembl
)
protein_coding_genes_del <- genes_in_deletion[genes_in_deletion$gene_biotype == "protein_coding", ]
number_of_genes_del <- length(unique(protein_coding_genes_del$ensembl_gene_id))
cat("Number of protein-coding genes in the deletion region:", number_of_genes_del, "\n")
## Number of protein-coding genes in the deletion region: 23
# duplication region
filters_dup <- c("chromosome_name", "start", "end")
values_dup <- list(chromosome_of_interest, duplication_start, duplication_end)
genes_in_duplication <- getBM(
attributes = attributes,
filters = filters_dup,
values = values_dup,
mart = ensembl
)
protein_coding_genes_dup <- genes_in_duplication[genes_in_duplication$gene_biotype == "protein_coding", ]
number_of_genes_dup <- length(unique(protein_coding_genes_dup$ensembl_gene_id))
cat("Number of protein-coding genes in the duplication region:", number_of_genes_dup, "\n")
## Number of protein-coding genes in the duplication region: 172
protein_coding_genes_del$region <- "Deletion"
protein_coding_genes_dup$region <- "Duplication"
combined_genes <- rbind(protein_coding_genes_del, protein_coding_genes_dup)
combined_genes
| 6 |
ENSG00000176269 |
OR4F21 |
8 |
166086 |
167024 |
-1 |
protein_coding |
olfactory receptor family 4 subfamily F member 21
[Source:HGNC Symbol;Acc:HGNC:19583] |
Deletion |
| 10 |
ENSG00000172748 |
ZNF596 |
8 |
232137 |
264703 |
1 |
protein_coding |
zinc finger protein 596 [Source:HGNC
Symbol;Acc:HGNC:27268] |
Deletion |
| 21 |
ENSG00000147364 |
FBXO25 |
8 |
406428 |
477967 |
1 |
protein_coding |
F-box protein 25 [Source:HGNC
Symbol;Acc:HGNC:13596] |
Deletion |
| 24 |
ENSG00000180190 |
TDRP |
8 |
489803 |
545781 |
-1 |
protein_coding |
testis development related protein [Source:HGNC
Symbol;Acc:HGNC:26951] |
Deletion |
| 26 |
ENSG00000104714 |
ERICH1 |
8 |
614746 |
738106 |
-1 |
protein_coding |
glutamate rich 1 [Source:HGNC
Symbol;Acc:HGNC:27234] |
Deletion |
| 31 |
ENSG00000198010 |
DLGAP2 |
8 |
737628 |
1708476 |
1 |
protein_coding |
DLG associated protein 2 [Source:HGNC
Symbol;Acc:HGNC:2906] |
Deletion |
| 48 |
ENSG00000182372 |
CLN8 |
8 |
1755778 |
1801711 |
1 |
protein_coding |
CLN8 transmembrane ER and ERGIC protein [Source:HGNC
Symbol;Acc:HGNC:2079] |
Deletion |
| 50 |
ENSG00000283239 |
KBTBD11-OT1 |
8 |
1763888 |
1958627 |
1 |
protein_coding |
KBTBD11 overlapping transcript 1 [Source:NCBI gene
(formerly Entrezgene);Acc:104266957] |
Deletion |
| 54 |
ENSG00000104728 |
ARHGEF10 |
8 |
1823926 |
1958641 |
1 |
protein_coding |
Rho guanine nucleotide exchange factor 10 [Source:HGNC
Symbol;Acc:HGNC:14103] |
Deletion |
| 60 |
ENSG00000176595 |
KBTBD11 |
8 |
1973677 |
2006936 |
1 |
protein_coding |
kelch repeat and BTB domain containing 11 [Source:HGNC
Symbol;Acc:HGNC:29104] |
Deletion |
| 64 |
ENSG00000036448 |
MYOM2 |
8 |
2045046 |
2165552 |
1 |
protein_coding |
myomesin 2 [Source:HGNC Symbol;Acc:HGNC:7614] |
Deletion |
| 85 |
ENSG00000183117 |
CSMD1 |
8 |
2935353 |
4994972 |
-1 |
protein_coding |
CUB and Sushi multiple domains 1 [Source:HGNC
Symbol;Acc:HGNC:14026] |
Deletion |
| 121 |
ENSG00000147316 |
MCPH1 |
8 |
6406592 |
6648508 |
1 |
protein_coding |
microcephalin 1 [Source:HGNC Symbol;Acc:HGNC:6954] |
Deletion |
| 123 |
ENSG00000091879 |
ANGPT2 |
8 |
6499632 |
6563409 |
-1 |
protein_coding |
angiopoietin 2 [Source:HGNC Symbol;Acc:HGNC:485] |
Deletion |
| 130 |
ENSG00000155189 |
AGPAT5 |
8 |
6708642 |
6761503 |
1 |
protein_coding |
1-acylglycerol-3-phosphate O-acyltransferase 5
[Source:HGNC Symbol;Acc:HGNC:20886] |
Deletion |
| 137 |
ENSG00000275591 |
XKR5 |
8 |
6808517 |
6835524 |
-1 |
protein_coding |
XK related 5 [Source:HGNC Symbol;Acc:HGNC:20782] |
Deletion |
| 141 |
ENSG00000164825 |
DEFB1 |
8 |
6870592 |
6877936 |
-1 |
protein_coding |
defensin beta 1 [Source:HGNC Symbol;Acc:HGNC:2766] |
Deletion |
| 144 |
ENSG00000164822 |
DEFA6 |
8 |
6924697 |
6926076 |
-1 |
protein_coding |
defensin alpha 6 [Source:HGNC
Symbol;Acc:HGNC:2765] |
Deletion |
| 147 |
ENSG00000164821 |
DEFA4 |
8 |
6935820 |
6938306 |
-1 |
protein_coding |
defensin alpha 4 [Source:HGNC
Symbol;Acc:HGNC:2763] |
Deletion |
| 152 |
ENSG00000206047 |
DEFA1 |
8 |
6977649 |
6980092 |
-1 |
protein_coding |
defensin alpha 1 [Source:HGNC
Symbol;Acc:HGNC:2761] |
Deletion |
| 155 |
ENSG00000240247 |
DEFA1B |
8 |
6996766 |
6999198 |
-1 |
protein_coding |
defensin alpha 1B [Source:HGNC
Symbol;Acc:HGNC:33596] |
Deletion |
| 158 |
ENSG00000239839 |
DEFA3 |
8 |
7015869 |
7018297 |
-1 |
protein_coding |
defensin alpha 3 [Source:HGNC
Symbol;Acc:HGNC:2762] |
Deletion |
| 161 |
ENSG00000164816 |
DEFA5 |
8 |
7055304 |
7056739 |
-1 |
protein_coding |
defensin alpha 5 [Source:HGNC
Symbol;Acc:HGNC:2764] |
Deletion |
| 1 |
ENSG00000079459 |
FDFT1 |
8 |
11795573 |
11839395 |
1 |
protein_coding |
farnesyl-diphosphate farnesyltransferase 1 [Source:HGNC
Symbol;Acc:HGNC:3629] |
Duplication |
| 2 |
ENSG00000164733 |
CTSB |
8 |
11842524 |
11869533 |
-1 |
protein_coding |
cathepsin B [Source:HGNC Symbol;Acc:HGNC:2527] |
Duplication |
| 101 |
ENSG00000205884 |
DEFB136 |
8 |
11973937 |
11974599 |
-1 |
protein_coding |
defensin beta 136 [Source:HGNC
Symbol;Acc:HGNC:34433] |
Duplication |
| 11 |
ENSG00000205883 |
DEFB135 |
8 |
11982256 |
11984590 |
1 |
protein_coding |
defensin beta 135 [Source:HGNC
Symbol;Acc:HGNC:32400] |
Duplication |
| 12 |
ENSG00000205882 |
DEFB134 |
8 |
11993174 |
12000752 |
-1 |
protein_coding |
defensin beta 134 [Source:HGNC
Symbol;Acc:HGNC:32399] |
Duplication |
| 20 |
ENSG00000233050 |
DEFB130B |
8 |
12064389 |
12071747 |
-1 |
protein_coding |
defensin beta 130B [Source:HGNC
Symbol;Acc:HGNC:39814] |
Duplication |
| 22 |
ENSG00000215343 |
ZNF705D |
8 |
12089338 |
12115516 |
1 |
protein_coding |
zinc finger protein 705D [Source:HGNC
Symbol;Acc:HGNC:33202] |
Duplication |
| 262 |
ENSG00000226430 |
USP17L7 |
8 |
12132417 |
12134099 |
-1 |
protein_coding |
ubiquitin specific peptidase 17 like family member 7
[Source:HGNC Symbol;Acc:HGNC:37180] |
Duplication |
| 27 |
ENSG00000223443 |
USP17L2 |
8 |
12136435 |
12138849 |
-1 |
protein_coding |
ubiquitin specific peptidase 17 like family member 2
[Source:HGNC Symbol;Acc:HGNC:34434] |
Duplication |
| 28 |
ENSG00000254866 |
DEFB109D |
8 |
12150888 |
12158033 |
-1 |
protein_coding |
defensin beta 109D (pseudogene) [Source:HGNC
Symbol;Acc:HGNC:30838] |
Duplication |
| 311 |
ENSG00000186523 |
FAM86B1 |
8 |
12182096 |
12194133 |
-1 |
protein_coding |
family with sequence similarity 86 member B1
[Source:HGNC Symbol;Acc:HGNC:28268] |
Duplication |
| 35 |
ENSG00000232948 |
DEFB130A |
8 |
12310962 |
12318316 |
-1 |
protein_coding |
defensin beta 130A [Source:HGNC
Symbol;Acc:HGNC:18107] |
Duplication |
| 47 |
ENSG00000145002 |
FAM86B2 |
8 |
12424411 |
12436406 |
-1 |
protein_coding |
family with sequence similarity 86 member B2
[Source:HGNC Symbol;Acc:HGNC:32222] |
Duplication |
| 65 |
ENSG00000154359 |
LONRF1 |
8 |
12721906 |
12756073 |
-1 |
protein_coding |
LON peptidase N-terminal domain and ring finger 1
[Source:HGNC Symbol;Acc:HGNC:26302] |
Duplication |
| 73 |
ENSG00000250305 |
TRMT9B |
8 |
12945642 |
13031503 |
1 |
protein_coding |
tRNA methyltransferase 9B (putative) [Source:HGNC
Symbol;Acc:HGNC:26725] |
Duplication |
| 78 |
ENSG00000164741 |
DLC1 |
8 |
13083361 |
13604610 |
-1 |
protein_coding |
DLC1 Rho GTPase activating protein [Source:HGNC
Symbol;Acc:HGNC:2897] |
Duplication |
| 84 |
ENSG00000164743 |
C8orf48 |
8 |
13566869 |
13568288 |
1 |
protein_coding |
chromosome 8 open reading frame 48 [Source:HGNC
Symbol;Acc:HGNC:26345] |
Duplication |
| 95 |
ENSG00000185053 |
SGCZ |
8 |
14084845 |
15238431 |
-1 |
protein_coding |
sarcoglycan zeta [Source:HGNC
Symbol;Acc:HGNC:14075] |
Duplication |
| 110 |
ENSG00000104723 |
TUSC3 |
8 |
15417215 |
15766649 |
1 |
protein_coding |
tumor suppressor candidate 3 [Source:HGNC
Symbol;Acc:HGNC:30242] |
Duplication |
| 117 |
ENSG00000038945 |
MSR1 |
8 |
16107878 |
16567490 |
-1 |
protein_coding |
macrophage scavenger receptor 1 [Source:HGNC
Symbol;Acc:HGNC:7376] |
Duplication |
| 125 |
ENSG00000078579 |
FGF20 |
8 |
16992181 |
17002345 |
-1 |
protein_coding |
fibroblast growth factor 20 [Source:HGNC
Symbol;Acc:HGNC:3677] |
Duplication |
| 127 |
ENSG00000155970 |
MICU3 |
8 |
17027238 |
17125880 |
1 |
protein_coding |
mitochondrial calcium uptake family member 3
[Source:HGNC Symbol;Acc:HGNC:27820] |
Duplication |
| 131 |
ENSG00000104219 |
ZDHHC2 |
8 |
17156482 |
17224799 |
1 |
protein_coding |
zinc finger DHHC-type palmitoyltransferase 2
[Source:HGNC Symbol;Acc:HGNC:18469] |
Duplication |
| 132 |
ENSG00000198791 |
CNOT7 |
8 |
17224966 |
17246878 |
-1 |
protein_coding |
CCR4-NOT transcription complex subunit 7 [Source:HGNC
Symbol;Acc:HGNC:14101] |
Duplication |
| 134 |
ENSG00000155975 |
VPS37A |
8 |
17246931 |
17302427 |
1 |
protein_coding |
VPS37A subunit of ESCRT-I [Source:HGNC
Symbol;Acc:HGNC:24928] |
Duplication |
| 136 |
ENSG00000003987 |
MTMR7 |
8 |
17296794 |
17413528 |
-1 |
protein_coding |
myotubularin related protein 7 [Source:HGNC
Symbol;Acc:HGNC:7454] |
Duplication |
| 1411 |
ENSG00000003989 |
SLC7A2 |
8 |
17497088 |
17570573 |
1 |
protein_coding |
solute carrier family 7 member 2 [Source:HGNC
Symbol;Acc:HGNC:11060] |
Duplication |
| 146 |
ENSG00000104213 |
PDGFRL |
8 |
17576433 |
17644071 |
1 |
protein_coding |
platelet derived growth factor receptor like
[Source:HGNC Symbol;Acc:HGNC:8805] |
Duplication |
| 149 |
ENSG00000129422 |
MTUS1 |
8 |
17643795 |
17801094 |
-1 |
protein_coding |
microtubule associated scaffold protein 1 [Source:HGNC
Symbol;Acc:HGNC:29789] |
Duplication |
| 156 |
ENSG00000104760 |
FGL1 |
8 |
17864380 |
17910365 |
-1 |
protein_coding |
fibrinogen like 1 [Source:HGNC
Symbol;Acc:HGNC:3695] |
Duplication |
| 159 |
ENSG00000078674 |
PCM1 |
8 |
17922842 |
18029948 |
1 |
protein_coding |
pericentriolar material 1 [Source:HGNC
Symbol;Acc:HGNC:8727] |
Duplication |
| 1611 |
ENSG00000104763 |
ASAH1 |
8 |
18055992 |
18084998 |
-1 |
protein_coding |
N-acylsphingosine amidohydrolase 1 [Source:HGNC
Symbol;Acc:HGNC:735] |
Duplication |
| 164 |
ENSG00000171428 |
NAT1 |
8 |
18170477 |
18223689 |
1 |
protein_coding |
N-acetyltransferase 1 [Source:HGNC
Symbol;Acc:HGNC:7645] |
Duplication |
| 171 |
ENSG00000156006 |
NAT2 |
8 |
18391282 |
18401218 |
1 |
protein_coding |
N-acetyltransferase 2 [Source:HGNC
Symbol;Acc:HGNC:7646] |
Duplication |
| 172 |
ENSG00000156011 |
PSD3 |
8 |
18527303 |
19084730 |
-1 |
protein_coding |
pleckstrin and Sec7 domain containing 3 [Source:HGNC
Symbol;Acc:HGNC:19093] |
Duplication |
| 190 |
ENSG00000104611 |
SH2D4A |
8 |
19313693 |
19396218 |
1 |
protein_coding |
SH2 domain containing 4A [Source:HGNC
Symbol;Acc:HGNC:26102] |
Duplication |
| 192 |
ENSG00000147408 |
CSGALNACT1 |
8 |
19404161 |
19758029 |
-1 |
protein_coding |
chondroitin sulfate N-acetylgalactosaminyltransferase 1
[Source:HGNC Symbol;Acc:HGNC:24290] |
Duplication |
| 198 |
ENSG00000104613 |
INTS10 |
8 |
19817391 |
19852083 |
1 |
protein_coding |
integrator complex subunit 10 [Source:HGNC
Symbol;Acc:HGNC:25548] |
Duplication |
| 199 |
ENSG00000175445 |
LPL |
8 |
19901717 |
19967259 |
1 |
protein_coding |
lipoprotein lipase [Source:HGNC
Symbol;Acc:HGNC:6677] |
Duplication |
| 204 |
ENSG00000036565 |
SLC18A1 |
8 |
20144855 |
20183206 |
-1 |
protein_coding |
solute carrier family 18 member A1 [Source:HGNC
Symbol;Acc:HGNC:10934] |
Duplication |
| 206 |
ENSG00000147416 |
ATP6V1B2 |
8 |
20197381 |
20230399 |
1 |
protein_coding |
ATPase H+ transporting V1 subunit B2 [Source:HGNC
Symbol;Acc:HGNC:854] |
Duplication |
| 209 |
ENSG00000061337 |
LZTS1 |
8 |
20246165 |
20303963 |
-1 |
protein_coding |
leucine zipper tumor suppressor 1 [Source:HGNC
Symbol;Acc:HGNC:13861] |
Duplication |
| 235 |
ENSG00000168546 |
GFRA2 |
8 |
21690398 |
21812357 |
-1 |
protein_coding |
GDNF family receptor alpha 2 [Source:HGNC
Symbol;Acc:HGNC:4244] |
Duplication |
| 237 |
ENSG00000147443 |
DOK2 |
8 |
21908873 |
21913690 |
-1 |
protein_coding |
docking protein 2 [Source:HGNC
Symbol;Acc:HGNC:2991] |
Duplication |
| 238 |
ENSG00000130227 |
XPO7 |
8 |
21919662 |
22006585 |
1 |
protein_coding |
exportin 7 [Source:HGNC Symbol;Acc:HGNC:14108] |
Duplication |
| 241 |
ENSG00000158806 |
NPM2 |
8 |
22024125 |
22036897 |
1 |
protein_coding |
nucleophosmin/nucleoplasmin 2 [Source:HGNC
Symbol;Acc:HGNC:7930] |
Duplication |
| 242 |
ENSG00000158815 |
FGF17 |
8 |
22042398 |
22048809 |
1 |
protein_coding |
fibroblast growth factor 17 [Source:HGNC
Symbol;Acc:HGNC:3673] |
Duplication |
| 243 |
ENSG00000158856 |
DMTN |
8 |
22048995 |
22082527 |
1 |
protein_coding |
dematin actin binding protein [Source:HGNC
Symbol;Acc:HGNC:3382] |
Duplication |
| 245 |
ENSG00000158863 |
FHIP2B |
8 |
22089150 |
22104911 |
1 |
protein_coding |
FHF complex subunit HOOK interacting protein 2B
[Source:HGNC Symbol;Acc:HGNC:16492] |
Duplication |
| 246 |
ENSG00000275074 |
NUDT18 |
8 |
22105748 |
22109419 |
-1 |
protein_coding |
nudix hydrolase 18 [Source:HGNC
Symbol;Acc:HGNC:26194] |
Duplication |
| 247 |
ENSG00000168453 |
HR |
8 |
22114419 |
22133384 |
-1 |
protein_coding |
HR lysine demethylase and nuclear receptor corepressor
[Source:HGNC Symbol;Acc:HGNC:5172] |
Duplication |
| 248 |
ENSG00000288677 |
HRURF |
8 |
22130458 |
22131010 |
-1 |
protein_coding |
HR upstream open reading frame [Source:HGNC
Symbol;Acc:HGNC:55085] |
Duplication |
| 249 |
ENSG00000168476 |
REEP4 |
8 |
22138020 |
22141951 |
-1 |
protein_coding |
receptor accessory protein 4 [Source:HGNC
Symbol;Acc:HGNC:26176] |
Duplication |
| 251 |
ENSG00000168481 |
LGI3 |
8 |
22146830 |
22157084 |
-1 |
protein_coding |
leucine rich repeat LGI family member 3 [Source:HGNC
Symbol;Acc:HGNC:18711] |
Duplication |
| 252 |
ENSG00000168484 |
SFTPC |
8 |
22156913 |
22164479 |
1 |
protein_coding |
surfactant protein C [Source:HGNC
Symbol;Acc:HGNC:10802] |
Duplication |
| 253 |
ENSG00000168487 |
BMP1 |
8 |
22165140 |
22212326 |
1 |
protein_coding |
bone morphogenetic protein 1 [Source:HGNC
Symbol;Acc:HGNC:1067] |
Duplication |
| 256 |
ENSG00000168490 |
PHYHIP |
8 |
22219703 |
22232101 |
-1 |
protein_coding |
phytanoyl-CoA 2-hydroxylase interacting protein
[Source:HGNC Symbol;Acc:HGNC:16865] |
Duplication |
| 259 |
ENSG00000168495 |
POLR3D |
8 |
22245133 |
22254601 |
1 |
protein_coding |
RNA polymerase III subunit D [Source:HGNC
Symbol;Acc:HGNC:1080] |
Duplication |
| 261 |
ENSG00000197181 |
PIWIL2 |
8 |
22275316 |
22357568 |
1 |
protein_coding |
piwi like RNA-mediated gene silencing 2 [Source:HGNC
Symbol;Acc:HGNC:17644] |
Duplication |
| 263 |
ENSG00000104635 |
SLC39A14 |
8 |
22367278 |
22434129 |
1 |
protein_coding |
solute carrier family 39 member 14 [Source:HGNC
Symbol;Acc:HGNC:20858] |
Duplication |
| 266 |
ENSG00000120910 |
PPP3CC |
8 |
22440819 |
22541142 |
1 |
protein_coding |
protein phosphatase 3 catalytic subunit gamma
[Source:HGNC Symbol;Acc:HGNC:9316] |
Duplication |
| 269 |
ENSG00000120896 |
SORBS3 |
8 |
22544986 |
22575788 |
1 |
protein_coding |
sorbin and SH3 domain containing 3 [Source:HGNC
Symbol;Acc:HGNC:30907] |
Duplication |
| 273 |
ENSG00000120913 |
PDLIM2 |
8 |
22578279 |
22598025 |
1 |
protein_coding |
PDZ and LIM domain 2 [Source:HGNC
Symbol;Acc:HGNC:13992] |
Duplication |
| 274 |
ENSG00000248235 |
|
8 |
22589274 |
22602084 |
1 |
protein_coding |
novel protein |
Duplication |
| 275 |
ENSG00000241852 |
C8orf58 |
8 |
22599599 |
22604150 |
1 |
protein_coding |
chromosome 8 open reading frame 58 [Source:HGNC
Symbol;Acc:HGNC:32233] |
Duplication |
| 277 |
ENSG00000158941 |
CCAR2 |
8 |
22604757 |
22620964 |
1 |
protein_coding |
cell cycle and apoptosis regulator 2 [Source:HGNC
Symbol;Acc:HGNC:23360] |
Duplication |
| 279 |
ENSG00000147439 |
BIN3 |
8 |
22620418 |
22669148 |
-1 |
protein_coding |
bridging integrator 3 [Source:HGNC
Symbol;Acc:HGNC:1054] |
Duplication |
| 283 |
ENSG00000179388 |
EGR3 |
8 |
22687659 |
22693480 |
-1 |
protein_coding |
early growth response 3 [Source:HGNC
Symbol;Acc:HGNC:3240] |
Duplication |
| 288 |
ENSG00000134020 |
PEBP4 |
8 |
22713251 |
23000000 |
-1 |
protein_coding |
phosphatidylethanolamine binding protein 4 [Source:HGNC
Symbol;Acc:HGNC:28319] |
Duplication |
| 297 |
ENSG00000008853 |
RHOBTB2 |
8 |
22987417 |
23020509 |
1 |
protein_coding |
Rho related BTB domain containing 2 [Source:HGNC
Symbol;Acc:HGNC:18756] |
Duplication |
| 298 |
ENSG00000120889 |
TNFRSF10B |
8 |
23020133 |
23069031 |
-1 |
protein_coding |
TNF receptor superfamily member 10b [Source:HGNC
Symbol;Acc:HGNC:11905] |
Duplication |
| 302 |
ENSG00000284956 |
|
8 |
23084403 |
23115536 |
1 |
protein_coding |
novel protein |
Duplication |
| 303 |
ENSG00000173535 |
TNFRSF10C |
8 |
23102921 |
23117445 |
1 |
protein_coding |
TNF receptor superfamily member 10c [Source:HGNC
Symbol;Acc:HGNC:11906] |
Duplication |
| 304 |
ENSG00000173530 |
TNFRSF10D |
8 |
23135588 |
23164027 |
-1 |
protein_coding |
TNF receptor superfamily member 10d [Source:HGNC
Symbol;Acc:HGNC:11907] |
Duplication |
| 307 |
ENSG00000104689 |
TNFRSF10A |
8 |
23190452 |
23225102 |
-1 |
protein_coding |
TNF receptor superfamily member 10a [Source:HGNC
Symbol;Acc:HGNC:11904] |
Duplication |
| 312 |
ENSG00000147457 |
CHMP7 |
8 |
23243637 |
23262000 |
1 |
protein_coding |
charged multivesicular body protein 7 [Source:HGNC
Symbol;Acc:HGNC:28439] |
Duplication |
| 314 |
ENSG00000104679 |
R3HCC1 |
8 |
23270120 |
23296279 |
1 |
protein_coding |
R3H domain and coiled-coil containing 1 [Source:HGNC
Symbol;Acc:HGNC:27329] |
Duplication |
| 316 |
ENSG00000134013 |
LOXL2 |
8 |
23296897 |
23425328 |
-1 |
protein_coding |
lysyl oxidase like 2 [Source:HGNC
Symbol;Acc:HGNC:6666] |
Duplication |
| 318 |
ENSG00000197217 |
ENTPD4 |
8 |
23385783 |
23457695 |
-1 |
protein_coding |
ectonucleoside triphosphate diphosphohydrolase 4
[Source:HGNC Symbol;Acc:HGNC:14573] |
Duplication |
| 326 |
ENSG00000147454 |
SLC25A37 |
8 |
23528956 |
23575463 |
1 |
protein_coding |
solute carrier family 25 member 37 [Source:HGNC
Symbol;Acc:HGNC:29786] |
Duplication |
| 330 |
ENSG00000167034 |
NKX3-1 |
8 |
23678697 |
23682938 |
-1 |
protein_coding |
NK3 homeobox 1 [Source:HGNC Symbol;Acc:HGNC:7838] |
Duplication |
| 331 |
ENSG00000180053 |
NKX2-6 |
8 |
23701740 |
23706756 |
-1 |
protein_coding |
NK2 homeobox 6 [Source:HGNC Symbol;Acc:HGNC:32940] |
Duplication |
| 338 |
ENSG00000159167 |
STC1 |
8 |
23841929 |
23854806 |
-1 |
protein_coding |
stanniocalcin 1 [Source:HGNC
Symbol;Acc:HGNC:11373] |
Duplication |
| 343 |
ENSG00000042980 |
ADAM28 |
8 |
24294069 |
24359014 |
1 |
protein_coding |
ADAM metallopeptidase domain 28 [Source:HGNC
Symbol;Acc:HGNC:206] |
Duplication |
| 344 |
ENSG00000134028 |
ADAMDEC1 |
8 |
24384285 |
24406013 |
1 |
protein_coding |
ADAM like decysin 1 [Source:HGNC
Symbol;Acc:HGNC:16299] |
Duplication |
| 345 |
ENSG00000069206 |
ADAM7 |
8 |
24440930 |
24526970 |
1 |
protein_coding |
ADAM metallopeptidase domain 7 [Source:HGNC
Symbol;Acc:HGNC:214] |
Duplication |
| 349 |
ENSG00000104722 |
NEFM |
8 |
24913758 |
24919098 |
1 |
protein_coding |
neurofilament medium chain [Source:HGNC
Symbol;Acc:HGNC:7734] |
Duplication |
| 351 |
ENSG00000277586 |
NEFL |
8 |
24950955 |
24956721 |
-1 |
protein_coding |
neurofilament light chain [Source:HGNC
Symbol;Acc:HGNC:7739] |
Duplication |
| 362 |
ENSG00000147459 |
DOCK5 |
8 |
25184689 |
25418082 |
1 |
protein_coding |
dedicator of cytokinesis 5 [Source:HGNC
Symbol;Acc:HGNC:23476] |
Duplication |
| 366 |
ENSG00000147437 |
GNRH1 |
8 |
25419258 |
25424654 |
-1 |
protein_coding |
gonadotropin releasing hormone 1 [Source:HGNC
Symbol;Acc:HGNC:4419] |
Duplication |
| 368 |
ENSG00000104756 |
KCTD9 |
8 |
25427847 |
25458476 |
-1 |
protein_coding |
potassium channel tetramerization domain containing 9
[Source:HGNC Symbol;Acc:HGNC:22401] |
Duplication |
| 369 |
ENSG00000184661 |
CDCA2 |
8 |
25459199 |
25507911 |
1 |
protein_coding |
cell division cycle associated 2 [Source:HGNC
Symbol;Acc:HGNC:14623] |
Duplication |
| 377 |
ENSG00000221818 |
EBF2 |
8 |
25841725 |
26045413 |
-1 |
protein_coding |
EBF transcription factor 2 [Source:HGNC
Symbol;Acc:HGNC:19090] |
Duplication |
| 386 |
ENSG00000221914 |
PPP2R2A |
8 |
26291508 |
26372680 |
1 |
protein_coding |
protein phosphatase 2 regulatory subunit Balpha
[Source:HGNC Symbol;Acc:HGNC:9304] |
Duplication |
| 389 |
ENSG00000104765 |
BNIP3L |
8 |
26383054 |
26505636 |
1 |
protein_coding |
BCL2 interacting protein 3 like [Source:HGNC
Symbol;Acc:HGNC:1085] |
Duplication |
| 394 |
ENSG00000240694 |
PNMA2 |
8 |
26504701 |
26514092 |
-1 |
protein_coding |
PNMA family member 2 [Source:HGNC
Symbol;Acc:HGNC:9159] |
Duplication |
| 395 |
ENSG00000092964 |
DPYSL2 |
8 |
26514031 |
26658178 |
1 |
protein_coding |
dihydropyrimidinase like 2 [Source:HGNC
Symbol;Acc:HGNC:3014] |
Duplication |
| 399 |
ENSG00000120907 |
ADRA1A |
8 |
26748150 |
26867278 |
-1 |
protein_coding |
adrenoceptor alpha 1A [Source:HGNC
Symbol;Acc:HGNC:277] |
Duplication |
| 411 |
ENSG00000015592 |
STMN4 |
8 |
27235308 |
27258420 |
-1 |
protein_coding |
stathmin 4 [Source:HGNC Symbol;Acc:HGNC:16078] |
Duplication |
| 412 |
ENSG00000104228 |
TRIM35 |
8 |
27284886 |
27311272 |
-1 |
protein_coding |
tripartite motif containing 35 [Source:HGNC
Symbol;Acc:HGNC:16285] |
Duplication |
| 413 |
ENSG00000120899 |
PTK2B |
8 |
27311482 |
27459391 |
1 |
protein_coding |
protein tyrosine kinase 2 beta [Source:HGNC
Symbol;Acc:HGNC:9612] |
Duplication |
| 415 |
ENSG00000120903 |
CHRNA2 |
8 |
27459756 |
27479883 |
-1 |
protein_coding |
cholinergic receptor nicotinic alpha 2 subunit
[Source:HGNC Symbol;Acc:HGNC:1956] |
Duplication |
| 416 |
ENSG00000120915 |
EPHX2 |
8 |
27490781 |
27548615 |
1 |
protein_coding |
epoxide hydrolase 2 [Source:HGNC
Symbol;Acc:HGNC:3402] |
Duplication |
| 420 |
ENSG00000120885 |
CLU |
8 |
27596917 |
27614700 |
-1 |
protein_coding |
clusterin [Source:HGNC Symbol;Acc:HGNC:2095] |
Duplication |
| 422 |
ENSG00000168077 |
SCARA3 |
8 |
27633868 |
27676776 |
1 |
protein_coding |
scavenger receptor class A member 3 [Source:HGNC
Symbol;Acc:HGNC:19000] |
Duplication |
| 430 |
ENSG00000147419 |
CCDC25 |
8 |
27733316 |
27772653 |
-1 |
protein_coding |
coiled-coil domain containing 25 [Source:HGNC
Symbol;Acc:HGNC:25591] |
Duplication |
| 431 |
ENSG00000171320 |
ESCO2 |
8 |
27771949 |
27812640 |
1 |
protein_coding |
establishment of sister chromatid cohesion
N-acetyltransferase 2 [Source:HGNC Symbol;Acc:HGNC:27230] |
Duplication |
| 433 |
ENSG00000168078 |
PBK |
8 |
27809624 |
27838082 |
-1 |
protein_coding |
PDZ binding kinase [Source:HGNC
Symbol;Acc:HGNC:18282] |
Duplication |
| 436 |
ENSG00000168079 |
SCARA5 |
8 |
27869883 |
27992673 |
-1 |
protein_coding |
scavenger receptor class A member 5 [Source:HGNC
Symbol;Acc:HGNC:28701] |
Duplication |
| 441 |
ENSG00000189233 |
NUGGC |
8 |
28021964 |
28083936 |
-1 |
protein_coding |
nuclear GTPase, germinal center associated [Source:HGNC
Symbol;Acc:HGNC:33550] |
Duplication |
| 442 |
ENSG00000134014 |
ELP3 |
8 |
28089673 |
28191156 |
1 |
protein_coding |
elongator acetyltransferase complex subunit 3
[Source:HGNC Symbol;Acc:HGNC:20696] |
Duplication |
| 449 |
ENSG00000168081 |
PNOC |
8 |
28316986 |
28343355 |
1 |
protein_coding |
prepronociceptin [Source:HGNC
Symbol;Acc:HGNC:9163] |
Duplication |
| 450 |
ENSG00000186918 |
ZNF395 |
8 |
28345590 |
28402701 |
-1 |
protein_coding |
zinc finger protein 395 [Source:HGNC
Symbol;Acc:HGNC:18737] |
Duplication |
| 451 |
ENSG00000214050 |
FBXO16 |
8 |
28348287 |
28490278 |
-1 |
protein_coding |
F-box protein 16 [Source:HGNC
Symbol;Acc:HGNC:13618] |
Duplication |
| 457 |
ENSG00000104290 |
FZD3 |
8 |
28494205 |
28574267 |
1 |
protein_coding |
frizzled class receptor 3 [Source:HGNC
Symbol;Acc:HGNC:4041] |
Duplication |
| 461 |
ENSG00000012232 |
EXTL3 |
8 |
28600469 |
28756561 |
1 |
protein_coding |
exostosin like glycosyltransferase 3 [Source:HGNC
Symbol;Acc:HGNC:3518] |
Duplication |
| 465 |
ENSG00000104299 |
INTS9 |
8 |
28767661 |
28890242 |
-1 |
protein_coding |
integrator complex subunit 9 [Source:HGNC
Symbol;Acc:HGNC:25592] |
Duplication |
| 468 |
ENSG00000147421 |
HMBOX1 |
8 |
28890395 |
29064764 |
1 |
protein_coding |
homeobox containing 1 [Source:HGNC
Symbol;Acc:HGNC:26137] |
Duplication |
| 475 |
ENSG00000197892 |
KIF13B |
8 |
29067278 |
29263124 |
-1 |
protein_coding |
kinesin family member 13B [Source:HGNC
Symbol;Acc:HGNC:14405] |
Duplication |
| 481 |
ENSG00000120875 |
DUSP4 |
8 |
29333064 |
29350684 |
-1 |
protein_coding |
dual specificity phosphatase 4 [Source:HGNC
Symbol;Acc:HGNC:3070] |
Duplication |
| 512 |
ENSG00000133872 |
SARAF |
8 |
30063003 |
30083208 |
-1 |
protein_coding |
store-operated calcium entry associated regulatory
factor [Source:HGNC Symbol;Acc:HGNC:28789] |
Duplication |
| 515 |
ENSG00000104660 |
LEPROTL1 |
8 |
30095408 |
30177208 |
1 |
protein_coding |
leptin receptor overlapping transcript like 1
[Source:HGNC Symbol;Acc:HGNC:6555] |
Duplication |
| 517 |
ENSG00000177669 |
MBOAT4 |
8 |
30131671 |
30144665 |
-1 |
protein_coding |
membrane bound O-acyltransferase domain containing 4
[Source:HGNC Symbol;Acc:HGNC:32311] |
Duplication |
| 519 |
ENSG00000104671 |
DCTN6 |
8 |
30156319 |
30183639 |
1 |
protein_coding |
dynactin subunit 6 [Source:HGNC
Symbol;Acc:HGNC:16964] |
Duplication |
| 535 |
ENSG00000157110 |
RBPMS |
8 |
30384511 |
30572256 |
1 |
protein_coding |
RNA binding protein, mRNA processing factor
[Source:HGNC Symbol;Acc:HGNC:19097] |
Duplication |
| 539 |
ENSG00000197265 |
GTF2E2 |
8 |
30578318 |
30658236 |
-1 |
protein_coding |
general transcription factor IIE subunit 2 [Source:HGNC
Symbol;Acc:HGNC:4651] |
Duplication |
| 541 |
ENSG00000253457 |
SMIM18 |
8 |
30638580 |
30646064 |
1 |
protein_coding |
small integral membrane protein 18 [Source:HGNC
Symbol;Acc:HGNC:42973] |
Duplication |
| 543 |
ENSG00000104687 |
GSR |
8 |
30678066 |
30727846 |
-1 |
protein_coding |
glutathione-disulfide reductase [Source:HGNC
Symbol;Acc:HGNC:4623] |
Duplication |
| 544 |
ENSG00000104691 |
UBXN8 |
8 |
30729131 |
30767006 |
1 |
protein_coding |
UBX domain protein 8 [Source:HGNC
Symbol;Acc:HGNC:30307] |
Duplication |
| 546 |
ENSG00000104695 |
PPP2CB |
8 |
30774457 |
30814314 |
-1 |
protein_coding |
protein phosphatase 2 catalytic subunit beta
[Source:HGNC Symbol;Acc:HGNC:9300] |
Duplication |
| 547 |
ENSG00000133863 |
TEX15 |
8 |
30831544 |
30913008 |
-1 |
protein_coding |
testis expressed 15, meiosis and synapsis associated
[Source:HGNC Symbol;Acc:HGNC:11738] |
Duplication |
| 551 |
ENSG00000172733 |
PURG |
8 |
30995802 |
31033715 |
-1 |
protein_coding |
purine rich element binding protein G [Source:HGNC
Symbol;Acc:HGNC:17930] |
Duplication |
| 552 |
ENSG00000165392 |
WRN |
8 |
31033788 |
31176138 |
1 |
protein_coding |
WRN RecQ like helicase [Source:HGNC
Symbol;Acc:HGNC:12791] |
Duplication |
| 563 |
ENSG00000157168 |
NRG1 |
8 |
31639222 |
32855666 |
1 |
protein_coding |
neuregulin 1 [Source:HGNC Symbol;Acc:HGNC:7997] |
Duplication |
| 571 |
ENSG00000286131 |
|
8 |
32647202 |
32647390 |
1 |
protein_coding |
novel protein |
Duplication |
| 582 |
ENSG00000172728 |
FUT10 |
8 |
33370824 |
33473146 |
-1 |
protein_coding |
fucosyltransferase 10 [Source:HGNC
Symbol;Acc:HGNC:19234] |
Duplication |
| 585 |
ENSG00000129696 |
TTI2 |
8 |
33473386 |
33513185 |
-1 |
protein_coding |
TELO2 interacting protein 2 [Source:HGNC
Symbol;Acc:HGNC:26262] |
Duplication |
| 586 |
ENSG00000198042 |
MAK16 |
8 |
33485182 |
33501262 |
1 |
protein_coding |
MAK16 homolog [Source:HGNC Symbol;Acc:HGNC:13703] |
Duplication |
| 592 |
ENSG00000133874 |
RNF122 |
8 |
33547754 |
33567128 |
-1 |
protein_coding |
ring finger protein 122 [Source:HGNC
Symbol;Acc:HGNC:21147] |
Duplication |
| 594 |
ENSG00000133878 |
DUSP26 |
8 |
33591330 |
33600023 |
-1 |
protein_coding |
dual specificity phosphatase 26 [Source:HGNC
Symbol;Acc:HGNC:28161] |
Duplication |
| 615 |
ENSG00000156687 |
UNC5D |
8 |
35235475 |
35796550 |
1 |
protein_coding |
unc-5 netrin receptor D [Source:HGNC
Symbol;Acc:HGNC:18634] |
Duplication |
| 633 |
ENSG00000215262 |
KCNU1 |
8 |
36784324 |
36936125 |
1 |
protein_coding |
potassium calcium-activated channel subfamily U member
1 [Source:HGNC Symbol;Acc:HGNC:18867] |
Duplication |
| 659 |
ENSG00000183779 |
ZNF703 |
8 |
37695782 |
37700019 |
1 |
protein_coding |
zinc finger protein 703 [Source:HGNC
Symbol;Acc:HGNC:25883] |
Duplication |
| 663 |
ENSG00000147475 |
ERLIN2 |
8 |
37736601 |
37758422 |
1 |
protein_coding |
ER lipid raft associated 2 [Source:HGNC
Symbol;Acc:HGNC:1356] |
Duplication |
| 666 |
ENSG00000147471 |
PLPBP |
8 |
37762595 |
37779768 |
1 |
protein_coding |
pyridoxal phosphate binding protein [Source:HGNC
Symbol;Acc:HGNC:9457] |
Duplication |
| 668 |
ENSG00000020181 |
ADGRA2 |
8 |
37784191 |
37844896 |
1 |
protein_coding |
adhesion G protein-coupled receptor A2 [Source:HGNC
Symbol;Acc:HGNC:17849] |
Duplication |
| 670 |
ENSG00000104221 |
BRF2 |
8 |
37843268 |
37849861 |
-1 |
protein_coding |
BRF2 RNA polymerase III transcription initiation factor
subunit [Source:HGNC Symbol;Acc:HGNC:17298] |
Duplication |
| 671 |
ENSG00000156675 |
RAB11FIP1 |
8 |
37858618 |
37899497 |
-1 |
protein_coding |
RAB11 family interacting protein 1 [Source:HGNC
Symbol;Acc:HGNC:30265] |
Duplication |
| 674 |
ENSG00000169154 |
GOT1L1 |
8 |
37934281 |
37940124 |
-1 |
protein_coding |
glutamic-oxaloacetic transaminase 1 like 1 [Source:HGNC
Symbol;Acc:HGNC:28487] |
Duplication |
| 675 |
ENSG00000285880 |
|
8 |
37934340 |
37965953 |
-1 |
protein_coding |
ADRB3-GOT1L1 readthrough |
Duplication |
| 677 |
ENSG00000188778 |
ADRB3 |
8 |
37962990 |
37966599 |
-1 |
protein_coding |
adrenoceptor beta 3 [Source:HGNC
Symbol;Acc:HGNC:288] |
Duplication |
| 679 |
ENSG00000187840 |
EIF4EBP1 |
8 |
38030534 |
38060365 |
1 |
protein_coding |
eukaryotic translation initiation factor 4E binding
protein 1 [Source:HGNC Symbol;Acc:HGNC:3288] |
Duplication |
| 684 |
ENSG00000129691 |
ASH2L |
8 |
38105493 |
38144076 |
1 |
protein_coding |
ASH2 like, histone lysine methyltransferase complex
subunit [Source:HGNC Symbol;Acc:HGNC:744] |
Duplication |
| 687 |
ENSG00000147465 |
STAR |
8 |
38142700 |
38150992 |
-1 |
protein_coding |
steroidogenic acute regulatory protein [Source:HGNC
Symbol;Acc:HGNC:11359] |
Duplication |
| 689 |
ENSG00000175324 |
LSM1 |
8 |
38163335 |
38176730 |
-1 |
protein_coding |
LSM1 homolog, mRNA degradation associated [Source:HGNC
Symbol;Acc:HGNC:20472] |
Duplication |
| 691 |
ENSG00000156735 |
BAG4 |
8 |
38176533 |
38213301 |
1 |
protein_coding |
BAG cochaperone 4 [Source:HGNC
Symbol;Acc:HGNC:940] |
Duplication |
| 694 |
ENSG00000085788 |
DDHD2 |
8 |
38225218 |
38275558 |
1 |
protein_coding |
DDHD domain containing 2 [Source:HGNC
Symbol;Acc:HGNC:29106] |
Duplication |
| 695 |
ENSG00000147535 |
PLPP5 |
8 |
38263130 |
38269243 |
-1 |
protein_coding |
phospholipid phosphatase 5 [Source:HGNC
Symbol;Acc:HGNC:25026] |
Duplication |
| 696 |
ENSG00000147548 |
NSD3 |
8 |
38269704 |
38382272 |
-1 |
protein_coding |
nuclear receptor binding SET domain protein 3
[Source:HGNC Symbol;Acc:HGNC:12767] |
Duplication |
| 701 |
ENSG00000165046 |
LETM2 |
8 |
38386207 |
38409527 |
1 |
protein_coding |
leucine zipper and EF-hand containing transmembrane
protein 2 [Source:HGNC Symbol;Acc:HGNC:14648] |
Duplication |
| 702 |
ENSG00000077782 |
FGFR1 |
8 |
38400215 |
38468834 |
-1 |
protein_coding |
fibroblast growth factor receptor 1 [Source:HGNC
Symbol;Acc:HGNC:3688] |
Duplication |
| 718 |
ENSG00000147526 |
TACC1 |
8 |
38728186 |
38853028 |
1 |
protein_coding |
transforming acidic coiled-coil containing protein 1
[Source:HGNC Symbol;Acc:HGNC:11522] |
Duplication |
| 723 |
ENSG00000169499 |
PLEKHA2 |
8 |
38901235 |
38973912 |
1 |
protein_coding |
pleckstrin homology domain containing A2 [Source:HGNC
Symbol;Acc:HGNC:14336] |
Duplication |
| 725 |
ENSG00000169495 |
HTRA4 |
8 |
38974228 |
38988663 |
1 |
protein_coding |
HtrA serine peptidase 4 [Source:HGNC
Symbol;Acc:HGNC:26909] |
Duplication |
| 726 |
ENSG00000169490 |
TM2D2 |
8 |
38988808 |
38996824 |
-1 |
protein_coding |
TM2 domain containing 2 [Source:HGNC
Symbol;Acc:HGNC:24127] |
Duplication |
| 727 |
ENSG00000168615 |
ADAM9 |
8 |
38996754 |
39105445 |
1 |
protein_coding |
ADAM metallopeptidase domain 9 [Source:HGNC
Symbol;Acc:HGNC:216] |
Duplication |
| 729 |
ENSG00000197140 |
ADAM32 |
8 |
39106990 |
39284917 |
1 |
protein_coding |
ADAM metallopeptidase domain 32 [Source:HGNC
Symbol;Acc:HGNC:15479] |
Duplication |
| 739 |
ENSG00000168619 |
ADAM18 |
8 |
39584489 |
39730065 |
1 |
protein_coding |
ADAM metallopeptidase domain 18 [Source:HGNC
Symbol;Acc:HGNC:196] |
Duplication |
| 740 |
ENSG00000104755 |
ADAM2 |
8 |
39743735 |
39838227 |
-1 |
protein_coding |
ADAM metallopeptidase domain 2 [Source:HGNC
Symbol;Acc:HGNC:198] |
Duplication |
| 743 |
ENSG00000131203 |
IDO1 |
8 |
39902275 |
39928790 |
1 |
protein_coding |
indoleamine 2,3-dioxygenase 1 [Source:HGNC
Symbol;Acc:HGNC:6059] |
Duplication |
| 747 |
ENSG00000188676 |
IDO2 |
8 |
39934614 |
40016392 |
1 |
protein_coding |
indoleamine 2,3-dioxygenase 2 [Source:HGNC
Symbol;Acc:HGNC:27269] |
Duplication |
| 753 |
ENSG00000176907 |
TCIM |
8 |
40153482 |
40155310 |
1 |
protein_coding |
transcriptional and immune response regulator
[Source:HGNC Symbol;Acc:HGNC:1357] |
Duplication |
overlapping_genes <- intersect(protein_coding_genes_del$ensembl_gene_id, protein_coding_genes_dup$ensembl_gene_id)
if (length(overlapping_genes) > 0) {
cat("Genes present in both regions:\n")
print(overlapping_genes)
} else {
cat("No genes are present in both the deletion and duplication regions.\n")
}
## No genes are present in both the deletion and duplication regions.
8p analysis
counts <- read.csv("data/gene_count.csv")
samples <- read_xlsx("data/Sample List.xlsx")
raw.counts <- counts %>% dplyr::select(gene_id, samples$`Sample Name`)
annotation <- counts %>% dplyr::select(-all_of(samples$`Sample Name`))
samples.parents <- samples %>%
mutate(`Group Name` = ifelse(`Group Name` %in% c("MOM", "DAD"), "Parents", `Group Name`))
print(samples.parents)
## # A tibble: 12 × 7
## `Sample Name` `Cell line` Concentration (ng/uL…¹ `Volume (uL)` `A260/A280`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 SL_1 31.3 (Sample … 1194. 30 2.09
## 2 SL_2 31.3 (Sample … 1068. 30 2.08
## 3 SL_3 31.3 (Sample … 1097. 30 2.08
## 4 SL_4 JE01 214 (Sam… 523. 30 2.00
## 5 SL_5 JE01 214 (Sam… 762. 30 2.04
## 6 SL_6 JE01 214 (Sam… 410. 30 1.96
## 7 SL_7 255-1 (p13) 244. 30 2.08
## 8 SL_8 255-4 (p13) 606. 30 2.09
## 9 SL_9 255-3 (p13) 495. 30 2.06
## 10 SL_10 294-1 1128. 30 2.07
## 11 SL_11 294-2 750. 30 2.07
## 12 SL_12 294-3 382. 30 2.09
## # ℹ abbreviated name: ¹`Concentration (ng/uL)`
## # ℹ 2 more variables: `A260/A230` <dbl>, `Group Name` <chr>
y.parents <- DGEList(counts = raw.counts, samples = samples.parents, group = samples.parents$`Group Name`)
y.parents$genes <- annotation
keep <- filterByExpr(y.parents, min.count = 30, min.total.count = 50, large.n = 10, min.prop = 0.75) # filter lowly expressed transcripts #, min.count = 30, min.total.count = 50, large.n = 10, min.prop = 0.75
table(keep)
## keep
## FALSE TRUE
## 43987 14748
y.parents <- y.parents[keep, , keep.lib.sizes=FALSE]
y.parents <- normLibSizes(y.parents) # TMM normalization
# calculate CPM
log2_cpm <- cpm(y.parents, log = TRUE, prior.count = 1, normalized.lib.sizes = T)
log2_tmm_data_with_annotations <- cbind(y.parents$genes, log2_cpm)
# DE analysis
plotMDS(y.parents)

samples.design.parents <- model.matrix(~ 0 + group,data = y.parents$samples) # design
colnames(samples.design.parents) <- gsub("group", "", colnames(samples.design.parents))
y.parents <- estimateDisp(y.parents, samples.design.parents, robust=TRUE)
print(y.parents$common.dispersion)
## [1] 0.03257095
plotBCV(y.parents)

fit.parents <- glmQLFit(y.parents, samples.design.parents, robust=TRUE)
plotQLDisp(fit.parents)

# make contrast
rev.contrast <- makeContrasts(PROvsREV=PRO-REV, levels=samples.design.parents)
qlf.PROvsREV <- glmQLFTest(fit.parents, contrast=rev.contrast[,"PROvsREV"])
contr.rev <- get_results("PRO - REV", qlf.PROvsREV)
parents_results_list <- list(contr.rev)
plot_all_results(parents_results_list)

table.results <- plot_thresholded_results(parents_results_list)

Supplementary Figure S3BC
centromeres <- read_tsv("data/centromeres-UCSC.bed", comment = "#")
centromeres_summary <- centromeres %>%
group_by(chrom) %>%
summarize(
start = min(chromStart),
end = max(chromEnd)
)
sample_to_group <- samples %>%
dplyr::select(`Sample Name`,`Group Name`) %>%
dplyr::filter(`Group Name` == "REV" |`Group Name` == "PRO")
gene_logfc <- contr.rev$top_tags$table %>%
dplyr::select(gene_id, logFC, gene_chr) # Select relevant columns (adjust the names if needed)
summary_list <- list()
for (i in 1:22) {
chrom_gene_logfc <- gene_logfc %>%
filter(gene_chr == as.character(i))
avg_logfc <- mean(chrom_gene_logfc$logFC, na.rm = TRUE)
median_logfc <- median(chrom_gene_logfc$logFC, na.rm = TRUE)
summary_df <- tibble(
CHR = i,
mean_logFC = avg_logfc,
median_logFC = median_logfc
)
summary_list[[i]] <- summary_df
}
final_summary <- bind_rows(summary_list)
final_summary %>% gt()
| CHR |
mean_logFC |
median_logFC |
| 1 |
0.03530399 |
0.002479699 |
| 2 |
0.07225343 |
0.001113815 |
| 3 |
0.04830308 |
0.014212023 |
| 4 |
0.07049886 |
0.027990760 |
| 5 |
0.07644282 |
0.011892850 |
| 6 |
0.01334637 |
-0.009420676 |
| 7 |
0.04393906 |
0.002151135 |
| 8 |
0.19836010 |
0.121154181 |
| 9 |
0.03652699 |
-0.010908634 |
| 10 |
0.07900521 |
-0.009744297 |
| 11 |
0.02607693 |
-0.019861725 |
| 12 |
0.09566353 |
0.033873698 |
| 13 |
0.12699998 |
0.020871230 |
| 14 |
0.05985903 |
0.020982288 |
| 15 |
0.07295819 |
0.038859233 |
| 16 |
0.02208292 |
-0.034985374 |
| 17 |
0.02113778 |
-0.019461390 |
| 18 |
0.13336673 |
0.018704282 |
| 19 |
0.09589128 |
0.035091883 |
| 20 |
0.06615149 |
0.002520803 |
| 21 |
0.05502288 |
0.008762923 |
| 22 |
0.04180247 |
-0.027330879 |
library(grid)
# chr 7
chr <- paste0("chr", 7)
centromere_start <- centromeres_summary %>%
filter(chrom == chr) %>%
pull(start)
centromere_end <- centromeres_summary %>%
filter(chrom == chr) %>%
pull(end)
gene.deldup.tmm <- log2_tmm_data_with_annotations %>%
filter(gene_chr == as.character(7)) %>%
filter(!(gene_start >= centromere_start & gene_start < centromere_end)) %>%
dplyr::select(gene_id, gene_start, gene_end, samples$`Sample Name`) %>%
pivot_longer(cols = -c(gene_id, gene_start, gene_end), names_to = "Sample", values_to = "Expression") %>%
left_join(sample_to_group, by = c("Sample" = "Sample Name")) %>% # Map samples to their groups
group_by(gene_id, gene_start, gene_end, `Group Name`) %>%
summarise(Average_Expression = mean(Expression, na.rm = TRUE), .groups = "drop") %>%
arrange(gene_start, desc(Average_Expression))
gene.deldup.tmm.plot.relative.PRO <- gene.deldup.tmm %>%
mutate(region = case_when(
gene_start >= 0 & gene_start < centromere_start ~ "p",
gene_start >= centromere_end ~ "q"
)) %>%
filter(`Group Name` %in% c('PRO', 'REV')) %>% # Filter for PRO and REV
tidyr::pivot_wider(names_from = `Group Name`, values_from = Average_Expression) %>%
# Calculate relative expression as PRO average minus REV average
mutate(relative_expression = PRO - REV) %>%
group_by(region) %>%
mutate(rolling_avg = custom_rollmean(relative_expression, k = 14)) %>%
ungroup()
text_cen <- textGrob("Centromere", gp=gpar(fontsize=18, fontface="bold"))
plot_object <- ggplot(gene.deldup.tmm.plot.relative.PRO, aes(x = (gene_start+gene_end)/2, y = rolling_avg)) +
theme(plot.margin = unit(c(1,1,2,1), "lines")) +
geom_rect(aes(xmin = centromere_start, xmax = centromere_end, ymin = -Inf, ymax = Inf),
fill = "grey", color = "grey", alpha = 0.6) +
annotation_custom(text_cen,xmin=(centromere_start + centromere_end) / 2,xmax=(centromere_start + centromere_end) / 2,ymin=-1.35,ymax=-1.35) +
geom_hline(yintercept = 0, linetype = "dashed", color = "black", size = 0.5, alpha = 0.8 ) +
geom_point(size = 1.5, alpha = 0.9, color = "#727572") +
labs(
title = paste("Gene Expression on Chromosome 7 - Proband relative to Revertant"),
x = "Chromosome Position (bp)",
y = "Relative Expression Level (log2)",
color =NULL
) +
scale_y_continuous(limits = c(-1, 1.5), breaks = seq(-1.5, 1.5, by = 0.5)) +
scale_x_continuous(expand = c(0, 0),
limits = c(0, max(gene.deldup.tmm.plot.relative.PRO$gene_end) + 2e6),
breaks = seq(from = 0, to = max(gene.deldup.tmm.plot.relative.PRO$gene_end), by = 20000000),
labels = c("0", "20,000,000", "40,000,000", "60,000,000", "80,000,000", "100,000,000", "120,000,000", "140,000,000")) +
expand_limits(x = 0, y = -1.5) +
theme_classic() +
coord_cartesian(clip = "off") +
theme(
text = element_text(family = "Arial", face = "bold"),
plot.title = element_text(size = 28, face = "bold", hjust = 0.5, family = "Arial"),
axis.title = element_text(size = 20, family = "Arial"),
axis.text = element_text(size = 18, family = "Arial"),
axis.title.x = element_text(margin = margin(t = 20)),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(plot_object)

# ggsave(
# filename = paste("CHR7_gene_expression_plot_log2_PROvsREV.png"),
# plot = plot_object,
# width = 18, # Adjust the width to make the canvas longer
# height = 6, # Adjust the height to control the aspect ratio
# dpi = 300, # Set the resolution to 300 dpi
# bg = "transparent"
# )
# chr 9
chr <- paste0("chr", 9)
centromere_start <- centromeres_summary %>%
filter(chrom == chr) %>%
pull(start)
centromere_end <- centromeres_summary %>%
filter(chrom == chr) %>%
pull(end)
gene.deldup.tmm <- log2_tmm_data_with_annotations %>%
filter(gene_chr == as.character(9)) %>%
filter(!(gene_start >= centromere_start & gene_start < centromere_end)) %>%
dplyr::select(gene_id, gene_start, gene_end, samples$`Sample Name`) %>%
pivot_longer(cols = -c(gene_id, gene_start, gene_end), names_to = "Sample", values_to = "Expression") %>%
left_join(sample_to_group, by = c("Sample" = "Sample Name")) %>% # Map samples to their groups
group_by(gene_id, gene_start, gene_end, `Group Name`) %>%
summarise(Average_Expression = mean(Expression, na.rm = TRUE), .groups = "drop") %>%
arrange(gene_start, desc(Average_Expression))
gene.deldup.tmm.plot.relative.PRO <- gene.deldup.tmm %>%
mutate(region = case_when(
gene_start >= 0 & gene_start < centromere_start ~ "p",
gene_start >= centromere_end ~ "q"
)) %>%
filter(`Group Name` %in% c('PRO', 'REV'), region != "c") %>% # Filter for PRO and REV
tidyr::pivot_wider(names_from = `Group Name`, values_from = Average_Expression) %>%
# Calculate relative expression as PRO average minus REV average
mutate(relative_expression = PRO - REV) %>%
group_by(region) %>%
mutate(rolling_avg = custom_rollmean(relative_expression, k = 14)) %>%
ungroup()
text_cen <- textGrob("Centromere", gp=gpar(fontsize=18, fontface="bold"))
plot_object <- ggplot(gene.deldup.tmm.plot.relative.PRO, aes(x = (gene_start+gene_end)/2, y = rolling_avg)) +
theme(plot.margin = unit(c(1,1,2,1), "lines")) +
geom_rect(aes(xmin = centromere_start, xmax = centromere_end, ymin = -Inf, ymax = Inf),
fill = "grey", color = "grey", alpha = 0.6) +
annotation_custom(text_cen,xmin=(centromere_start + centromere_end) / 2,xmax=(centromere_start + centromere_end) / 2,ymin=-1.35,ymax=-1.35) +
geom_hline(yintercept = 0, linetype = "dashed", color = "black", size = 0.5, alpha = 0.8 ) +
geom_point(size = 1.5, alpha = 0.9, color = "#727572") +
labs(
title = paste("Gene Expression on Chromosome 9 - Proband relative to Revertant"),
x = "Chromosome Position (bp)",
y = "Relative Expression Level (log2)",
color =NULL
) +
scale_y_continuous(limits = c(-1, 1.5), breaks = seq(-1.5, 1.5, by = 0.5)) +
scale_x_continuous(expand = c(0, 0),
limits = c(0, max(gene.deldup.tmm.plot.relative.PRO$gene_end) + 2e6),
breaks = seq(from = 0, to = max(gene.deldup.tmm.plot.relative.PRO$gene_end), by = 20000000),
labels = c("0", "20,000,000", "40,000,000", "60,000,000", "80,000,000", "100,000,000", "120,000,000")) +
expand_limits(x = 0, y = -1.5) +
theme_classic() +
coord_cartesian(clip = "off") +
theme(
text = element_text(family = "Arial", face = "bold"),
plot.title = element_text(size = 28, face = "bold", hjust = 0.5, family = "Arial"),
axis.title = element_text(size = 20, family = "Arial"),
axis.text = element_text(size = 18, family = "Arial"),
axis.title.x = element_text(margin = margin(t = 20)),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
print(plot_object)

# ggsave(
# filename = paste("CHR9_gene_expression_plot_log2_PROvsREV.png"),
# plot = plot_object,
# width = 18, # Adjust the width to make the canvas longer
# height = 6, # Adjust the height to control the aspect ratio
# dpi = 300, # Set the resolution to 300 dpi
# bg = "transparent"
# )
GSE185192 DE analysis
new_counts <- read.csv("data/gene_counts_GSE185192.csv", comment.char = "#")
in_both <- new_counts$Geneid %in% counts$gene_id
all(in_both)
## [1] TRUE
sample_names <- c("IsoE_Rep1", "IsoE_Rep2", "IsoE_Rep3", "IsoT_Rep1", "IsoT_Rep2", "IsoT_Rep3")
GSE185192.samples <- data.frame(
sample_name = sample_names,
group = c("DS","DS","DS","TS","TS","TS")
)
GSE185192 <- new_counts %>%
dplyr::select(Geneid, all_of(starts_with("aligned"))) %>%
dplyr::rename(gene_id = Geneid,
"IsoE_Rep1" = "aligned.SRR16242104.sorted.bam",
"IsoE_Rep2" = "aligned.SRR16242105.sorted.bam",
"IsoE_Rep3" = "aligned.SRR16242106.sorted.bam",
"IsoT_Rep1" = "aligned.SRR16242107.sorted.bam",
"IsoT_Rep2" = "aligned.SRR16242108.sorted.bam",
"IsoT_Rep3" = "aligned.SRR16242109.sorted.bam")
GSE185192.full <- GSE185192 %>%
dplyr::left_join(annotation, by = "gene_id")
# get counts and annotation
GSE185192.annotation <- GSE185192.full %>% dplyr::select(-all_of(GSE185192.samples$sample_name))
GSE185192.counts <- GSE185192.full %>% dplyr::select(gene_id, all_of(GSE185192.samples$sample_name))
y.GSE185192 <- DGEList(counts = GSE185192.counts, samples = GSE185192.samples, group = GSE185192.samples$group)
y.GSE185192$genes <- GSE185192.annotation
keep <- filterByExpr(y.GSE185192, min.count = 30, min.total.count = 50, large.n = 10, min.prop = 0.75) # filter lowly expressed transcripts
table(keep)
## keep
## FALSE TRUE
## 42304 16431
y.GSE185192 <- y.GSE185192[keep, , keep.lib.sizes = FALSE]
y.GSE185192 <- normLibSizes(y.GSE185192) # TMM normalization
biotypes <- y.GSE185192$genes$gene_biotype
# Count the occurrences of each gene biotype
biotype_table <- table(biotypes)
# Calculate the fraction of each gene biotype
biotype_fraction <- prop.table(biotype_table)
biotype_df <- data.frame(
Biotype = names(biotype_fraction),
Count = as.numeric(biotype_table),
Fraction = round(biotype_fraction, 3)
)
# calculate CPM
log2_cpm.GSE185192 <- cpm(y.GSE185192, log = TRUE, prior.count = 1, normalized.lib.sizes = T)
log2_tmm_data_with_annotations.GSE185192 <- cbind(y.GSE185192$genes, log2_cpm.GSE185192)
colnames(log2_cpm.GSE185192) <- GSE185192.samples$sample_name
# DE analysis
plotMDS(y.GSE185192)

samples.design <- model.matrix(~ 0 + group,data = y.GSE185192$samples) # design
colnames(samples.design) <- gsub("group", "", colnames(samples.design))
y.GSE185192 <- estimateDisp(y.GSE185192, samples.design, robust=TRUE)
print(y.GSE185192$common.dispersion)
## [1] 0.004212285
plotBCV(y.GSE185192)

fit <- glmQLFit(y.GSE185192, samples.design, robust=TRUE)
plotQLDisp(fit)

# make contrast
contrast <- makeContrasts(TSvsDS=TS-DS, levels=samples.design)
qlf.TSvsDS <- glmQLFTest(fit, contrast=contrast[,"TSvsDS"])
contr <- get_results("TS - DS", qlf.TSvsDS)
results_list <- list(contr)
plot_all_results(results_list)

table.results <- plot_thresholded_results(results_list)
### Figure 5
chr <- paste0("chr", 21)
centromere_start <- centromeres_summary %>%
filter(chrom == chr) %>%
pull(start)
centromere_end <- centromeres_summary %>%
filter(chrom == chr) %>%
pull(end)
gene.deldup.tmm <- log2_tmm_data_with_annotations %>%
filter(gene_chr == as.character(21)) %>%
filter(!(gene_start >= centromere_start & gene_start < centromere_end)) %>%
dplyr::select(gene_id, gene_start, gene_end, samples$`Sample Name`) %>%
pivot_longer(cols = -c(gene_id, gene_start, gene_end), names_to = "Sample", values_to = "Expression") %>%
left_join(sample_to_group, by = c("Sample" = "Sample Name")) %>% # Map samples to their groups
group_by(gene_id, gene_start, gene_end, `Group Name`) %>%
summarise(Average_Expression = mean(Expression, na.rm = TRUE), .groups = "drop") %>%
arrange(gene_start, desc(Average_Expression))
gene.deldup.tmm.plot.relative.PRO <- gene.deldup.tmm %>%
mutate(region = case_when(
gene_start >= 0 & gene_start < centromere_start ~ "p",
gene_start >= centromere_end ~ "q"
)) %>%
filter(`Group Name` %in% c('PRO', 'REV')) %>% # Filter for PRO and REV
tidyr::pivot_wider(names_from = `Group Name`, values_from = Average_Expression) %>%
# Calculate relative expression as PRO average minus REV average
mutate(relative_expression = PRO - REV) %>%
group_by(region) %>%
mutate(rolling_avg = custom_rollmean(relative_expression, k = 8)) %>%
ungroup()
## TS21
gene.deldup.tmm.GSE185192 <- log2_tmm_data_with_annotations.GSE185192 %>%
filter(gene_chr == as.character(21)) %>%
filter(!(gene_start >= centromere_start & gene_start < centromere_end)) %>%
dplyr::select(gene_id, gene_start, gene_end, GSE185192.samples$sample_name) %>%
pivot_longer(cols = -c(gene_id, gene_start, gene_end), names_to = "Sample", values_to = "Expression") %>%
left_join(GSE185192.samples, by = c("Sample" = "sample_name")) %>% # Map samples to their groups
group_by(gene_id, gene_start, gene_end, group) %>%
summarise(Average_Expression = mean(Expression, na.rm = TRUE), .groups = "drop") %>%
arrange(gene_start, desc(Average_Expression))
gene.deldup.tmm.plot.relative.TS.GSE185192 <- gene.deldup.tmm.GSE185192 %>%
mutate(region = case_when(
gene_start >= 0 & gene_start < centromere_start ~ "p",
gene_start >= centromere_end ~ "q"
)) %>%
filter(group %in% c('TS', 'DS')) %>% # Filter for PRO and REV
tidyr::pivot_wider(names_from = group, values_from = Average_Expression) %>%
# Calculate relative expression as PRO average minus REV average
mutate(relative_expression = TS - DS) %>%
group_by(region) %>%
mutate(rolling_avg = custom_rollmean(relative_expression, k = 8)) %>%
ungroup()
plot_combined_data <- inner_join(gene.deldup.tmm.plot.relative.PRO %>% dplyr::select(gene_id, gene_start, gene_end, rolling_avg),
gene.deldup.tmm.plot.relative.TS.GSE185192 %>% dplyr::select(gene_id, rolling_avg), by = "gene_id", suffix = c("_8p", "_21"))
plot_combined_data_long <- plot_combined_data %>%
pivot_longer(cols = c(rolling_avg_8p, rolling_avg_21),
names_to = "Dataset",
names_prefix = "rolling_avg_",
values_to = "rolling_avg") %>%
mutate(Dataset = recode(Dataset,
"8p" = "8p Proband",
"21" = "Trisomy 21"))
text_cen <- textGrob("Centromere", gp=gpar(fontsize=18, fontface="bold"))
plot_object <- ggplot(plot_combined_data_long, aes(x = (gene_start+gene_end)/2, y = rolling_avg, color = Dataset)) +
theme(plot.margin = unit(c(1,1,2,1), "lines")) +
geom_rect(aes(xmin = centromere_start, xmax = centromere_end, ymin = -Inf, ymax = Inf),
fill = "grey", color = "grey", alpha = 0.6) +
annotation_custom(text_cen,xmin=(centromere_start + centromere_end) / 2,xmax=(centromere_start + centromere_end) / 2,ymin=-1.35,ymax=-1.35) +
geom_hline(yintercept = 0, linetype = "dashed", color = "black", size = 0.5, alpha = 0.8 ) +
geom_point(size = 1.5, alpha = 0.9) +
labs(
title = paste("Gene Expression on Chromosome 21"),
x = "Chromosome Position (bp)",
y = "Relative Expression Level (log2)",
color =NULL
) +
scale_y_continuous(limits = c(-1, 1.5), breaks = seq(-1.5, 1.5, by = 0.5)) +
scale_x_continuous(expand = c(0, 0),
limits = c(0, max(plot_combined_data_long$gene_end) + 1e7),
breaks = seq(from = 0, to = max(plot_combined_data_long$gene_end)+1e7, by = 10000000),
labels = c("0", "10,000,000", "20,000,000", "30,000,000", "40,000,000", "50,000,000")) +
scale_color_manual(values = c("8p Proband" = "steelblue", "Trisomy 21" = "firebrick")) +
expand_limits(x = 0, y = -1.5) +
theme_classic() +
coord_cartesian(clip = "off") +
theme(
text = element_text(family = "Arial", face = "bold"),
plot.title = element_text(size = 28, face = "bold", hjust = 0.5, family = "Arial"),
axis.title = element_text(size = 20, family = "Arial"),
axis.text = element_text(size = 18, family = "Arial"),
axis.title.x = element_text(margin = margin(t = 20)),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.text = element_text(size = 16, family = "Arial")
)
print(plot_object)

# ggsave(
# filename = paste("CHR21_gene_expression_plot_log2_TS21vsDS_PROvsREV.png"),
# plot = plot_object,
# width = 18, # Adjust the width to make the canvas longer
# height = 6, # Adjust the height to control the aspect ratio
# dpi = 300, # Set the resolution to 300 dpi
# bg = "transparent"
# )
mean_values_GSE185192 <- gene.deldup.tmm.plot.relative.TS.GSE185192 %>%
summarize(DS_mean_log2 = mean(DS, na.rm = TRUE),
TS_mean_log2 = mean(TS, na.rm = TRUE)) %>%
mutate(abs_fold_change = 2^(TS_mean_log2 - DS_mean_log2))
mean_values_GSE185192
| 3.411458 |
4.034069 |
1.539659 |
filtering out 8p and chr21 genes
# PRO vs REV 8p
PRO_REV_8P_table <- contr.rev$top_tags$table %>%
filter(gene_biotype == "protein_coding", gene_chr %in% c(1:22,"X","Y"))
# TS vs DS 21
TS_DS_21_table <- contr$top_tags$table %>%
filter(gene_biotype == "protein_coding", gene_chr %in% c(1:22,"X","Y"))
# write_csv(contr$top_tags$table %>% dplyr::filter(FDR <= 0.05), "DEgenes_Trisomy21.csv")
dim(TS_DS_21_table)
## [1] 14038 15
dim(PRO_REV_8P_table)
## [1] 13212 15
region_counts21 <- TS_DS_21_table %>%
mutate(region = gene_chr) %>%
group_by(region) %>%
summarise(
total_genes = n(),
de_count = sum(FDR <= 0.05),
.groups = "drop"
) %>%
mutate(fraction = de_count / total_genes) %>%
mutate(region = factor(region, levels = c(as.character(1:22), "X", "Y")))
ggplot(region_counts21, aes(x = region, y = fraction)) +
geom_col(width = 0.7) +
geom_text(
aes(label = scales::percent(fraction, accuracy = 0.1)),
vjust = -0.5,
size = 3
) +
scale_y_continuous(
labels = scales::percent,
expand = expansion(mult = c(0, 0.1))
) +
labs(
title = "DE Genes per Chromosome",
subtitle = "TS vs DS (FDR <= 0.05, protein-coding)",
x = "Chromosome",
y = "Fraction DE"
) +
theme_classic(base_family = "Arial") +
theme(
plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 12, hjust = 0.5),
axis.title = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 14),
axis.text.x = element_text(hjust = 1),
panel.grid = element_blank()
)

## 8p
region_counts8p <- PRO_REV_8P_table %>%
mutate(region = gene_chr) %>%
group_by(region) %>%
summarise(
total_genes = n(),
de_count = sum(FDR <= 0.05),
.groups = "drop"
) %>%
mutate(fraction = de_count / total_genes) %>%
mutate(region = factor(region, levels = c(as.character(1:22), "X", "Y")))
ggplot(region_counts8p, aes(x = region, y = fraction)) +
geom_col(width = 0.7) +
geom_text(
aes(label = scales::percent(fraction, accuracy = 0.1)),
vjust = -0.5,
size = 3
) +
scale_y_continuous(
labels = scales::percent,
expand = expansion(mult = c(0, 0.1))
) +
labs(
title = "DE Genes per Chromosome - 8p",
subtitle = "PRO vs REV (FDR <= 0.05, protein-coding)",
x = "Chromosome",
y = "Fraction DE"
) +
theme_classic(base_family = "Arial") +
theme(
plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 12, hjust = 0.5),
axis.title = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 14),
axis.text.x = element_text(hjust = 1),
panel.grid = element_blank()
)

merged_FC <- inner_join(TS_DS_21_table %>%
dplyr::select(-c(gene_name, gene_chr, gene_start, gene_end,gene_strand,gene_length, gene_description, gene_biotype, tf_family)),
PRO_REV_8P_table, by = "gene_id", suffix = c("_21", "_8p"))
# # export as background for enrichr
# write.table(
# merged_FC$gene_name,
# file = "co-detected.txt",
# quote = FALSE,
# row.names = FALSE,
# col.names = FALSE
# )
Figure 5D
total21.filtered <- TS_DS_21_table %>%
dplyr::filter(
gene_chr != "21",
!gene_id %in% combined_genes$ensembl_gene_id
)
total8p.filtered <- PRO_REV_8P_table %>%
dplyr::filter(
gene_chr != "21",
!gene_id %in% combined_genes$ensembl_gene_id
)
merged_FC_filtered <- merged_FC %>%
dplyr::filter(
gene_chr != "21",
!gene_id %in% combined_genes$ensembl_gene_id
)
## protein coding gene
up_21 <- total21.filtered %>%
dplyr::filter(FDR <= 0.05 & logFC > 0) %>%
pull(gene_name)
up_8p <- total8p.filtered %>%
dplyr::filter(FDR <= 0.05, logFC > 0) %>%
pull(gene_name)
# Negative logFC (downregulated)
down_21 <- total21.filtered %>%
dplyr::filter(FDR <= 0.05, logFC < 0) %>%
pull(gene_name)
down_8p <- total8p.filtered %>%
dplyr::filter(FDR <= 0.05, logFC < 0) %>%
pull(gene_name)
# Upregulated (positive logFC)
upregulated_intersected_genes <- intersect(up_21, up_8p)
# Downregulated (negative logFC)
downregulated_intersected_genes <- intersect(down_21, down_8p)
cat("exclude 21 and 8p genes:")
## exclude 21 and 8p genes:
cat("Upregulated:", length(upregulated_intersected_genes),
"\nDownregulated:", length(downregulated_intersected_genes), "\n")
## Upregulated: 263
## Downregulated: 269
# write.table(
# upregulated_intersected_genes,
# file = "enrichr_upregulated_both_filtered.txt",
# quote = FALSE,
# row.names = FALSE,
# col.names = FALSE
# )
#
# write.table(
# downregulated_intersected_genes,
# file = "enrichr_downregulated_both_filtered.txt",
# quote = FALSE,
# row.names = FALSE,
# col.names = FALSE
# )
#
# write.table(
# merged_FC_filtered$gene_name,
# file = "co-detected_filtered.txt",
# quote = FALSE,
# row.names = FALSE,
# col.names = FALSE
# )
# hypergeometric test
N <- nrow(merged_FC_filtered) # total number of genes in merged dataset
# Upregulated in 21 that are in merged_FC
K <- sum(up_21 %in% merged_FC_filtered$gene_name)
# Upregulated in 8p that are in merged_FC
n <- sum(up_8p %in% merged_FC_filtered$gene_name)
# Shared upregulated genes that are also in merged_FC
x <- sum(upregulated_intersected_genes %in% merged_FC_filtered$gene_name)
# Run hypergeometric test
p_value <- phyper(q = x - 1, m = K, n = N - K, k = n, lower.tail = FALSE)
# Hypergeometric test (greater tail = enrichment)
p_value <- phyper(q = x - 1, m = K, n = N - K, k = n, lower.tail = FALSE)
cat("N (total tested genes):", N, "\n",
"K (DE up in dataset 21):", K, "\n",
"n (DE up in dataset 8p):", n, "\n",
"x (co-upregulated):", x, "\n")
## N (total tested genes): 12869
## K (DE up in dataset 21): 1623
## n (DE up in dataset 8p): 972
## x (co-upregulated): 263
# "N (total tested genes): 12869
# K (DE up in dataset 21): 1623
# n (DE up in dataset 8p): 972
# x (co-upregulated): 263
# "
print(p_value)
## [1] 5.901803e-37
venn_data <- c(
"Invdupdel(8p)" = n-x,
"Trisomy 21" = K-x,
"Invdupdel(8p)&Trisomy 21" = x
)
venn_euler <- euler(venn_data)
p <- plot(
venn_euler,
fills = c(
met.brewer("Hiroshige", n = 10)[3],
met.brewer("Hiroshige", n = 10)[7]
),
labels = list(font = 1, cex = 0),
alpha = 0.7
)
# png("euler_plot_up.png", width = 1500, height = 1200, res = 300)
# grid::grid.newpage()
# # Draw the plot
# grid::grid.draw(p)
#
# # Close the file
# dev.off()
# hypergeometric test
N <- nrow(merged_FC_filtered) # total genes (universe)
# Upregulated in 21 that are in merged_FC
K <- sum(down_21 %in% merged_FC_filtered$gene_name)
# Upregulated in 8p that are in merged_FC
n <- sum(down_8p%in% merged_FC_filtered$gene_name)
# Shared upregulated genes that are also in merged_FC
x <- sum(downregulated_intersected_genes %in% merged_FC_filtered$gene_name)
# Hypergeometric test (greater tail = enrichment)
p_value <- phyper(q = x - 1, m = K, n = N - K, k = n, lower.tail = FALSE)
cat("N (total tested genes):", N, "\n",
"K (DE down in dataset 21):", K, "\n",
"n (DE down in dataset 8p):", n, "\n",
"x (co-downregulated):", x, "\n")
## N (total tested genes): 12869
## K (DE down in dataset 21): 1783
## n (DE down in dataset 8p): 1153
## x (co-downregulated): 269
# "N (total tested genes): 12869
# K (DE down in dataset 21): 1783
# n (DE down in dataset 8p): 1153
# x (co-downregulated): 269
# "
print(p_value)
## [1] 5.388227e-20
venn_data <- c(
"Invdupdel(8p)" = n-x,
"Trisomy 21" = K-x,
"Invdupdel(8p)&Trisomy 21" = x
)
venn_euler <- euler(venn_data)
p <- plot(
venn_euler,
fills = c(
met.brewer("Hiroshige", n = 10)[3],
met.brewer("Hiroshige", n = 10)[7]
),
labels = list(font = 1, cex = 0),
alpha = 0.7
)
# png("euler_plot_down.png", width = 1500, height = 1200, res = 300)
# grid::grid.newpage()
# # Draw the plot
# grid::grid.draw(p)
# # Close the file
# dev.off()
upregulated_8p_not_21 <- setdiff(up_8p, up_21)[setdiff(up_8p, up_21) %in% merged_FC_filtered$gene_name]
cat("Upregulated in 8p but not in 21:", length(upregulated_8p_not_21), " (FDR <= 0.05) \n")
## Upregulated in 8p but not in 21: 709 (FDR <= 0.05)
downregulated_8p_not_21 <- setdiff(down_8p, down_21)[setdiff(down_8p, down_21) %in% merged_FC_filtered$gene_name]
cat("Downregulated in 8p but not in 21:", length(downregulated_8p_not_21), " (FDR <= 0.05) \n")
## Downregulated in 8p but not in 21: 884 (FDR <= 0.05)
# write.table(
# upregulated_8p_not_21,
# file = "enrichr_upregulated_8p_not_21_filtered.txt",
# quote = FALSE,
# row.names = FALSE,
# col.names = FALSE
# )
#
# write.table(
# downregulated_8p_not_21,
# file = "enrichr_downregulated_8p_not_21_filtered.txt",
# quote = FALSE,
# row.names = FALSE,
# col.names = FALSE
# )
upregulated_21_not_8p <- setdiff(up_21, up_8p)[setdiff(up_21, up_8p) %in% merged_FC_filtered$gene_name]
cat("Upregulated in 21 but not in 8p:", length(upregulated_21_not_8p), " (FDR <= 0.05) \n")
## Upregulated in 21 but not in 8p: 1360 (FDR <= 0.05)
downregulated_21_not_8p <- setdiff(down_21, down_8p)[setdiff(down_21, down_8p) %in% merged_FC_filtered$gene_name]
cat("Downregulated in 21 but not in 8p:", length(downregulated_21_not_8p), " (FDR <= 0.05) \n")
## Downregulated in 21 but not in 8p: 1514 (FDR <= 0.05)
# write.table(
# upregulated_21_not_8p,
# file = "enrichr_upregulated_21_not_8p_filtered.txt",
# quote = FALSE,
# row.names = FALSE,
# col.names = FALSE
# )
#
# write.table(
# downregulated_21_not_8p,
# file = "enrichr_downregulated_21_not_8p_filtered.txt",
# quote = FALSE,
# row.names = FALSE,
# col.names = FALSE
# )
overlapping_genes <- intersect(protein_coding_genes_del$ensembl_gene_id, protein_coding_genes_dup$ensembl_gene_id)
if (length(overlapping_genes) > 0) {
cat("Genes present in both regions:\n")
print(overlapping_genes)
} else {
cat("No genes are present in both the deletion and duplication regions.\n")
}
## No genes are present in both the deletion and duplication regions.
Session Info and Citations
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS 15.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Los_Angeles
## tzcode source: internal
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ggthemes_5.1.0 eulerr_7.0.2 MetBrewer_0.2.0
## [4] gt_0.11.1 gtsummary_2.0.4 ggrepel_0.9.6
## [7] scales_1.3.0 biomaRt_2.60.1 msigdbr_7.5.1
## [10] enrichplot_1.24.4 org.Hs.eg.db_3.19.1 AnnotationDbi_1.66.0
## [13] IRanges_2.38.1 S4Vectors_0.42.1 Biobase_2.64.0
## [16] BiocGenerics_0.50.0 clusterProfiler_4.12.6 edgeR_4.2.2
## [19] limma_3.60.6 lubridate_1.9.3 forcats_1.0.0
## [22] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2
## [25] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [28] ggplot2_3.5.1 tidyverse_2.0.0 readxl_1.4.3
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.17.1 jsonlite_1.8.9
## [4] magrittr_2.0.3 farver_2.1.2 rmarkdown_2.28
## [7] fs_1.6.5 zlibbioc_1.50.0 vctrs_0.6.5
## [10] memoise_2.0.1 ggtree_3.12.0 progress_1.2.3
## [13] htmltools_0.5.8.1 curl_5.2.3 cellranger_1.1.0
## [16] gridGraphics_0.5-1 sass_0.4.9 bslib_0.8.0
## [19] plyr_1.8.9 httr2_1.0.5 cachem_1.1.0
## [22] igraph_2.1.1 lifecycle_1.0.4 pkgconfig_2.0.3
## [25] Matrix_1.7-1 R6_2.5.1 fastmap_1.2.0
## [28] gson_0.1.0 GenomeInfoDbData_1.2.12 digest_0.6.37
## [31] aplot_0.2.3 colorspace_2.1-1 patchwork_1.3.0
## [34] RSQLite_2.3.7 labeling_0.4.3 filelock_1.0.3
## [37] fansi_1.0.6 timechange_0.3.0 httr_1.4.7
## [40] polyclip_1.10-7 compiler_4.4.1 bit64_4.5.2
## [43] withr_3.0.2 BiocParallel_1.38.0 viridis_0.6.5
## [46] DBI_1.2.3 highr_0.11 ggforce_0.4.2
## [49] R.utils_2.12.3 MASS_7.3-61 rappdirs_0.3.3
## [52] tools_4.4.1 ape_5.8 scatterpie_0.2.4
## [55] R.oo_1.26.0 glue_1.8.0 nlme_3.1-166
## [58] GOSemSim_2.30.2 polylabelr_0.3.0 shadowtext_0.1.4
## [61] reshape2_1.4.4 fgsea_1.30.0 generics_0.1.3
## [64] gtable_0.3.6 tzdb_0.4.0 R.methodsS3_1.8.2
## [67] data.table_1.16.2 hms_1.1.3 xml2_1.3.6
## [70] tidygraph_1.3.1 utf8_1.2.4 XVector_0.44.0
## [73] pillar_1.9.0 vroom_1.6.5 babelgene_22.9
## [76] yulab.utils_0.1.7 splines_4.4.1 tweenr_2.0.3
## [79] BiocFileCache_2.12.0 treeio_1.28.0 lattice_0.22-6
## [82] bit_4.5.0 tidyselect_1.2.1 GO.db_3.19.1
## [85] locfit_1.5-9.10 Biostrings_2.72.1 knitr_1.48
## [88] gridExtra_2.3 xfun_0.48 graphlayouts_1.2.0
## [91] statmod_1.5.0 stringi_1.8.4 UCSC.utils_1.0.0
## [94] lazyeval_0.2.2 ggfun_0.1.7 yaml_2.3.10
## [97] evaluate_1.0.1 codetools_0.2-20 ggraph_2.2.1
## [100] qvalue_2.36.0 ggplotify_0.1.2 cli_3.6.3
## [103] munsell_0.5.1 jquerylib_0.1.4 Rcpp_1.0.13
## [106] GenomeInfoDb_1.40.1 dbplyr_2.5.0 png_0.1-8
## [109] parallel_4.4.1 blob_1.2.4 prettyunits_1.2.0
## [112] DOSE_3.30.5 viridisLite_0.4.2 tidytree_0.4.6
## [115] crayon_1.5.3 rlang_1.1.4 cowplot_1.1.3
## [118] fastmatch_1.1-4 KEGGREST_1.44.1
packages_in_use <- c( names( sessionInfo()$otherPkgs ) )
the_citations_list <- lapply( X=packages_in_use, FUN=citation)
the_citations_list
## [[1]]
## To cite package 'ggthemes' in publications use:
##
## Arnold J (2024). _ggthemes: Extra Themes, Scales and Geoms for
## 'ggplot2'_. R package version 5.1.0,
## <https://CRAN.R-project.org/package=ggthemes>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {ggthemes: Extra Themes, Scales and Geoms for 'ggplot2'},
## author = {Jeffrey B. Arnold},
## year = {2024},
## note = {R package version 5.1.0},
## url = {https://CRAN.R-project.org/package=ggthemes},
## }
##
## [[2]]
## To cite use of the eulerr R package in publications, please use:
##
## Larsson J (2024). _eulerr: Area-Proportional Euler and Venn Diagrams
## with Ellipses_. R package version 7.0.2,
## <https://CRAN.R-project.org/package=eulerr>.
##
## To cite the methodology behind eulerr in publications, please use:
##
## Larsson J, Gustafsson P (2018). "A Case Study in Fitting
## Area-Proportional Euler Diagrams with Ellipses Using eulerr." In
## _Proceedings of International Workshop on Set Visualization and
## Reasoning_, volume 2116, 84-91.
## <https://cran.r-project.org/package=eulerr>.
##
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
##
## [[3]]
## To cite package 'MetBrewer' in publications use:
##
## Mills BR (2022). _MetBrewer: Color Palettes Inspired by Works at the
## Metropolitan Museum of Art_. R package version 0.2.0,
## <https://CRAN.R-project.org/package=MetBrewer>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {MetBrewer: Color Palettes Inspired by Works at the Metropolitan Museum of
## Art},
## author = {Blake Robert Mills},
## year = {2022},
## note = {R package version 0.2.0},
## url = {https://CRAN.R-project.org/package=MetBrewer},
## }
##
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
##
## [[4]]
## To cite package 'gt' in publications use:
##
## Iannone R, Cheng J, Schloerke B, Hughes E, Lauer A, Seo J, Brevoort
## K, Roy O (2024). _gt: Easily Create Presentation-Ready Display
## Tables_. R package version 0.11.1,
## <https://CRAN.R-project.org/package=gt>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {gt: Easily Create Presentation-Ready Display Tables},
## author = {Richard Iannone and Joe Cheng and Barret Schloerke and Ellis Hughes and Alexandra Lauer and JooYoung Seo and Ken Brevoort and Olivier Roy},
## year = {2024},
## note = {R package version 0.11.1},
## url = {https://CRAN.R-project.org/package=gt},
## }
##
## [[5]]
## To cite gtsummary in publications use:
##
## Sjoberg DD, Whiting K, Curry M, Lavery JA, Larmarange J. Reproducible
## summary tables with the gtsummary package. The R Journal
## 2021;13:570–80. https://doi.org/10.32614/RJ-2021-053.
##
## A BibTeX entry for LaTeX users is
##
## @Article{gtsummary,
## author = {Daniel D. Sjoberg and Karissa Whiting and Michael Curry and Jessica A. Lavery and Joseph Larmarange},
## title = {Reproducible Summary Tables with the gtsummary Package},
## journal = {{The R Journal}},
## year = {2021},
## url = {https://doi.org/10.32614/RJ-2021-053},
## doi = {10.32614/RJ-2021-053},
## volume = {13},
## issue = {1},
## pages = {570-580},
## }
##
## [[6]]
## To cite package 'ggrepel' in publications use:
##
## Slowikowski K (2024). _ggrepel: Automatically Position
## Non-Overlapping Text Labels with 'ggplot2'_. R package version 0.9.6,
## <https://CRAN.R-project.org/package=ggrepel>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {ggrepel: Automatically Position Non-Overlapping Text Labels with
## 'ggplot2'},
## author = {Kamil Slowikowski},
## year = {2024},
## note = {R package version 0.9.6},
## url = {https://CRAN.R-project.org/package=ggrepel},
## }
##
## [[7]]
## To cite package 'scales' in publications use:
##
## Wickham H, Pedersen T, Seidel D (2023). _scales: Scale Functions for
## Visualization_. R package version 1.3.0,
## <https://CRAN.R-project.org/package=scales>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {scales: Scale Functions for Visualization},
## author = {Hadley Wickham and Thomas Lin Pedersen and Dana Seidel},
## year = {2023},
## note = {R package version 1.3.0},
## url = {https://CRAN.R-project.org/package=scales},
## }
##
## [[8]]
## To cite the biomaRt package in publications use:
##
## Mapping identifiers for the integration of genomic datasets with the
## R/Bioconductor package biomaRt. Steffen Durinck, Paul T. Spellman,
## Ewan Birney and Wolfgang Huber, Nature Protocols 4, 1184-1191 (2009).
##
## BioMart and Bioconductor: a powerful link between biological
## databases and microarray data analysis. Steffen Durinck, Yves Moreau,
## Arek Kasprzyk, Sean Davis, Bart De Moor, Alvis Brazma and Wolfgang
## Huber, Bioinformatics 21, 3439-3440 (2005).
##
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
##
## [[9]]
## To cite package 'msigdbr' in publications use:
##
## Dolgalev I (2022). _msigdbr: MSigDB Gene Sets for Multiple Organisms
## in a Tidy Data Format_. R package version 7.5.1,
## <https://CRAN.R-project.org/package=msigdbr>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {msigdbr: MSigDB Gene Sets for Multiple Organisms in a Tidy Data Format},
## author = {Igor Dolgalev},
## year = {2022},
## note = {R package version 7.5.1},
## url = {https://CRAN.R-project.org/package=msigdbr},
## }
##
## [[10]]
## To cite package 'enrichplot' in publications use:
##
## Yu G (2024). _enrichplot: Visualization of Functional Enrichment
## Result_. doi:10.18129/B9.bioc.enrichplot
## <https://doi.org/10.18129/B9.bioc.enrichplot>, R package version
## 1.24.4, <https://bioconductor.org/packages/enrichplot>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {enrichplot: Visualization of Functional Enrichment Result},
## author = {Guangchuang Yu},
## year = {2024},
## note = {R package version 1.24.4},
## url = {https://bioconductor.org/packages/enrichplot},
## doi = {10.18129/B9.bioc.enrichplot},
## }
##
## [[11]]
## To cite package 'org.Hs.eg.db' in publications use:
##
## Carlson M (2024). _org.Hs.eg.db: Genome wide annotation for Human_. R
## package version 3.19.1.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {org.Hs.eg.db: Genome wide annotation for Human},
## author = {Marc Carlson},
## year = {2024},
## note = {R package version 3.19.1},
## }
##
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
##
## [[12]]
## To cite package 'AnnotationDbi' in publications use:
##
## Pagès H, Carlson M, Falcon S, Li N (2024). _AnnotationDbi:
## Manipulation of SQLite-based annotations in Bioconductor_.
## doi:10.18129/B9.bioc.AnnotationDbi
## <https://doi.org/10.18129/B9.bioc.AnnotationDbi>, R package version
## 1.66.0, <https://bioconductor.org/packages/AnnotationDbi>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {AnnotationDbi: Manipulation of SQLite-based annotations in Bioconductor},
## author = {Hervé Pagès and Marc Carlson and Seth Falcon and Nianhua Li},
## year = {2024},
## note = {R package version 1.66.0},
## url = {https://bioconductor.org/packages/AnnotationDbi},
## doi = {10.18129/B9.bioc.AnnotationDbi},
## }
##
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
##
## [[13]]
## To cite package 'IRanges' in publications use:
##
## Lawrence M, Huber W, Pag\`es H, Aboyoun P, Carlson M, et al. (2013)
## Software for Computing and Annotating Genomic Ranges. PLoS Comput
## Biol 9(8): e1003118. doi:10.1371/journal.pcbi.1003118
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {Software for Computing and Annotating Genomic Ranges},
## author = {Michael Lawrence and Wolfgang Huber and Herv\'e Pag\`es and Patrick Aboyoun and Marc Carlson and Robert Gentleman and Martin Morgan and Vincent Carey},
## year = {2013},
## journal = {{PLoS} Computational Biology},
## volume = {9},
## issue = {8},
## doi = {10.1371/journal.pcbi.1003118},
## url = {http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003118},
## }
##
## [[14]]
## To cite package 'S4Vectors' in publications use:
##
## Pagès H, Lawrence M, Aboyoun P (2024). _S4Vectors: Foundation of
## vector-like and list-like containers in Bioconductor_.
## doi:10.18129/B9.bioc.S4Vectors
## <https://doi.org/10.18129/B9.bioc.S4Vectors>, R package version
## 0.42.1, <https://bioconductor.org/packages/S4Vectors>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {S4Vectors: Foundation of vector-like and list-like containers in
## Bioconductor},
## author = {Hervé Pagès and Michael Lawrence and Patrick Aboyoun},
## year = {2024},
## note = {R package version 0.42.1},
## url = {https://bioconductor.org/packages/S4Vectors},
## doi = {10.18129/B9.bioc.S4Vectors},
## }
##
## [[15]]
## To cite package 'Biobase' in publications use:
##
## Orchestrating high-throughput genomic analysis with Bioconductor. W.
## Huber, V.J. Carey, R. Gentleman, ..., M. Morgan Nature Methods,
## 2015:12, 115.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## author = {W. Huber and V. J. Carey and R. Gentleman and S. Anders and M. Carlson and B. S. Carvalho and H. C. Bravo and S. Davis and L. Gatto and T. Girke and R. Gottardo and F. Hahne and K. D. Hansen and R. A. Irizarry and M. Lawrence and M. I. Love and J. MacDonald and V. Obenchain and A. K. {Ole's} and H. {Pag`es} and A. Reyes and P. Shannon and G. K. Smyth and D. Tenenbaum and L. Waldron and M. Morgan},
## title = {{O}rchestrating high-throughput genomic analysis with {B}ioconductor},
## journal = {Nature Methods},
## year = {2015},
## volume = {12},
## number = {2},
## pages = {115--121},
## url = {http://www.nature.com/nmeth/journal/v12/n2/full/nmeth.3252.html},
## }
##
## [[16]]
## To cite package 'BiocGenerics' in publications use:
##
## Orchestrating high-throughput genomic analysis with Bioconductor. W.
## Huber, V.J. Carey, R. Gentleman, ..., M. Morgan Nature Methods,
## 2015:12, 115.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## author = {{Huber} and {W.} and {Carey} and V. J. and {Gentleman} and {R.} and {Anders} and {S.} and {Carlson} and {M.} and {Carvalho} and B. S. and {Bravo} and H. C. and {Davis} and {S.} and {Gatto} and {L.} and {Girke} and {T.} and {Gottardo} and {R.} and {Hahne} and {F.} and {Hansen} and K. D. and {Irizarry} and R. A. and {Lawrence} and {M.} and {Love} and M. I. and {MacDonald} and {J.} and {Obenchain} and {V.} and {{Ole's}} and A. K. and {{Pag`es}} and {H.} and {Reyes} and {A.} and {Shannon} and {P.} and {Smyth} and G. K. and {Tenenbaum} and {D.} and {Waldron} and {L.} and {Morgan} and {M.}},
## title = {{O}rchestrating high-throughput genomic analysis with {B}ioconductor},
## journal = {Nature Methods},
## year = {2015},
## volume = {12},
## number = {2},
## pages = {115--121},
## url = {http://www.nature.com/nmeth/journal/v12/n2/full/nmeth.3252.html},
## }
##
## [[17]]
## Please cite S. Xu (2024) for using clusterProfiler. In addition, please
## cite G. Yu (2010) when using GOSemSim, G. Yu (2015) when using DOSE and
## G. Yu (2015) when using ChIPseeker.
##
## S Xu, E Hu, Y Cai, Z Xie, X Luo, L Zhan, W Tang, Q Wang, B Liu, R
## Wang, W Xie, T Wu, L Xie, G Yu. Using clusterProfiler to characterize
## multiomics data. Nature Protocols. 2024,
## doi:10.1038/s41596-024-01020-z
##
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L
## Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal
## enrichment tool for interpreting omics data. The Innovation. 2021,
## 2(3):100141
##
## Guangchuang Yu, Li-Gen Wang, Yanyan Han and Qing-Yu He.
## clusterProfiler: an R package for comparing biological themes among
## gene clusters. OMICS: A Journal of Integrative Biology 2012,
## 16(5):284-287
##
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
##
## [[18]]
## See Section 1.2 in the User's Guide for more detail about how to cite
## the different edgeR pipelines.
##
## Chen Y, Chen L, Lun ATL, Baldoni PL, Smyth GK (2024). edgeR 4.0:
## powerful differential analysis of sequencing data with expanded
## functionality and improved support for small counts and larger
## datasets. bioRxiv doi: 10.1101/2024.01.21.576131
##
## Chen Y, Lun ATL, Smyth GK (2016). From reads to genes to pathways:
## differential expression analysis of RNA-Seq experiments using
## Rsubread and the edgeR quasi-likelihood pipeline. F1000Research 5,
## 1438
##
## McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression
## analysis of multifactor RNA-Seq experiments with respect to
## biological variation. Nucleic Acids Research 40(10), 4288-4297
##
## Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a Bioconductor
## package for differential expression analysis of digital gene
## expression data. Bioinformatics 26(1), 139-140
##
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
##
## [[19]]
## To cite package 'limma' in publications use:
##
## Ritchie, M.E., Phipson, B., Wu, D., Hu, Y., Law, C.W., Shi, W., and
## Smyth, G.K. (2015). limma powers differential expression analyses for
## RNA-sequencing and microarray studies. Nucleic Acids Research 43(7),
## e47.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## author = {Matthew E Ritchie and Belinda Phipson and Di Wu and Yifang Hu and Charity W Law and Wei Shi and Gordon K Smyth},
## title = {{limma} powers differential expression analyses for {RNA}-sequencing and microarray studies},
## journal = {Nucleic Acids Research},
## year = {2015},
## volume = {43},
## number = {7},
## pages = {e47},
## doi = {10.1093/nar/gkv007},
## }
##
## [[20]]
## To cite lubridate in publications use:
##
## Garrett Grolemund, Hadley Wickham (2011). Dates and Times Made Easy
## with lubridate. Journal of Statistical Software, 40(3), 1-25. URL
## https://www.jstatsoft.org/v40/i03/.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {Dates and Times Made Easy with {lubridate}},
## author = {Garrett Grolemund and Hadley Wickham},
## journal = {Journal of Statistical Software},
## year = {2011},
## volume = {40},
## number = {3},
## pages = {1--25},
## url = {https://www.jstatsoft.org/v40/i03/},
## }
##
## [[21]]
## To cite package 'forcats' in publications use:
##
## Wickham H (2023). _forcats: Tools for Working with Categorical
## Variables (Factors)_. R package version 1.0.0,
## <https://CRAN.R-project.org/package=forcats>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {forcats: Tools for Working with Categorical Variables (Factors)},
## author = {Hadley Wickham},
## year = {2023},
## note = {R package version 1.0.0},
## url = {https://CRAN.R-project.org/package=forcats},
## }
##
## [[22]]
## To cite package 'stringr' in publications use:
##
## Wickham H (2023). _stringr: Simple, Consistent Wrappers for Common
## String Operations_. R package version 1.5.1,
## <https://CRAN.R-project.org/package=stringr>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {stringr: Simple, Consistent Wrappers for Common String Operations},
## author = {Hadley Wickham},
## year = {2023},
## note = {R package version 1.5.1},
## url = {https://CRAN.R-project.org/package=stringr},
## }
##
## [[23]]
## To cite package 'dplyr' in publications use:
##
## Wickham H, François R, Henry L, Müller K, Vaughan D (2023). _dplyr: A
## Grammar of Data Manipulation_. R package version 1.1.4,
## <https://CRAN.R-project.org/package=dplyr>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {dplyr: A Grammar of Data Manipulation},
## author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller and Davis Vaughan},
## year = {2023},
## note = {R package version 1.1.4},
## url = {https://CRAN.R-project.org/package=dplyr},
## }
##
## [[24]]
## To cite package 'purrr' in publications use:
##
## Wickham H, Henry L (2023). _purrr: Functional Programming Tools_. R
## package version 1.0.2, <https://CRAN.R-project.org/package=purrr>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {purrr: Functional Programming Tools},
## author = {Hadley Wickham and Lionel Henry},
## year = {2023},
## note = {R package version 1.0.2},
## url = {https://CRAN.R-project.org/package=purrr},
## }
##
## [[25]]
## To cite package 'readr' in publications use:
##
## Wickham H, Hester J, Bryan J (2024). _readr: Read Rectangular Text
## Data_. R package version 2.1.5,
## <https://CRAN.R-project.org/package=readr>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {readr: Read Rectangular Text Data},
## author = {Hadley Wickham and Jim Hester and Jennifer Bryan},
## year = {2024},
## note = {R package version 2.1.5},
## url = {https://CRAN.R-project.org/package=readr},
## }
##
## [[26]]
## To cite package 'tidyr' in publications use:
##
## Wickham H, Vaughan D, Girlich M (2024). _tidyr: Tidy Messy Data_. R
## package version 1.3.1, <https://CRAN.R-project.org/package=tidyr>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {tidyr: Tidy Messy Data},
## author = {Hadley Wickham and Davis Vaughan and Maximilian Girlich},
## year = {2024},
## note = {R package version 1.3.1},
## url = {https://CRAN.R-project.org/package=tidyr},
## }
##
## [[27]]
## To cite package 'tibble' in publications use:
##
## Müller K, Wickham H (2023). _tibble: Simple Data Frames_. R package
## version 3.2.1, <https://CRAN.R-project.org/package=tibble>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {tibble: Simple Data Frames},
## author = {Kirill Müller and Hadley Wickham},
## year = {2023},
## note = {R package version 3.2.1},
## url = {https://CRAN.R-project.org/package=tibble},
## }
##
## [[28]]
## To cite ggplot2 in publications, please use
##
## H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
## Springer-Verlag New York, 2016.
##
## A BibTeX entry for LaTeX users is
##
## @Book{,
## author = {Hadley Wickham},
## title = {ggplot2: Elegant Graphics for Data Analysis},
## publisher = {Springer-Verlag New York},
## year = {2016},
## isbn = {978-3-319-24277-4},
## url = {https://ggplot2.tidyverse.org},
## }
##
## [[29]]
## To cite package 'tidyverse' in publications use:
##
## Wickham H, Averick M, Bryan J, Chang W, McGowan LD, François R,
## Grolemund G, Hayes A, Henry L, Hester J, Kuhn M, Pedersen TL, Miller
## E, Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V,
## Takahashi K, Vaughan D, Wilke C, Woo K, Yutani H (2019). "Welcome to
## the tidyverse." _Journal of Open Source Software_, *4*(43), 1686.
## doi:10.21105/joss.01686 <https://doi.org/10.21105/joss.01686>.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {Welcome to the {tidyverse}},
## author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},
## year = {2019},
## journal = {Journal of Open Source Software},
## volume = {4},
## number = {43},
## pages = {1686},
## doi = {10.21105/joss.01686},
## }
##
## [[30]]
## To cite package 'readxl' in publications use:
##
## Wickham H, Bryan J (2023). _readxl: Read Excel Files_. R package
## version 1.4.3, <https://CRAN.R-project.org/package=readxl>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {readxl: Read Excel Files},
## author = {Hadley Wickham and Jennifer Bryan},
## year = {2023},
## note = {R package version 1.4.3},
## url = {https://CRAN.R-project.org/package=readxl},
## }