library(DESeq2) #for differential expression analysis
library(tidyverse)
library(ggnewscale) #add extra scale to ggplot
library(ggtext) #text in ggplot
library(viridis) #to change colour scale
library(RColorBrewer) #to manage colours
library(patchwork) #for combining graphs
library(stringr) #for manipulating strings
library(pheatmap) #for heatmaps
library(DEGreport) #to create report of DE results
library(ggplot2) #for graphs
library(ggrepel) #to label individual points on ggplot graph
library(factoextra) #for PCA
library(apeglm) #for shrinking log2fold change estimates
library(ashr) #for shrinking log2fold change estimates
library(knitr) #for kable
library(splitstackshape) #for spliting concatenated values into separate values
library(ggvenn) #for venn diagrams
library(patchwork) #for combining graphs
library(Cairo) #for cairo graphics to change font, save ggplots as pdfs etc
library(extrafont) #for extra text font families
#for funtional enrichment analysis
library(DOSE)
library(pathview)
library(clusterProfiler)
library(enrichplot)
library(ggnewscale)
library(ggupset)
# count data
count_data <- read.delim("data/corset-counts.txt", row.names = 1)
# meta data
meta_data1 <- read_csv("data/metadata.csv", trim_ws = TRUE)
meta_data1 <- meta_data1 %>%
filter(Squid == "PS506" | Squid == "PS508" | Squid == "PS509" | Squid == "PS510" | Squid == "PS513" | Squid == "PS514" | Squid == "PS518" | Squid == "PS522" | Squid == "PS527" | Squid == "PS528" | Squid == "PS529" | Squid == "PS534" | Squid == "PS539" | Squid == "PS547" | Squid == "PS551" | Squid == "PS557" | Squid == "PS561" | Squid == "PS584" | Squid == "PS626" | Squid == "PS636") %>%
mutate(CO2 = factor(CO2, levels=c('Ambient', 'Elevated')))
meta_data2 <- meta_data1
meta_data1$Squid = paste0(meta_data1$Squid, '_CNS')
meta_data2$Squid = paste0(meta_data2$Squid, '_eyes')
meta_data1 <- meta_data1 %>%
mutate(tissue = "CNS")
meta_data2 <- meta_data2 %>%
mutate(tissue = "eyes")
meta_data <- meta_data1 %>%
full_join(meta_data2)
meta_data <- meta_data %>%
unite("sampletype", CO2, tissue, remove = FALSE)
#Make Squid in meta_data same order as in count_data (alphabetical) and make into row names (rather than a normal column)
meta_data <- meta_data %>%
dplyr::arrange(Squid) %>%
column_to_rownames(var = "Squid")
save(count_data, file = 'data/count_data.RData')
save(meta_data, file = 'data/meta_data.RData')
#Subset count_data for just CNS
count_data_CNS <- count_data %>%
select(contains("CNS"))
save(count_data_CNS, file = 'data/count_data_CNS.RData')
#Subset meta_data for just CNS
meta_data_CNS <- meta_data %>%
filter(tissue == "CNS")
save(meta_data_CNS, file = 'data/meta_data_CNS.RData')
# Check they match
all(colnames(count_data_CNS) %in% rownames(meta_data_CNS))
all(colnames(count_data_CNS) == rownames(meta_data_CNS))
#Subset count_data for eyes
count_data_eyes <- count_data %>%
select(contains("eyes"))
save(count_data_eyes, file = 'data/count_data_eyes.RData')
#Subset meta_data for eyes
meta_data_eyes <- meta_data %>%
filter(tissue == "eyes")
save(meta_data_eyes, file = 'data/meta_data_eyes.RData')
#Check they match
all(colnames(count_data_eyes) %in% rownames(meta_data_eyes))
all(colnames(count_data_eyes) == rownames(meta_data_eyes))
counts_CNS <- count_data_CNS %>%
summarise(across(everything(), ~ sum(., na.rm = TRUE))) %>%
pivot_longer(everything(), names_to = 'squid', values_to = 'tot_raw_counts')
ggplot(counts_CNS, aes(x = squid, y = tot_raw_counts)) +
geom_point() +
geom_text_repel(aes(label = squid), max.overlaps = 2)
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
counts_eyes <- count_data_eyes %>%
summarise(across(everything(), ~ sum(., na.rm = TRUE))) %>%
pivot_longer(everything(), names_to = 'squid', values_to = 'tot_raw_counts')
ggplot(counts_eyes, aes(x = squid, y = tot_raw_counts)) +
geom_point() +
geom_text_repel(aes(label = squid), max.overlaps = 2)
CNS: PS548 has more counts than others - see if normalisation has any effect on this
Eyes: All samples have similar amount of counts
count_data_CNS_long <- count_data_CNS %>%
tibble::rownames_to_column(var = 'gene') %>%
pivot_longer(cols = -gene, names_to = 'squid', values_to = 'count')
ggplot(count_data_CNS_long, aes(x = count)) +
geom_histogram(stat = "bin", bins = 200) +
facet_grid(squid ~ .) +
labs(x = "raw count", y = "No. of genes with this raw count")
count_data_eyes_long <- count_data_eyes %>%
tibble::rownames_to_column(var = 'gene') %>%
pivot_longer(cols = -gene, names_to = 'squid', values_to = 'count')
ggplot(count_data_eyes_long, aes(x = count)) +
geom_histogram(stat = "bin", bins = 200) +
facet_grid(squid ~ .) +
labs(x = "raw count", y = "No. of genes with this raw count")
Looks normal, shows the common features of RNAseq data:
#Do for one group at a time
Ambient_CNS_counts <- count_data %>%
select("PS506_CNS", "PS508_CNS", "PS513_CNS", "PS514_CNS", "PS518_CNS", "PS529_CNS", "PS534_CNS", "PS551_CNS", "PS557_CNS", "PS561_CNS")
mean_counts <- apply(Ambient_CNS_counts, 1, mean) #The second argument '1' of 'apply' function indicates the function being applied to rows. Use '2' if applied to columns
variance_counts <- apply(Ambient_CNS_counts, 1, var)
df <- data.frame(mean_counts, variance_counts)
Ambient_CNS_mean_var <- ggplot(df) +
geom_point(aes(x=mean_counts, y=variance_counts)) +
scale_y_log10() +
scale_x_log10() +
geom_abline(intercept = 0, slope = 1, color="red") +
ggtitle("Ambient_CNS mean vs variance")
Ambient_CNS_mean_var
Elevated_CNS_counts <- count_data %>%
select("PS509_CNS", "PS510_CNS", "PS522_CNS", "PS527_CNS", "PS528_CNS", "PS539_CNS", "PS547_CNS", "PS584_CNS", "PS626_CNS", "PS636_CNS")
mean_counts <- apply(Elevated_CNS_counts, 1, mean)
variance_counts <- apply(Ambient_CNS_counts, 1, var)
df <- data.frame(mean_counts, variance_counts)
Elevated_CNS_mean_var <- ggplot(df) +
geom_point(aes(x=mean_counts, y=variance_counts)) +
scale_y_log10() +
scale_x_log10() +
geom_abline(intercept = 0, slope = 1, color="red") +
ggtitle("Elevated_CNS mean vs variance")
Elevated_CNS_mean_var
Ambient_eyes_counts <- count_data %>%
select("PS506_eyes", "PS508_eyes", "PS513_eyes", "PS514_eyes", "PS518_eyes", "PS529_eyes", "PS534_eyes", "PS551_eyes", "PS557_eyes", "PS561_eyes")
mean_counts <- apply(Ambient_eyes_counts, 1, mean)
variance_counts <- apply(Ambient_eyes_counts, 1, var)
df <- data.frame(mean_counts, variance_counts)
Ambient_eyes_mean_var <- ggplot(df) +
geom_point(aes(x=mean_counts, y=variance_counts)) +
scale_y_log10() +
scale_x_log10() +
geom_abline(intercept = 0, slope = 1, color="red") +
ggtitle("Ambient_eyes mean vs variance")
Ambient_eyes_mean_var
Elevated_eyes_counts <- count_data %>%
select("PS509_eyes", "PS510_eyes", "PS522_eyes", "PS527_eyes", "PS528_eyes", "PS539_eyes", "PS547_eyes", "PS584_eyes", "PS626_eyes", "PS636_eyes")
mean_counts <- apply(Elevated_eyes_counts, 1, mean)
variance_counts <- apply(Ambient_eyes_counts, 1, var)
df <- data.frame(mean_counts, variance_counts)
Elevated_eyes_mean_var <- ggplot(df) +
geom_point(aes(x=mean_counts, y=variance_counts)) +
scale_y_log10() +
scale_x_log10() +
geom_abline(intercept = 0, slope = 1, color="red") +
ggtitle("Elevated_eyes mean vs variance")
Elevated_eyes_mean_var
Entire dataset
#Check sample names match in count_data and meta_data
all(colnames(count_data) %in% rownames(meta_data))
all(colnames(count_data) == rownames(meta_data))
#Create DESeq2 Dataset object
dds <- DESeqDataSetFromMatrix(count_data, colData = meta_data, design = ~ sampletype)
# Run analysis - runs through the steps of estimating size factors, estimates dispersion and runs Wald statistics on NB model
# These steps can all be done separately = estimateSizeFactors, estimateDispersions, nbinomWaldTest
dds <- DESeq(dds)
save(dds, file = 'results/dds.RData')
Dataset subset for CNS
#Create DESeqDataset and run analysis on data subset for CNS
dds_CNS <- DESeqDataSetFromMatrix(count_data_CNS, colData = meta_data_CNS, design = ~ sampletype)
dds_CNS <- DESeq(dds_CNS)
save(dds_CNS, file = 'results/dds_CNS.RData')
Dataset subset for eyes
#Create DESeqDataset and run analysis on data subset for eyes
dds_eyes <- DESeqDataSetFromMatrix(count_data_eyes, colData = meta_data_eyes, design = ~ sampletype)
dds_eyes <- DESeq(dds_eyes)
save(dds_eyes, file = 'results/dds_eyes.RData')
raw_counts <- counts(dds, normalized=FALSE)
raw_counts_tb <- raw_counts %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
save(raw_counts, file = 'data/raw_counts.RData')
save(raw_counts_tb, file = 'data/raw_counts_tb.RData')
raw_counts_CNS <- counts(dds_CNS, normalized=FALSE)
raw_counts_CNS_tb <- raw_counts_CNS %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
save(raw_counts_CNS, file = 'data/raw_counts_CNS.RData')
save(raw_counts_CNS_tb, file = 'data/raw_counts_CNS_tb.RData')
raw_counts_eyes <- counts(dds_eyes, normalized=FALSE)
raw_counts_eyes_tb <- raw_counts_eyes %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
save(raw_counts_eyes, file = 'data/raw_counts_eyes.RData')
save(raw_counts_eyes_tb, file = 'data/raw_counts_eyes_tb.RData')
#All
normalised_counts <- counts(dds, normalized=TRUE)
normalised_counts_tb <- normalised_counts %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
save(normalised_counts, file = 'data/normalised_counts.RData')
save(normalised_counts_tb, file = 'data/normalised_counts_tb.RData')
#CNS
normalised_counts_CNS <- counts(dds_CNS, normalized=TRUE)
normalised_counts_CNS_tb <- normalised_counts_CNS %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
save(normalised_counts_CNS, file = 'data/normalised_counts_CNS.RData')
save(normalised_counts_CNS_tb, file = 'data/normalised_counts_CNS_tb.RData')
#Eyes
normalised_counts_eyes <- counts(dds_eyes, normalized=TRUE)
normalised_counts_eyes_tb <- normalised_counts_eyes %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
save(normalised_counts_eyes, file = 'data/normalised_counts_eyes.RData')
save(normalised_counts_eyes_tb, file = 'data/normalised_counts_eyes_tb.RData')
rld <- rlog(dds, blind=TRUE)
rld_CNS <- rlog(dds_CNS, blind=TRUE)
rld_eyes <- rlog(dds_eyes, blind=TRUE)
save(rld_CNS, file = 'data/rld_CNS.RData')
save(rld, file = 'data/rld.RData')
save(rld_eyes, file = 'data/rld_eyes.RData')
# Extract results from DESeq analysis
res_CNS_unshrunken <- results(dds_CNS, name = "sampletype_Elevated_CNS_vs_Ambient_CNS", alpha = 0.05)
save(res_CNS_unshrunken, file = "results/res_CNS_unshrunken.RData")
res_CNS_unshrunken_tb <- res_CNS_unshrunken %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
save(res_CNS_unshrunken_tb, file = "results/res_CNS_unshrunken_tb.RData")
# Extract results from DESeq analysis
res_eyes_unshrunken <- results(dds_eyes, name = "sampletype_Elevated_eyes_vs_Ambient_eyes", alpha = 0.05)
save(res_eyes_unshrunken, file = "results/res_eyes_unshrunken.RData")
res_eyes_unshrunken_tb <- res_eyes_unshrunken %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
save(res_eyes_unshrunken_tb, file = "results/res_eyes_unshrunken_tb.RData")
res_CNS_shrunken_ashr <- lfcShrink(dds_CNS, coef="sampletype_Elevated_CNS_vs_Ambient_CNS", res=res_CNS_unshrunken, type = "ashr")
save(res_CNS_shrunken_ashr, file = "results/res_CNS_shrunken_ashr.RData")
res_CNS_shrunken_ashr_tb <- res_CNS_shrunken_ashr %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
save(res_CNS_shrunken_ashr_tb, file = "results/res_CNS_shrunken_ashr_tb.RData")
res_eyes_shrunken_ashr <- lfcShrink(dds_eyes, coef="sampletype_Elevated_eyes_vs_Ambient_eyes", res=res_eyes_unshrunken, type = "ashr")
save(res_eyes_shrunken_ashr, file = "results/res_eyes_shrunken_ashr.RData")
res_eyes_shrunken_ashr_tb <- res_eyes_shrunken_ashr %>%
data.frame() %>%
rownames_to_column(var="gene") %>%
as_tibble()
save(res_eyes_shrunken_ashr_tb, file = "results/res_eyes_shrunken_ashr_tb.RData")
rld$sampletype <- factor(rld$sampletype, levels = c("Ambient_CNS", "Elevated_CNS", "Ambient_eyes", "Elevated_eyes"))
All <- plotPCA(rld, intgroup="sampletype", ntop = 50000) +
scale_colour_discrete(name = "", labels = c("CNS - ambient CO2", "CNS - elevated CO2", "eyes - ambient CO2", "eyes - elevated CO2")) +
labs(title = "All genes") +
geom_text_repel(aes(label = name)) +
coord_cartesian(ylim = c(-80, 80)) +
theme_classic()
All
top500 <- plotPCA(rld, intgroup="sampletype", ntop = 500) +
scale_colour_discrete(name = "", labels = c("CNS - ambient CO2", "CNS - elevated CO2", "eyes - ambient CO2", "eyes - elevated CO2")) +
geom_text_repel(aes(label = name)) +
labs(title = "Top 500 genes") +
coord_cartesian(ylim = c(-20, 20)) +
theme_classic()
top500
rld_CNS$sampletype <- factor(rld_CNS$sampletype, levels = c("Ambient_CNS", "Elevated_CNS"))
All_CNS <- plotPCA(rld_CNS, intgroup="sampletype", ntop = 50000) +
scale_colour_discrete(name = "", labels = c("CNS - ambient CO2", "CNS - elevated CO2")) +
stat_ellipse() +
labs(title = "All genes") +
geom_text_repel(aes(label = name)) +
coord_cartesian(ylim = c(-80, 80)) +
theme_classic()
All_CNS
top500_CNS <- plotPCA(rld_CNS, intgroup="sampletype", ntop = 500) +
scale_colour_discrete(name = "", labels = c("CNS - ambient CO2", "CNS - elevated CO2")) +
stat_ellipse() +
geom_text_repel(aes(label = name)) +
labs(title = "Top 500 genes") +
coord_cartesian(ylim = c(-20, 20)) +
theme_classic()
top500_CNS
rld_eyes$sampletype <- factor(rld_eyes$sampletype, levels = c("Ambient_eyes", "Elevated_eyes"))
All_eyes <- plotPCA(rld_eyes, intgroup="sampletype", ntop = 50000) +
scale_colour_discrete(name = "", labels = c("eyes - ambient CO2", "eyes - elevated CO2")) +
stat_ellipse() +
labs(title = "All genes") +
geom_text_repel(aes(label = name)) +
coord_cartesian(ylim = c(-80, 80)) +
theme_classic()
All_eyes
top500_eyes <- plotPCA(rld_eyes, intgroup="sampletype", ntop = 500) +
scale_colour_discrete(name = "", labels = c("eyes - ambient CO2", "eyes - elevated CO2")) +
stat_ellipse() +
geom_text_repel(aes(label = name)) +
labs(title = "Top 500 genes") +
coord_cartesian(ylim = c(-20, 20)) +
theme_classic()
top500_eyes
Heatmaps using all genes
# Extract the rlog matrix from the object
rld_mat <- assay(rld)
# Compute pairwise correlation values
rld_cor <- cor(rld_mat) ## cor() is a base R function
#meta_data for heatmap annotation
meta_data_heatmap <- meta_data %>%
select("sampletype")
#plot heatmap
pheatmap(rld_cor, annotation_row = meta_data_heatmap, annotation_col = meta_data_heatmap)
rld_mat_CNS <- assay(rld_CNS)
rld_cor_CNS <- cor(rld_mat_CNS)
meta_data_heatmap_CNS <- meta_data %>%
filter(tissue == "CNS") %>%
select("sampletype")
pheatmap(rld_cor_CNS, annotation_row = meta_data_heatmap_CNS, annotation_col = meta_data_heatmap_CNS)
rld_mat_eyes <- assay(rld_eyes)
rld_cor_eyes <- cor(rld_mat_eyes)
meta_data_heatmap_eyes <- meta_data %>%
filter(tissue == "eyes") %>%
select("sampletype")
pheatmap(rld_cor_eyes, annotation_row = meta_data_heatmap_eyes, annotation_col = meta_data_heatmap_eyes)
# Total number of normalized counts per sample
tot_normalised_counts_df <- colSums(counts(dds_CNS, normalized=T)) %>%
data.frame() %>%
rownames_to_column(var="squid") %>%
as_tibble() %>%
rename(tot_normalised_counts = '.')
plot_tot_normalised_counts <- ggplot(tot_normalised_counts_df, aes(x = squid, y = tot_normalised_counts)) +
geom_point() +
scale_x_discrete(guide = guide_axis(angle = 90))
plot_tot_normalised_counts
# Total number of normalized counts per sample
tot_normalised_counts_df <- colSums(counts(dds_eyes, normalized=T)) %>%
data.frame() %>%
rownames_to_column(var="squid") %>%
as_tibble() %>%
rename(tot_normalised_counts = '.')
plot_tot_normalised_counts <- ggplot(tot_normalised_counts_df, aes(x = squid, y = tot_normalised_counts)) +
geom_point() +
scale_x_discrete(guide = guide_axis(angle = 90))
plot_tot_normalised_counts
ggplot(res_CNS_unshrunken_tb, aes(x = baseMean)) +
geom_histogram(bins = 10000) +
scale_y_sqrt() +
labs(y = 'No. of genes') +
coord_cartesian(xlim = c(0, 1000000))
ggplot(res_eyes_unshrunken_tb, aes(x = baseMean)) +
geom_histogram(bins = 10000) +
scale_y_sqrt() +
labs(y = 'No. of genes') +
coord_cartesian(xlim = c(0, 1000000))
res_CNS_shrunken_ashr_tb_basemean <- res_CNS_shrunken_ashr_tb %>%
mutate(genelabels = "") %>%
arrange(-baseMean)
# Populate the genelabels column with contents of the gene column for the first 10 rows, i.e. the top 10 high;y expressed genes
res_CNS_shrunken_ashr_tb_basemean$genelabels[1:10] <- as.character(res_CNS_shrunken_ashr_tb_basemean$gene[1:10])
plot_basemean_LFC_ashr_CNS <- ggplot(res_CNS_shrunken_ashr_tb_basemean, aes(x = log2FoldChange, y = baseMean)) +
geom_point() +
geom_text_repel(aes(label = genelabels))
plot_basemean_LFC_ashr_CNS
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
res_eyes_shrunken_ashr_tb_basemean <- res_eyes_shrunken_ashr_tb %>%
mutate(genelabels = "") %>%
arrange(-baseMean)
# Populate the genelabels column with contents of the gene column for the first 10 rows, i.e. the top 10 high;y expressed genes
res_eyes_shrunken_ashr_tb_basemean$genelabels[1:10] <- as.character(res_eyes_shrunken_ashr_tb_basemean$gene[1:10])
plot_basemean_LFC_ashr_eyes <- ggplot(res_eyes_shrunken_ashr_tb_basemean, aes(x = log2FoldChange, y = baseMean)) +
geom_point() +
geom_text_repel(aes(label = genelabels))
plot_basemean_LFC_ashr_eyes
plotDispEsts(dds_CNS)
plotDispEsts(dds_eyes)
annotated_transcriptome <- read.csv('data/annotated_transcriptome_ORFs.csv')
save(annotated_transcriptome, file = 'data/annotated_transcriptome.RData')
transcript2gene_map <- read.delim('data/corset-clusters.txt')
save(transcript2gene_map, file = 'data/transcript2gene_map.RData')
annotated_res_CNS_shrunken_ashr_tb <- res_CNS_shrunken_ashr_tb %>%
full_join(transcript2gene_map) %>%
full_join(annotated_transcriptome, by = c("transcript" = "Sequence.Name"))
save(annotated_res_CNS_shrunken_ashr_tb, file = 'results/annotated_res_CNS_shrunken_ashr_tb.RData')
annotated_res_eyes_shrunken_ashr_tb <- res_eyes_shrunken_ashr_tb %>%
full_join(transcript2gene_map) %>%
full_join(annotated_transcriptome, by = c("transcript" = "Sequence.Name"))
save(annotated_res_eyes_shrunken_ashr_tb, file = 'results/annotated_res_eyes_shrunken_ashr_tb.RData')
# Summarize results
summary(res_CNS_shrunken_ashr, alpha = 0.05)
##
## out of 27401 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 14, 0.051%
## LFC < 0 (down) : 11, 0.04%
## outliers [1] : 0, 0%
## low counts [2] : 6, 0.022%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Extract sig DE genes
sig_DE_CNS_ashr <- annotated_res_CNS_shrunken_ashr_tb %>%
filter(padj < 0.05)
save(sig_DE_CNS_ashr, file = 'results/sig_DE_CNS_ashr.RData')
# Summarize results
summary(res_eyes_shrunken_ashr, alpha = 0.05)
##
## out of 27400 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 6, 0.022%
## LFC < 0 (down) : 5, 0.018%
## outliers [1] : 0, 0%
## low counts [2] : 1071, 3.9%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Extract sig DE genes
sig_DE_eyes_ashr <- annotated_res_eyes_shrunken_ashr_tb %>%
filter(padj < 0.05)
save(sig_DE_eyes_ashr, file = 'results/sig_DE_eyes_ashr.RData')
# Unshrunken results
plotMA(res_CNS_unshrunken, alpha = 0.05, main = "MA plot unshrunken LFC in CNS", ylim=c(-2,2))
# Shrunken results
plotMA(res_CNS_shrunken_ashr, alpha = 0.05, main = "MA plot ashr shrunken LFC in CNS", ylim=c(-2,2))
# Unshrunken results
plotMA(res_eyes_unshrunken, alpha = 0.05, main = "MA plot unshrunken LFC in eyes", ylim=c(-2,2))
# Shrunken results
plotMA(res_eyes_shrunken_ashr, alpha = 0.05, main = "MA plot ashr shrunken LFC in eyes", ylim=c(-2,2))
# Obtain logical vector where TRUE values denote padj values < 0.05
# Create an empty column to indicate which genes to label
# Sort by padj values
res_CNS_shrunken_ashr_tb_volcano <- res_CNS_shrunken_ashr_tb %>%
mutate(threshold_DE = padj < 0.05) %>%
mutate(genelabels = "") %>%
arrange(padj)
# Populate the genelabels column with contents of the gene column for the first 25 rows, i.e. all sig DE genes
res_CNS_shrunken_ashr_tb_volcano$genelabels[1:25] <- as.character(res_CNS_shrunken_ashr_tb_volcano$gene[1:25])
# Volcano plot
volcano_CNS_ashr <- ggplot(res_CNS_shrunken_ashr_tb_volcano, aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(aes(colour = threshold_DE)) +
geom_text_repel(aes(label = genelabels)) +
ggtitle("Volcano plot CNS (ashr shrinkage method): p < 0.05") +
xlab("log2 fold change") +
ylab("-log10 adjusted p-value") +
theme(legend.position = "none",
plot.title = element_text(size = rel(1.5), hjust = 0.5),
axis.title = element_text(size = rel(1.25)))
volcano_CNS_ashr
# Obtain logical vector where TRUE values denote padj values < 0.05
# Create an empty column to indicate which genes to label
# Sort by padj values
res_eyes_shrunken_ashr_tb_volcano <- res_eyes_shrunken_ashr_tb %>%
mutate(threshold_DE = padj < 0.05) %>%
mutate(genelabels = "") %>%
arrange(padj)
# Populate the genelabels column with contents of the gene column for the first 25 rows, i.e. all sig DE genes
res_eyes_shrunken_ashr_tb_volcano$genelabels[1:25] <- as.character(res_eyes_shrunken_ashr_tb_volcano$gene[1:25])
# Volcano plot
volcano_eyes_ashr <- ggplot(res_eyes_shrunken_ashr_tb_volcano, aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(aes(colour = threshold_DE)) +
geom_text_repel(aes(label = genelabels)) +
ggtitle("Volcano plot eyes (ashr shrinkage method): p < 0.05") +
xlab("log2 fold change") +
ylab("-log10 adjusted p-value") +
theme(legend.position = "none",
plot.title = element_text(size = rel(1.5), hjust = 0.5),
axis.title = element_text(size = rel(1.25)))
volcano_eyes_ashr
## Warning: Removed 1083 rows containing missing values (`geom_point()`).
## Warning: Removed 1083 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Significantly DE genes
meta_data_CNS_heatmap <- meta_data_CNS %>%
select("sampletype")
# Use rlog transformed data
rld_mat_CNS <- assay(rld_CNS)
rld_mat_CNS_tb <- rld_mat_CNS %>%
as.data.frame() %>%
rownames_to_column(var = "gene") %>%
as_tibble()
#Extract only the sig DE genes
rld_mat_CNS_heatmap <- rld_mat_CNS_tb %>%
filter(gene %in% sig_DE_CNS_ashr$gene) %>%
column_to_rownames(var = "gene")
pheatmap(rld_mat_CNS_heatmap,
cluster_rows = T,
clustering_method = "complete",
show_rownames = T,
annotation_col = meta_data_CNS_heatmap,
border_color = NA,
fontsize = 10,
scale = "row", # scales the data to Z scores (by gene)
fontsize_row = 10,
main = "Heatmap of significantly DE genes in CNS")
Top 100 genes (sorted by adjusted p-value)
#Extract only the top 100 genes (by padj)
top100 <- res_CNS_shrunken_ashr_tb %>%
arrange(padj) %>%
slice_head(n = 100)
rld_mat_CNS_heatmap <- rld_mat_CNS_tb %>%
filter(gene %in% top100$gene) %>%
column_to_rownames(var = "gene")
pheatmap(rld_mat_CNS_heatmap,
cluster_rows = T,
clustering_method = "complete",
show_rownames = F,
annotation_col = meta_data_CNS_heatmap,
border_color = NA,
fontsize = 10,
scale = "row", # scales the data to Z scores (by gene)
fontsize_row = 10,
main = "Heatmap of top 100 genes in CNS")
Significantly DE genes
meta_data_eyes_heatmap <- meta_data_eyes %>%
select("sampletype")
# Use rlog transformed data
rld_mat_eyes <- assay(rld_eyes)
rld_mat_eyes_tb <- rld_mat_eyes %>%
as.data.frame() %>%
rownames_to_column(var = "gene") %>%
as_tibble()
#Extract only the sig DE genes
rld_mat_eyes_heatmap <- rld_mat_eyes_tb %>%
filter(gene %in% sig_DE_eyes_ashr$gene) %>%
column_to_rownames(var = "gene")
pheatmap(rld_mat_eyes_heatmap,
cluster_rows = T,
clustering_method = "complete",
show_rownames = T,
annotation_col = meta_data_eyes_heatmap,
border_color = NA,
fontsize = 10,
scale = "row", # scales the data to Z scores (by gene)
fontsize_row = 10,
main = "Heatmap of significantly DE genes in eyes")
Top 100 genes (sorted by adjusted p-value)
#Extract only the top 100 genes (by padj)
top100 <- res_eyes_shrunken_ashr_tb %>%
arrange(padj) %>%
slice_head(n = 100)
rld_mat_eyes_heatmap <- rld_mat_eyes_tb %>%
filter(gene %in% top100$gene) %>%
column_to_rownames(var = "gene")
pheatmap(rld_mat_eyes_heatmap,
cluster_rows = T,
clustering_method = "complete",
show_rownames = F,
annotation_col = meta_data_eyes_heatmap,
border_color = NA,
fontsize = 10,
scale = "row", # scales the data to Z scores (by gene)
fontsize_row = 10,
main = "Heatmap of top 100 genes in eyes")
sig_DE_CNS_normalised_counts <- normalised_counts_CNS_tb %>%
filter(gene %in% sig_DE_CNS_ashr$gene)
sig_DE_CNS_normalised_counts <- sig_DE_CNS_normalised_counts %>%
gather(colnames(normalised_counts_CNS_tb)[2:21], key = "Squid", value = "normalised_counts")
# Combine with meta_data_CNS so can include treatment info in graph
sig_DE_CNS_normalised_counts_metadata <- (meta_data_CNS %>% rownames_to_column(var = "Squid")) %>%
inner_join(sig_DE_CNS_normalised_counts)
# Dot plot
jitter <- position_jitter(width = 0.15, height = 0)
ggplot(sig_DE_CNS_normalised_counts_metadata, aes(x = gene, y = normalised_counts, color = sampletype)) +
geom_point(position = jitter) +
scale_y_log10() +
xlab("Genes") +
ylab("log10 Normalized Counts") +
ggtitle("All significantly DE genes in CNS") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5))
# Box plot
ggplot(sig_DE_CNS_normalised_counts_metadata, aes(x = gene, y = normalised_counts, color = sampletype)) +
geom_boxplot() +
scale_y_log10() +
xlab("Genes") +
ylab("log10 Normalized Counts") +
ggtitle("All significantly DE genes in CNS") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5))
sig_DE_eyes_normalised_counts <- normalised_counts_eyes_tb %>%
filter(gene %in% sig_DE_eyes_ashr$gene)
sig_DE_eyes_normalised_counts <- sig_DE_eyes_normalised_counts %>%
gather(colnames(normalised_counts_eyes_tb)[2:21], key = "Squid", value = "normalised_counts")
# Combine with meta_data_eyes so can include treatment info in graph
sig_DE_eyes_normalised_counts_metadata <- (meta_data_eyes %>% rownames_to_column(var = "Squid")) %>%
inner_join(sig_DE_eyes_normalised_counts)
# Dot plot
jitter <- position_jitter(width = 0.15, height = 0)
# jitter log10 y
ggplot(sig_DE_eyes_normalised_counts_metadata, aes(x = gene, y = normalised_counts, color = sampletype)) +
geom_point(position = jitter) +
#geom_text_repel(aes(label = Squid), size = 2, position = jitter) +
scale_y_log10() +
xlab("Genes") +
ylab("log10 Normalized Counts") +
ggtitle("All significantly DE genes in eyes") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5))
## Warning: Transformation introduced infinite values in continuous y-axis
# Box plot
ggplot(sig_DE_eyes_normalised_counts_metadata, aes(x = gene, y = normalised_counts, color = sampletype)) +
geom_boxplot() +
scale_y_log10() +
xlab("Genes") +
ylab("log10 Normalized Counts") +
ggtitle("All significantly DE genes in eyes") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5))
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 21 rows containing non-finite values (`stat_boxplot()`).
# Look closer at genes that don't appear to be DE
# look at cluster-5283.10996
df <- sig_DE_eyes_normalised_counts_metadata %>%
filter(gene == "Cluster-5283.10996")
ggplot(df, aes(x = gene, y = normalised_counts, color = sampletype)) +
geom_point(position = jitter) +
geom_text_repel(aes(label = Squid), size = 2, position = jitter) +
scale_y_log10() +
xlab("Genes") +
ylab("Normalized Counts") +
ggtitle("Cluster-5283.10996") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5))
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# look at cluster-5744.0
df <- sig_DE_eyes_normalised_counts_metadata %>%
filter(gene == "Cluster-5744.0")
ggplot(df, aes(x = gene, y = normalised_counts, color = sampletype)) +
geom_point(position = jitter) +
geom_text_repel(aes(label = Squid), size = 2, position = jitter) +
xlab("Genes") +
ylab("Normalized Counts") +
ggtitle("Cluster-5744.0 look at low counts") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5)) +
coord_cartesian(ylim = c(0, 10))
# look at cluster-73.0
df <- sig_DE_eyes_normalised_counts_metadata %>%
filter(gene == "Cluster-73.0")
ggplot(df, aes(x = gene, y = normalised_counts, color = sampletype)) +
geom_point(position = jitter) +
geom_text_repel(aes(label = Squid), size = 2, position = jitter) +
xlab("Genes") +
ylab("Normalized Counts") +
ggtitle("Cluster-73.0 count not on log10 \nscale to look at low counts") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5))
## Warning: ggrepel: 18 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Cluster-5283.10996 does not show clear DE
There is a lot of variation and overlap between CO2 levels
LFC is 1.5 but this is not clear by looking at the graph
Even outliers are matched - 2 ambient squid (PS514_eyes and PS518_eyes) with high expression and 2 elevated squid (PS527_eyes and PS636_eyes) with high expression
Cluster-5744.0 also does not show clear DE
There is a lot of variation and overlap between CO2 levels
LFC is -0.7 but this is not clear by looking at the graph
Genes with low expression show overlap between the two CO2 levels
Even outliers are matched - 2 ambient squid (PS514_eyes and PS518_eyes) with high expression and 2 elevated squid (PS527_eyes and PS636_eyes) with high expression
Cluster-73.0 DE looks driven by 2 outliers
All samples have 0 expression but PS534_eyes and PS506_eyes (both ambient) have very high expression.
LFC is -24: surprised shrinkage didn’t deal with this
Therefore visual inspection shows clear DE of all DE genes in the CNS, but in the eyes, 3 of the genes looks problematic - remove these
# 3 genes don't look DE when look at graphs - remove these
sig_DE_eyes_ashr_final <- annotated_res_eyes_shrunken_ashr_tb %>%
filter(padj < 0.05) %>%
filter(!gene %in% c("Cluster-5283.10996", "Cluster-5744.0", "Cluster-73.0"))
save(sig_DE_eyes_ashr_final, file = 'results/sig_DE_eyes_ashr_final.RData')
DE_CNS <- sig_DE_CNS_ashr %>%
distinct(gene)
DE_eyes <- sig_DE_eyes_ashr_final %>%
distinct(gene)
x <- c(list(DE_CNS$gene), list(DE_eyes$gene))
names(x) <- c('CNS\n', 'Eyes\n')
ggvenn(x,
show_percentage = FALSE,
text_size = 0,
fill_color = c('#238A8DFF', '#55C667FF'),
fill_alpha = 0.8) +
annotate('text', label = '12 up\n11 down', x = -1, y = 0) +
annotate('text', label = '3 up\n3 down', x = 1, y = 0) +
annotate('text', label = '2 up', x = 0, y = 0) +
annotate('text', label = '25 DE genes', x = -1, y = 1.2) +
annotate('text', label = '8 DE genes', x = 1, y = 1.2)
meta_data_CNS_heatmap <- meta_data_CNS %>%
select("sampletype")
# Use rlog transformed data
rld_mat_CNS <- assay(rld_CNS)
rld_mat_CNS_tb <- rld_mat_CNS %>%
as.data.frame() %>%
rownames_to_column(var = "gene") %>%
as_tibble()
## ONLY SIG DE GENES ##
rld_mat_CNS_heatmap <- rld_mat_CNS_tb %>%
filter(gene %in% sig_DE_CNS_ashr$gene) %>%
column_to_rownames(var = "gene")
pheatmap(rld_mat_CNS_heatmap,
cluster_rows = T,
clustering_method = "complete",
show_rownames = T,
annotation_col = meta_data_CNS_heatmap,
border_color = NA,
fontsize = 10,
scale = "row", # scales the data to Z scores (by gene)
fontsize_row = 10,
main = "Heatmap of significantly DE genes in CNS")
# change colour scheme with color = viridis(100)
## TOP 100 GENES
top100 <- res_CNS_shrunken_ashr_tb %>%
arrange(padj) %>%
slice_head(n = 100)
rld_mat_CNS_heatmap <- rld_mat_CNS_tb %>%
filter(gene %in% top100$gene) %>%
column_to_rownames(var = "gene")
pheatmap(rld_mat_CNS_heatmap,
cluster_rows = T,
clustering_method = "complete",
show_rownames = F,
annotation_col = meta_data_CNS_heatmap,
border_color = NA,
fontsize = 10,
scale = "row", # scales the data to Z scores (by gene)
fontsize_row = 10,
main = "Heatmap of top 100 genes in CNS")
meta_data_eyes_heatmap <- meta_data_eyes %>%
select("sampletype")
# Use rlog transformed data
rld_mat_eyes <- assay(rld_eyes)
rld_mat_eyes_tb <- rld_mat_eyes %>%
as.data.frame() %>%
rownames_to_column(var = "gene") %>%
as_tibble()
## ONLY SIG DE GENES ##
rld_mat_eyes_heatmap <- rld_mat_eyes_tb %>%
filter(gene %in% sig_DE_eyes_ashr_final$gene) %>%
column_to_rownames(var = "gene")
pheatmap(rld_mat_eyes_heatmap,
cluster_rows = T,
clustering_method = "complete",
show_rownames = T,
annotation_col = meta_data_eyes_heatmap,
border_color = NA,
fontsize = 10,
scale = "row", # scales the data to Z scores (by gene)
fontsize_row = 10,
main = "Heatmap of significantly DE genes in eyes")
## TOP 100 GENES ##
top100 <- res_eyes_shrunken_ashr_tb %>%
arrange(padj) %>%
slice_head(n = 100)
rld_mat_eyes_heatmap <- rld_mat_eyes_tb %>%
filter(gene %in% top100$gene) %>%
column_to_rownames(var = "gene")
pheatmap(rld_mat_eyes_heatmap,
cluster_rows = T,
clustering_method = "complete",
show_rownames = F,
annotation_col = meta_data_eyes_heatmap,
border_color = NA,
fontsize = 10,
scale = "row", # scales the data to Z scores (by gene)
fontsize_row = 10,
main = "Heatmap of top 100 genes in eyes")
sig_DE_CNS_ashr %>%
select(gene, Sequence.Description, padj, log2FoldChange, lfcSE) %>%
arrange(-log2FoldChange) %>%
distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
kable(caption = 'Significantly DE genes in CNS')
gene | Sequence.Description | padj | log2FoldChange | lfcSE |
---|---|---|---|---|
Cluster-5283.1476 | protein FAM204A isoform X1 | 0.0044818 | 1.3394024 | 0.3855364 |
Cluster-7057.2 | prickle-like protein 3 isoform X4 | 0.0340854 | 0.9631720 | 0.3995900 |
Cluster-5283.10634 | mitogen-activated protein kinase kinase kinase kinase 5-like isoform X4 | 0.0385885 | 0.9215585 | 0.5599760 |
Cluster-5283.10634 | mitogen-activated protein kinase kinase kinase kinase 3-like isoform X3 | 0.0385885 | 0.9215585 | 0.5599760 |
Cluster-7096.4 | synaptobrevin homolog YKT6 | 0.0385885 | 0.8850380 | 0.4291597 |
Cluster-5283.4688 | zinc finger protein 271-like | 0.0385885 | 0.8581222 | 0.4381018 |
Cluster-5283.11841 | chromatin accessibility complex protein 1 | 0.0457181 | 0.7541101 | 0.4949590 |
Cluster-6236.3 | nucleoside diphosphate kinase 6-like | 0.0385885 | 0.6277556 | 0.6588387 |
Cluster-5283.11516 | Hypothetical predicted protein | 0.0385885 | 0.5997121 | 0.2692648 |
Cluster-7175.0 | —NA— | 0.0385885 | 0.5955914 | 0.2308095 |
Cluster-5283.537 | putative N-acetylated-alpha-linked acidic dipeptidase isoform X4 | 0.0385885 | 0.5832244 | 0.2579140 |
Cluster-7445.2 | hypothetical protein OCBIM_22018451mg | 0.0457181 | 0.5257109 | 0.6274280 |
Cluster-5590.1 | transferrin-like protein | 0.0385885 | 0.4800865 | 0.6305837 |
Cluster-5046.4 | PREDICTED: uncharacterized protein LOC106875384 isoform X2 | 0.0385885 | 0.3107304 | 0.5556629 |
Cluster-2174.0 | E3 ubiquitin-protein ligase synoviolin B-like | 0.0385885 | 0.2741695 | 0.1471531 |
Cluster-5283.4947 | —NA— | 0.0340833 | -0.3743971 | 0.1330268 |
Cluster-5283.6450 | cytochrome b561 domain-containing protein 2 | 0.0385885 | -0.4632560 | 0.6431749 |
Cluster-1257.0 | Microtubule-associated proteins 1A/1B light chain 3B | 0.0445542 | -0.5404408 | 0.2593373 |
Cluster-1257.0 | microtubule-associated proteins 1A/1B light chain 3A | 0.0445542 | -0.5404408 | 0.2593373 |
Cluster-5283.3831 | proton myo-inositol cotransporter | 0.0026007 | -0.5563098 | 0.1130174 |
Cluster-7238.6 | —NA— | 0.0385885 | -0.7648719 | 0.3262858 |
Cluster-5283.750 | unnamed protein product | 0.0340833 | -0.7888475 | 0.6961008 |
Cluster-5283.3189 | gamma-secretase subunit PEN-2 | 0.0385885 | -0.7952439 | 0.4251421 |
Cluster-5290.5 | cadherin EGF LAG seven-pass G-type receptor 3 | 0.0385885 | -0.8019407 | 0.5279131 |
Cluster-5283.883 | methylcrotonoyl-CoA carboxylase beta chain, mitochondrial | 0.0340833 | -0.8088622 | 0.2704246 |
Cluster-5283.2490 | —NA— | 0.0385885 | -0.8291220 | 0.5330048 |
Cluster-3464.0 | Hypothetical predicted protein | 0.0340833 | -0.9442758 | 0.3183627 |
sig_DE_eyes_ashr_final %>%
select(gene, Sequence.Description, padj, log2FoldChange) %>%
arrange(-log2FoldChange) %>%
distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
kable(caption = 'Significantly DE genes in eyes - final set (3 genes removed due to not showing clear DE upon visual inspection of the normalised counts across CO2 levels')
gene | Sequence.Description | padj | log2FoldChange |
---|---|---|---|
Cluster-5283.5132 | probable flavin-containing monoamine oxidase A | 0.0048141 | 2.2916297 |
Cluster-1772.0 | synaptic vesicular amine transporter | 0.0116265 | 2.0443468 |
Cluster-5283.1476 | protein FAM204A isoform X1 | 0.0116265 | 1.4711580 |
Cluster-7096.4 | synaptobrevin homolog YKT6 | 0.0000059 | 1.2616376 |
Cluster-7187.3 | cystathionine beta-synthase | 0.0383992 | 0.8890046 |
Cluster-5283.10628 | transcription initiation factor IIE subunit beta | 0.0224947 | -0.9911208 |
Cluster-5283.11079 | —NA— | 0.0183964 | -1.4212532 |
Cluster-1000.0 | N-acetylmuramoyl-L-alanine amidase | 0.0016092 | -2.8983839 |
CNS <- sig_DE_CNS_ashr %>%
select(gene, transcript, Sequence.Description, padj, log2FoldChange) %>%
rename(padj_CNS = padj,
log2FoldChange_CNS = log2FoldChange)
eyes <- sig_DE_eyes_ashr_final %>%
select(gene, transcript, Sequence.Description, padj, log2FoldChange) %>%
rename(padj_eyes = padj,
log2FoldChange_eyes = log2FoldChange)
CNS_eyes <- CNS %>%
filter(gene %in% eyes$gene)
eyes_CNS <- eyes %>%
filter(gene %in% CNS$gene)
sig_DE_CNS_and_eyes_ashr <- CNS_eyes %>%
full_join(eyes_CNS)
save(sig_DE_CNS_and_eyes_ashr, file = 'results/sig_DE_CNS_and_eyes_ashr.RData')
sig_DE_CNS_and_eyes_ashr %>%
select(gene, Sequence.Description, padj_CNS, log2FoldChange_CNS, padj_eyes, log2FoldChange_eyes) %>%
kable(caption = 'Genes significantly DE in both CNS and eyes')
gene | Sequence.Description | padj_CNS | log2FoldChange_CNS | padj_eyes | log2FoldChange_eyes |
---|---|---|---|---|---|
Cluster-5283.1476 | protein FAM204A isoform X1 | 0.0044818 | 1.339402 | 0.0116265 | 1.471158 |
Cluster-7096.4 | synaptobrevin homolog YKT6 | 0.0385885 | 0.885038 | 0.0000059 | 1.261638 |
Prep transcriptome
# Remove letters form gene names as struggles with this
annotated_transcriptome_rename <- annotated_transcriptome %>%
rename(transcript = Sequence.Name) %>%
mutate(transcript = gsub('transcript/', '', transcript))
save(annotated_transcriptome_rename, file = 'data/annotated_transcriptome_rename.RData')
corset_clusters_2_transcripts_renamed <- corset_clusters_2_transcripts %>%
rename(transcript = TXNAME,
gene = GENEID) %>%
mutate(transcript = gsub('transcript/', '', transcript),
gene = gsub('Cluster-', '', gene))
save(corset_clusters_2_transcripts_renamed, file = 'data/corset_clusters_2_transcripts_renamed.RData')
Prep CNS data
# Remove letters form gene names as struggles with this
res_CNS_shrunken_ashr_tb_rename <- res_CNS_shrunken_ashr_tb %>%
mutate(gene = gsub('Cluster-', '', gene))
# Create gene list - assumes that 1st column is ID and 2nd column is fold change
res_CNS <- res_CNS_shrunken_ashr_tb_rename %>%
select(gene, log2FoldChange) %>%
as.data.frame()
gene_LFC_CNS_ashr <- res_CNS[,2]
names(gene_LFC_CNS_ashr) <- as.character(res_CNS[,1])
gene_LFC_CNS_ashr <- sort(gene_LFC_CNS_ashr, decreasing = TRUE)
head(gene_LFC_CNS_ashr)
save(gene_LFC_CNS_ashr, file = 'results/gene_LFC_CNS_ashr.RData')
Prep eyes data
# Remove letters form gene names as struggles with this
res_eyes_shrunken_ashr_tb_rename <- res_eyes_shrunken_ashr_tb %>%
mutate(gene = gsub('Cluster-', '', gene))
# Create gene list - assumes that 1st column is ID and 2nd column is fold change
res_eyes <- res_eyes_shrunken_ashr_tb_rename %>%
select(gene, log2FoldChange) %>%
as.data.frame()
gene_LFC_eyes_ashr <- res_eyes[,2]
names(gene_LFC_eyes_ashr) <- as.character(res_eyes[,1])
gene_LFC_eyes_ashr <- sort(gene_LFC_eyes_ashr, decreasing = TRUE)
head(gene_LFC_eyes_ashr)
save(gene_LFC_eyes_ashr, file = 'results/gene_LFC_eyes_ashr.RData')
GO terms data for CNS
# ANNOTATION GO TERMS
GO_annot_data_CNS <- res_CNS_shrunken_ashr_tb_rename %>%
full_join(corset_clusters_2_transcripts_renamed) %>%
inner_join(annotated_transcriptome_rename) %>%
select(Annotation.GO.ID, gene, Annotation.GO.Term)
#Change df format so one GO ID per row so a transcript with multiple GO IDs takes multiple rows
GO_annot_data_CNS_split <- cSplit(GO_annot_data_CNS, c('Annotation.GO.ID', 'Annotation.GO.Term'), ';', direction = "long")
# TERM2GENE is a data.frame with first column of (GO) term ID and second column of corresponding mapped gene
term2gene_annot_CNS <- GO_annot_data_CNS_split %>%
select(Annotation.GO.ID, gene) %>%
as.data.frame()
save(term2gene_annot_CNS, file = 'data/term2gene_annot_CNS.RData')
# TERM2NAME is a data.frame with first column of (GO) term ID and second column of corresponding term name
term2name_annot_CNS <- GO_annot_data_CNS_split %>%
select(Annotation.GO.ID, Annotation.GO.Term) %>%
as.data.frame()
save(term2name_annot_CNS, file = 'data/term2name_annot_CNS.RData')
GO terms data for eyes
# ANNOTATION GO TERMS
GO_annot_data_eyes <- res_eyes_shrunken_ashr_tb_rename %>%
full_join(corset_clusters_2_transcripts_renamed) %>%
inner_join(annotated_transcriptome_rename) %>%
select(Annotation.GO.ID, gene, Annotation.GO.Term)
#Change df format so one GO ID per row so a transcript with multiple GO IDs takes multiple rows
GO_annot_data_eyes_split <- cSplit(GO_annot_data_eyes, c('Annotation.GO.ID', 'Annotation.GO.Term'), ';', direction = "long")
# TERM2GENE is a data.frame with first column of (GO) term ID and second column of corresponding mapped gene
term2gene_annot_eyes <- GO_annot_data_eyes_split %>%
select(Annotation.GO.ID, gene) %>%
as.data.frame()
save(term2gene_annot_eyes, file = 'data/term2gene_annot_eyes.RData')
# TERM2NAME is a data.frame with first column of (GO) term ID and second column of corresponding term name
term2name_annot_eyes <- GO_annot_data_eyes_split %>%
select(Annotation.GO.ID, Annotation.GO.Term) %>%
as.data.frame()
save(term2name_annot_eyes, file = 'data/term2name_annot_eyes.RData')
# Will get different results for the GSEA because the permutations performed use random reordering - use set.seed for reproducibility
set.seed(83276453)
gsea_CNS_ashr_annot_exp0 <- GSEA(
geneList = gene_LFC_CNS_ashr,
exponent = 0, # weight of each step (0 = same as classic in omicsbox)
minGSSize = 15, # change from default 10 to 15 (what used in omicsbox and suggested in Subramanian 2005)
maxGSSize = 500,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
TERM2GENE = term2gene_annot_CNS,
TERM2NAME = term2name_annot_CNS,
verbose = TRUE,
seed = TRUE,
by = "fgsea"
)
# Warning messages: 1: In preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, There are ties in the preranked stats (0.01% of the list). The order of those tied genes will be arbitrary, which may produce unexpected results.
save(gsea_CNS_ashr_annot_exp0, file = 'results/gsea_CNS_ashr_annot_exp0.RData')
# extract dataframe from gsea object
gsea_CNS_ashr_annot_exp0_summary <- fortify(
gsea_CNS_ashr_annot_exp0,
showCategory = 300,
by = "Count"
)
save(gsea_CNS_ashr_annot_exp0_summary, file = 'results/gsea_CNS_ashr_annot_exp0_summary.RData')
# Will get different results for the GSEA because the permutations performed use random reordering - use set.seed fpr reproducibility
set.seed(83276453)
gsea_eyes_ashr_annot_exp0 <- GSEA(
geneList = gene_LFC_eyes_ashr,
exponent = 0, # weight of each step (0 = same as classic in omicsbox)
minGSSize = 15, # change from default 10 to 15 (what used in omicsbox and suggested in Subramanian 2005)
maxGSSize = 500,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
TERM2GENE = term2gene_annot_eyes,
TERM2NAME = term2name_annot_eyes,
verbose = TRUE,
seed = TRUE,
by = "fgsea"
)
# Warning messages: 1: In preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, There are ties in the preranked stats (0.01% of the list). The order of those tied genes will be arbitrary, which may produce unexpected results.
save(gsea_eyes_ashr_annot_exp0, file = 'results/gsea_eyes_ashr_annot_exp0.RData')
# extract dataframe from gsea object
gsea_eyes_ashr_annot_exp0_summary <- fortify(
gsea_eyes_ashr_annot_exp0,
showCategory = 300,
by = "Count"
)
save(gsea_eyes_ashr_annot_exp0_summary, file = 'results/gsea_eyes_ashr_annot_exp0_summary.RData')
# Plot all GO terms for CNS and eyes on same graph
# CNS and eyes share same y axis on dotplot - want to order by NES of CNS, and show overlaps of GO terms found in CNS and eyes
# Below method so GO terms in both tissues are not twice on the y axis
CNS <- gsea_CNS_ashr_annot_exp0_summary %>%
mutate(tissue = "CNS")
eyes <- gsea_eyes_ashr_annot_exp0_summary %>%
mutate(tissue = "eyes")
CNS_NES <- CNS %>%
select(ID, Description, NES) %>%
rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_NES <- eyes %>%
select(ID, Description, NES) %>%
rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_NES <- CNS_NES %>%
full_join(eyes_NES) %>%
ungroup() %>%
arrange(NES_CNS, NES_eyes) %>%
mutate(order_NES = as.factor(row_number())) %>%
pivot_longer(c(NES_CNS, NES_eyes), names_to = "tissue", values_to = "NES") %>%
mutate(tissue = gsub("NES_", "", tissue)) %>%
arrange(NES)
CNS_p.adjust <- CNS %>%
select(ID, Description, p.adjust) %>%
rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_p.adjust <- eyes %>%
select(ID, Description, p.adjust) %>%
rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_p.adjust <- CNS_p.adjust %>%
full_join(eyes_p.adjust) %>%
ungroup() %>%
arrange(p.adjust_CNS) %>%
mutate(order_p.adjust = as.factor(row_number())) %>%
pivot_longer(c(p.adjust_CNS, p.adjust_eyes), names_to = "tissue", values_to = "p.adjust") %>%
mutate(tissue = gsub("p.adjust_", "", tissue)) %>%
arrange(Description)
CNS_Count <- CNS %>%
select(ID, Description, Count) %>%
rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_Count <- eyes %>%
select(ID, Description, Count) %>%
rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_Count <- CNS_Count %>%
full_join(eyes_Count) %>%
ungroup() %>%
arrange(Count_CNS) %>%
mutate(order_Count = as.factor(row_number())) %>%
pivot_longer(c(Count_CNS, Count_eyes), names_to = "tissue", values_to = "Count") %>%
mutate(tissue = gsub("Count_", "", tissue))
df_all <- df_NES %>%
full_join(df_p.adjust) %>%
full_join(df_Count) %>%
mutate(Description_wrapped = str_wrap(Description, width = 30))
ggplot(df_all, aes(x = NES, y = order_NES, colour = p.adjust, size = Count)) +
geom_point() +
scale_y_discrete(
breaks = df_all$order_NES,
labels = df_all$Description_wrapped) +
ggplot2::facet_grid(. ~ tissue) +
scale_colour_viridis_c(direction = -1) +
geom_vline(xintercept = 0) +
xlab("Normalised enrichment score") +
ylab("") +
labs(colour = "adjusted p value", size = "No. core enrichment\ngenes in GO term", title = "GSEA - significant GO terms")
## Warning: Removed 96 rows containing missing values (`geom_point()`).
Pull out all core enrichment genes from an interesting cluster of GO terms that were found significant with gene set enrichment analysis
Prep data CNS
gsea_CNS_transcripts <- cSplit(gsea_CNS_ashr_annot_exp0_summary, 'core_enrichment', '/', direction = "long", type.convert = FALSE) %>%
rename_with(~paste0(., "_gsea")) %>%
rename(gene = core_enrichment_gsea)
annotated_res_CNS_shrunken_ashr_tb <- annotated_res_CNS_shrunken_ashr_tb %>%
select(gene, transcript, Sequence.Description, log2FoldChange, lfcSE, padj) %>%
rename(log2FoldChange_ashr = log2FoldChange,
lfcSE_ashr = lfcSE,
padj_ashr = padj) %>%
mutate(gene = gsub('Cluster-', '', gene),
transcript = gsub("transcript/", "", transcript))
gsea_exp0_transcripts_GO_CNS <- gsea_CNS_transcripts %>%
full_join(annotated_res_CNS_shrunken_ashr_tb)
save(gsea_exp0_transcripts_GO_CNS, file = 'results/gsea_exp0_transcripts_GO_CNS.RData')
Prep data eyes
gsea_eyes_transcripts <- cSplit(gsea_eyes_ashr_annot_exp0_summary, 'core_enrichment', '/', direction = "long", type.convert = FALSE) %>%
rename_with(~paste0(., "_gsea")) %>%
rename(gene = core_enrichment_gsea)
annotated_res_eyes_shrunken_ashr_tb <- annotated_res_eyes_shrunken_ashr_tb %>%
select(gene, transcript, Sequence.Description, log2FoldChange, lfcSE, padj) %>%
rename(log2FoldChange_ashr = log2FoldChange,
lfcSE_ashr = lfcSE,
padj_ashr = padj) %>%
mutate(gene = gsub('Cluster-', '', gene),
transcript = gsub("transcript/", "", transcript))
gsea_exp0_transcripts_GO_eyes <- gsea_eyes_transcripts %>%
full_join(annotated_res_eyes_shrunken_ashr_tb)
save(gsea_exp0_transcripts_GO_eyes, file = 'results/gsea_exp0_transcripts_GO_eyes.RData')
# Pull out genes in the ion channel cluster of GO terms
ion_channel_cluster_CNS <- gsea_exp0_transcripts_GO_CNS %>%
select(gene, transcript, Sequence.Description, Description_gsea) %>%
filter(Description_gsea %in% c('ionotropic glutamate receptor signaling pathway', 'ionotropic glutamate receptor activity', 'postsynaptic membrane', 'excitatory postsynaptic potential', 'ion transmembrane transport', 'acetylcholine-gated cation-selective channel activity', 'transmembrane signaling receptor activity', 'extracellular ligand-gated ion channel activity', 'synapse')) %>%
distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
arrange(Sequence.Description)
ion_channel_cluster_eyes <- gsea_exp0_transcripts_GO_eyes %>%
select(gene, transcript, Sequence.Description, Description_gsea) %>%
filter(Description_gsea %in% c('ion transmembrane transport', 'ion channel activity')) %>%
distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
arrange(Sequence.Description)
ion_channel_cluster_CNS_eyes <- ion_channel_cluster_CNS %>%
filter(gene %in% ion_channel_cluster_eyes$gene)
save(ion_channel_cluster_CNS, ion_channel_cluster_eyes, ion_channel_cluster_CNS_eyes, file = 'results/ion_channel_cluster.RData')
ion_channel_cluster_CNS %>%
select(gene, Sequence.Description) %>%
kable(caption = 'Core enrichment genes within significantly enriched GO terms of the ion channel cluster in the CNS')
gene | Sequence.Description |
---|---|
5283.10289 | —NA— |
5283.7784 | —NA— |
5283.10291 | —NA— |
3318.0 | —NA— |
5283.9460 | —NA— |
5283.4105 | —NA— |
5283.9217 | —NA— |
5661.1 | acetylcholine receptor subunit alpha-like 1 isoform X1 |
5661.0 | acetylcholine receptor subunit alpha-like isoform X2 |
5283.4487 | acetylcholine receptor subunit beta-like 1 isoform X2 |
5283.4488 | acetylcholine receptor subunit beta-like 1 isoform X2 |
5283.4489 | acetylcholine receptor subunit beta-like 1 isoform X2 |
5283.7592 | cyclic nucleotide-gated cation channel alpha-3 |
5283.7592 | cyclic nucleotide-gated cation channel alpha-3-like |
6610.0 | cyclic nucleotide-gated cation channel beta-3-like isoform X1 |
6610.2 | cyclic nucleotide-gated cation channel beta-3-like isoform X2 |
6610.0 | cyclic nucleotide-gated cation channel beta-3-like isoform X7 |
5283.9458 | cyclic nucleotide-gated olfactory channel-like |
5283.9460 | cyclic nucleotide-gated olfactory channel-like |
7253.3 | Cys-loop ligand-gated ion channel |
7253.4 | cys-loop ligand-gated ion channel-like isoform X2 |
7253.1 | cys-loop ligand-gated ion channel-like isoform X2 |
7253.0 | cys-loop ligand-gated ion channel-like isoform X2 |
7253.2 | cys-loop ligand-gated ion channel-like isoform X2 |
7253.3 | cys-loop ligand-gated ion channel-like isoform X2 |
7253.6 | cys-loop ligand-gated ion channel-like isoform X3 |
7253.5 | cys-loop ligand-gated ion channel-like isoform X3 |
5517.0 | dnaJ homolog subfamily C member 5 |
5517.0 | dnaJ homolog subfamily C member 5-like isoform X1 |
5517.0 | dnaJ homolog subfamily C member 5-like isoform X2 |
5517.0 | dnaJ homolog subfamily C member 5 isoform X1 |
5057.2 | excitatory amino acid transporter 1-like |
5057.1 | excitatory amino acid transporter 1-like |
5283.8663 | F-box/SPRY domain-containing protein 1 |
5283.8664 | F-box/SPRY domain-containing protein 1 |
5283.8665 | F-box/SPRY domain-containing protein 1-like |
5283.9038 | gamma-aminobutyric acid receptor alpha-like |
3318.0 | gamma-aminobutyric acid receptor alpha-like |
5283.9039 | gamma-aminobutyric acid receptor alpha-like |
3050.4 | glutamate-gated chloride channel isoform X5 |
5283.11461 | glutamate receptor |
602.0 | glutamate receptor-like |
5283.11461 | glutamate receptor-like |
5283.4759 | glutamate receptor-like |
5283.8285 | glutamate receptor 1-like |
5283.4759 | glutamate receptor 1-like isoform X1 |
5283.4759 | glutamate receptor 2-like |
5283.4759 | glutamate receptor 2 isoform X2 |
5283.4759 | glutamate receptor 3 isoform X1 |
5283.8285 | glutamate receptor 4 |
5283.3373 | glutamate receptor ionotropic, kainate 2-like |
5283.3372 | glutamate receptor ionotropic, kainate 2-like |
5283.4614 | glutamate receptor ionotropic, kainate 2-like |
5283.4105 | glutamate receptor ionotropic, kainate 2-like |
5283.5622 | glutamate receptor ionotropic, kainate 2-like |
7335.0 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
7335.3 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
2901.0 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
6740.0 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
7335.4 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
7335.5 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
6740.1 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
6740.2 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
7335.6 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
7335.7 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
7335.1 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
6740.4 | glutamate receptor ionotropic, kainate 2-like isoform X2 |
6740.3 | glutamate receptor ionotropic, kainate 2 isoform X1 |
1742.0 | glutamate receptor ionotropic, NMDA 2B-like |
4067.1 | glutamate receptor ionotropic, NMDA 3A-like |
3215.0 | glutamate receptor ionotropic, NMDA 3A-like |
3215.1 | glutamate receptor ionotropic, NMDA 3A-like |
4067.0 | glutamate receptor ionotropic, NMDA 3A-like |
3215.2 | glutamate receptor ionotropic, NMDA 3A-like isoform X2 |
7335.2 | glutamate receptor subunit protein GluR6 |
3050.1 | glycine receptor subunit alpha-3 isoform X2 |
3050.0 | glycine receptor subunit alphaZ1-like |
2499.0 | glycine receptor subunit alphaZ1-like |
5283.8285 | GRIA4 protein |
5283.2592 | HCN channel protein |
5283.2593 | HCN channel protein |
5283.9211 | Hypothetical predicted protein |
5283.9212 | Hypothetical predicted protein |
7253.3 | Hypothetical predicted protein |
7211.2 | Hypothetical predicted protein |
5283.1960 | inositol 1,4,5-trisphosphate receptor type 1-like isoform X2 |
7360.0 | intersectin-1-like isoform X1 |
7360.0 | intersectin-1-like isoform X2 |
7360.0 | intersectin-1-like isoform X3 |
7360.0 | intersectin-1 isoform X10 |
7360.0 | intersectin-1 isoform X13 |
7360.0 | intersectin-1 isoform X8 |
7211.2 | liprin-alpha-1-like isoform X2 |
7211.2 | liprin-alpha-1-like isoform X3 |
7211.2 | liprin-alpha-1-like isoform X6 |
7211.2 | liprin-alpha-1-like isoform X9 |
7211.2 | liprin-alpha-1 isoform X3 |
7211.2 | liprin-alpha-1 isoform X4 |
7211.3 | liprin-alpha-1 isoform X8 |
7211.2 | liprin-alpha-1 isoform X8 |
6774.1 | muscarinic acetylcholine receptor M1-like |
6774.2 | muscarinic acetylcholine receptor M2-like protein |
5283.11306 | Na+/K+ ATPase beta subunit |
5283.5376 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.5377 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.1 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.4 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.7 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.8 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.7784 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.3847 | neuronal acetylcholine receptor subunit alpha-10-like |
5505.0 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.10 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.3 | neuronal acetylcholine receptor subunit alpha-10-like |
3136.0 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.6 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.5 | neuronal acetylcholine receptor subunit alpha-10-like |
6423.0 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.3849 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.4563 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.4564 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.4565 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.0 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.9 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.4560 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.10464 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.4562 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.2 | neuronal acetylcholine receptor subunit alpha-10-like |
6423.1 | neuronal acetylcholine receptor subunit alpha-10-like |
5505.0 | neuronal acetylcholine receptor subunit alpha-10-like isoform X1 |
5283.997 | neuronal acetylcholine receptor subunit alpha-10-like isoform X1 |
5283.2323 | neuronal acetylcholine receptor subunit alpha-10-like isoform X3 |
5283.997 | neuronal acetylcholine receptor subunit alpha-10 isoform X1 |
5283.8846 | neuronal acetylcholine receptor subunit alpha-3-like |
5283.8847 | neuronal acetylcholine receptor subunit alpha-3-like |
5283.8850 | neuronal acetylcholine receptor subunit alpha-3-like |
5283.8849 | neuronal acetylcholine receptor subunit alpha-3-like |
5283.8848 | neuronal acetylcholine receptor subunit alpha-3-like |
5283.8851 | neuronal acetylcholine receptor subunit alpha-3-like |
878.0 | neuronal acetylcholine receptor subunit alpha-5-like |
5283.10289 | neuronal acetylcholine receptor subunit alpha-5-like isoform X1 |
5283.10287 | neuronal acetylcholine receptor subunit alpha-5-like isoform X2 |
5283.10291 | neuronal acetylcholine receptor subunit alpha-5-like isoform X2 |
5283.10286 | neuronal acetylcholine receptor subunit alpha-5-like isoform X2 |
5283.10288 | neuronal acetylcholine receptor subunit alpha-5-like isoform X2 |
5283.10467 | neuronal acetylcholine receptor subunit alpha-6-like |
5283.10753 | neuronal acetylcholine receptor subunit alpha-7 isoform X1 |
5283.9321 | PDZ domain-containing protein 11 |
2261.1 | phosphatidylinositol 3,4,5-trisphosphate 3-phosphatase TPTE2-like isoform X3 |
4605.3 | piezo-type mechanosensitive ion channel component 1-like isoform X1 |
1571.0 | potassium voltage-gated channel subfamily H member 6 isoform X1 |
5283.2593 | potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 2-like |
5283.2593 | potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X2 |
5283.2593 | potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X4 |
5283.11306 | probable sodium/potassium-transporting ATPase subunit beta-3 |
5283.7180 | Protein unc-13 B |
5283.7180 | protein unc-13 homolog A-like |
5283.7180 | protein unc-13 homolog B-like isoform X1 |
5283.7180 | protein unc-13 homolog B-like isoform X2 |
5283.7177 | protein unc-13 homolog B-like isoform X3 |
5283.7180 | protein unc-13 homolog B-like isoform X4 |
5283.7180 | protein unc-13 homolog B isoform X1 |
5283.7476 | Receptor-type tyrosine-protein phosphatase N2 |
5283.7474 | Receptor-type tyrosine-protein phosphatase N2 |
5283.7476 | receptor-type tyrosine-protein phosphatase N2-like |
6010.2 | sideroflexin-1 |
6010.1 | sideroflexin-1 |
6165.7 | sideroflexin-2 |
4048.1 | sideroflexin-5 isoform X1 |
5283.9214 | sodium/potassium/calcium exchanger 2-like isoform X2 |
5283.9214 | sodium/potassium/calcium exchanger Nckx30C |
5283.9214 | sodium/potassium/calcium exchanger Nckx30C-like |
5283.9211 | sodium/potassium/calcium exchanger Nckx30C isoform X2 |
5899.6 | synaptosomal-associated protein 25 |
5899.7 | synaptosomal-associated protein 25 |
5899.5 | synaptosomal-associated protein 25 |
5899.1 | synaptosomal-associated protein 25 |
6236.5 | synaptosomal-associated protein 25 |
6236.6 | synaptosomal-associated protein 25 |
5899.3 | synaptosomal-associated protein 25 |
5899.0 | synaptosomal-associated protein 25-like isoform X2 |
6236.6 | synaptosomal-associated protein 25 isoform X1 |
5283.6899 | syntenin-1 |
5283.6896 | syntenin-1 |
5283.4554 | toll-like receptor Tollo |
5283.3487 | toll-like receptor Tollo |
5283.4187 | toll-like receptor Tollo |
5283.1236 | transient receptor potential cation channel subfamily M member 3-like |
3379.0 | transient receptor potential cation channel subfamily V member 5-like |
4897.2 | Translational regulator orb2 |
5359.2 | Two pore calcium channel protein 1 |
5359.2 | two pore calcium channel protein 1-like |
4307.0 | tyrosine-protein kinase transmembrane receptor ROR1-like isoform X1 |
5747.0 | tyrosine-protein kinase transmembrane receptor Ror2-like |
5283.10289 | uncharacterized protein LOC118470684 isoform X6 |
5283.9217 | vesicle-associated membrane protein/synaptobrevin-binding protein |
5283.9217 | vesicle-associated membrane protein/synaptobrevin-binding protein-like isoform X1 |
5283.2593 | voltage-activated ion channel, putative |
ion_channel_cluster_eyes %>%
select(gene, Sequence.Description) %>%
kable(caption = 'Core enrichment genes within significantly enriched GO terms of the ion channel cluster in the eyes')
gene | Sequence.Description |
---|---|
5283.9460 | —NA— |
5283.7784 | —NA— |
3318.0 | —NA— |
5661.1 | acetylcholine receptor subunit alpha-like 1 isoform X1 |
5661.0 | acetylcholine receptor subunit alpha-like isoform X2 |
5283.4488 | acetylcholine receptor subunit beta-like 1 isoform X2 |
5283.4489 | acetylcholine receptor subunit beta-like 1 isoform X2 |
5283.7592 | cyclic nucleotide-gated cation channel alpha-3 |
5283.7592 | cyclic nucleotide-gated cation channel alpha-3-like |
6610.0 | cyclic nucleotide-gated cation channel beta-3-like isoform X1 |
6610.1 | cyclic nucleotide-gated cation channel beta-3-like isoform X1 |
6610.2 | cyclic nucleotide-gated cation channel beta-3-like isoform X2 |
6610.0 | cyclic nucleotide-gated cation channel beta-3-like isoform X7 |
6610.1 | cyclic nucleotide-gated cation channel beta-3-like isoform X7 |
5283.9460 | cyclic nucleotide-gated olfactory channel-like |
5283.9458 | cyclic nucleotide-gated olfactory channel-like |
7253.3 | Cys-loop ligand-gated ion channel |
7253.3 | cys-loop ligand-gated ion channel-like isoform X2 |
7253.4 | cys-loop ligand-gated ion channel-like isoform X2 |
7253.0 | cys-loop ligand-gated ion channel-like isoform X2 |
7253.6 | cys-loop ligand-gated ion channel-like isoform X3 |
5283.9038 | gamma-aminobutyric acid receptor alpha-like |
5283.9040 | gamma-aminobutyric acid receptor alpha-like |
3318.0 | gamma-aminobutyric acid receptor alpha-like |
4411.0 | glutamate [NMDA] receptor subunit 1-like isoform X1 |
5283.8285 | glutamate receptor 1-like |
5283.8285 | glutamate receptor 4 |
6740.0 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
7335.0 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
7335.6 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
7335.1 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
7335.3 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
2901.0 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
6740.2 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
6740.3 | glutamate receptor ionotropic, kainate 2 isoform X1 |
1742.0 | glutamate receptor ionotropic, NMDA 2B-like |
4067.0 | glutamate receptor ionotropic, NMDA 3A-like |
3215.2 | glutamate receptor ionotropic, NMDA 3A-like isoform X2 |
2499.0 | glycine receptor subunit alphaZ1-like |
5283.8285 | GRIA4 protein |
5283.2593 | HCN channel protein |
5283.2592 | HCN channel protein |
7253.3 | Hypothetical predicted protein |
5496.1 | muscle calcium channel subunit alpha-1-like isoform X1 |
6426.1 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.10 | neuronal acetylcholine receptor subunit alpha-10-like |
6423.3 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.7 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.5 | neuronal acetylcholine receptor subunit alpha-10-like |
3136.0 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.5377 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.7784 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.3848 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.6 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.10464 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.995 | neuronal acetylcholine receptor subunit alpha-10-like isoform X1 |
5283.996 | neuronal acetylcholine receptor subunit alpha-10-like isoform X1 |
5283.994 | neuronal acetylcholine receptor subunit alpha-10-like isoform X1 |
5283.2323 | neuronal acetylcholine receptor subunit alpha-10-like isoform X3 |
5283.8846 | neuronal acetylcholine receptor subunit alpha-3-like |
5283.8847 | neuronal acetylcholine receptor subunit alpha-3-like |
878.0 | neuronal acetylcholine receptor subunit alpha-5-like |
5283.10290 | neuronal acetylcholine receptor subunit alpha-5-like isoform X1 |
5283.10467 | neuronal acetylcholine receptor subunit alpha-6-like |
2261.2 | phosphatidylinositol 3,4,5-trisphosphate 3-phosphatase TPTE2-like isoform X3 |
4605.3 | piezo-type mechanosensitive ion channel component 1-like isoform X1 |
5283.2593 | potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 2-like |
5283.2593 | potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X2 |
5283.2593 | potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X4 |
5283.9459 | serine-rich adhesin for platelets-like |
6010.2 | sideroflexin-1 |
6010.0 | sideroflexin-1 |
6165.9 | sideroflexin-2 |
5283.8067 | transient receptor potential-gamma protein-like |
5283.8067 | transient receptor potential-gamma protein-like isoform X1 |
5283.2593 | voltage-activated ion channel, putative |
All core enrichment genes from the cluster of GO terms related to ion channels, that were found significant with gene set enrichment analysis in both the CNS and eyes.
ion_channel_cluster_CNS_eyes %>%
select(gene, Sequence.Description) %>%
kable(caption = 'Core enrichment genes within significantly enriched GO terms of the ion channel cluster in both the CNS and eyes')
gene | Sequence.Description |
---|---|
5283.7784 | —NA— |
3318.0 | —NA— |
5283.9460 | —NA— |
5661.1 | acetylcholine receptor subunit alpha-like 1 isoform X1 |
5661.0 | acetylcholine receptor subunit alpha-like isoform X2 |
5283.4488 | acetylcholine receptor subunit beta-like 1 isoform X2 |
5283.4489 | acetylcholine receptor subunit beta-like 1 isoform X2 |
5283.7592 | cyclic nucleotide-gated cation channel alpha-3 |
5283.7592 | cyclic nucleotide-gated cation channel alpha-3-like |
6610.0 | cyclic nucleotide-gated cation channel beta-3-like isoform X1 |
6610.2 | cyclic nucleotide-gated cation channel beta-3-like isoform X2 |
6610.0 | cyclic nucleotide-gated cation channel beta-3-like isoform X7 |
5283.9458 | cyclic nucleotide-gated olfactory channel-like |
5283.9460 | cyclic nucleotide-gated olfactory channel-like |
7253.3 | Cys-loop ligand-gated ion channel |
7253.4 | cys-loop ligand-gated ion channel-like isoform X2 |
7253.0 | cys-loop ligand-gated ion channel-like isoform X2 |
7253.3 | cys-loop ligand-gated ion channel-like isoform X2 |
7253.6 | cys-loop ligand-gated ion channel-like isoform X3 |
5283.9038 | gamma-aminobutyric acid receptor alpha-like |
3318.0 | gamma-aminobutyric acid receptor alpha-like |
5283.8285 | glutamate receptor 1-like |
5283.8285 | glutamate receptor 4 |
7335.0 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
7335.3 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
2901.0 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
6740.0 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
6740.2 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
7335.6 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
7335.1 | glutamate receptor ionotropic, kainate 2-like isoform X1 |
6740.3 | glutamate receptor ionotropic, kainate 2 isoform X1 |
1742.0 | glutamate receptor ionotropic, NMDA 2B-like |
4067.0 | glutamate receptor ionotropic, NMDA 3A-like |
3215.2 | glutamate receptor ionotropic, NMDA 3A-like isoform X2 |
2499.0 | glycine receptor subunit alphaZ1-like |
5283.8285 | GRIA4 protein |
5283.2592 | HCN channel protein |
5283.2593 | HCN channel protein |
7253.3 | Hypothetical predicted protein |
5283.5377 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.1 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.7 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.7784 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.10 | neuronal acetylcholine receptor subunit alpha-10-like |
3136.0 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.6 | neuronal acetylcholine receptor subunit alpha-10-like |
6426.5 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.10464 | neuronal acetylcholine receptor subunit alpha-10-like |
5283.2323 | neuronal acetylcholine receptor subunit alpha-10-like isoform X3 |
5283.8846 | neuronal acetylcholine receptor subunit alpha-3-like |
5283.8847 | neuronal acetylcholine receptor subunit alpha-3-like |
878.0 | neuronal acetylcholine receptor subunit alpha-5-like |
5283.10467 | neuronal acetylcholine receptor subunit alpha-6-like |
4605.3 | piezo-type mechanosensitive ion channel component 1-like isoform X1 |
5283.2593 | potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 2-like |
5283.2593 | potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X2 |
5283.2593 | potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X4 |
6010.2 | sideroflexin-1 |
5283.2593 | voltage-activated ion channel, putative |
# Pull out genes in the ion transport, voltage cluster of GO terms
ion_transport_voltage_cluster_CNS <- gsea_exp0_transcripts_GO_CNS %>%
select(gene, transcript, Sequence.Description, Description_gsea) %>%
filter(Description_gsea %in% c('potassium ion transport', 'potassium channel activity', 'potassium ion transmembrane transport', 'voltage-gated potassium channel activity', 'regulation of ion transmembrane transport', 'voltage-gated calcium channel complex', 'calcium ion transmembrane transport')) %>%
distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
arrange(Sequence.Description)
save(ion_transport_voltage_cluster_CNS, file = 'results/ion_transport_voltage_cluster.RData')
ion_transport_voltage_cluster_CNS %>%
select(gene, Sequence.Description) %>%
kable(caption = 'Core enrichment genes within significantly enriched GO terms of the ion transport, voltage cluster in the CNS')
gene | Sequence.Description |
---|---|
6427.3 | —NA— |
5283.9460 | —NA— |
6093.6 | ATP-binding cassette sub-family C member 9-like isoform X1 |
6093.4 | ATP-binding cassette sub-family C member 9-like isoform X1 |
6093.4 | ATP-binding cassette sub-family C member 9-like isoform X2 |
6093.4 | ATP-binding cassette sub-family C member 9-like isoform X3 |
6093.1 | ATP-binding cassette sub-family C member 9 isoform X1 |
6093.0 | ATP-binding cassette sub-family C member 9 isoform X1 |
5283.3940 | CACNA1G |
5283.7244 | CACNA1G |
1506.0 | calcium-activated potassium channel slowpoke isoform X15 |
1506.1 | calcium-activated potassium channel subunit alpha-1 isoform X10 |
1506.1 | calcium-activated potassium channel subunit alpha-1 isoform X8 |
1506.0 | calcium-activated potassium channel subunit alpha-1 isoform X8 |
7412.0 | calcium channel beta subunit |
5283.10202 | calcium uniporter protein, mitochondrial-like |
5283.7592 | cyclic nucleotide-gated cation channel alpha-3 |
5283.7592 | cyclic nucleotide-gated cation channel alpha-3-like |
5283.9460 | cyclic nucleotide-gated olfactory channel-like |
1329.0 | G protein-activated inward rectifier potassium channel 2-like |
5187.3 | G protein-activated inward rectifier potassium channel 3-like |
5283.7191 | G protein-activated inward rectifier potassium channel 3-like |
5283.7192 | G protein-activated inward rectifier potassium channel 3-like |
5187.7 | G protein-activated inward rectifier potassium channel 3-like |
5283.2593 | HCN channel protein |
5496.1 | muscle calcium channel subunit alpha-1-like isoform X1 |
5496.0 | muscle calcium channel subunit alpha-1-like isoform X3 |
1933.0 | muscle calcium channel subunit alpha-1-like isoform X4 |
5283.11306 | Na+/K+ ATPase beta subunit |
206.0 | neuronal calcium sensor 1 |
7325.1 | plasma membrane calcium-transporting ATPase 2-like isoform X1 |
7325.6 | plasma membrane calcium-transporting ATPase 2-like isoform X1 |
7325.1 | plasma membrane calcium-transporting ATPase 2 isoform X1 |
7325.5 | plasma membrane calcium-transporting ATPase 2 isoform X1 |
7325.2 | plasma membrane calcium-transporting ATPase 2 isoform X1 |
7107.0 | potassium channel Kv1 |
5283.9860 | potassium channel subfamily K member 1-like |
5283.4027 | potassium channel subfamily K member 4-like |
5283.1287 | potassium channel subfamily T member 2-like isoform X1 |
5283.1286 | potassium channel subfamily T member 2-like isoform X1 |
5283.5242 | potassium channel subfamily T member 2-like isoform X1 |
5283.1287 | potassium channel subfamily T member 2-like isoform X2 |
5283.5240 | potassium channel subfamily T member 2-like isoform X2 |
5283.5241 | potassium channel subfamily T member 2-like isoform X2 |
5283.5239 | potassium channel subfamily T member 2-like isoform X3 |
5283.5241 | potassium channel subfamily T member 2-like isoform X5 |
7032.0 | potassium intermediate/small conductance calcium-activated channel subfamily N member 3 |
7032.1 | potassium intermediate/small conductance calcium-activated channel subfamily N member 3 |
7109.1 | potassium voltage-gated channel protein Shab-like |
5283.2059 | potassium voltage-gated channel protein Shaker-like |
5283.2058 | potassium voltage-gated channel protein Shaker-like |
5283.2060 | potassium voltage-gated channel protein Shaker-like |
5283.2159 | potassium voltage-gated channel protein Shal-like |
5283.2158 | potassium voltage-gated channel protein Shal-like |
5283.2160 | potassium voltage-gated channel protein Shal-like isoform X1 |
5283.2157 | potassium voltage-gated channel protein Shal-like isoform X1 |
7066.1 | potassium voltage-gated channel protein Shaw-like |
7066.0 | potassium voltage-gated channel protein Shaw-like |
6752.2 | potassium voltage-gated channel protein Shaw-like isoform X1 |
6752.0 | potassium voltage-gated channel protein Shaw-like isoform X1 |
6752.1 | potassium voltage-gated channel protein Shaw-like isoform X1 |
995.0 | potassium voltage-gated channel subfamily A member 1-like |
7109.0 | potassium voltage-gated channel subfamily B member 2 isoform X6 |
7109.1 | potassium voltage-gated channel subfamily B member 2 isoform X6 |
5283.1038 | potassium voltage-gated channel subfamily H member 4 |
5283.1038 | potassium voltage-gated channel subfamily H member 4 isoform X1 |
1571.0 | potassium voltage-gated channel subfamily H member 6 isoform X1 |
1768.0 | potassium voltage-gated channel subfamily H member 7-like isoform X1 |
5283.1038 | potassium voltage-gated channel subfamily H member 8-like |
3824.0 | potassium voltage-gated channel subfamily KQT member 1-like isoform X1 |
3824.1 | potassium voltage-gated channel subfamily KQT member 1-like isoform X1 |
5283.2593 | potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 2-like |
5283.2593 | potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X2 |
5283.2593 | potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X4 |
7109.1 | probable serine/threonine-protein kinase DDB_G0282963 |
5283.11306 | probable sodium/potassium-transporting ATPase subunit beta-3 |
6905.0 | putative Na+/K+-ATPase alpha subunit |
5283.4923 | riboflavin-binding protein-like |
5283.324 | ryanodine receptor-like isoform X11 |
5283.324 | ryanodine receptor-like isoform X12 |
5283.324 | ryanodine receptor-like isoform X15 |
5283.324 | ryanodine receptor-like isoform X17 |
5283.324 | ryanodine receptor-like isoform X2 |
5283.324 | ryanodine receptor-like isoform X7 |
5283.324 | ryanodine receptor 2 |
5283.324 | ryanodine receptor 2-like isoform X10 |
1483.0 | short transient receptor potential channel 3-like |
1418.0 | short transient receptor potential channel 3-like isoform X1 |
7032.1 | small conductance calcium-activated potassium channel protein-like |
7032.2 | small conductance calcium-activated potassium channel protein 2 isoform X1 |
7032.1 | small conductance calcium-activated potassium channel protein 2 isoform X1 |
7381.7 | sodium channel |
5283.2924 | sodium channel protein 1 brain |
5283.2924 | sodium channel protein 1 brain-like |
5283.2922 | sodium channel protein 1 brain-like |
5283.2924 | sodium channel protein 1 brain-like isoform X1 |
5283.2924 | sodium channel protein 1 brain isoform X1 |
5283.2924 | sodium channel protein para isoform X2 |
5283.2922 | sodium channel protein para isoform X2 |
7381.6 | sodium channel protein type 4 subunit alpha A-like isoform X1 |
5283.2924 | sodium channel protein type 4 subunit alpha B-like |
7381.2 | sodium channel protein type 4 subunit alpha B-like isoform X1 |
7381.3 | sodium channel protein type 4 subunit alpha B-like isoform X1 |
7381.1 | sodium channel protein type 4 subunit alpha B-like isoform X1 |
7381.5 | sodium channel protein type 4 subunit alpha B-like isoform X1 |
7381.4 | sodium channel protein type 4 subunit alpha B-like isoform X1 |
5283.9598 | sodium/potassium-transporting ATPase subunit beta-like |
5283.9595 | sodium/potassium-transporting ATPase subunit beta-like |
5283.9596 | sodium/potassium-transporting ATPase subunit beta-like |
5283.9600 | sodium/potassium-transporting ATPase subunit beta-like |
4619.0 | sodium/potassium-transporting ATPase subunit beta-like |
5283.9599 | sodium/potassium-transporting ATPase subunit beta-like |
4619.1 | sodium/potassium-transporting ATPase subunit beta-like |
5283.9594 | sodium/potassium-transporting ATPase subunit beta-like |
5283.9214 | sodium/potassium/calcium exchanger 2-like isoform X2 |
7433.2 | sodium/potassium/calcium exchanger 4-like |
7433.0 | sodium/potassium/calcium exchanger 4-like |
7433.3 | sodium/potassium/calcium exchanger 4-like |
7433.1 | sodium/potassium/calcium exchanger 4-like |
5283.9214 | sodium/potassium/calcium exchanger Nckx30C |
5283.9214 | sodium/potassium/calcium exchanger Nckx30C-like |
5283.9213 | sodium/potassium/calcium exchanger Nckx30C-like |
6396.2 | solute carrier family 12 member 4-like isoform X1 |
6396.3 | solute carrier family 12 member 4-like isoform X1 |
6396.1 | solute carrier family 12 member 4-like isoform X2 |
6396.9 | solute carrier family 12 member 4-like isoform X2 |
3790.0 | transient-receptor-potential-like protein |
3790.0 | transient-receptor-potential-like protein isoform X1 |
3790.1 | transient-receptor-potential-like protein isoform X1 |
5283.3687 | transient receptor potential-gamma protein-like |
5811.9 | transient receptor potential cation channel subfamily M member-like 2 isoform X1 |
6627.0 | trimeric intracellular cation channel type 1B.1-like |
7122.2 | TWiK family of potassium channels protein 18-like |
7122.1 | TWiK family of potassium channels protein 18-like |
7122.0 | TWiK family of potassium channels protein 18-like |
6244.0 | two pore potassium channel protein sup-9-like |
5283.2593 | voltage-activated ion channel, putative |
6427.3 | voltage-dependent calcium channel |
6427.2 | voltage-dependent calcium channel |
1684.1 | voltage-dependent calcium channel |
6427.3 | voltage-dependent calcium channel type A subunit alpha-1-like |
6427.0 | voltage-dependent calcium channel type A subunit alpha-1-like |
6427.3 | voltage-dependent calcium channel type A subunit alpha-1-like isoform X6 |
1684.0 | voltage-dependent calcium channel type A subunit alpha-1 isoform X2 |
5283.3939 | voltage-dependent T-type calcium channel subunit alpha-1G-like |
5283.3940 | voltage-dependent T-type calcium channel subunit alpha-1G-like isoform X2 |
5283.7246 | voltage-dependent T-type calcium channel subunit alpha-1G-like isoform X2 |
5283.7243 | voltage-dependent T-type calcium channel subunit alpha-1H-like |
5283.7245 | voltage-dependent T-type calcium channel subunit alpha-1H-like |
5283.7242 | voltage-dependent T-type calcium channel subunit alpha-1I-like |
5283.7244 | voltage-dependent T-type calcium channel subunit alpha-1I-like |
5283.10568 | voltage-gated potassium channel subunit beta-2-like isoform X1 |
5283.10569 | voltage-gated potassium channel subunit beta-2-like isoform X1 |
7381.5 | voltage-gated sodium channel invertebrate type 1 |
# Pull out genes in the GPCR cluster of GO terms
GPCR_cluster_CNS <- gsea_exp0_transcripts_GO_CNS %>%
select(gene, transcript, Sequence.Description, Description_gsea) %>%
filter(Description_gsea %in% c('G protein-coupled receptor activity', 'G protein-coupled receptor signaling pathway')) %>%
distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
arrange(Sequence.Description)
save(GPCR_cluster_CNS, file = 'results/GPCR_cluster.RData')
GPCR_cluster_CNS %>%
select(gene, Sequence.Description) %>%
kable(caption = 'Core enrichment genes within significantly enriched GO terms of the GPCR cluster in the CNS')
gene | Sequence.Description |
---|---|
5283.11228 | —NA— |
6174.0 | —NA— |
5283.7636 | —NA— |
5283.4060 | —NA— |
3985.0 | 5-hydroxytryptamine receptor-like |
396.0 | 5-hydroxytryptamine receptor 1D-like |
5283.11786 | 5-hydroxytryptamine receptor 2C-like |
5283.11785 | 5-hydroxytryptamine receptor 2C-like |
5283.1508 | 5-hydroxytryptamine receptor 4-like |
5283.9162 | adhesion G-protein coupled receptor D1-like |
5283.9162 | adhesion G-protein coupled receptor D2-like |
252.1 | adhesion G protein-coupled receptor A3-like |
6594.0 | adhesion G protein-coupled receptor L2-like isoform X1 |
6594.0 | adhesion G protein-coupled receptor L2 isoform X1 |
2554.0 | allatostatin-A receptor |
1650.0 | alpha-2A adrenergic receptor |
5283.10427 | cadherin EGF LAG seven-pass G-type receptor 1-like |
5283.10430 | cadherin EGF LAG seven-pass G-type receptor 1-like |
5283.10428 | cadherin EGF LAG seven-pass G-type receptor 1-like |
5596.4 | cadherin EGF LAG seven-pass G-type receptor 1 isoform X3 |
5596.1 | cadherin EGF LAG seven-pass G-type receptor 1 isoform X3 |
5596.3 | cadherin EGF LAG seven-pass G-type receptor 2 isoform X1 |
5596.2 | cadherin EGF LAG seven-pass G-type receptor 2 isoform X5 |
5283.11833 | calcitonin gene-related peptide type 1 receptor-like isoform X1 |
5283.2398 | cAMP-dependent protein kinase catalytic subunit beta |
5283.2398 | cAMP-dependent protein kinase catalytic subunit isoform X5 |
5283.2397 | cAMP-dependent protein kinase catalytic subunit isoform X5 |
5283.9030 | cardioacceleratory peptide receptor-like isoform X1 |
5283.5948 | catalytic subunit of protein kinase A |
2171.0 | cholecystokinin receptor type A-like |
3084.0 | cholecystokinin receptor type A-like |
5283.8548 | corticotropin-releasing factor receptor 2-like |
6072.0 | CX3C chemokine receptor 1 isoform X1 |
5283.11354 | dopamine receptor 1-like |
5283.11353 | dopamine receptor 1-like |
3912.1 | dopamine receptor 1-like |
3716.0 | FMRFamide receptor-like |
1567.0 | FMRFamide receptor-like |
3871.0 | FMRFamide receptor-like |
5415.1 | FMRFamide receptor-like |
6899.1 | frizzled-9 |
6899.1 | frizzled-9-like |
6899.1 | Frizzled 10A |
6899.0 | Frizzled 10A |
6899.1 | frizzled 9/10 |
5283.9573 | G-protein coupled receptor 143-like |
5283.9574 | G-protein coupled receptor 143-like |
1827.0 | G-protein coupled receptor 161-like |
4382.0 | G-protein coupled receptor Mth-like |
5283.11232 | GABR1 protein |
5283.11232 | gamma-aminobutyric acid type B receptor subunit 1-like |
5283.11229 | gamma-aminobutyric acid type B receptor subunit 1-like |
5283.8308 | Gamma-aminobutyric acid type B receptor subunit 2 |
5283.10141 | gonadotropin-releasing hormone receptor |
5283.2535 | growth hormone secretagogue receptor type 1-like |
5283.732 | guanine nucleotide-binding protein subunit beta-5 |
5283.2425 | Hypothetical predicted protein |
5283.4222 | leucine-rich repeat-containing G-protein coupled receptor 5-like |
5283.4293 | leucine-rich repeat-containing G-protein coupled receptor 5-like |
5283.4294 | leucine-rich repeat-containing G-protein coupled receptor 5-like |
5283.10762 | lissencephaly-1 homolog |
5283.10762 | lissencephaly-1 homolog B |
5283.4221 | lutropin-choriogonadotropic hormone receptor isoform X3 |
5283.11295 | melatonin receptor type 1A-like |
5283.11297 | melatonin receptor type 1A-like |
5283.7577 | metabotropic glutamate receptor 1-like |
5283.7570 | metabotropic glutamate receptor 1-like |
5283.7573 | metabotropic glutamate receptor 1-like |
5283.7569 | metabotropic glutamate receptor 1-like |
5283.7574 | metabotropic glutamate receptor 1-like |
1651.0 | metabotropic glutamate receptor 3 |
5283.7578 | metabotropic glutamate receptor 5-like |
5283.7575 | metabotropic glutamate receptor 5-like |
6075.0 | neuropeptide FF receptor 1-like |
6075.1 | neuropeptide FF receptor 1-like |
1357.0 | octopamine receptor Oamb-like |
1357.1 | octopamine receptor Oamb-like |
2184.1 | octopamine receptor Oamb isoform X1 |
2664.0 | orexin receptor type 2-like |
5283.9529 | orexin receptor type 2-like |
3932.1 | parathyroid hormone/parathyroid hormone-related peptide receptor isoform X3 |
6072.0 | PREDICTED: uncharacterized protein LOC106880461 |
4782.0 | prolactin-releasing peptide receptor-like |
5283.2398 | protein kinase A |
5283.2397 | protein kinase A |
1909.1 | pyrokinin-1 receptor-like |
5283.1602 | regulator of G-protein signaling 4-like |
5283.1599 | regulator of G-protein signaling 4-like |
5283.1146 | retinochrome |
5283.8098 | retinochrome |
5283.1143 | retinochrome |
5283.1145 | retinochrome |
5283.1142 | retinochrome |
5283.7855 | rhodopsin |
5283.7636 | S-antigen protein-like |
3932.0 | secretin receptor-like isoform X1 |
6876.2 | somatostatin receptor type 2-like |
5629.0 | somatostatin receptor type 3-like |
5984.0 | thyrotropin-releasing hormone receptor-like |
5629.0 | thyrotropin-releasing hormone receptor-like |
5283.1199 | type-1 angiotensin II receptor isoform X1 |
5283.1602 | unnamed protein product |
gsea_result_CNS_ashr_pos <- gsea_CNS_ashr_annot_exp0_summary %>%
filter(NES > 0) %>%
select(ID, Description, setSize, enrichmentScore, NES, pvalue, qvalues, p.adjust, rank, leading_edge) %>%
rename(NAME = ID) %>%
mutate('GS<br> follow link to MSigDB' = '') %>%
rename('GS DETAILS' = Description,
SIZE = setSize,
ES = enrichmentScore,
'NOM p-val' = pvalue,
'FDR q-val' = qvalues,
'FWER p-val' = p.adjust,
'RANK AT MAX' = rank,
'LEADING EDGE' = leading_edge) %>%
relocate('GS<br> follow link to MSigDB', .after = NAME)
write.table(gsea_result_CNS_ashr_pos, file = 'gsea_cytoscape/gsea_result_CNS_ashr_pos.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)
gsea_result_CNS_ashr_neg <- gsea_CNS_ashr_annot_exp0_summary %>%
filter(NES < 0) %>%
select(ID, Description, setSize, enrichmentScore, NES, pvalue, qvalues, p.adjust, rank, leading_edge) %>%
rename(NAME = ID) %>%
mutate('GS<br> follow link to MSigDB' = '') %>%
rename('GS DETAILS' = Description,
SIZE = setSize,
ES = enrichmentScore,
'NOM p-val' = pvalue,
'FDR q-val' = qvalues,
'FWER p-val' = p.adjust,
'RANK AT MAX' = rank,
'LEADING EDGE' = leading_edge) %>%
relocate('GS<br> follow link to MSigDB', .after = NAME)
write.table(gsea_result_CNS_ashr_neg, file = 'gsea_cytoscape/gsea_result_CNS_ashr_neg.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)
gsea_result_eyes_ashr_pos <- gsea_eyes_ashr_annot_exp0_summary %>%
filter(NES > 0) %>%
select(ID, Description, setSize, enrichmentScore, NES, pvalue, qvalues, p.adjust, rank, leading_edge) %>%
rename(NAME = ID) %>%
mutate('GS<br> follow link to MSigDB' = '') %>%
rename('GS DETAILS' = Description,
SIZE = setSize,
ES = enrichmentScore,
'NOM p-val' = pvalue,
'FDR q-val' = qvalues,
'FWER p-val' = p.adjust,
'RANK AT MAX' = rank,
'LEADING EDGE' = leading_edge) %>%
relocate('GS<br> follow link to MSigDB', .after = NAME)
write.table(gsea_result_eyes_ashr_pos, file = 'gsea_cytoscape/gsea_result_eyes_ashr_pos.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)
gsea_result_eyes_ashr_neg <- gsea_eyes_ashr_annot_exp0_summary %>%
filter(NES < 0) %>%
select(ID, Description, setSize, enrichmentScore, NES, pvalue, qvalues, p.adjust, rank, leading_edge) %>%
rename(NAME = ID) %>%
mutate('GS<br> follow link to MSigDB' = '') %>%
rename('GS DETAILS' = Description,
SIZE = setSize,
ES = enrichmentScore,
'NOM p-val' = pvalue,
'FDR q-val' = qvalues,
'FWER p-val' = p.adjust,
'RANK AT MAX' = rank,
'LEADING EDGE' = leading_edge) %>%
relocate('GS<br> follow link to MSigDB', .after = NAME)
write.table(gsea_result_eyes_ashr_neg, file = 'gsea_cytoscape/gsea_result_eyes_ashr_neg.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)
res_CNS_shrunken_ashr_tb_rename <- res_CNS_shrunken_ashr_tb %>%
mutate(gene = gsub('Cluster-', '', gene))
res_CNS <- res_CNS_shrunken_ashr_tb_rename %>%
select(gene, log2FoldChange) %>%
as.data.frame()
write.table(res_CNS, file = 'gsea_cytoscape/gsea_CNS_ashr_rank.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)
res_eyes_shrunken_ashr_tb_rename <- res_eyes_shrunken_ashr_tb %>%
mutate(gene = gsub('Cluster-', '', gene))
res_eyes <- res_eyes_shrunken_ashr_tb_rename %>%
select(gene, log2FoldChange) %>%
as.data.frame()
write.table(res_eyes, file = 'gsea_cytoscape/gsea_eyes_ashr_rank.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)
normalised_counts_CNS_tb_rename <- normalised_counts_CNS_tb %>%
mutate(gene = gsub("Cluster-", "", gene)) %>%
rename_with(~ gsub("CNS", "CNS_ambient", .x), c("PS506_CNS", "PS508_CNS", "PS513_CNS", "PS514_CNS", "PS518_CNS", "PS529_CNS", "PS534_CNS", "PS551_CNS", "PS557_CNS", "PS561_CNS")) %>%
rename_with(~ gsub("CNS", "CNS_elevated", .x), c("PS509_CNS", "PS510_CNS", "PS522_CNS", "PS527_CNS", "PS528_CNS", "PS539_CNS", "PS547_CNS", "PS584_CNS", "PS626_CNS", "PS636_CNS")) %>%
relocate(ends_with("ambient"), .before = ends_with("elevated"))
normalised_counts_CNS_annotated <- normalised_counts_CNS_tb_rename %>%
full_join(corset_clusters_2_transcripts_renamed) %>%
full_join(annotated_transcriptome_rename) %>%
select(gene, Sequence.Description, PS506_CNS_ambient:PS636_CNS_elevated) %>%
distinct(gene, .keep_all = TRUE)
write.table(normalised_counts_CNS_annotated, file = 'gsea_cytoscape/gsea_CNS_ashr_expression.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)
normalised_counts_eyes_tb_rename <- normalised_counts_eyes_tb %>%
mutate(gene = gsub("Cluster-", "", gene)) %>%
rename_with(~ gsub("eyes", "eyes_ambient", .x), c("PS506_eyes", "PS508_eyes", "PS513_eyes", "PS514_eyes", "PS518_eyes", "PS529_eyes", "PS534_eyes", "PS551_eyes", "PS557_eyes", "PS561_eyes")) %>%
rename_with(~ gsub("eyes", "eyes_elevated", .x), c("PS509_eyes", "PS510_eyes", "PS522_eyes", "PS527_eyes", "PS528_eyes", "PS539_eyes", "PS547_eyes", "PS584_eyes", "PS626_eyes", "PS636_eyes")) %>%
relocate(ends_with("ambient"), .before = ends_with("elevated"))
normalised_counts_eyes_annotated <- normalised_counts_eyes_tb_rename %>%
full_join(corset_clusters_2_transcripts_renamed) %>%
full_join(annotated_transcriptome_rename) %>%
select(gene, Sequence.Description, PS506_eyes_ambient:PS636_eyes_elevated) %>%
distinct(gene, .keep_all = TRUE)
write.table(normalised_counts_eyes_annotated, file = 'gsea_cytoscape/gsea_eyes_ashr_expression.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)
gmt <- read.delim(file = 'data/blast2go_gene_sets.txt')
gmt <- gmt %>%
as_tibble()
# need corresponding genes for the transcript list
gmt_split <- cSplit(gmt, splitCols = 'Sequences', ',', direction = "long")
gmt_genes <- gmt_split %>%
rename(transcript = Sequences) %>%
mutate(transcript = gsub("transcript/", '', transcript)) %>%
full_join(corset_clusters_2_transcripts_renamed) %>%
select(GO.ID, gene)
#collapse so a list of tab delimited genes for each GO term - should have 5731 rows
gmt_genes_list <- gmt_genes %>%
drop_na() %>%
group_by(GO.ID) %>%
dplyr::summarise(gene = paste(unique(gene), collapse='\t'))
# Add GO term/description to gmt_genes
# Used Annotation.GO.ID for GSEA in ClusterProfiler
# So extract all Annotation.GO.ID and corresponding Annotation.GO.Term and add to gmt_genes and should have all the GO terms with their description that were use din the analysis
GO <- annotated_transcriptome %>%
select(Annotation.GO.ID, Annotation.GO.Term) %>%
rename(GO.ID = Annotation.GO.ID,
GO.Term = Annotation.GO.Term)
GO_split <- cSplit(GO, c('GO.ID', 'GO.Term'), ';', direction = "long")
GO_split <- GO_split %>%
distinct(GO.ID, .keep_all = TRUE) # transcriptome contains genes annotated with same GO terms so pull out only one of each GO
gmt_genes_GO <- gmt_genes_list %>%
left_join(GO_split) %>%
select(GO.ID, GO.Term, gene)
write.table(gmt_genes_GO, file = 'gsea_cytoscape/genesets.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = FALSE)
Calculate some statistics regarding annotation of the transcriptome
No._no_blast_hits <- sum(is.na(annotated_transcriptome$Blast.Top.Hit.Score))
No._transcripts <- nrow(annotated_transcriptome)
(No._transcripts - No._no_blast_hits)/No._transcripts*100
93% transcripts had a blast hit
# Replace blank cells with 'NA'
annotated_transcriptome$Mapping.GO.ID[annotated_transcriptome$Mapping.GO.ID == ""] <- NA
No._no_GO_mapping <- sum(is.na(annotated_transcriptome$Mapping.GO.ID))
No._transcripts <- nrow(annotated_transcriptome)
(No._transcripts - No._no_GO_mapping)/No._transcripts*100
73% of transcripts succesfully GO mapped
# Replace blank cells with 'NA'
annotated_transcriptome$Annotation.GO.ID[annotated_transcriptome$Annotation.GO.ID == ""] <- NA
No._no_complete_annotation <- sum(is.na(annotated_transcriptome$Annotation.GO.ID))
No._transcripts <- nrow(annotated_transcriptome)
(No._transcripts - No._no_complete_annotation)/No._transcripts*100
69% of transcripts completely annotated
Make PCA
rld$sampletype <- factor(rld$sampletype, levels = c("Ambient_CNS", "Elevated_CNS", "Ambient_eyes", "Elevated_eyes"))
pcaData <- plotPCA(rld, intgroup = "sampletype", ntop = 50000000, returnData = TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
( figure_PCA <- ggplot(pcaData, aes(x = PC1, y = PC2,
fill = factor(sampletype),
shape = factor(sampletype)),
colour = "black") +
geom_point(size = 2) +
scale_shape_manual(values=c(21, 22, 23, 24),
name = "",
labels = c(bquote("CNS: Current-day CO"[2]),
bquote("CNS: Elevated CO"[2]),
bquote("Eyes: Current-day CO"[2]),
bquote("Eyes: Elevated CO"[2]))) +
scale_fill_manual(values = c('#FFFFFF', '#FDE725', '#FFFFFF', '#FDE725'),
name = "",
labels = c(bquote("CNS: Current-day CO"[2]),
bquote("CNS: Elevated CO"[2]),
bquote("Eyes: Current-day CO"[2]),
bquote("Eyes: Elevated CO"[2]))) +
xlab(paste0("PC1: ", percentVar[1], "% variance")) +
ylab(paste0("PC2: ", percentVar[2], "% variance")) +
coord_cartesian(ylim = c(-85, 85), xlim = c(-152, 152)) +
scale_x_continuous(breaks = c(-100, 0, 100),
labels = c(-100, 0, 100)) +
scale_y_continuous(breaks = c(-80, -40, 0, 40),
labels = c(-80, -40, 0, 40)) +
theme_classic() +
theme(axis.title = element_text(size = 10),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
legend.text = element_text(size = 8),
legend.position = c(0.5,1.1)) +
guides(fill = guide_legend(nrow = 2, byrow = FALSE),
shape = guide_legend(nrow = 2, byrow = FALSE))
)
save(figure_PCA, file = 'publication_figures/figure_PCA.RData')
Make Venn diagrams
DE_CNS <- sig_DE_CNS_ashr %>%
distinct(gene)
DE_eyes <- sig_DE_eyes_ashr_final %>%
distinct(gene)
x <- c(list(DE_CNS$gene), list(DE_eyes$gene))
names(x) <- c('CNS', 'Eyes')
(venn_de <- ggvenn(x,
show_percentage = FALSE,
text_size = 0,
set_name_size = 4,
fill_color = c('#4040FF', '#BFBFFF'),
fill_alpha = 0.8) +
annotate('text', label = '23', x = -1, y = 0.25, size = 4) +
annotate("rect", xmin = -1.2, xmax = -0.8, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
annotate('text', label = '2', x = 0, y = 0.25, size = 4) +
annotate("rect", xmin = -0.2, xmax = 0.2, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
annotate('text', label = '6', x = 1, y = 0.25, size = 4) +
annotate("rect", xmin = 0.8, xmax = 1.2, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
annotate('text', label = expression('12 ' *symbol('\255')), x = -1, y = -0.25) +
annotate('text', label = expression('11 ' *symbol('\257')), x = -1, y = -0.55) +
annotate('text', label = expression('3 ' *symbol('\255')), x = 1, y = -0.25) +
annotate('text', label = expression('3 ' *symbol('\257')), x = 1, y = -0.55) +
annotate('text', label = expression('2 ' *symbol('\255')), x = 0, y = -0.25) +
theme(plot.margin = unit(c(0,0,0,0), "pt"))
)
save(venn_de, file = 'publication_figures/venn_de.RData')
GSEA_CNS <- gsea_CNS_ashr_annot_exp0_summary %>%
distinct(ID)
GSEA_eyes <- gsea_eyes_ashr_annot_exp0_summary %>%
distinct(ID)
x <- c(list(GSEA_CNS$ID), list(GSEA_eyes$ID))
names(x) <- c('CNS', 'Eyes')
(venn_gsea <- ggvenn(x,
show_percentage = FALSE,
text_size = 0,
set_name_size = 4,
fill_color = c('#4040FF', '#BFBFFF'),
fill_alpha = 0.8) +
annotate('text', label = '89', x = -1, y = 0.25, size = 4) +
annotate("rect", xmin = -1.2, xmax = -0.8, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
annotate('text', label = '10', x = 0, y = 0.35, size = 4) +
annotate("rect", xmin = -0.15, xmax = 0.15, ymin = 0.22, ymax = 0.45, fill = NA, colour = 'black', size = 0.5) +
annotate('text', label = '7', x = 1, y = 0.25, size = 4) +
annotate("rect", xmin = 0.8, xmax = 1.2, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
annotate('text', label = expression('69 ' *symbol('\255')), x = -1, y = -0.25) +
annotate('text', label = expression('20 ' *symbol('\257')), x = -1, y = -0.55) +
annotate('text', label = expression('5 ' *symbol('\255')), x = 1, y = -0.25) +
annotate('text', label = expression('2 ' *symbol('\257')), x = 1, y = -0.55) +
annotate('text', label = expression('5 ' *symbol('\255'), ' / ' *symbol('\255')), x = -0.1, y = 0.1) +
annotate('text', label = expression('2 ' *symbol('\255'), ' / ' *symbol('\257')), x = -0.1, y = -0.05) +
annotate('text', label = expression('1 ' *symbol('\257'), ' / ' *symbol('\257')), x = -0.1, y = -0.2) +
annotate('text', label = expression('2 ' *symbol('\257'), ' / ' *symbol('\255')), x = -0.1, y = -0.35) +
theme(plot.margin = unit(c(0,0,0,0), "pt"))
)
save(venn_gsea, file = 'publication_figures/venn_gsea.RData')
Combine into one figure
(figure_DE_GSEA <- (
(
plot_spacer()
|
(figure_PCA + theme(plot.margin = unit(c(30,50,30,50), "pt")))
|
plot_spacer()
) +
plot_layout(widths = c(0.3,1,0.3))
)
/
(
(
(venn_de + theme(plot.margin = unit(c(0,0,0,0), "pt")))
|
(venn_gsea + theme(plot.margin = unit(c(0,0,0,0), "pt")))
) +
plot_layout(widths = c(1,1))
) +
plot_layout(heights = c(1,1)) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(face = 'bold'))
)
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
The top blast hit species distribution .txt file is exported from OmicsBox: First run BLAST statistics, then go to tab ‘Chart: Top-hit species distribution’ and in the toolbar click ‘Data as text’.
species <- read.delim(file = 'data/top_hit_species_distribution_annotated_transcriptome_orfs.txt')
save(species, file = 'publication_figures/species.RData')
# Read in .txt file output from OmicsBox
a <- species %>%
rename(No._top_hits = X.BLAST.Top.Hits) %>%
arrange(-No._top_hits) %>%
slice_head(n = 20) %>%
mutate(order = as.factor(row_number()))
b <- species %>%
rename(No._top_hits = X.BLAST.Top.Hits) %>%
arrange(No._top_hits) %>%
slice_head(n = nrow(species) - 20) %>%
summarize(No._top_hits = sum(No._top_hits)) %>%
mutate(Species = 'others')
species_data <- a %>%
full_join(b) %>%
arrange(order)
(species_distr <- ggplot(species_data, aes(x = No._top_hits, y = rev(order))) +
geom_col(colour = 'black', size = 0.05, fill = '#FF8080') +
scale_y_discrete(
breaks = species_data$order,
labels = rev(c('squid: *Sepia pharaonis*', 'octopus: *Octopus sinensis*', 'octopus: *Octopus bimaculoides*', 'squid: *Doryteuthis opalescens*', 'squid: *Idiosepius paradoxus*', 'squid: *Doryteuthis pealeii*', 'aphid: *Acyrthosiphon pisum*', 'starfish: *Acanthaster planci*', 'scallop: *Mizuhopecten yessoensis*', 'scallop: *Pecten maximus*', 'freshwater snail: *Pomacea canaliculata*', 'mussel: *Mytilus galloprovincialis*', 'limpet: *Lottia gigantea*', 'butterfly: *Danaus plexippus plexippus*', 'sea slug: *Aplysia californica*', 'cuttlefish: *Sepia officinalis*', 'squid: *Nototodarus sloanii*', 'sea anemone: *Exaiptasia diaphana*', 'squid: *Euprymna scolopes*', 'oyster: *Cassostrea virginica*', 'others'))) +
scale_x_continuous(breaks = c(0, 2500, 5000, 7500, 10000, 12500, 15000, 17500),
labels = c(0, 2500, 5000, 7500, 10000, 12500, 15000, 17500),
expand = expansion(mult = c(0.005, 0.02))) +
labs(x = "Number of BLAST top hits", y = "") +
coord_cartesian(xlim = c(0, 17800)) +
theme_classic() +
theme(axis.text.y = element_markdown(size = 8),
axis.title = element_text(size = 10),
axis.text.x = element_text(size = 8)) )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Plot all GO terms for CNS and eyes on same graph
# CNS and eyes share same y axis on dotplot - want to order by NES of CNS, and show overlaps of GO terms found in CNS and eyes
# Below method so GO terms in both tissues are not twice on the y axis
CNS <- gsea_CNS_ashr_annot_exp0_summary %>%
mutate(tissue = "CNS")
eyes <- gsea_eyes_ashr_annot_exp0_summary %>%
mutate(tissue = "eyes")
CNS_NES <- CNS %>%
dplyr::select(ID, Description, NES) %>%
rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_NES <- eyes %>%
dplyr::select(ID, Description, NES) %>%
rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_NES <- CNS_NES %>%
full_join(eyes_NES) %>%
ungroup() %>%
arrange(NES_CNS, NES_eyes) %>%
dplyr::mutate(order_NES = as.factor(row_number())) %>%
pivot_longer(c(NES_CNS, NES_eyes), names_to = "tissue", values_to = "NES") %>%
mutate(tissue = gsub("NES_", "", tissue)) %>%
arrange(NES)
CNS_p.adjust <- CNS %>%
dplyr::select(ID, Description, p.adjust) %>%
rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_p.adjust <- eyes %>%
dplyr::select(ID, Description, p.adjust) %>%
rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_p.adjust <- CNS_p.adjust %>%
full_join(eyes_p.adjust) %>%
ungroup() %>%
arrange(p.adjust_CNS) %>%
dplyr::mutate(order_p.adjust = as.factor(row_number())) %>%
pivot_longer(c(p.adjust_CNS, p.adjust_eyes), names_to = "tissue", values_to = "p.adjust") %>%
dplyr::mutate(tissue = gsub("p.adjust_", "", tissue)) %>%
arrange(Description)
CNS_Count <- CNS %>%
dplyr::select(ID, Description, Count) %>%
rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_Count <- eyes %>%
dplyr::select(ID, Description, Count) %>%
rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_Count <- CNS_Count %>%
full_join(eyes_Count) %>%
ungroup() %>%
arrange(Count_CNS) %>%
dplyr::mutate(order_Count = as.factor(row_number())) %>%
pivot_longer(c(Count_CNS, Count_eyes), names_to = "tissue", values_to = "Count") %>%
dplyr::mutate(tissue = gsub("Count_", "", tissue))
df_all <- df_NES %>%
full_join(df_p.adjust) %>%
full_join(df_Count) %>%
dplyr::mutate(Description_wrapped = str_wrap(Description, width = 50))
figure_GSEA <- (ggplot(df_all, aes(x = NES, y = order_NES, colour = p.adjust, size = Count)) +
geom_point() +
scale_y_discrete(
breaks = df_all$order_NES,
labels = df_all$Description_wrapped) +
ggplot2::facet_grid(. ~ tissue) +
scale_colour_gradient(low = '#4040FF', high = '#E3E3E3',
name = "padj",
guide = guide_colourbar(barwidth = 0.5,
barheight = 8,
ticks = FALSE)) +
scale_size(name = "Count") +
geom_vline(xintercept = 0) +
labs(x = "Normalised enrichment score", y = "") +
theme(panel.border = element_rect(fill = NA, size = 0.5),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(colour = 'grey', size = 0.2),
panel.background = element_blank(),
strip.background = element_rect(fill = NA, size = 0.5, colour = 'black'),
axis.title = element_text(size = 8),
axis.text.x = element_text(size = 6),
axis.text.y = element_text(size = 4),
legend.title = element_text(size = 8),
legend.text = element_text(size = 6),
legend.key = element_blank(),
strip.text = element_text(size = 8, face = 'bold')) )
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
figure_GSEA
## Warning: Removed 96 rows containing missing values (`geom_point()`).
R version 4.0.4 (2021-02-15) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 18363)
Matrix products: default
locale: [1] LC_COLLATE=English_Australia.1252
LC_CTYPE=English_Australia.1252
[3] LC_MONETARY=English_Australia.1252 LC_NUMERIC=C
[5] LC_TIME=English_Australia.1252
attached base packages: [1] grid parallel stats4 stats graphics
grDevices utils datasets methods
[10] base
other attached packages: [1] ggupset_0.3.0 ggnewscale_0.4.5
enrichplot_1.10.2
[4] clusterProfiler_3.18.1 pathview_1.30.1 DOSE_3.16.0
[7] ggvenn_0.1.8 splitstackshape_1.4.8 knitr_1.31
[10] ashr_2.2-47 apeglm_1.12.0 factoextra_1.0.7
[13] ggrepel_0.9.1 DEGreport_1.26.0 pheatmap_1.0.12
[16] patchwork_1.1.1 RColorBrewer_1.1-2 viridis_0.5.1
[19] viridisLite_0.3.0 forcats_0.5.1 stringr_1.4.0
[22] dplyr_1.0.5 purrr_0.3.4 readr_1.4.0
[25] tidyr_1.1.3 tibble_3.1.0 ggplot2_3.3.3
[28] tidyverse_1.3.0 DESeq2_1.30.1 SummarizedExperiment_1.20.0 [31]
Biobase_2.50.0 MatrixGenerics_1.2.1 matrixStats_0.58.0
[34] GenomicRanges_1.42.0 GenomeInfoDb_1.26.4 IRanges_2.24.1
[37] S4Vectors_0.28.1 BiocGenerics_0.36.0
loaded via a namespace (and not attached): [1] utf8_1.2.1
tidyselect_1.1.0 RSQLite_2.2.4
[4] AnnotationDbi_1.52.0 BiocParallel_1.24.1 scatterpie_0.1.5
[7] munsell_0.5.0 withr_2.4.1 colorspace_2.0-1
[10] GOSemSim_2.16.1 rstudioapi_0.13 KEGGgraph_1.50.0
[13] lasso2_1.2-21.1 bbmle_1.0.23.1 GenomeInfoDbData_1.2.4
[16] mixsqp_0.3-43 mnormt_2.0.2 polyclip_1.10-0
[19] farver_2.1.0 bit64_4.0.5 downloader_0.4
[22] coda_0.19-4 vctrs_0.3.6 generics_0.1.0
[25] xfun_0.22 R6_2.5.0 graphlayouts_0.7.1
[28] clue_0.3-58 invgamma_1.1 locfit_1.5-9.4
[31] bitops_1.0-6 cachem_1.0.4 reshape_0.8.8
[34] fgsea_1.16.0 DelayedArray_0.16.2 assertthat_0.2.1
[37] scales_1.1.1 ggraph_2.0.5 gtable_0.3.0
[40] Cairo_1.5-12.2 tidygraph_1.2.0 rlang_0.4.11
[43] genefilter_1.72.1 GlobalOptions_0.1.2 splines_4.0.4
[46] broom_0.7.5 BiocManager_1.30.10 yaml_2.2.1
[49] reshape2_1.4.4 modelr_0.1.8 backports_1.2.1
[52] qvalue_2.22.0 tools_4.0.4 psych_2.0.12
[55] logging_0.10-108 ellipsis_0.3.1 ggdendro_0.1.22
[58] Rcpp_1.0.6 plyr_1.8.6 zlibbioc_1.36.0
[61] RCurl_1.98-1.2 GetoptLong_1.0.5 cowplot_1.1.1
[64] haven_2.3.1 cluster_2.1.0 fs_1.5.0
[67] magrittr_2.0.1 data.table_1.14.0 DO.db_2.9
[70] circlize_0.4.12 reprex_1.0.0 truncnorm_1.0-8
[73] tmvnsim_1.0-2 mvtnorm_1.1-1 SQUAREM_2021.1
[76] hms_1.0.0 evaluate_0.14 xtable_1.8-4
[79] XML_3.99-0.5 emdbook_1.3.12 readxl_1.3.1
[82] gridExtra_2.3 shape_1.4.5 compiler_4.0.4
[85] bdsmatrix_1.3-4 shadowtext_0.0.7 crayon_1.4.1
[88] htmltools_0.5.1.1 geneplotter_1.68.0 lubridate_1.7.10
[91] DBI_1.1.1 tweenr_1.0.1 dbplyr_2.1.0
[94] ComplexHeatmap_2.6.2 MASS_7.3-53 Matrix_1.3-2
[97] cli_2.3.1 igraph_1.2.6 pkgconfig_2.0.3
[100] rvcheck_0.1.8 numDeriv_2016.8-1.1 xml2_1.3.2
[103] annotate_1.68.0 XVector_0.30.0 rvest_1.0.0
[106] digest_0.6.27 ConsensusClusterPlus_1.54.0 graph_1.68.0
[109] Biostrings_2.58.0 rmarkdown_2.7 cellranger_1.1.0
[112] fastmatch_1.1-0 edgeR_3.32.1 rjson_0.2.20
[115] lifecycle_1.0.0 nlme_3.1-152 jsonlite_1.7.2
[118] limma_3.46.0 fansi_0.4.2 pillar_1.5.1
[121] lattice_0.20-41 Nozzle.R1_1.1-1 KEGGREST_1.30.1
[124] fastmap_1.1.0 httr_1.4.2 survival_3.2-7
[127] GO.db_3.12.1 glue_1.4.2 png_0.1-7
[130] bit_4.0.4 Rgraphviz_2.34.0 ggforce_0.3.2
[133] stringi_1.5.3 blob_1.2.1 org.Hs.eg.db_3.12.0
[136] memoise_2.0.0 irlba_2.3.3