Preamble

Packages

library(DESeq2)       #for differential expression analysis
library(tidyverse)
library(ggnewscale)   #add extra scale to ggplot
library(ggtext)       #text in ggplot
library(viridis)      #to change colour scale
library(RColorBrewer) #to manage colours
library(patchwork)    #for combining graphs
library(stringr)      #for manipulating strings
library(pheatmap)     #for heatmaps
library(DEGreport)    #to create report of DE results
library(ggplot2)      #for graphs
library(ggrepel)      #to label individual points on ggplot graph
library(factoextra)   #for PCA
library(apeglm)       #for shrinking log2fold change estimates
library(ashr)         #for shrinking log2fold change estimates
library(knitr)        #for kable
library(splitstackshape)  #for spliting concatenated values into separate values
library(ggvenn)       #for venn diagrams
library(patchwork)    #for combining graphs
library(Cairo)        #for cairo graphics to change font, save ggplots as pdfs etc
library(extrafont)  #for extra text font families

#for funtional enrichment analysis
library(DOSE)
library(pathview)
library(clusterProfiler)
library(enrichplot)
library(ggnewscale)
library(ggupset)

Set up data

# count data
count_data <- read.delim("data/corset-counts.txt", row.names = 1)

# meta data
meta_data1 <- read_csv("data/metadata.csv", trim_ws = TRUE)

meta_data1 <- meta_data1 %>%
  filter(Squid == "PS506" | Squid == "PS508" | Squid == "PS509" | Squid == "PS510" | Squid == "PS513" | Squid == "PS514" | Squid == "PS518" | Squid == "PS522" | Squid == "PS527" | Squid == "PS528" | Squid == "PS529" | Squid == "PS534" | Squid == "PS539" | Squid == "PS547" | Squid == "PS551" | Squid == "PS557" | Squid == "PS561" | Squid == "PS584" | Squid == "PS626" | Squid == "PS636") %>%
  mutate(CO2 = factor(CO2, levels=c('Ambient', 'Elevated')))

meta_data2 <- meta_data1

meta_data1$Squid = paste0(meta_data1$Squid, '_CNS')
meta_data2$Squid = paste0(meta_data2$Squid, '_eyes')

meta_data1 <- meta_data1 %>%
  mutate(tissue = "CNS")

meta_data2 <- meta_data2 %>%
  mutate(tissue = "eyes")

meta_data <- meta_data1 %>%
  full_join(meta_data2)

meta_data <- meta_data %>%
  unite("sampletype", CO2, tissue, remove = FALSE)

#Make Squid in meta_data same order as in count_data (alphabetical) and make into row names (rather than a normal column)
meta_data <- meta_data %>%
  dplyr::arrange(Squid) %>%
  column_to_rownames(var = "Squid")

save(count_data, file = 'data/count_data.RData')
save(meta_data, file = 'data/meta_data.RData')
#Subset count_data for just CNS
count_data_CNS <- count_data %>%
  select(contains("CNS"))
save(count_data_CNS, file = 'data/count_data_CNS.RData')

#Subset meta_data for just CNS
meta_data_CNS <- meta_data %>%
  filter(tissue == "CNS")
save(meta_data_CNS, file = 'data/meta_data_CNS.RData')

# Check they match
all(colnames(count_data_CNS) %in% rownames(meta_data_CNS))
all(colnames(count_data_CNS) == rownames(meta_data_CNS))
#Subset count_data for eyes
count_data_eyes <- count_data %>%
  select(contains("eyes"))
save(count_data_eyes, file = 'data/count_data_eyes.RData')

#Subset meta_data for eyes
meta_data_eyes <- meta_data %>%
  filter(tissue == "eyes")
save(meta_data_eyes, file = 'data/meta_data_eyes.RData')

#Check they match
all(colnames(count_data_eyes) %in% rownames(meta_data_eyes))
all(colnames(count_data_eyes) == rownames(meta_data_eyes))

Explore Raw Data

Total raw counts per Individual

counts_CNS <- count_data_CNS %>%
  summarise(across(everything(), ~ sum(., na.rm = TRUE))) %>% 
  pivot_longer(everything(), names_to = 'squid', values_to = 'tot_raw_counts')

ggplot(counts_CNS, aes(x = squid, y = tot_raw_counts)) +
  geom_point() +
  geom_text_repel(aes(label = squid), max.overlaps = 2)
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

counts_eyes <- count_data_eyes %>%
  summarise(across(everything(), ~ sum(., na.rm = TRUE))) %>% 
  pivot_longer(everything(), names_to = 'squid', values_to = 'tot_raw_counts')

ggplot(counts_eyes, aes(x = squid, y = tot_raw_counts)) +
  geom_point() +
  geom_text_repel(aes(label = squid), max.overlaps = 2)

CNS: PS548 has more counts than others - see if normalisation has any effect on this

Eyes: All samples have similar amount of counts

Raw counts across genes within an individual

count_data_CNS_long <- count_data_CNS %>%
  tibble::rownames_to_column(var = 'gene') %>%
  pivot_longer(cols = -gene, names_to = 'squid', values_to = 'count')

ggplot(count_data_CNS_long, aes(x = count)) +
  geom_histogram(stat = "bin", bins = 200) +
  facet_grid(squid ~ .) +
  labs(x = "raw count", y = "No. of genes with this raw count")

count_data_eyes_long <- count_data_eyes %>%
  tibble::rownames_to_column(var = 'gene') %>%
  pivot_longer(cols = -gene, names_to = 'squid', values_to = 'count')

ggplot(count_data_eyes_long, aes(x = count)) +
  geom_histogram(stat = "bin", bins = 200) +
  facet_grid(squid ~ .) +
  labs(x = "raw count", y = "No. of genes with this raw count")

Looks normal, shows the common features of RNAseq data:

  • a low number of counts associated with a large proportion of genes
  • a long right tail due to the lack of any upper limit for expression
  • large dynamic range

Mean vs Variance

#Do for one group at a time
Ambient_CNS_counts <- count_data %>%
  select("PS506_CNS", "PS508_CNS", "PS513_CNS", "PS514_CNS", "PS518_CNS", "PS529_CNS", "PS534_CNS", "PS551_CNS", "PS557_CNS", "PS561_CNS")

mean_counts <- apply(Ambient_CNS_counts, 1, mean)        #The second argument '1' of 'apply' function indicates the function being applied to rows. Use '2' if applied to columns 
variance_counts <- apply(Ambient_CNS_counts, 1, var)
df <- data.frame(mean_counts, variance_counts)

Ambient_CNS_mean_var <- ggplot(df) +
  geom_point(aes(x=mean_counts, y=variance_counts)) + 
  scale_y_log10() +
  scale_x_log10() +
  geom_abline(intercept = 0, slope = 1, color="red") +
  ggtitle("Ambient_CNS mean vs variance")
Ambient_CNS_mean_var

Elevated_CNS_counts <- count_data %>%
  select("PS509_CNS", "PS510_CNS", "PS522_CNS", "PS527_CNS", "PS528_CNS", "PS539_CNS", "PS547_CNS", "PS584_CNS", "PS626_CNS", "PS636_CNS")

mean_counts <- apply(Elevated_CNS_counts, 1, mean)
variance_counts <- apply(Ambient_CNS_counts, 1, var)
df <- data.frame(mean_counts, variance_counts)

Elevated_CNS_mean_var <- ggplot(df) +
  geom_point(aes(x=mean_counts, y=variance_counts)) + 
  scale_y_log10() +
  scale_x_log10() +
  geom_abline(intercept = 0, slope = 1, color="red") +
  ggtitle("Elevated_CNS mean vs variance")
Elevated_CNS_mean_var

Ambient_eyes_counts <- count_data %>%
  select("PS506_eyes", "PS508_eyes", "PS513_eyes", "PS514_eyes", "PS518_eyes", "PS529_eyes", "PS534_eyes", "PS551_eyes", "PS557_eyes", "PS561_eyes")

mean_counts <- apply(Ambient_eyes_counts, 1, mean)
variance_counts <- apply(Ambient_eyes_counts, 1, var)
df <- data.frame(mean_counts, variance_counts)

Ambient_eyes_mean_var <- ggplot(df) +
  geom_point(aes(x=mean_counts, y=variance_counts)) + 
  scale_y_log10() +
  scale_x_log10() +
  geom_abline(intercept = 0, slope = 1, color="red") +
  ggtitle("Ambient_eyes mean vs variance")
Ambient_eyes_mean_var

Elevated_eyes_counts <- count_data %>%
  select("PS509_eyes", "PS510_eyes", "PS522_eyes", "PS527_eyes", "PS528_eyes", "PS539_eyes", "PS547_eyes", "PS584_eyes", "PS626_eyes", "PS636_eyes")

mean_counts <- apply(Elevated_eyes_counts, 1, mean)
variance_counts <- apply(Ambient_eyes_counts, 1, var)
df <- data.frame(mean_counts, variance_counts)

Elevated_eyes_mean_var <- ggplot(df) +
  geom_point(aes(x=mean_counts, y=variance_counts)) + 
  scale_y_log10() +
  scale_x_log10() +
  geom_abline(intercept = 0, slope = 1, color="red") +
  ggtitle("Elevated_eyes mean vs variance")
Elevated_eyes_mean_var

  • Each black point is a gene
  • Red line represents if mean = variance
  • The variance across replicates tends to be greater than the mean (red line), especially for genes with large mean expression levels.
  • More scatter/heteroscedascity in lowly expressed genes in the elevated groups.
  • Indicates data doesn’t fit poisson distribution and better to use the negative binomial

Run DE Analysis (Walds Test)

Create DESeq dataset and run analysis

Entire dataset

#Check sample names match in count_data and meta_data
all(colnames(count_data) %in% rownames(meta_data))
all(colnames(count_data) == rownames(meta_data))

#Create DESeq2 Dataset object
dds <- DESeqDataSetFromMatrix(count_data, colData = meta_data, design = ~ sampletype)

# Run analysis - runs through the steps of estimating size factors, estimates dispersion and runs Wald statistics on NB model
# These steps can all be done separately = estimateSizeFactors, estimateDispersions, nbinomWaldTest
dds <- DESeq(dds)

save(dds, file = 'results/dds.RData')

Dataset subset for CNS

#Create DESeqDataset and run analysis on data subset for CNS
dds_CNS <- DESeqDataSetFromMatrix(count_data_CNS, colData = meta_data_CNS, design = ~ sampletype)

dds_CNS <- DESeq(dds_CNS)

save(dds_CNS, file = 'results/dds_CNS.RData')

Dataset subset for eyes

#Create DESeqDataset and run analysis on data subset for eyes
dds_eyes <- DESeqDataSetFromMatrix(count_data_eyes, colData = meta_data_eyes, design = ~ sampletype)

dds_eyes <- DESeq(dds_eyes)

save(dds_eyes, file = 'results/dds_eyes.RData')

Save raw count data

raw_counts <- counts(dds, normalized=FALSE)
raw_counts_tb <- raw_counts %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()
save(raw_counts, file = 'data/raw_counts.RData')
save(raw_counts_tb, file = 'data/raw_counts_tb.RData')

raw_counts_CNS <- counts(dds_CNS, normalized=FALSE)
raw_counts_CNS_tb <- raw_counts_CNS %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

save(raw_counts_CNS, file = 'data/raw_counts_CNS.RData')
save(raw_counts_CNS_tb, file = 'data/raw_counts_CNS_tb.RData')

raw_counts_eyes <- counts(dds_eyes, normalized=FALSE)
raw_counts_eyes_tb <- raw_counts_eyes %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()
save(raw_counts_eyes, file = 'data/raw_counts_eyes.RData')
save(raw_counts_eyes_tb, file = 'data/raw_counts_eyes_tb.RData')

Save normalised count data

  • Normalised counts used for data visualisation
#All
normalised_counts <- counts(dds, normalized=TRUE)
normalised_counts_tb <- normalised_counts %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

save(normalised_counts, file = 'data/normalised_counts.RData')
save(normalised_counts_tb, file = 'data/normalised_counts_tb.RData')

#CNS
normalised_counts_CNS <- counts(dds_CNS, normalized=TRUE)
normalised_counts_CNS_tb <- normalised_counts_CNS %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

save(normalised_counts_CNS, file = 'data/normalised_counts_CNS.RData')
save(normalised_counts_CNS_tb, file = 'data/normalised_counts_CNS_tb.RData')


#Eyes
normalised_counts_eyes <- counts(dds_eyes, normalized=TRUE)
normalised_counts_eyes_tb <- normalised_counts_eyes %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

save(normalised_counts_eyes, file = 'data/normalised_counts_eyes.RData')
save(normalised_counts_eyes_tb, file = 'data/normalised_counts_eyes_tb.RData')

Save rlog transformed data

  • rlog transforming normalised counts improves the distances/clustering for the PCA and heirarchical clustering visualization methods by moderating the variance across the mean
  • blind=TRUE for transformation unbiased to sample condition information
rld <- rlog(dds, blind=TRUE)
rld_CNS <- rlog(dds_CNS, blind=TRUE)
rld_eyes <- rlog(dds_eyes, blind=TRUE)

save(rld_CNS, file = 'data/rld_CNS.RData')
save(rld, file = 'data/rld.RData')
save(rld_eyes, file = 'data/rld_eyes.RData')

Extract DESeq results (unshrunken)

# Extract results from DESeq analysis
res_CNS_unshrunken <- results(dds_CNS, name = "sampletype_Elevated_CNS_vs_Ambient_CNS", alpha = 0.05)
save(res_CNS_unshrunken, file = "results/res_CNS_unshrunken.RData")

res_CNS_unshrunken_tb <- res_CNS_unshrunken %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()
save(res_CNS_unshrunken_tb, file = "results/res_CNS_unshrunken_tb.RData")
# Extract results from DESeq analysis
res_eyes_unshrunken <- results(dds_eyes, name = "sampletype_Elevated_eyes_vs_Ambient_eyes", alpha = 0.05)
save(res_eyes_unshrunken, file = "results/res_eyes_unshrunken.RData")

res_eyes_unshrunken_tb <- res_eyes_unshrunken %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()
save(res_eyes_unshrunken_tb, file = "results/res_eyes_unshrunken_tb.RData")

Extract DESeq results (shrunken with ashr)

  • Shrinkage of log2 fold change (LFC) estimates generates more accurate log2 fold change estimates
  • Shrinks LFC towards zero when the info for a gene is low (e.g. low counts, high dispersion values)
res_CNS_shrunken_ashr <- lfcShrink(dds_CNS, coef="sampletype_Elevated_CNS_vs_Ambient_CNS", res=res_CNS_unshrunken, type = "ashr")
save(res_CNS_shrunken_ashr, file = "results/res_CNS_shrunken_ashr.RData")

res_CNS_shrunken_ashr_tb <- res_CNS_shrunken_ashr %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()
save(res_CNS_shrunken_ashr_tb, file = "results/res_CNS_shrunken_ashr_tb.RData")
res_eyes_shrunken_ashr <- lfcShrink(dds_eyes, coef="sampletype_Elevated_eyes_vs_Ambient_eyes", res=res_eyes_unshrunken, type = "ashr")
save(res_eyes_shrunken_ashr, file = "results/res_eyes_shrunken_ashr.RData")

res_eyes_shrunken_ashr_tb <- res_eyes_shrunken_ashr %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()
save(res_eyes_shrunken_ashr_tb, file = "results/res_eyes_shrunken_ashr_tb.RData")

Sample level Quality Control

PCA Plots

All data from both tissues

rld$sampletype <- factor(rld$sampletype, levels = c("Ambient_CNS", "Elevated_CNS", "Ambient_eyes", "Elevated_eyes"))

All <- plotPCA(rld, intgroup="sampletype", ntop = 50000) +
  scale_colour_discrete(name = "", labels = c("CNS - ambient CO2", "CNS - elevated CO2", "eyes - ambient CO2", "eyes - elevated CO2")) +
  labs(title = "All genes") +
  geom_text_repel(aes(label = name)) +
  coord_cartesian(ylim = c(-80, 80)) +
  theme_classic()
All

top500 <- plotPCA(rld, intgroup="sampletype", ntop = 500) +
  scale_colour_discrete(name = "", labels = c("CNS - ambient CO2", "CNS - elevated CO2", "eyes - ambient CO2", "eyes - elevated CO2")) +
  geom_text_repel(aes(label = name)) +
  labs(title = "Top 500 genes") +
  coord_cartesian(ylim = c(-20, 20)) +
  theme_classic()
top500

  • Main variation is between tissue types (explained by PC1)
  • Eyes has more variation than the CNS - so use DE analysis done on data subset for CNS and eyes separately

CNS

rld_CNS$sampletype <- factor(rld_CNS$sampletype, levels = c("Ambient_CNS", "Elevated_CNS"))

All_CNS <- plotPCA(rld_CNS, intgroup="sampletype", ntop = 50000) +
  scale_colour_discrete(name = "", labels = c("CNS - ambient CO2", "CNS - elevated CO2")) +
  stat_ellipse() +
  labs(title = "All genes") +
  geom_text_repel(aes(label = name)) +
  coord_cartesian(ylim = c(-80, 80)) +
  theme_classic()
All_CNS

top500_CNS <- plotPCA(rld_CNS, intgroup="sampletype", ntop = 500) +
  scale_colour_discrete(name = "", labels = c("CNS - ambient CO2", "CNS - elevated CO2")) +
  stat_ellipse() +
  geom_text_repel(aes(label = name)) +
  labs(title = "Top 500 genes") +
  coord_cartesian(ylim = c(-20, 20)) +
  theme_classic()
top500_CNS

Eyes

rld_eyes$sampletype <- factor(rld_eyes$sampletype, levels = c("Ambient_eyes", "Elevated_eyes"))

All_eyes <- plotPCA(rld_eyes, intgroup="sampletype", ntop = 50000) +
  scale_colour_discrete(name = "", labels = c("eyes - ambient CO2", "eyes - elevated CO2")) +
  stat_ellipse() +
  labs(title = "All genes") +
  geom_text_repel(aes(label = name)) +
  coord_cartesian(ylim = c(-80, 80)) +
  theme_classic()
All_eyes

top500_eyes <- plotPCA(rld_eyes, intgroup="sampletype", ntop = 500) +
  scale_colour_discrete(name = "", labels = c("eyes - ambient CO2", "eyes - elevated CO2")) +
  stat_ellipse() +
  geom_text_repel(aes(label = name)) +
  labs(title = "Top 500 genes") +
  coord_cartesian(ylim = c(-20, 20)) +
  theme_classic()
top500_eyes

Heatmaps

Heatmaps using all genes

All data from both tissues

# Extract the rlog matrix from the object
rld_mat <- assay(rld)
# Compute pairwise correlation values
rld_cor <- cor(rld_mat)    ## cor() is a base R function

#meta_data for heatmap annotation
meta_data_heatmap <- meta_data %>%
  select("sampletype")

#plot heatmap
pheatmap(rld_cor, annotation_row = meta_data_heatmap, annotation_col = meta_data_heatmap)

  • Cluster by tissue type but not by CO2

CNS

rld_mat_CNS <- assay(rld_CNS)
rld_cor_CNS <- cor(rld_mat_CNS) 

meta_data_heatmap_CNS <- meta_data %>%
  filter(tissue == "CNS") %>%
  select("sampletype")

pheatmap(rld_cor_CNS, annotation_row = meta_data_heatmap_CNS, annotation_col = meta_data_heatmap_CNS)

Eyes

rld_mat_eyes <- assay(rld_eyes)
rld_cor_eyes <- cor(rld_mat_eyes) 

meta_data_heatmap_eyes <- meta_data %>%
  filter(tissue == "eyes") %>%
  select("sampletype")

pheatmap(rld_cor_eyes, annotation_row = meta_data_heatmap_eyes, annotation_col = meta_data_heatmap_eyes)

Explore DESeq2 Data

Total normalised Counts per individual

CNS

# Total number of normalized counts per sample
tot_normalised_counts_df <- colSums(counts(dds_CNS, normalized=T)) %>%
  data.frame() %>%
  rownames_to_column(var="squid") %>% 
  as_tibble() %>%
  rename(tot_normalised_counts = '.')

plot_tot_normalised_counts <- ggplot(tot_normalised_counts_df, aes(x = squid, y = tot_normalised_counts)) +
  geom_point() +
  scale_x_discrete(guide = guide_axis(angle = 90))

plot_tot_normalised_counts

  • Normalisation has dealt with PS548_CNS having more counts than other samples
  • PS636_CNS has slightly more normalised counts than other samples, but within the same order of magnitude

Eyes

# Total number of normalized counts per sample
tot_normalised_counts_df <- colSums(counts(dds_eyes, normalized=T)) %>%
  data.frame() %>%
  rownames_to_column(var="squid") %>% 
  as_tibble() %>%
  rename(tot_normalised_counts = '.')

plot_tot_normalised_counts <- ggplot(tot_normalised_counts_df, aes(x = squid, y = tot_normalised_counts)) +
  geom_point() +
  scale_x_discrete(guide = guide_axis(angle = 90))

plot_tot_normalised_counts

  • All eye samples have similar amounts of normalised counts

BaseMean level of expression across genes

  • BaseMean = the average of the normalised count values, dividing by size factors, taken over all samples

CNS

ggplot(res_CNS_unshrunken_tb, aes(x = baseMean)) +
  geom_histogram(bins = 10000) +
  scale_y_sqrt() +
  labs(y = 'No. of genes') +
  coord_cartesian(xlim = c(0, 1000000))

Eyes

ggplot(res_eyes_unshrunken_tb, aes(x = baseMean)) +
  geom_histogram(bins = 10000) +
  scale_y_sqrt() +
  labs(y = 'No. of genes') +
  coord_cartesian(xlim = c(0, 1000000))

  • Looks normal and common of RNAseq data - a low number of counts associated with a large proportion of genes and a long right tail due to the lack of any upper limit for expression.

BaseMean level of expression across samples

CNS

res_CNS_shrunken_ashr_tb_basemean <- res_CNS_shrunken_ashr_tb %>%
  mutate(genelabels = "") %>% 
  arrange(-baseMean)

# Populate the genelabels column with contents of the gene  column for the first 10 rows, i.e. the top 10 high;y expressed genes
res_CNS_shrunken_ashr_tb_basemean$genelabels[1:10] <- as.character(res_CNS_shrunken_ashr_tb_basemean$gene[1:10])


plot_basemean_LFC_ashr_CNS <- ggplot(res_CNS_shrunken_ashr_tb_basemean, aes(x = log2FoldChange, y = baseMean)) +
  geom_point() +
  geom_text_repel(aes(label = genelabels))

plot_basemean_LFC_ashr_CNS
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Eyes

res_eyes_shrunken_ashr_tb_basemean <- res_eyes_shrunken_ashr_tb %>%
  mutate(genelabels = "") %>% 
  arrange(-baseMean)

# Populate the genelabels column with contents of the gene  column for the first 10 rows, i.e. the top 10 high;y expressed genes
res_eyes_shrunken_ashr_tb_basemean$genelabels[1:10] <- as.character(res_eyes_shrunken_ashr_tb_basemean$gene[1:10])


plot_basemean_LFC_ashr_eyes <- ggplot(res_eyes_shrunken_ashr_tb_basemean, aes(x = log2FoldChange, y = baseMean)) +
  geom_point() +
  geom_text_repel(aes(label = genelabels))

plot_basemean_LFC_ashr_eyes

  • As expected, only a few genes have high basemean expression

Dispersion Estimates

  • To ensure the data is a good fit for the DESeq2 model

CNS

plotDispEsts(dds_CNS)

Eyes

plotDispEsts(dds_eyes)

  • Looks as expected for both CNS and eyes - the data is scattered around the curve, with the dispersion decreasing with increasing mean expression levels.
  • Thus the data is a good fit for the DESeq2 model

DE Results

Annotate DESeq2 results

annotated_transcriptome <- read.csv('data/annotated_transcriptome_ORFs.csv')
save(annotated_transcriptome, file = 'data/annotated_transcriptome.RData')
transcript2gene_map <- read.delim('data/corset-clusters.txt')
save(transcript2gene_map, file = 'data/transcript2gene_map.RData')
annotated_res_CNS_shrunken_ashr_tb <- res_CNS_shrunken_ashr_tb %>%
  full_join(transcript2gene_map) %>%
  full_join(annotated_transcriptome, by = c("transcript" = "Sequence.Name"))
save(annotated_res_CNS_shrunken_ashr_tb, file = 'results/annotated_res_CNS_shrunken_ashr_tb.RData')
annotated_res_eyes_shrunken_ashr_tb <- res_eyes_shrunken_ashr_tb %>%
  full_join(transcript2gene_map) %>%
  full_join(annotated_transcriptome, by = c("transcript" = "Sequence.Name"))
save(annotated_res_eyes_shrunken_ashr_tb, file = 'results/annotated_res_eyes_shrunken_ashr_tb.RData')

Significantly DE genes

CNS

# Summarize results
summary(res_CNS_shrunken_ashr, alpha = 0.05)
## 
## out of 27401 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 14, 0.051%
## LFC < 0 (down)     : 11, 0.04%
## outliers [1]       : 0, 0%
## low counts [2]     : 6, 0.022%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Extract sig DE genes
sig_DE_CNS_ashr <- annotated_res_CNS_shrunken_ashr_tb %>%
  filter(padj < 0.05)
save(sig_DE_CNS_ashr, file = 'results/sig_DE_CNS_ashr.RData')

Eyes

# Summarize results
summary(res_eyes_shrunken_ashr, alpha = 0.05)
## 
## out of 27400 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 6, 0.022%
## LFC < 0 (down)     : 5, 0.018%
## outliers [1]       : 0, 0%
## low counts [2]     : 1071, 3.9%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Extract sig DE genes
sig_DE_eyes_ashr <- annotated_res_eyes_shrunken_ashr_tb %>%
  filter(padj < 0.05)
save(sig_DE_eyes_ashr, file = 'results/sig_DE_eyes_ashr.RData')

MA Plots

  • Allows us to evaluate the magnitude of fold changes and how they are distributed relative to mean expression.
  • Generally, we would expect to see significant genes across the full range of expression levels.

CNS

# Unshrunken results
plotMA(res_CNS_unshrunken, alpha = 0.05, main = "MA plot unshrunken LFC in CNS", ylim=c(-2,2))

# Shrunken results
plotMA(res_CNS_shrunken_ashr, alpha = 0.05, main = "MA plot ashr shrunken LFC in CNS", ylim=c(-2,2))

Eyes

# Unshrunken results
plotMA(res_eyes_unshrunken, alpha = 0.05, main = "MA plot unshrunken LFC in eyes", ylim=c(-2,2))

# Shrunken results
plotMA(res_eyes_shrunken_ashr, alpha = 0.05, main = "MA plot ashr shrunken LFC in eyes", ylim=c(-2,2))

  • Significantly DE genes (green) are spread across LFC values
  • Ashr shrinkage has shrunk a lot of the LFCs

Volcano Plots

CNS

# Obtain logical vector where TRUE values denote padj values < 0.05
# Create an empty column to indicate which genes to label
# Sort by padj values 
res_CNS_shrunken_ashr_tb_volcano <- res_CNS_shrunken_ashr_tb %>%
  mutate(threshold_DE = padj < 0.05) %>% 
  mutate(genelabels = "") %>% 
  arrange(padj)

# Populate the genelabels column with contents of the gene  column for the first 25 rows, i.e. all sig DE genes 
res_CNS_shrunken_ashr_tb_volcano$genelabels[1:25] <- as.character(res_CNS_shrunken_ashr_tb_volcano$gene[1:25])

# Volcano plot
volcano_CNS_ashr <- ggplot(res_CNS_shrunken_ashr_tb_volcano, aes(x = log2FoldChange, y = -log10(padj))) +
  geom_point(aes(colour = threshold_DE)) +
  geom_text_repel(aes(label = genelabels)) +
  ggtitle("Volcano plot CNS (ashr shrinkage method): p < 0.05") +
  xlab("log2 fold change") + 
  ylab("-log10 adjusted p-value") +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25))) 
volcano_CNS_ashr

  • Removed 19 rows containing missing values (geom_point)
  • 6 have low counts (shown in summary table)
  • 6 have NA for padj only in res_table = row is filtered by automatic independent filtering, for having a low mean normalized count.
  • 13 have NA in pvalue and padj = row contains a sample with an extreme count outlier, detected by Cook’s distance.

Eyes

# Obtain logical vector where TRUE values denote padj values < 0.05
# Create an empty column to indicate which genes to label
# Sort by padj values 
res_eyes_shrunken_ashr_tb_volcano <- res_eyes_shrunken_ashr_tb %>%
  mutate(threshold_DE = padj < 0.05) %>% 
  mutate(genelabels = "") %>% 
  arrange(padj)

# Populate the genelabels column with contents of the gene  column for the first 25 rows, i.e. all sig DE genes 
res_eyes_shrunken_ashr_tb_volcano$genelabels[1:25] <- as.character(res_eyes_shrunken_ashr_tb_volcano$gene[1:25])

# Volcano plot
volcano_eyes_ashr <- ggplot(res_eyes_shrunken_ashr_tb_volcano, aes(x = log2FoldChange, y = -log10(padj))) +
  geom_point(aes(colour = threshold_DE)) +
  geom_text_repel(aes(label = genelabels)) +
  ggtitle("Volcano plot eyes (ashr shrinkage method): p < 0.05") +
  xlab("log2 fold change") + 
  ylab("-log10 adjusted p-value") +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25))) 
volcano_eyes_ashr
## Warning: Removed 1083 rows containing missing values (`geom_point()`).
## Warning: Removed 1083 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

  • Removed 1083 rows containing missing values (geom_point)
  • 1071 = low counts (shown in summary table)
  • 12 = have NA in pvalue and padj = row contains a sample with an extreme count outlier, detected by Cook’s distance.

Heatmaps

CNS

Significantly DE genes

meta_data_CNS_heatmap <- meta_data_CNS %>%
  select("sampletype")

# Use rlog transformed data
rld_mat_CNS <- assay(rld_CNS)

rld_mat_CNS_tb <- rld_mat_CNS %>%
  as.data.frame() %>%
  rownames_to_column(var = "gene") %>% 
  as_tibble()

#Extract only the sig DE genes
rld_mat_CNS_heatmap <- rld_mat_CNS_tb %>% 
  filter(gene %in% sig_DE_CNS_ashr$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_CNS_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = T,
         annotation_col = meta_data_CNS_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of significantly DE genes in CNS")

Top 100 genes (sorted by adjusted p-value)

#Extract only the top 100 genes (by padj)
top100 <- res_CNS_shrunken_ashr_tb %>%
  arrange(padj) %>%
  slice_head(n = 100)

rld_mat_CNS_heatmap <- rld_mat_CNS_tb %>% 
  filter(gene %in% top100$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_CNS_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = F,
         annotation_col = meta_data_CNS_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of top 100 genes in CNS")

Eyes

Significantly DE genes

meta_data_eyes_heatmap <- meta_data_eyes %>%
  select("sampletype")

# Use rlog transformed data
rld_mat_eyes <- assay(rld_eyes)

rld_mat_eyes_tb <- rld_mat_eyes %>%
  as.data.frame() %>%
  rownames_to_column(var = "gene") %>% 
  as_tibble()

#Extract only the sig DE genes
rld_mat_eyes_heatmap <- rld_mat_eyes_tb %>% 
  filter(gene %in% sig_DE_eyes_ashr$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_eyes_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = T,
         annotation_col = meta_data_eyes_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of significantly DE genes in eyes")

Top 100 genes (sorted by adjusted p-value)

#Extract only the top 100 genes (by padj)
top100 <- res_eyes_shrunken_ashr_tb %>%
  arrange(padj) %>%
  slice_head(n = 100)

rld_mat_eyes_heatmap <- rld_mat_eyes_tb %>% 
  filter(gene %in% top100$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_eyes_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = F,
         annotation_col = meta_data_eyes_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of top 100 genes in eyes")

Plot gene expression

CNS

sig_DE_CNS_normalised_counts <- normalised_counts_CNS_tb %>%
  filter(gene %in% sig_DE_CNS_ashr$gene)

sig_DE_CNS_normalised_counts <- sig_DE_CNS_normalised_counts %>%
  gather(colnames(normalised_counts_CNS_tb)[2:21], key = "Squid", value = "normalised_counts")

# Combine with meta_data_CNS so can include treatment info in graph
sig_DE_CNS_normalised_counts_metadata <- (meta_data_CNS %>% rownames_to_column(var = "Squid")) %>%
  inner_join(sig_DE_CNS_normalised_counts)

# Dot plot
jitter <- position_jitter(width = 0.15, height = 0)
ggplot(sig_DE_CNS_normalised_counts_metadata, aes(x = gene, y = normalised_counts, color = sampletype)) +
  geom_point(position = jitter) +
  scale_y_log10() +
  xlab("Genes") +
  ylab("log10 Normalized Counts") +
  ggtitle("All significantly DE genes in CNS") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(plot.title = element_text(hjust = 0.5))

# Box plot
ggplot(sig_DE_CNS_normalised_counts_metadata, aes(x = gene, y = normalised_counts, color = sampletype)) +
  geom_boxplot() +
  scale_y_log10() +
  xlab("Genes") +
  ylab("log10 Normalized Counts") +
  ggtitle("All significantly DE genes in CNS") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(plot.title = element_text(hjust = 0.5))

  • All genes identified as significantly differentially expressed show clear DE

Eyes

sig_DE_eyes_normalised_counts <- normalised_counts_eyes_tb %>%
  filter(gene %in% sig_DE_eyes_ashr$gene)

sig_DE_eyes_normalised_counts <- sig_DE_eyes_normalised_counts %>%
  gather(colnames(normalised_counts_eyes_tb)[2:21], key = "Squid", value = "normalised_counts")

# Combine with meta_data_eyes so can include treatment info in graph
sig_DE_eyes_normalised_counts_metadata <- (meta_data_eyes %>% rownames_to_column(var = "Squid")) %>%
  inner_join(sig_DE_eyes_normalised_counts)

# Dot plot
jitter <- position_jitter(width = 0.15, height = 0)
# jitter log10 y
ggplot(sig_DE_eyes_normalised_counts_metadata, aes(x = gene, y = normalised_counts, color = sampletype)) +
  geom_point(position = jitter) +
  #geom_text_repel(aes(label = Squid), size = 2, position = jitter) +
  scale_y_log10() +
  xlab("Genes") +
  ylab("log10 Normalized Counts") +
  ggtitle("All significantly DE genes in eyes") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(plot.title = element_text(hjust = 0.5))
## Warning: Transformation introduced infinite values in continuous y-axis

# Box plot
ggplot(sig_DE_eyes_normalised_counts_metadata, aes(x = gene, y = normalised_counts, color = sampletype)) +
  geom_boxplot() +
  scale_y_log10() +
  xlab("Genes") +
  ylab("log10 Normalized Counts") +
  ggtitle("All significantly DE genes in eyes") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(plot.title = element_text(hjust = 0.5))
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 21 rows containing non-finite values (`stat_boxplot()`).

  • Most genes identified as significantly differentially expressed show clear DE
  • However, a few genes don’t show clear DE so look at these closer
# Look closer at genes that don't appear to be DE

# look at cluster-5283.10996
df <- sig_DE_eyes_normalised_counts_metadata %>%
  filter(gene == "Cluster-5283.10996")

ggplot(df, aes(x = gene, y = normalised_counts, color = sampletype)) +
  geom_point(position = jitter) +
  geom_text_repel(aes(label = Squid), size = 2, position = jitter) +
  scale_y_log10() +
  xlab("Genes") +
  ylab("Normalized Counts") +
  ggtitle("Cluster-5283.10996") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(plot.title = element_text(hjust = 0.5)) 
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# look at cluster-5744.0
df <- sig_DE_eyes_normalised_counts_metadata %>%
  filter(gene == "Cluster-5744.0")

ggplot(df, aes(x = gene, y = normalised_counts, color = sampletype)) +
  geom_point(position = jitter) +
  geom_text_repel(aes(label = Squid), size = 2, position = jitter) +
  xlab("Genes") +
  ylab("Normalized Counts") +
  ggtitle("Cluster-5744.0 look at low counts") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(plot.title = element_text(hjust = 0.5)) +
  coord_cartesian(ylim = c(0, 10))

# look at cluster-73.0
df <- sig_DE_eyes_normalised_counts_metadata %>%
  filter(gene == "Cluster-73.0")

ggplot(df, aes(x = gene, y = normalised_counts, color = sampletype)) +
  geom_point(position = jitter) +
  geom_text_repel(aes(label = Squid), size = 2, position = jitter) +
  xlab("Genes") +
  ylab("Normalized Counts") +
  ggtitle("Cluster-73.0 count not on log10 \nscale to look at low counts") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(plot.title = element_text(hjust = 0.5))
## Warning: ggrepel: 18 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

  • Cluster-5283.10996 does not show clear DE

  • There is a lot of variation and overlap between CO2 levels

  • LFC is 1.5 but this is not clear by looking at the graph

  • Even outliers are matched - 2 ambient squid (PS514_eyes and PS518_eyes) with high expression and 2 elevated squid (PS527_eyes and PS636_eyes) with high expression

  • Cluster-5744.0 also does not show clear DE

  • There is a lot of variation and overlap between CO2 levels

  • LFC is -0.7 but this is not clear by looking at the graph

  • Genes with low expression show overlap between the two CO2 levels

  • Even outliers are matched - 2 ambient squid (PS514_eyes and PS518_eyes) with high expression and 2 elevated squid (PS527_eyes and PS636_eyes) with high expression

  • Cluster-73.0 DE looks driven by 2 outliers

  • All samples have 0 expression but PS534_eyes and PS506_eyes (both ambient) have very high expression.

  • LFC is -24: surprised shrinkage didn’t deal with this

Therefore visual inspection shows clear DE of all DE genes in the CNS, but in the eyes, 3 of the genes looks problematic - remove these

Final DE results

# 3 genes don't look DE when look at graphs - remove these
sig_DE_eyes_ashr_final <- annotated_res_eyes_shrunken_ashr_tb %>%
  filter(padj < 0.05) %>%
  filter(!gene %in% c("Cluster-5283.10996", "Cluster-5744.0", "Cluster-73.0"))
save(sig_DE_eyes_ashr_final, file = 'results/sig_DE_eyes_ashr_final.RData')

Venn diagram

DE_CNS <- sig_DE_CNS_ashr %>%
  distinct(gene)

DE_eyes <- sig_DE_eyes_ashr_final %>%
  distinct(gene)

x <- c(list(DE_CNS$gene), list(DE_eyes$gene))
names(x) <- c('CNS\n', 'Eyes\n')
ggvenn(x,
       show_percentage = FALSE,
       text_size = 0,
       fill_color = c('#238A8DFF', '#55C667FF'),
       fill_alpha = 0.8) +
  annotate('text', label = '12 up\n11 down', x = -1, y = 0) +
  annotate('text', label = '3 up\n3 down', x = 1, y = 0) +
  annotate('text', label = '2 up', x = 0, y = 0) +
  annotate('text', label = '25 DE genes', x = -1, y = 1.2) +
  annotate('text', label = '8 DE genes', x = 1, y = 1.2)

Heatmap

CNS

meta_data_CNS_heatmap <- meta_data_CNS %>%
  select("sampletype")

# Use rlog transformed data
rld_mat_CNS <- assay(rld_CNS)

rld_mat_CNS_tb <- rld_mat_CNS %>%
  as.data.frame() %>%
  rownames_to_column(var = "gene") %>% 
  as_tibble()

## ONLY SIG DE GENES ##
rld_mat_CNS_heatmap <- rld_mat_CNS_tb %>% 
  filter(gene %in% sig_DE_CNS_ashr$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_CNS_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = T,
         annotation_col = meta_data_CNS_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of significantly DE genes in CNS")

# change colour scheme with color = viridis(100)

## TOP 100 GENES

top100 <- res_CNS_shrunken_ashr_tb %>%
  arrange(padj) %>%
  slice_head(n = 100)

rld_mat_CNS_heatmap <- rld_mat_CNS_tb %>% 
  filter(gene %in% top100$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_CNS_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = F,
         annotation_col = meta_data_CNS_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of top 100 genes in CNS")

Eyes

meta_data_eyes_heatmap <- meta_data_eyes %>%
  select("sampletype")

# Use rlog transformed data
rld_mat_eyes <- assay(rld_eyes)

rld_mat_eyes_tb <- rld_mat_eyes %>%
  as.data.frame() %>%
  rownames_to_column(var = "gene") %>% 
  as_tibble()

## ONLY SIG DE GENES ##
rld_mat_eyes_heatmap <- rld_mat_eyes_tb %>% 
  filter(gene %in% sig_DE_eyes_ashr_final$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_eyes_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = T,
         annotation_col = meta_data_eyes_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of significantly DE genes in eyes")

## TOP 100 GENES ##

top100 <- res_eyes_shrunken_ashr_tb %>%
  arrange(padj) %>%
  slice_head(n = 100)

rld_mat_eyes_heatmap <- rld_mat_eyes_tb %>% 
  filter(gene %in% top100$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_eyes_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = F,
         annotation_col = meta_data_eyes_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of top 100 genes in eyes")

Table of significantly DE genes

CNS

sig_DE_CNS_ashr %>%
  select(gene, Sequence.Description, padj, log2FoldChange, lfcSE) %>%
  arrange(-log2FoldChange) %>% 
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  kable(caption = 'Significantly DE genes in CNS')
Significantly DE genes in CNS
gene Sequence.Description padj log2FoldChange lfcSE
Cluster-5283.1476 protein FAM204A isoform X1 0.0044818 1.3394024 0.3855364
Cluster-7057.2 prickle-like protein 3 isoform X4 0.0340854 0.9631720 0.3995900
Cluster-5283.10634 mitogen-activated protein kinase kinase kinase kinase 5-like isoform X4 0.0385885 0.9215585 0.5599760
Cluster-5283.10634 mitogen-activated protein kinase kinase kinase kinase 3-like isoform X3 0.0385885 0.9215585 0.5599760
Cluster-7096.4 synaptobrevin homolog YKT6 0.0385885 0.8850380 0.4291597
Cluster-5283.4688 zinc finger protein 271-like 0.0385885 0.8581222 0.4381018
Cluster-5283.11841 chromatin accessibility complex protein 1 0.0457181 0.7541101 0.4949590
Cluster-6236.3 nucleoside diphosphate kinase 6-like 0.0385885 0.6277556 0.6588387
Cluster-5283.11516 Hypothetical predicted protein 0.0385885 0.5997121 0.2692648
Cluster-7175.0 —NA— 0.0385885 0.5955914 0.2308095
Cluster-5283.537 putative N-acetylated-alpha-linked acidic dipeptidase isoform X4 0.0385885 0.5832244 0.2579140
Cluster-7445.2 hypothetical protein OCBIM_22018451mg 0.0457181 0.5257109 0.6274280
Cluster-5590.1 transferrin-like protein 0.0385885 0.4800865 0.6305837
Cluster-5046.4 PREDICTED: uncharacterized protein LOC106875384 isoform X2 0.0385885 0.3107304 0.5556629
Cluster-2174.0 E3 ubiquitin-protein ligase synoviolin B-like 0.0385885 0.2741695 0.1471531
Cluster-5283.4947 —NA— 0.0340833 -0.3743971 0.1330268
Cluster-5283.6450 cytochrome b561 domain-containing protein 2 0.0385885 -0.4632560 0.6431749
Cluster-1257.0 Microtubule-associated proteins 1A/1B light chain 3B 0.0445542 -0.5404408 0.2593373
Cluster-1257.0 microtubule-associated proteins 1A/1B light chain 3A 0.0445542 -0.5404408 0.2593373
Cluster-5283.3831 proton myo-inositol cotransporter 0.0026007 -0.5563098 0.1130174
Cluster-7238.6 —NA— 0.0385885 -0.7648719 0.3262858
Cluster-5283.750 unnamed protein product 0.0340833 -0.7888475 0.6961008
Cluster-5283.3189 gamma-secretase subunit PEN-2 0.0385885 -0.7952439 0.4251421
Cluster-5290.5 cadherin EGF LAG seven-pass G-type receptor 3 0.0385885 -0.8019407 0.5279131
Cluster-5283.883 methylcrotonoyl-CoA carboxylase beta chain, mitochondrial 0.0340833 -0.8088622 0.2704246
Cluster-5283.2490 —NA— 0.0385885 -0.8291220 0.5330048
Cluster-3464.0 Hypothetical predicted protein 0.0340833 -0.9442758 0.3183627

Eyes

sig_DE_eyes_ashr_final %>%
  select(gene, Sequence.Description, padj, log2FoldChange) %>%
  arrange(-log2FoldChange) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  kable(caption = 'Significantly DE genes in eyes - final set (3 genes  removed due to not showing clear DE upon visual inspection of the normalised counts across CO2 levels')
Significantly DE genes in eyes - final set (3 genes removed due to not showing clear DE upon visual inspection of the normalised counts across CO2 levels
gene Sequence.Description padj log2FoldChange
Cluster-5283.5132 probable flavin-containing monoamine oxidase A 0.0048141 2.2916297
Cluster-1772.0 synaptic vesicular amine transporter 0.0116265 2.0443468
Cluster-5283.1476 protein FAM204A isoform X1 0.0116265 1.4711580
Cluster-7096.4 synaptobrevin homolog YKT6 0.0000059 1.2616376
Cluster-7187.3 cystathionine beta-synthase 0.0383992 0.8890046
Cluster-5283.10628 transcription initiation factor IIE subunit beta 0.0224947 -0.9911208
Cluster-5283.11079 —NA— 0.0183964 -1.4212532
Cluster-1000.0 N-acetylmuramoyl-L-alanine amidase 0.0016092 -2.8983839

Significantly DE in both tissues

CNS <- sig_DE_CNS_ashr %>%
  select(gene, transcript, Sequence.Description, padj, log2FoldChange) %>%
  rename(padj_CNS = padj,
         log2FoldChange_CNS = log2FoldChange)

eyes <- sig_DE_eyes_ashr_final %>%
  select(gene, transcript, Sequence.Description, padj, log2FoldChange) %>%
  rename(padj_eyes = padj,
         log2FoldChange_eyes = log2FoldChange)

CNS_eyes <- CNS %>%
  filter(gene %in% eyes$gene)

eyes_CNS <- eyes %>%
  filter(gene %in% CNS$gene)

sig_DE_CNS_and_eyes_ashr <- CNS_eyes %>%
  full_join(eyes_CNS)

save(sig_DE_CNS_and_eyes_ashr, file = 'results/sig_DE_CNS_and_eyes_ashr.RData')
sig_DE_CNS_and_eyes_ashr %>%
  select(gene, Sequence.Description, padj_CNS, log2FoldChange_CNS, padj_eyes, log2FoldChange_eyes) %>%
  kable(caption = 'Genes significantly DE in both CNS and eyes')
Genes significantly DE in both CNS and eyes
gene Sequence.Description padj_CNS log2FoldChange_CNS padj_eyes log2FoldChange_eyes
Cluster-5283.1476 protein FAM204A isoform X1 0.0044818 1.339402 0.0116265 1.471158
Cluster-7096.4 synaptobrevin homolog YKT6 0.0385885 0.885038 0.0000059 1.261638

Functional Enrichment Analysis

Prepare data

Prep transcriptome

# Remove letters form gene names as struggles with this
annotated_transcriptome_rename <- annotated_transcriptome %>%
  rename(transcript = Sequence.Name) %>% 
  mutate(transcript = gsub('transcript/', '', transcript))
save(annotated_transcriptome_rename, file = 'data/annotated_transcriptome_rename.RData')

corset_clusters_2_transcripts_renamed <- corset_clusters_2_transcripts %>%
  rename(transcript = TXNAME,
         gene = GENEID) %>% 
  mutate(transcript = gsub('transcript/', '', transcript),
         gene = gsub('Cluster-', '', gene))
save(corset_clusters_2_transcripts_renamed, file = 'data/corset_clusters_2_transcripts_renamed.RData')

Prep CNS data

# Remove letters form gene names as struggles with this
res_CNS_shrunken_ashr_tb_rename <- res_CNS_shrunken_ashr_tb %>%
  mutate(gene = gsub('Cluster-', '', gene))

# Create gene list - assumes that 1st column is ID and 2nd column is fold change
res_CNS <- res_CNS_shrunken_ashr_tb_rename %>%
  select(gene, log2FoldChange) %>% 
  as.data.frame()
gene_LFC_CNS_ashr <- res_CNS[,2]
names(gene_LFC_CNS_ashr) <- as.character(res_CNS[,1])
gene_LFC_CNS_ashr <- sort(gene_LFC_CNS_ashr, decreasing = TRUE)
head(gene_LFC_CNS_ashr)
save(gene_LFC_CNS_ashr, file = 'results/gene_LFC_CNS_ashr.RData')

Prep eyes data

# Remove letters form gene names as struggles with this
res_eyes_shrunken_ashr_tb_rename <- res_eyes_shrunken_ashr_tb %>%
  mutate(gene = gsub('Cluster-', '', gene))

# Create gene list - assumes that 1st column is ID and 2nd column is fold change
res_eyes <- res_eyes_shrunken_ashr_tb_rename %>%
  select(gene, log2FoldChange) %>% 
  as.data.frame()
gene_LFC_eyes_ashr <- res_eyes[,2]
names(gene_LFC_eyes_ashr) <- as.character(res_eyes[,1])
gene_LFC_eyes_ashr <- sort(gene_LFC_eyes_ashr, decreasing = TRUE)
head(gene_LFC_eyes_ashr)
save(gene_LFC_eyes_ashr, file = 'results/gene_LFC_eyes_ashr.RData')

GO terms data for CNS

# ANNOTATION GO TERMS
GO_annot_data_CNS <- res_CNS_shrunken_ashr_tb_rename %>%
  full_join(corset_clusters_2_transcripts_renamed) %>%
  inner_join(annotated_transcriptome_rename) %>%
  select(Annotation.GO.ID, gene, Annotation.GO.Term)

#Change df format so one GO ID per row so a transcript with multiple GO IDs takes multiple rows
GO_annot_data_CNS_split <- cSplit(GO_annot_data_CNS, c('Annotation.GO.ID', 'Annotation.GO.Term'), ';', direction = "long") 

# TERM2GENE is a data.frame with first column of (GO) term ID and second column of corresponding mapped gene 
term2gene_annot_CNS <- GO_annot_data_CNS_split %>%
  select(Annotation.GO.ID, gene) %>%
  as.data.frame()
save(term2gene_annot_CNS, file = 'data/term2gene_annot_CNS.RData')

# TERM2NAME is a data.frame with first column of (GO) term ID and second column of corresponding term name
term2name_annot_CNS <- GO_annot_data_CNS_split %>%
  select(Annotation.GO.ID, Annotation.GO.Term) %>% 
  as.data.frame()
save(term2name_annot_CNS, file = 'data/term2name_annot_CNS.RData')

GO terms data for eyes

# ANNOTATION GO TERMS
GO_annot_data_eyes <- res_eyes_shrunken_ashr_tb_rename %>%
  full_join(corset_clusters_2_transcripts_renamed) %>%
  inner_join(annotated_transcriptome_rename) %>%
  select(Annotation.GO.ID, gene, Annotation.GO.Term)

#Change df format so one GO ID per row so a transcript with multiple GO IDs takes multiple rows
GO_annot_data_eyes_split <- cSplit(GO_annot_data_eyes, c('Annotation.GO.ID', 'Annotation.GO.Term'), ';', direction = "long")

# TERM2GENE is a data.frame with first column of (GO) term ID and second column of corresponding mapped gene 
term2gene_annot_eyes <- GO_annot_data_eyes_split %>%
  select(Annotation.GO.ID, gene) %>%
  as.data.frame()
save(term2gene_annot_eyes, file = 'data/term2gene_annot_eyes.RData')

# TERM2NAME is a data.frame with first column of (GO) term ID and second column of corresponding term name
term2name_annot_eyes <- GO_annot_data_eyes_split %>%
  select(Annotation.GO.ID, Annotation.GO.Term) %>% 
  as.data.frame()
save(term2name_annot_eyes, file = 'data/term2name_annot_eyes.RData')

Gene set enrichment analysis

  • Gene set enrichment analysis using the log2FoldChange values of all genes from DESeq2 and the annotated GO terms as the gene sets’
  • GSEA uses all genes and aggregates the per gene statistics across genes within a gene set, therefore making it possible to detect situations where all genes in a predefined set change in a small but coordinated way. Useful since it is likely that many relevant phenotypic differences are manifested by small but consistent changes in a set of genes.
  • Using GSEA in clusterProfiler which utilises fgsea, minimum gene set size = 15, max gene set size = 500, unweighted, benjamini-hochberg method for multiple comparisons, adjusted p value cut off of 0.05

CNS

# Will get different results for the GSEA because the permutations performed use random reordering - use set.seed for reproducibility
set.seed(83276453)
gsea_CNS_ashr_annot_exp0 <- GSEA(
  geneList = gene_LFC_CNS_ashr,
  exponent = 0,             # weight of each step (0 = same as classic in omicsbox)
  minGSSize = 15,           # change from default 10 to 15 (what used in omicsbox and suggested in Subramanian 2005)
  maxGSSize = 500,
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  TERM2GENE = term2gene_annot_CNS,
  TERM2NAME = term2name_annot_CNS,
  verbose = TRUE,
  seed = TRUE,
  by = "fgsea"
)
# Warning messages: 1: In preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, There are ties in the preranked stats (0.01% of the list). The order of those tied genes will be arbitrary, which may produce unexpected results.
save(gsea_CNS_ashr_annot_exp0, file = 'results/gsea_CNS_ashr_annot_exp0.RData')

# extract dataframe from gsea object
gsea_CNS_ashr_annot_exp0_summary <- fortify(
  gsea_CNS_ashr_annot_exp0,
  showCategory = 300,
  by = "Count"
)
save(gsea_CNS_ashr_annot_exp0_summary, file = 'results/gsea_CNS_ashr_annot_exp0_summary.RData')

Eyes

# Will get different results for the GSEA because the permutations performed use random reordering - use set.seed fpr reproducibility
set.seed(83276453)
gsea_eyes_ashr_annot_exp0 <- GSEA(
  geneList = gene_LFC_eyes_ashr,
  exponent = 0,             # weight of each step (0 = same as classic in omicsbox)
  minGSSize = 15,           # change from default 10 to 15 (what used in omicsbox and suggested in Subramanian 2005)
  maxGSSize = 500,
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  TERM2GENE = term2gene_annot_eyes,
  TERM2NAME = term2name_annot_eyes,
  verbose = TRUE,
  seed = TRUE,
  by = "fgsea"
)
# Warning messages: 1: In preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, There are ties in the preranked stats (0.01% of the list). The order of those tied genes will be arbitrary, which may produce unexpected results.
save(gsea_eyes_ashr_annot_exp0, file = 'results/gsea_eyes_ashr_annot_exp0.RData')

# extract dataframe from gsea object
gsea_eyes_ashr_annot_exp0_summary <- fortify(
  gsea_eyes_ashr_annot_exp0,
  showCategory = 300,
  by = "Count"
)
save(gsea_eyes_ashr_annot_exp0_summary, file = 'results/gsea_eyes_ashr_annot_exp0_summary.RData')

Dot plot

# Plot all GO terms for CNS and eyes on same graph

# CNS and eyes share same y axis on dotplot - want to order by NES of CNS, and show overlaps of GO terms found in CNS and eyes
# Below method so GO terms in both tissues are not twice on the y axis
CNS <- gsea_CNS_ashr_annot_exp0_summary %>%
  mutate(tissue = "CNS")
eyes <- gsea_eyes_ashr_annot_exp0_summary %>%
  mutate(tissue = "eyes")

CNS_NES <- CNS %>%
  select(ID, Description, NES) %>%
  rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_NES <- eyes %>%
  select(ID, Description, NES) %>% 
  rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_NES <- CNS_NES %>%
  full_join(eyes_NES) %>%
  ungroup() %>% 
  arrange(NES_CNS, NES_eyes) %>%
  mutate(order_NES = as.factor(row_number())) %>%
  pivot_longer(c(NES_CNS, NES_eyes), names_to = "tissue", values_to = "NES") %>%
  mutate(tissue = gsub("NES_", "", tissue)) %>%
  arrange(NES)


CNS_p.adjust <- CNS %>%
  select(ID, Description, p.adjust) %>%
  rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_p.adjust <- eyes %>%
  select(ID, Description, p.adjust) %>% 
  rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_p.adjust <- CNS_p.adjust %>%
  full_join(eyes_p.adjust) %>% 
  ungroup() %>% 
  arrange(p.adjust_CNS) %>%
  mutate(order_p.adjust = as.factor(row_number())) %>% 
  pivot_longer(c(p.adjust_CNS, p.adjust_eyes), names_to = "tissue", values_to = "p.adjust") %>%
  mutate(tissue = gsub("p.adjust_", "", tissue)) %>%
  arrange(Description)

CNS_Count <- CNS %>%
  select(ID, Description, Count) %>%
  rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_Count <- eyes %>%
  select(ID, Description, Count) %>% 
  rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_Count <- CNS_Count %>%
  full_join(eyes_Count) %>% 
  ungroup() %>% 
  arrange(Count_CNS) %>%
  mutate(order_Count = as.factor(row_number())) %>% 
  pivot_longer(c(Count_CNS, Count_eyes), names_to = "tissue", values_to = "Count") %>%
  mutate(tissue = gsub("Count_", "", tissue))

df_all <- df_NES %>%
  full_join(df_p.adjust) %>%
  full_join(df_Count) %>%
  mutate(Description_wrapped = str_wrap(Description, width = 30))

ggplot(df_all, aes(x = NES, y = order_NES, colour = p.adjust, size = Count)) +
  geom_point() +
  scale_y_discrete(
    breaks = df_all$order_NES,
    labels = df_all$Description_wrapped) +
  ggplot2::facet_grid(. ~ tissue) +
  scale_colour_viridis_c(direction = -1) +
  geom_vline(xintercept = 0) +
  xlab("Normalised enrichment score") +
  ylab("") +
  labs(colour = "adjusted p value", size = "No. core enrichment\ngenes in GO term", title = "GSEA - significant GO terms")
## Warning: Removed 96 rows containing missing values (`geom_point()`).

  • CNS = 99 enriched GO terms in CNS
    • 75 upregulated
    • 24 downregulated
  • Eyes = 17 enriched GO terms in eyes
    • 12 upregulated
    • 5 downregulated
  • 10 GO terms enriched in both tissues

Core enrichment genes

Pull out all core enrichment genes from an interesting cluster of GO terms that were found significant with gene set enrichment analysis

Prep data CNS

gsea_CNS_transcripts <- cSplit(gsea_CNS_ashr_annot_exp0_summary, 'core_enrichment', '/', direction = "long", type.convert = FALSE) %>%
  rename_with(~paste0(., "_gsea")) %>% 
  rename(gene = core_enrichment_gsea)

annotated_res_CNS_shrunken_ashr_tb <- annotated_res_CNS_shrunken_ashr_tb %>% 
  select(gene, transcript, Sequence.Description, log2FoldChange, lfcSE, padj) %>%
  rename(log2FoldChange_ashr = log2FoldChange,
         lfcSE_ashr = lfcSE,
         padj_ashr = padj) %>%
  mutate(gene = gsub('Cluster-', '', gene),
         transcript = gsub("transcript/", "", transcript))

gsea_exp0_transcripts_GO_CNS <- gsea_CNS_transcripts %>%
  full_join(annotated_res_CNS_shrunken_ashr_tb)
save(gsea_exp0_transcripts_GO_CNS, file = 'results/gsea_exp0_transcripts_GO_CNS.RData')

Prep data eyes

gsea_eyes_transcripts <- cSplit(gsea_eyes_ashr_annot_exp0_summary, 'core_enrichment', '/', direction = "long", type.convert = FALSE) %>%
  rename_with(~paste0(., "_gsea")) %>% 
  rename(gene = core_enrichment_gsea)

annotated_res_eyes_shrunken_ashr_tb <- annotated_res_eyes_shrunken_ashr_tb %>% 
  select(gene, transcript, Sequence.Description, log2FoldChange, lfcSE, padj) %>%
  rename(log2FoldChange_ashr = log2FoldChange,
         lfcSE_ashr = lfcSE,
         padj_ashr = padj) %>%
  mutate(gene = gsub('Cluster-', '', gene),
         transcript = gsub("transcript/", "", transcript))

gsea_exp0_transcripts_GO_eyes <- gsea_eyes_transcripts %>%
  full_join(annotated_res_eyes_shrunken_ashr_tb)
save(gsea_exp0_transcripts_GO_eyes, file = 'results/gsea_exp0_transcripts_GO_eyes.RData')

Ion channel cluster of GO terms

  • All core enrichment genes from the cluster of GO terms related to ion channels, that were found significant with gene set enrichment analysis
# Pull out genes in the ion channel cluster of GO terms
ion_channel_cluster_CNS <- gsea_exp0_transcripts_GO_CNS %>%
  select(gene, transcript, Sequence.Description, Description_gsea) %>%
  filter(Description_gsea %in% c('ionotropic glutamate receptor signaling pathway', 'ionotropic glutamate receptor activity', 'postsynaptic membrane', 'excitatory postsynaptic potential', 'ion transmembrane transport', 'acetylcholine-gated cation-selective channel activity', 'transmembrane signaling receptor activity', 'extracellular ligand-gated ion channel activity', 'synapse')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

ion_channel_cluster_eyes <- gsea_exp0_transcripts_GO_eyes %>%
  select(gene, transcript, Sequence.Description, Description_gsea) %>%
  filter(Description_gsea %in% c('ion transmembrane transport', 'ion channel activity')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

ion_channel_cluster_CNS_eyes <- ion_channel_cluster_CNS %>%
  filter(gene %in% ion_channel_cluster_eyes$gene)

save(ion_channel_cluster_CNS, ion_channel_cluster_eyes, ion_channel_cluster_CNS_eyes, file = 'results/ion_channel_cluster.RData')

CNS

  • All core enrichment genes from the cluster of GO terms related to ion channels that were found significant with gene set enrichment analysis in the CNS.
  • This cluster of GO terms includes the following GO terms:
    • ionotropic glutamate receptor signaling pathway
    • ionotropic glutamate receptor activity
    • postsynaptic membrane
    • excitatory postsynaptic potential
    • ion transmembrane transport
    • acetylcholine-gated cation-selective channel activity
    • transmembrane signaling receptor activity
    • extracellular ligand-gated ion channel activity
    • synapse
ion_channel_cluster_CNS %>%
  select(gene, Sequence.Description) %>%
  kable(caption = 'Core enrichment genes within significantly enriched GO terms of the ion channel cluster in the CNS')
Core enrichment genes within significantly enriched GO terms of the ion channel cluster in the CNS
gene Sequence.Description
5283.10289 —NA—
5283.7784 —NA—
5283.10291 —NA—
3318.0 —NA—
5283.9460 —NA—
5283.4105 —NA—
5283.9217 —NA—
5661.1 acetylcholine receptor subunit alpha-like 1 isoform X1
5661.0 acetylcholine receptor subunit alpha-like isoform X2
5283.4487 acetylcholine receptor subunit beta-like 1 isoform X2
5283.4488 acetylcholine receptor subunit beta-like 1 isoform X2
5283.4489 acetylcholine receptor subunit beta-like 1 isoform X2
5283.7592 cyclic nucleotide-gated cation channel alpha-3
5283.7592 cyclic nucleotide-gated cation channel alpha-3-like
6610.0 cyclic nucleotide-gated cation channel beta-3-like isoform X1
6610.2 cyclic nucleotide-gated cation channel beta-3-like isoform X2
6610.0 cyclic nucleotide-gated cation channel beta-3-like isoform X7
5283.9458 cyclic nucleotide-gated olfactory channel-like
5283.9460 cyclic nucleotide-gated olfactory channel-like
7253.3 Cys-loop ligand-gated ion channel
7253.4 cys-loop ligand-gated ion channel-like isoform X2
7253.1 cys-loop ligand-gated ion channel-like isoform X2
7253.0 cys-loop ligand-gated ion channel-like isoform X2
7253.2 cys-loop ligand-gated ion channel-like isoform X2
7253.3 cys-loop ligand-gated ion channel-like isoform X2
7253.6 cys-loop ligand-gated ion channel-like isoform X3
7253.5 cys-loop ligand-gated ion channel-like isoform X3
5517.0 dnaJ homolog subfamily C member 5
5517.0 dnaJ homolog subfamily C member 5-like isoform X1
5517.0 dnaJ homolog subfamily C member 5-like isoform X2
5517.0 dnaJ homolog subfamily C member 5 isoform X1
5057.2 excitatory amino acid transporter 1-like
5057.1 excitatory amino acid transporter 1-like
5283.8663 F-box/SPRY domain-containing protein 1
5283.8664 F-box/SPRY domain-containing protein 1
5283.8665 F-box/SPRY domain-containing protein 1-like
5283.9038 gamma-aminobutyric acid receptor alpha-like
3318.0 gamma-aminobutyric acid receptor alpha-like
5283.9039 gamma-aminobutyric acid receptor alpha-like
3050.4 glutamate-gated chloride channel isoform X5
5283.11461 glutamate receptor
602.0 glutamate receptor-like
5283.11461 glutamate receptor-like
5283.4759 glutamate receptor-like
5283.8285 glutamate receptor 1-like
5283.4759 glutamate receptor 1-like isoform X1
5283.4759 glutamate receptor 2-like
5283.4759 glutamate receptor 2 isoform X2
5283.4759 glutamate receptor 3 isoform X1
5283.8285 glutamate receptor 4
5283.3373 glutamate receptor ionotropic, kainate 2-like
5283.3372 glutamate receptor ionotropic, kainate 2-like
5283.4614 glutamate receptor ionotropic, kainate 2-like
5283.4105 glutamate receptor ionotropic, kainate 2-like
5283.5622 glutamate receptor ionotropic, kainate 2-like
7335.0 glutamate receptor ionotropic, kainate 2-like isoform X1
7335.3 glutamate receptor ionotropic, kainate 2-like isoform X1
2901.0 glutamate receptor ionotropic, kainate 2-like isoform X1
6740.0 glutamate receptor ionotropic, kainate 2-like isoform X1
7335.4 glutamate receptor ionotropic, kainate 2-like isoform X1
7335.5 glutamate receptor ionotropic, kainate 2-like isoform X1
6740.1 glutamate receptor ionotropic, kainate 2-like isoform X1
6740.2 glutamate receptor ionotropic, kainate 2-like isoform X1
7335.6 glutamate receptor ionotropic, kainate 2-like isoform X1
7335.7 glutamate receptor ionotropic, kainate 2-like isoform X1
7335.1 glutamate receptor ionotropic, kainate 2-like isoform X1
6740.4 glutamate receptor ionotropic, kainate 2-like isoform X2
6740.3 glutamate receptor ionotropic, kainate 2 isoform X1
1742.0 glutamate receptor ionotropic, NMDA 2B-like
4067.1 glutamate receptor ionotropic, NMDA 3A-like
3215.0 glutamate receptor ionotropic, NMDA 3A-like
3215.1 glutamate receptor ionotropic, NMDA 3A-like
4067.0 glutamate receptor ionotropic, NMDA 3A-like
3215.2 glutamate receptor ionotropic, NMDA 3A-like isoform X2
7335.2 glutamate receptor subunit protein GluR6
3050.1 glycine receptor subunit alpha-3 isoform X2
3050.0 glycine receptor subunit alphaZ1-like
2499.0 glycine receptor subunit alphaZ1-like
5283.8285 GRIA4 protein
5283.2592 HCN channel protein
5283.2593 HCN channel protein
5283.9211 Hypothetical predicted protein
5283.9212 Hypothetical predicted protein
7253.3 Hypothetical predicted protein
7211.2 Hypothetical predicted protein
5283.1960 inositol 1,4,5-trisphosphate receptor type 1-like isoform X2
7360.0 intersectin-1-like isoform X1
7360.0 intersectin-1-like isoform X2
7360.0 intersectin-1-like isoform X3
7360.0 intersectin-1 isoform X10
7360.0 intersectin-1 isoform X13
7360.0 intersectin-1 isoform X8
7211.2 liprin-alpha-1-like isoform X2
7211.2 liprin-alpha-1-like isoform X3
7211.2 liprin-alpha-1-like isoform X6
7211.2 liprin-alpha-1-like isoform X9
7211.2 liprin-alpha-1 isoform X3
7211.2 liprin-alpha-1 isoform X4
7211.3 liprin-alpha-1 isoform X8
7211.2 liprin-alpha-1 isoform X8
6774.1 muscarinic acetylcholine receptor M1-like
6774.2 muscarinic acetylcholine receptor M2-like protein
5283.11306 Na+/K+ ATPase beta subunit
5283.5376 neuronal acetylcholine receptor subunit alpha-10-like
5283.5377 neuronal acetylcholine receptor subunit alpha-10-like
6426.1 neuronal acetylcholine receptor subunit alpha-10-like
6426.4 neuronal acetylcholine receptor subunit alpha-10-like
6426.7 neuronal acetylcholine receptor subunit alpha-10-like
6426.8 neuronal acetylcholine receptor subunit alpha-10-like
5283.7784 neuronal acetylcholine receptor subunit alpha-10-like
5283.3847 neuronal acetylcholine receptor subunit alpha-10-like
5505.0 neuronal acetylcholine receptor subunit alpha-10-like
6426.10 neuronal acetylcholine receptor subunit alpha-10-like
6426.3 neuronal acetylcholine receptor subunit alpha-10-like
3136.0 neuronal acetylcholine receptor subunit alpha-10-like
6426.6 neuronal acetylcholine receptor subunit alpha-10-like
6426.5 neuronal acetylcholine receptor subunit alpha-10-like
6423.0 neuronal acetylcholine receptor subunit alpha-10-like
5283.3849 neuronal acetylcholine receptor subunit alpha-10-like
5283.4563 neuronal acetylcholine receptor subunit alpha-10-like
5283.4564 neuronal acetylcholine receptor subunit alpha-10-like
5283.4565 neuronal acetylcholine receptor subunit alpha-10-like
6426.0 neuronal acetylcholine receptor subunit alpha-10-like
6426.9 neuronal acetylcholine receptor subunit alpha-10-like
5283.4560 neuronal acetylcholine receptor subunit alpha-10-like
5283.10464 neuronal acetylcholine receptor subunit alpha-10-like
5283.4562 neuronal acetylcholine receptor subunit alpha-10-like
6426.2 neuronal acetylcholine receptor subunit alpha-10-like
6423.1 neuronal acetylcholine receptor subunit alpha-10-like
5505.0 neuronal acetylcholine receptor subunit alpha-10-like isoform X1
5283.997 neuronal acetylcholine receptor subunit alpha-10-like isoform X1
5283.2323 neuronal acetylcholine receptor subunit alpha-10-like isoform X3
5283.997 neuronal acetylcholine receptor subunit alpha-10 isoform X1
5283.8846 neuronal acetylcholine receptor subunit alpha-3-like
5283.8847 neuronal acetylcholine receptor subunit alpha-3-like
5283.8850 neuronal acetylcholine receptor subunit alpha-3-like
5283.8849 neuronal acetylcholine receptor subunit alpha-3-like
5283.8848 neuronal acetylcholine receptor subunit alpha-3-like
5283.8851 neuronal acetylcholine receptor subunit alpha-3-like
878.0 neuronal acetylcholine receptor subunit alpha-5-like
5283.10289 neuronal acetylcholine receptor subunit alpha-5-like isoform X1
5283.10287 neuronal acetylcholine receptor subunit alpha-5-like isoform X2
5283.10291 neuronal acetylcholine receptor subunit alpha-5-like isoform X2
5283.10286 neuronal acetylcholine receptor subunit alpha-5-like isoform X2
5283.10288 neuronal acetylcholine receptor subunit alpha-5-like isoform X2
5283.10467 neuronal acetylcholine receptor subunit alpha-6-like
5283.10753 neuronal acetylcholine receptor subunit alpha-7 isoform X1
5283.9321 PDZ domain-containing protein 11
2261.1 phosphatidylinositol 3,4,5-trisphosphate 3-phosphatase TPTE2-like isoform X3
4605.3 piezo-type mechanosensitive ion channel component 1-like isoform X1
1571.0 potassium voltage-gated channel subfamily H member 6 isoform X1
5283.2593 potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 2-like
5283.2593 potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X2
5283.2593 potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X4
5283.11306 probable sodium/potassium-transporting ATPase subunit beta-3
5283.7180 Protein unc-13 B
5283.7180 protein unc-13 homolog A-like
5283.7180 protein unc-13 homolog B-like isoform X1
5283.7180 protein unc-13 homolog B-like isoform X2
5283.7177 protein unc-13 homolog B-like isoform X3
5283.7180 protein unc-13 homolog B-like isoform X4
5283.7180 protein unc-13 homolog B isoform X1
5283.7476 Receptor-type tyrosine-protein phosphatase N2
5283.7474 Receptor-type tyrosine-protein phosphatase N2
5283.7476 receptor-type tyrosine-protein phosphatase N2-like
6010.2 sideroflexin-1
6010.1 sideroflexin-1
6165.7 sideroflexin-2
4048.1 sideroflexin-5 isoform X1
5283.9214 sodium/potassium/calcium exchanger 2-like isoform X2
5283.9214 sodium/potassium/calcium exchanger Nckx30C
5283.9214 sodium/potassium/calcium exchanger Nckx30C-like
5283.9211 sodium/potassium/calcium exchanger Nckx30C isoform X2
5899.6 synaptosomal-associated protein 25
5899.7 synaptosomal-associated protein 25
5899.5 synaptosomal-associated protein 25
5899.1 synaptosomal-associated protein 25
6236.5 synaptosomal-associated protein 25
6236.6 synaptosomal-associated protein 25
5899.3 synaptosomal-associated protein 25
5899.0 synaptosomal-associated protein 25-like isoform X2
6236.6 synaptosomal-associated protein 25 isoform X1
5283.6899 syntenin-1
5283.6896 syntenin-1
5283.4554 toll-like receptor Tollo
5283.3487 toll-like receptor Tollo
5283.4187 toll-like receptor Tollo
5283.1236 transient receptor potential cation channel subfamily M member 3-like
3379.0 transient receptor potential cation channel subfamily V member 5-like
4897.2 Translational regulator orb2
5359.2 Two pore calcium channel protein 1
5359.2 two pore calcium channel protein 1-like
4307.0 tyrosine-protein kinase transmembrane receptor ROR1-like isoform X1
5747.0 tyrosine-protein kinase transmembrane receptor Ror2-like
5283.10289 uncharacterized protein LOC118470684 isoform X6
5283.9217 vesicle-associated membrane protein/synaptobrevin-binding protein
5283.9217 vesicle-associated membrane protein/synaptobrevin-binding protein-like isoform X1
5283.2593 voltage-activated ion channel, putative

Eyes

  • All core enrichment genes from the cluster of GO terms related to ion channels that were found significant with gene set enrichment analysis in the eyes.
  • This cluster of GO terms includes the following GO terms:
    • ion transmembrane transport
    • ion channel activity
ion_channel_cluster_eyes %>%
  select(gene, Sequence.Description) %>%
  kable(caption = 'Core enrichment genes within significantly enriched GO terms of the ion channel cluster in the eyes')
Core enrichment genes within significantly enriched GO terms of the ion channel cluster in the eyes
gene Sequence.Description
5283.9460 —NA—
5283.7784 —NA—
3318.0 —NA—
5661.1 acetylcholine receptor subunit alpha-like 1 isoform X1
5661.0 acetylcholine receptor subunit alpha-like isoform X2
5283.4488 acetylcholine receptor subunit beta-like 1 isoform X2
5283.4489 acetylcholine receptor subunit beta-like 1 isoform X2
5283.7592 cyclic nucleotide-gated cation channel alpha-3
5283.7592 cyclic nucleotide-gated cation channel alpha-3-like
6610.0 cyclic nucleotide-gated cation channel beta-3-like isoform X1
6610.1 cyclic nucleotide-gated cation channel beta-3-like isoform X1
6610.2 cyclic nucleotide-gated cation channel beta-3-like isoform X2
6610.0 cyclic nucleotide-gated cation channel beta-3-like isoform X7
6610.1 cyclic nucleotide-gated cation channel beta-3-like isoform X7
5283.9460 cyclic nucleotide-gated olfactory channel-like
5283.9458 cyclic nucleotide-gated olfactory channel-like
7253.3 Cys-loop ligand-gated ion channel
7253.3 cys-loop ligand-gated ion channel-like isoform X2
7253.4 cys-loop ligand-gated ion channel-like isoform X2
7253.0 cys-loop ligand-gated ion channel-like isoform X2
7253.6 cys-loop ligand-gated ion channel-like isoform X3
5283.9038 gamma-aminobutyric acid receptor alpha-like
5283.9040 gamma-aminobutyric acid receptor alpha-like
3318.0 gamma-aminobutyric acid receptor alpha-like
4411.0 glutamate [NMDA] receptor subunit 1-like isoform X1
5283.8285 glutamate receptor 1-like
5283.8285 glutamate receptor 4
6740.0 glutamate receptor ionotropic, kainate 2-like isoform X1
7335.0 glutamate receptor ionotropic, kainate 2-like isoform X1
7335.6 glutamate receptor ionotropic, kainate 2-like isoform X1
7335.1 glutamate receptor ionotropic, kainate 2-like isoform X1
7335.3 glutamate receptor ionotropic, kainate 2-like isoform X1
2901.0 glutamate receptor ionotropic, kainate 2-like isoform X1
6740.2 glutamate receptor ionotropic, kainate 2-like isoform X1
6740.3 glutamate receptor ionotropic, kainate 2 isoform X1
1742.0 glutamate receptor ionotropic, NMDA 2B-like
4067.0 glutamate receptor ionotropic, NMDA 3A-like
3215.2 glutamate receptor ionotropic, NMDA 3A-like isoform X2
2499.0 glycine receptor subunit alphaZ1-like
5283.8285 GRIA4 protein
5283.2593 HCN channel protein
5283.2592 HCN channel protein
7253.3 Hypothetical predicted protein
5496.1 muscle calcium channel subunit alpha-1-like isoform X1
6426.1 neuronal acetylcholine receptor subunit alpha-10-like
6426.10 neuronal acetylcholine receptor subunit alpha-10-like
6423.3 neuronal acetylcholine receptor subunit alpha-10-like
6426.7 neuronal acetylcholine receptor subunit alpha-10-like
6426.5 neuronal acetylcholine receptor subunit alpha-10-like
3136.0 neuronal acetylcholine receptor subunit alpha-10-like
5283.5377 neuronal acetylcholine receptor subunit alpha-10-like
5283.7784 neuronal acetylcholine receptor subunit alpha-10-like
5283.3848 neuronal acetylcholine receptor subunit alpha-10-like
6426.6 neuronal acetylcholine receptor subunit alpha-10-like
5283.10464 neuronal acetylcholine receptor subunit alpha-10-like
5283.995 neuronal acetylcholine receptor subunit alpha-10-like isoform X1
5283.996 neuronal acetylcholine receptor subunit alpha-10-like isoform X1
5283.994 neuronal acetylcholine receptor subunit alpha-10-like isoform X1
5283.2323 neuronal acetylcholine receptor subunit alpha-10-like isoform X3
5283.8846 neuronal acetylcholine receptor subunit alpha-3-like
5283.8847 neuronal acetylcholine receptor subunit alpha-3-like
878.0 neuronal acetylcholine receptor subunit alpha-5-like
5283.10290 neuronal acetylcholine receptor subunit alpha-5-like isoform X1
5283.10467 neuronal acetylcholine receptor subunit alpha-6-like
2261.2 phosphatidylinositol 3,4,5-trisphosphate 3-phosphatase TPTE2-like isoform X3
4605.3 piezo-type mechanosensitive ion channel component 1-like isoform X1
5283.2593 potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 2-like
5283.2593 potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X2
5283.2593 potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X4
5283.9459 serine-rich adhesin for platelets-like
6010.2 sideroflexin-1
6010.0 sideroflexin-1
6165.9 sideroflexin-2
5283.8067 transient receptor potential-gamma protein-like
5283.8067 transient receptor potential-gamma protein-like isoform X1
5283.2593 voltage-activated ion channel, putative

Both tissues

All core enrichment genes from the cluster of GO terms related to ion channels, that were found significant with gene set enrichment analysis in both the CNS and eyes.

ion_channel_cluster_CNS_eyes %>%
  select(gene, Sequence.Description) %>%
  kable(caption = 'Core enrichment genes within significantly enriched GO terms of the ion channel cluster in both the CNS and eyes')
Core enrichment genes within significantly enriched GO terms of the ion channel cluster in both the CNS and eyes
gene Sequence.Description
5283.7784 —NA—
3318.0 —NA—
5283.9460 —NA—
5661.1 acetylcholine receptor subunit alpha-like 1 isoform X1
5661.0 acetylcholine receptor subunit alpha-like isoform X2
5283.4488 acetylcholine receptor subunit beta-like 1 isoform X2
5283.4489 acetylcholine receptor subunit beta-like 1 isoform X2
5283.7592 cyclic nucleotide-gated cation channel alpha-3
5283.7592 cyclic nucleotide-gated cation channel alpha-3-like
6610.0 cyclic nucleotide-gated cation channel beta-3-like isoform X1
6610.2 cyclic nucleotide-gated cation channel beta-3-like isoform X2
6610.0 cyclic nucleotide-gated cation channel beta-3-like isoform X7
5283.9458 cyclic nucleotide-gated olfactory channel-like
5283.9460 cyclic nucleotide-gated olfactory channel-like
7253.3 Cys-loop ligand-gated ion channel
7253.4 cys-loop ligand-gated ion channel-like isoform X2
7253.0 cys-loop ligand-gated ion channel-like isoform X2
7253.3 cys-loop ligand-gated ion channel-like isoform X2
7253.6 cys-loop ligand-gated ion channel-like isoform X3
5283.9038 gamma-aminobutyric acid receptor alpha-like
3318.0 gamma-aminobutyric acid receptor alpha-like
5283.8285 glutamate receptor 1-like
5283.8285 glutamate receptor 4
7335.0 glutamate receptor ionotropic, kainate 2-like isoform X1
7335.3 glutamate receptor ionotropic, kainate 2-like isoform X1
2901.0 glutamate receptor ionotropic, kainate 2-like isoform X1
6740.0 glutamate receptor ionotropic, kainate 2-like isoform X1
6740.2 glutamate receptor ionotropic, kainate 2-like isoform X1
7335.6 glutamate receptor ionotropic, kainate 2-like isoform X1
7335.1 glutamate receptor ionotropic, kainate 2-like isoform X1
6740.3 glutamate receptor ionotropic, kainate 2 isoform X1
1742.0 glutamate receptor ionotropic, NMDA 2B-like
4067.0 glutamate receptor ionotropic, NMDA 3A-like
3215.2 glutamate receptor ionotropic, NMDA 3A-like isoform X2
2499.0 glycine receptor subunit alphaZ1-like
5283.8285 GRIA4 protein
5283.2592 HCN channel protein
5283.2593 HCN channel protein
7253.3 Hypothetical predicted protein
5283.5377 neuronal acetylcholine receptor subunit alpha-10-like
6426.1 neuronal acetylcholine receptor subunit alpha-10-like
6426.7 neuronal acetylcholine receptor subunit alpha-10-like
5283.7784 neuronal acetylcholine receptor subunit alpha-10-like
6426.10 neuronal acetylcholine receptor subunit alpha-10-like
3136.0 neuronal acetylcholine receptor subunit alpha-10-like
6426.6 neuronal acetylcholine receptor subunit alpha-10-like
6426.5 neuronal acetylcholine receptor subunit alpha-10-like
5283.10464 neuronal acetylcholine receptor subunit alpha-10-like
5283.2323 neuronal acetylcholine receptor subunit alpha-10-like isoform X3
5283.8846 neuronal acetylcholine receptor subunit alpha-3-like
5283.8847 neuronal acetylcholine receptor subunit alpha-3-like
878.0 neuronal acetylcholine receptor subunit alpha-5-like
5283.10467 neuronal acetylcholine receptor subunit alpha-6-like
4605.3 piezo-type mechanosensitive ion channel component 1-like isoform X1
5283.2593 potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 2-like
5283.2593 potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X2
5283.2593 potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X4
6010.2 sideroflexin-1
5283.2593 voltage-activated ion channel, putative

Ion transport cluster of GO terms

  • All core enrichment genes from the cluster of GO terms related to ion transport that were found significant with gene set enrichment analysis in the CNS (no ion transport GO terms found significant in the eyes)
  • This cluster of GO terms includes the following GO terms:
    • potassium channel activity
    • potassium ion transmembrane transport
    • voltage-gated potassium channel activity
    • regulation of ion transmembrane transport
    • voltage-gated calcium channel complex
    • calcium ion transmembrane transport
# Pull out genes in the ion transport, voltage cluster of GO terms
ion_transport_voltage_cluster_CNS <- gsea_exp0_transcripts_GO_CNS %>%
  select(gene, transcript, Sequence.Description, Description_gsea) %>%
  filter(Description_gsea %in% c('potassium ion transport', 'potassium channel activity', 'potassium ion transmembrane transport', 'voltage-gated potassium channel activity', 'regulation of ion transmembrane transport', 'voltage-gated calcium channel complex', 'calcium ion transmembrane transport')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

save(ion_transport_voltage_cluster_CNS, file = 'results/ion_transport_voltage_cluster.RData')
ion_transport_voltage_cluster_CNS %>%
  select(gene, Sequence.Description) %>%
  kable(caption = 'Core enrichment genes within significantly enriched GO terms of the ion transport, voltage cluster in the CNS')
Core enrichment genes within significantly enriched GO terms of the ion transport, voltage cluster in the CNS
gene Sequence.Description
6427.3 —NA—
5283.9460 —NA—
6093.6 ATP-binding cassette sub-family C member 9-like isoform X1
6093.4 ATP-binding cassette sub-family C member 9-like isoform X1
6093.4 ATP-binding cassette sub-family C member 9-like isoform X2
6093.4 ATP-binding cassette sub-family C member 9-like isoform X3
6093.1 ATP-binding cassette sub-family C member 9 isoform X1
6093.0 ATP-binding cassette sub-family C member 9 isoform X1
5283.3940 CACNA1G
5283.7244 CACNA1G
1506.0 calcium-activated potassium channel slowpoke isoform X15
1506.1 calcium-activated potassium channel subunit alpha-1 isoform X10
1506.1 calcium-activated potassium channel subunit alpha-1 isoform X8
1506.0 calcium-activated potassium channel subunit alpha-1 isoform X8
7412.0 calcium channel beta subunit
5283.10202 calcium uniporter protein, mitochondrial-like
5283.7592 cyclic nucleotide-gated cation channel alpha-3
5283.7592 cyclic nucleotide-gated cation channel alpha-3-like
5283.9460 cyclic nucleotide-gated olfactory channel-like
1329.0 G protein-activated inward rectifier potassium channel 2-like
5187.3 G protein-activated inward rectifier potassium channel 3-like
5283.7191 G protein-activated inward rectifier potassium channel 3-like
5283.7192 G protein-activated inward rectifier potassium channel 3-like
5187.7 G protein-activated inward rectifier potassium channel 3-like
5283.2593 HCN channel protein
5496.1 muscle calcium channel subunit alpha-1-like isoform X1
5496.0 muscle calcium channel subunit alpha-1-like isoform X3
1933.0 muscle calcium channel subunit alpha-1-like isoform X4
5283.11306 Na+/K+ ATPase beta subunit
206.0 neuronal calcium sensor 1
7325.1 plasma membrane calcium-transporting ATPase 2-like isoform X1
7325.6 plasma membrane calcium-transporting ATPase 2-like isoform X1
7325.1 plasma membrane calcium-transporting ATPase 2 isoform X1
7325.5 plasma membrane calcium-transporting ATPase 2 isoform X1
7325.2 plasma membrane calcium-transporting ATPase 2 isoform X1
7107.0 potassium channel Kv1
5283.9860 potassium channel subfamily K member 1-like
5283.4027 potassium channel subfamily K member 4-like
5283.1287 potassium channel subfamily T member 2-like isoform X1
5283.1286 potassium channel subfamily T member 2-like isoform X1
5283.5242 potassium channel subfamily T member 2-like isoform X1
5283.1287 potassium channel subfamily T member 2-like isoform X2
5283.5240 potassium channel subfamily T member 2-like isoform X2
5283.5241 potassium channel subfamily T member 2-like isoform X2
5283.5239 potassium channel subfamily T member 2-like isoform X3
5283.5241 potassium channel subfamily T member 2-like isoform X5
7032.0 potassium intermediate/small conductance calcium-activated channel subfamily N member 3
7032.1 potassium intermediate/small conductance calcium-activated channel subfamily N member 3
7109.1 potassium voltage-gated channel protein Shab-like
5283.2059 potassium voltage-gated channel protein Shaker-like
5283.2058 potassium voltage-gated channel protein Shaker-like
5283.2060 potassium voltage-gated channel protein Shaker-like
5283.2159 potassium voltage-gated channel protein Shal-like
5283.2158 potassium voltage-gated channel protein Shal-like
5283.2160 potassium voltage-gated channel protein Shal-like isoform X1
5283.2157 potassium voltage-gated channel protein Shal-like isoform X1
7066.1 potassium voltage-gated channel protein Shaw-like
7066.0 potassium voltage-gated channel protein Shaw-like
6752.2 potassium voltage-gated channel protein Shaw-like isoform X1
6752.0 potassium voltage-gated channel protein Shaw-like isoform X1
6752.1 potassium voltage-gated channel protein Shaw-like isoform X1
995.0 potassium voltage-gated channel subfamily A member 1-like
7109.0 potassium voltage-gated channel subfamily B member 2 isoform X6
7109.1 potassium voltage-gated channel subfamily B member 2 isoform X6
5283.1038 potassium voltage-gated channel subfamily H member 4
5283.1038 potassium voltage-gated channel subfamily H member 4 isoform X1
1571.0 potassium voltage-gated channel subfamily H member 6 isoform X1
1768.0 potassium voltage-gated channel subfamily H member 7-like isoform X1
5283.1038 potassium voltage-gated channel subfamily H member 8-like
3824.0 potassium voltage-gated channel subfamily KQT member 1-like isoform X1
3824.1 potassium voltage-gated channel subfamily KQT member 1-like isoform X1
5283.2593 potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 2-like
5283.2593 potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X2
5283.2593 potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X4
7109.1 probable serine/threonine-protein kinase DDB_G0282963
5283.11306 probable sodium/potassium-transporting ATPase subunit beta-3
6905.0 putative Na+/K+-ATPase alpha subunit
5283.4923 riboflavin-binding protein-like
5283.324 ryanodine receptor-like isoform X11
5283.324 ryanodine receptor-like isoform X12
5283.324 ryanodine receptor-like isoform X15
5283.324 ryanodine receptor-like isoform X17
5283.324 ryanodine receptor-like isoform X2
5283.324 ryanodine receptor-like isoform X7
5283.324 ryanodine receptor 2
5283.324 ryanodine receptor 2-like isoform X10
1483.0 short transient receptor potential channel 3-like
1418.0 short transient receptor potential channel 3-like isoform X1
7032.1 small conductance calcium-activated potassium channel protein-like
7032.2 small conductance calcium-activated potassium channel protein 2 isoform X1
7032.1 small conductance calcium-activated potassium channel protein 2 isoform X1
7381.7 sodium channel
5283.2924 sodium channel protein 1 brain
5283.2924 sodium channel protein 1 brain-like
5283.2922 sodium channel protein 1 brain-like
5283.2924 sodium channel protein 1 brain-like isoform X1
5283.2924 sodium channel protein 1 brain isoform X1
5283.2924 sodium channel protein para isoform X2
5283.2922 sodium channel protein para isoform X2
7381.6 sodium channel protein type 4 subunit alpha A-like isoform X1
5283.2924 sodium channel protein type 4 subunit alpha B-like
7381.2 sodium channel protein type 4 subunit alpha B-like isoform X1
7381.3 sodium channel protein type 4 subunit alpha B-like isoform X1
7381.1 sodium channel protein type 4 subunit alpha B-like isoform X1
7381.5 sodium channel protein type 4 subunit alpha B-like isoform X1
7381.4 sodium channel protein type 4 subunit alpha B-like isoform X1
5283.9598 sodium/potassium-transporting ATPase subunit beta-like
5283.9595 sodium/potassium-transporting ATPase subunit beta-like
5283.9596 sodium/potassium-transporting ATPase subunit beta-like
5283.9600 sodium/potassium-transporting ATPase subunit beta-like
4619.0 sodium/potassium-transporting ATPase subunit beta-like
5283.9599 sodium/potassium-transporting ATPase subunit beta-like
4619.1 sodium/potassium-transporting ATPase subunit beta-like
5283.9594 sodium/potassium-transporting ATPase subunit beta-like
5283.9214 sodium/potassium/calcium exchanger 2-like isoform X2
7433.2 sodium/potassium/calcium exchanger 4-like
7433.0 sodium/potassium/calcium exchanger 4-like
7433.3 sodium/potassium/calcium exchanger 4-like
7433.1 sodium/potassium/calcium exchanger 4-like
5283.9214 sodium/potassium/calcium exchanger Nckx30C
5283.9214 sodium/potassium/calcium exchanger Nckx30C-like
5283.9213 sodium/potassium/calcium exchanger Nckx30C-like
6396.2 solute carrier family 12 member 4-like isoform X1
6396.3 solute carrier family 12 member 4-like isoform X1
6396.1 solute carrier family 12 member 4-like isoform X2
6396.9 solute carrier family 12 member 4-like isoform X2
3790.0 transient-receptor-potential-like protein
3790.0 transient-receptor-potential-like protein isoform X1
3790.1 transient-receptor-potential-like protein isoform X1
5283.3687 transient receptor potential-gamma protein-like
5811.9 transient receptor potential cation channel subfamily M member-like 2 isoform X1
6627.0 trimeric intracellular cation channel type 1B.1-like
7122.2 TWiK family of potassium channels protein 18-like
7122.1 TWiK family of potassium channels protein 18-like
7122.0 TWiK family of potassium channels protein 18-like
6244.0 two pore potassium channel protein sup-9-like
5283.2593 voltage-activated ion channel, putative
6427.3 voltage-dependent calcium channel
6427.2 voltage-dependent calcium channel
1684.1 voltage-dependent calcium channel
6427.3 voltage-dependent calcium channel type A subunit alpha-1-like
6427.0 voltage-dependent calcium channel type A subunit alpha-1-like
6427.3 voltage-dependent calcium channel type A subunit alpha-1-like isoform X6
1684.0 voltage-dependent calcium channel type A subunit alpha-1 isoform X2
5283.3939 voltage-dependent T-type calcium channel subunit alpha-1G-like
5283.3940 voltage-dependent T-type calcium channel subunit alpha-1G-like isoform X2
5283.7246 voltage-dependent T-type calcium channel subunit alpha-1G-like isoform X2
5283.7243 voltage-dependent T-type calcium channel subunit alpha-1H-like
5283.7245 voltage-dependent T-type calcium channel subunit alpha-1H-like
5283.7242 voltage-dependent T-type calcium channel subunit alpha-1I-like
5283.7244 voltage-dependent T-type calcium channel subunit alpha-1I-like
5283.10568 voltage-gated potassium channel subunit beta-2-like isoform X1
5283.10569 voltage-gated potassium channel subunit beta-2-like isoform X1
7381.5 voltage-gated sodium channel invertebrate type 1

GPCR cluster of GO terms

  • All core enrichment genes from the cluster of GO terms related to GPCRs that were found significant with gene set enrichment analysis in the CNS (no ion transport GO terms found significant in the eyes)
  • This cluster of GO terms includes the following GO terms:
    • G protein-coupled receptor activity
    • G protein-coupled receptor signaling pathway
# Pull out genes in the GPCR cluster of GO terms
GPCR_cluster_CNS <- gsea_exp0_transcripts_GO_CNS %>%
  select(gene, transcript, Sequence.Description, Description_gsea) %>%
  filter(Description_gsea %in% c('G protein-coupled receptor activity', 'G protein-coupled receptor signaling pathway')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)


save(GPCR_cluster_CNS, file = 'results/GPCR_cluster.RData')
GPCR_cluster_CNS %>%
  select(gene, Sequence.Description) %>%
  kable(caption = 'Core enrichment genes within significantly enriched GO terms of the GPCR cluster in the CNS')
Core enrichment genes within significantly enriched GO terms of the GPCR cluster in the CNS
gene Sequence.Description
5283.11228 —NA—
6174.0 —NA—
5283.7636 —NA—
5283.4060 —NA—
3985.0 5-hydroxytryptamine receptor-like
396.0 5-hydroxytryptamine receptor 1D-like
5283.11786 5-hydroxytryptamine receptor 2C-like
5283.11785 5-hydroxytryptamine receptor 2C-like
5283.1508 5-hydroxytryptamine receptor 4-like
5283.9162 adhesion G-protein coupled receptor D1-like
5283.9162 adhesion G-protein coupled receptor D2-like
252.1 adhesion G protein-coupled receptor A3-like
6594.0 adhesion G protein-coupled receptor L2-like isoform X1
6594.0 adhesion G protein-coupled receptor L2 isoform X1
2554.0 allatostatin-A receptor
1650.0 alpha-2A adrenergic receptor
5283.10427 cadherin EGF LAG seven-pass G-type receptor 1-like
5283.10430 cadherin EGF LAG seven-pass G-type receptor 1-like
5283.10428 cadherin EGF LAG seven-pass G-type receptor 1-like
5596.4 cadherin EGF LAG seven-pass G-type receptor 1 isoform X3
5596.1 cadherin EGF LAG seven-pass G-type receptor 1 isoform X3
5596.3 cadherin EGF LAG seven-pass G-type receptor 2 isoform X1
5596.2 cadherin EGF LAG seven-pass G-type receptor 2 isoform X5
5283.11833 calcitonin gene-related peptide type 1 receptor-like isoform X1
5283.2398 cAMP-dependent protein kinase catalytic subunit beta
5283.2398 cAMP-dependent protein kinase catalytic subunit isoform X5
5283.2397 cAMP-dependent protein kinase catalytic subunit isoform X5
5283.9030 cardioacceleratory peptide receptor-like isoform X1
5283.5948 catalytic subunit of protein kinase A
2171.0 cholecystokinin receptor type A-like
3084.0 cholecystokinin receptor type A-like
5283.8548 corticotropin-releasing factor receptor 2-like
6072.0 CX3C chemokine receptor 1 isoform X1
5283.11354 dopamine receptor 1-like
5283.11353 dopamine receptor 1-like
3912.1 dopamine receptor 1-like
3716.0 FMRFamide receptor-like
1567.0 FMRFamide receptor-like
3871.0 FMRFamide receptor-like
5415.1 FMRFamide receptor-like
6899.1 frizzled-9
6899.1 frizzled-9-like
6899.1 Frizzled 10A
6899.0 Frizzled 10A
6899.1 frizzled 9/10
5283.9573 G-protein coupled receptor 143-like
5283.9574 G-protein coupled receptor 143-like
1827.0 G-protein coupled receptor 161-like
4382.0 G-protein coupled receptor Mth-like
5283.11232 GABR1 protein
5283.11232 gamma-aminobutyric acid type B receptor subunit 1-like
5283.11229 gamma-aminobutyric acid type B receptor subunit 1-like
5283.8308 Gamma-aminobutyric acid type B receptor subunit 2
5283.10141 gonadotropin-releasing hormone receptor
5283.2535 growth hormone secretagogue receptor type 1-like
5283.732 guanine nucleotide-binding protein subunit beta-5
5283.2425 Hypothetical predicted protein
5283.4222 leucine-rich repeat-containing G-protein coupled receptor 5-like
5283.4293 leucine-rich repeat-containing G-protein coupled receptor 5-like
5283.4294 leucine-rich repeat-containing G-protein coupled receptor 5-like
5283.10762 lissencephaly-1 homolog
5283.10762 lissencephaly-1 homolog B
5283.4221 lutropin-choriogonadotropic hormone receptor isoform X3
5283.11295 melatonin receptor type 1A-like
5283.11297 melatonin receptor type 1A-like
5283.7577 metabotropic glutamate receptor 1-like
5283.7570 metabotropic glutamate receptor 1-like
5283.7573 metabotropic glutamate receptor 1-like
5283.7569 metabotropic glutamate receptor 1-like
5283.7574 metabotropic glutamate receptor 1-like
1651.0 metabotropic glutamate receptor 3
5283.7578 metabotropic glutamate receptor 5-like
5283.7575 metabotropic glutamate receptor 5-like
6075.0 neuropeptide FF receptor 1-like
6075.1 neuropeptide FF receptor 1-like
1357.0 octopamine receptor Oamb-like
1357.1 octopamine receptor Oamb-like
2184.1 octopamine receptor Oamb isoform X1
2664.0 orexin receptor type 2-like
5283.9529 orexin receptor type 2-like
3932.1 parathyroid hormone/parathyroid hormone-related peptide receptor isoform X3
6072.0 PREDICTED: uncharacterized protein LOC106880461
4782.0 prolactin-releasing peptide receptor-like
5283.2398 protein kinase A
5283.2397 protein kinase A
1909.1 pyrokinin-1 receptor-like
5283.1602 regulator of G-protein signaling 4-like
5283.1599 regulator of G-protein signaling 4-like
5283.1146 retinochrome
5283.8098 retinochrome
5283.1143 retinochrome
5283.1145 retinochrome
5283.1142 retinochrome
5283.7855 rhodopsin
5283.7636 S-antigen protein-like
3932.0 secretin receptor-like isoform X1
6876.2 somatostatin receptor type 2-like
5629.0 somatostatin receptor type 3-like
5984.0 thyrotropin-releasing hormone receptor-like
5629.0 thyrotropin-releasing hormone receptor-like
5283.1199 type-1 angiotensin II receptor isoform X1
5283.1602 unnamed protein product

Export functional enrichment results for Cytoscape

  • Enrichmentmap within Cytoscape was used to visualise the GSEA results
  • In order to do this, the GSEA results need to be exported from R in the correct format to be used within Cytoscape.

GSEA results file

gsea_result_CNS_ashr_pos <- gsea_CNS_ashr_annot_exp0_summary %>%
  filter(NES > 0) %>%
  select(ID, Description, setSize, enrichmentScore, NES, pvalue, qvalues, p.adjust, rank, leading_edge) %>%
  rename(NAME = ID) %>%
  mutate('GS<br> follow link to MSigDB' = '') %>%
  rename('GS DETAILS' = Description,
         SIZE = setSize,
         ES = enrichmentScore,
         'NOM p-val' = pvalue,
         'FDR q-val' = qvalues,
         'FWER p-val' = p.adjust,
         'RANK AT MAX' = rank,
         'LEADING EDGE' = leading_edge) %>%
  relocate('GS<br> follow link to MSigDB', .after = NAME)

write.table(gsea_result_CNS_ashr_pos, file = 'gsea_cytoscape/gsea_result_CNS_ashr_pos.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)  


gsea_result_CNS_ashr_neg <- gsea_CNS_ashr_annot_exp0_summary %>%
  filter(NES < 0) %>%
  select(ID, Description, setSize, enrichmentScore, NES, pvalue, qvalues, p.adjust, rank, leading_edge) %>%
  rename(NAME = ID) %>%
  mutate('GS<br> follow link to MSigDB' = '') %>%
  rename('GS DETAILS' = Description,
         SIZE = setSize,
         ES = enrichmentScore,
         'NOM p-val' = pvalue,
         'FDR q-val' = qvalues,
         'FWER p-val' = p.adjust,
         'RANK AT MAX' = rank,
         'LEADING EDGE' = leading_edge) %>%
  relocate('GS<br> follow link to MSigDB', .after = NAME)

write.table(gsea_result_CNS_ashr_neg, file = 'gsea_cytoscape/gsea_result_CNS_ashr_neg.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)  
gsea_result_eyes_ashr_pos <- gsea_eyes_ashr_annot_exp0_summary %>%
  filter(NES > 0) %>%
  select(ID, Description, setSize, enrichmentScore, NES, pvalue, qvalues, p.adjust, rank, leading_edge) %>%
  rename(NAME = ID) %>%
  mutate('GS<br> follow link to MSigDB' = '') %>%
  rename('GS DETAILS' = Description,
         SIZE = setSize,
         ES = enrichmentScore,
         'NOM p-val' = pvalue,
         'FDR q-val' = qvalues,
         'FWER p-val' = p.adjust,
         'RANK AT MAX' = rank,
         'LEADING EDGE' = leading_edge) %>%
  relocate('GS<br> follow link to MSigDB', .after = NAME)

write.table(gsea_result_eyes_ashr_pos, file = 'gsea_cytoscape/gsea_result_eyes_ashr_pos.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)  


gsea_result_eyes_ashr_neg <- gsea_eyes_ashr_annot_exp0_summary %>%
  filter(NES < 0) %>%
  select(ID, Description, setSize, enrichmentScore, NES, pvalue, qvalues, p.adjust, rank, leading_edge) %>%
  rename(NAME = ID) %>%
  mutate('GS<br> follow link to MSigDB' = '') %>%
  rename('GS DETAILS' = Description,
         SIZE = setSize,
         ES = enrichmentScore,
         'NOM p-val' = pvalue,
         'FDR q-val' = qvalues,
         'FWER p-val' = p.adjust,
         'RANK AT MAX' = rank,
         'LEADING EDGE' = leading_edge) %>%
  relocate('GS<br> follow link to MSigDB', .after = NAME)

write.table(gsea_result_eyes_ashr_neg, file = 'gsea_cytoscape/gsea_result_eyes_ashr_neg.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)  

Rank file

  • column 1 = gene, column 2 = log2FoldChange, ordered
res_CNS_shrunken_ashr_tb_rename <- res_CNS_shrunken_ashr_tb %>%
  mutate(gene = gsub('Cluster-', '', gene))

res_CNS <- res_CNS_shrunken_ashr_tb_rename %>%
  select(gene, log2FoldChange) %>% 
  as.data.frame()

write.table(res_CNS, file = 'gsea_cytoscape/gsea_CNS_ashr_rank.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)
res_eyes_shrunken_ashr_tb_rename <- res_eyes_shrunken_ashr_tb %>%
  mutate(gene = gsub('Cluster-', '', gene))

res_eyes <- res_eyes_shrunken_ashr_tb_rename %>%
  select(gene, log2FoldChange) %>% 
  as.data.frame()

write.table(res_eyes, file = 'gsea_cytoscape/gsea_eyes_ashr_rank.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)
  • Then manually change .txt to .rnk

Expression data file

  • txt file
  • name (–tab–) description (–tab–) sample1 name (–tab–) sample2 name etc…
normalised_counts_CNS_tb_rename <- normalised_counts_CNS_tb %>%
  mutate(gene = gsub("Cluster-", "", gene)) %>%
  rename_with(~ gsub("CNS", "CNS_ambient", .x), c("PS506_CNS", "PS508_CNS", "PS513_CNS", "PS514_CNS", "PS518_CNS", "PS529_CNS", "PS534_CNS", "PS551_CNS", "PS557_CNS", "PS561_CNS")) %>%
  rename_with(~ gsub("CNS", "CNS_elevated", .x), c("PS509_CNS", "PS510_CNS", "PS522_CNS", "PS527_CNS", "PS528_CNS", "PS539_CNS", "PS547_CNS", "PS584_CNS", "PS626_CNS", "PS636_CNS")) %>%
  relocate(ends_with("ambient"), .before = ends_with("elevated"))

normalised_counts_CNS_annotated <- normalised_counts_CNS_tb_rename %>%
  full_join(corset_clusters_2_transcripts_renamed) %>%
  full_join(annotated_transcriptome_rename) %>%
  select(gene, Sequence.Description, PS506_CNS_ambient:PS636_CNS_elevated) %>%
  distinct(gene, .keep_all = TRUE)

write.table(normalised_counts_CNS_annotated, file = 'gsea_cytoscape/gsea_CNS_ashr_expression.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)
normalised_counts_eyes_tb_rename <- normalised_counts_eyes_tb %>%
  mutate(gene = gsub("Cluster-", "", gene)) %>%
  rename_with(~ gsub("eyes", "eyes_ambient", .x), c("PS506_eyes", "PS508_eyes", "PS513_eyes", "PS514_eyes", "PS518_eyes", "PS529_eyes", "PS534_eyes", "PS551_eyes", "PS557_eyes", "PS561_eyes")) %>%
  rename_with(~ gsub("eyes", "eyes_elevated", .x), c("PS509_eyes", "PS510_eyes", "PS522_eyes", "PS527_eyes", "PS528_eyes", "PS539_eyes", "PS547_eyes", "PS584_eyes", "PS626_eyes", "PS636_eyes")) %>%
  relocate(ends_with("ambient"), .before = ends_with("elevated"))

normalised_counts_eyes_annotated <- normalised_counts_eyes_tb_rename %>%
  full_join(corset_clusters_2_transcripts_renamed) %>%
  full_join(annotated_transcriptome_rename) %>%
  select(gene, Sequence.Description, PS506_eyes_ambient:PS636_eyes_elevated) %>%
  distinct(gene, .keep_all = TRUE)

write.table(normalised_counts_eyes_annotated, file = 'gsea_cytoscape/gsea_eyes_ashr_expression.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)

Gene Sets file (GMT file)

  • geneset name (–tab–) description (–tab–) a list of tab-delimited genes
  • Open annotated transcriptome in OmicsBox > File > Export > Export annotations > Export Sequences per GO (Gene Sets): Gives GO.ID column 1 and list of transcripts column 2
gmt <- read.delim(file = 'data/blast2go_gene_sets.txt')
gmt <- gmt %>%
  as_tibble()

# need corresponding genes for the transcript list
gmt_split <- cSplit(gmt, splitCols = 'Sequences', ',', direction = "long") 
gmt_genes <- gmt_split %>%
  rename(transcript = Sequences) %>% 
  mutate(transcript = gsub("transcript/", '', transcript)) %>%
  full_join(corset_clusters_2_transcripts_renamed) %>%
  select(GO.ID, gene)
#collapse so a list of tab delimited genes for each GO term - should have 5731 rows
gmt_genes_list <- gmt_genes %>%
  drop_na() %>%
  group_by(GO.ID) %>%
  dplyr::summarise(gene = paste(unique(gene), collapse='\t'))

# Add GO term/description to gmt_genes
# Used Annotation.GO.ID for GSEA in ClusterProfiler
# So extract all Annotation.GO.ID and corresponding Annotation.GO.Term and add to gmt_genes and should have all the GO terms with their description that were use din the analysis

GO <- annotated_transcriptome %>%
  select(Annotation.GO.ID, Annotation.GO.Term) %>%
  rename(GO.ID = Annotation.GO.ID,
         GO.Term = Annotation.GO.Term)
GO_split <- cSplit(GO, c('GO.ID', 'GO.Term'), ';', direction = "long") 

GO_split <- GO_split %>%
  distinct(GO.ID, .keep_all = TRUE) # transcriptome contains genes annotated with same GO terms so pull out only one of each GO

gmt_genes_GO <- gmt_genes_list %>%
  left_join(GO_split) %>%
  select(GO.ID, GO.Term, gene)

write.table(gmt_genes_GO, file = 'gsea_cytoscape/genesets.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = FALSE)
  • Then manually change .txt to .gmt

Annotation Statistics

Calculate some statistics regarding annotation of the transcriptome

Percentage of transcripts with a blast hit

No._no_blast_hits <- sum(is.na(annotated_transcriptome$Blast.Top.Hit.Score))
No._transcripts <- nrow(annotated_transcriptome)
(No._transcripts - No._no_blast_hits)/No._transcripts*100

93% transcripts had a blast hit

Percentage of transcripts succesfully GO mapped

# Replace blank cells with 'NA'
annotated_transcriptome$Mapping.GO.ID[annotated_transcriptome$Mapping.GO.ID == ""] <- NA


No._no_GO_mapping <- sum(is.na(annotated_transcriptome$Mapping.GO.ID))
No._transcripts <- nrow(annotated_transcriptome)
(No._transcripts - No._no_GO_mapping)/No._transcripts*100

73% of transcripts succesfully GO mapped

Percentage of transcripts completely annotated

# Replace blank cells with 'NA'
annotated_transcriptome$Annotation.GO.ID[annotated_transcriptome$Annotation.GO.ID == ""] <- NA

No._no_complete_annotation <- sum(is.na(annotated_transcriptome$Annotation.GO.ID))
No._transcripts <- nrow(annotated_transcriptome)
(No._transcripts - No._no_complete_annotation)/No._transcripts*100

69% of transcripts completely annotated

Publication Figures

Figure: DE and GSEA Results

Make PCA

rld$sampletype <- factor(rld$sampletype, levels = c("Ambient_CNS", "Elevated_CNS", "Ambient_eyes", "Elevated_eyes"))

pcaData <- plotPCA(rld, intgroup = "sampletype", ntop = 50000000, returnData = TRUE)
    
percentVar <- round(100 * attr(pcaData, "percentVar")) 
    

( figure_PCA <- ggplot(pcaData, aes(x = PC1, y = PC2,
                                         fill = factor(sampletype),
                                         shape = factor(sampletype)),
                            colour = "black") + 
    geom_point(size = 2) + 
    scale_shape_manual(values=c(21, 22, 23, 24),
                       name = "",
                       labels = c(bquote("CNS: Current-day CO"[2]),
                                  bquote("CNS: Elevated CO"[2]),
                                  bquote("Eyes: Current-day CO"[2]),
                                  bquote("Eyes: Elevated CO"[2]))) + 
    scale_fill_manual(values = c('#FFFFFF', '#FDE725', '#FFFFFF', '#FDE725'),
                      name = "",
                      labels = c(bquote("CNS: Current-day CO"[2]),
                                 bquote("CNS: Elevated CO"[2]),
                                 bquote("Eyes: Current-day CO"[2]),
                                 bquote("Eyes: Elevated CO"[2]))) +
    xlab(paste0("PC1: ", percentVar[1], "% variance")) + 
    ylab(paste0("PC2: ", percentVar[2], "% variance")) + 
    coord_cartesian(ylim = c(-85, 85), xlim = c(-152, 152)) +
    scale_x_continuous(breaks = c(-100, 0, 100),
                       labels = c(-100, 0, 100)) +
    scale_y_continuous(breaks = c(-80, -40, 0, 40),
                       labels = c(-80, -40, 0, 40)) +
    theme_classic() +
    theme(axis.title = element_text(size = 10),
          axis.text.x = element_text(size = 8),
          axis.text.y = element_text(size = 8),
          legend.text = element_text(size = 8),
          legend.position = c(0.5,1.1)) +
    guides(fill = guide_legend(nrow = 2, byrow = FALSE),
           shape = guide_legend(nrow = 2, byrow = FALSE))
)

save(figure_PCA, file = 'publication_figures/figure_PCA.RData')

Make Venn diagrams

DE_CNS <- sig_DE_CNS_ashr %>%
  distinct(gene)

DE_eyes <- sig_DE_eyes_ashr_final %>%
  distinct(gene)

x <- c(list(DE_CNS$gene), list(DE_eyes$gene))
names(x) <- c('CNS', 'Eyes')
(venn_de <- ggvenn(x,
               show_percentage = FALSE,
               text_size = 0,
               set_name_size = 4,
               fill_color = c('#4040FF', '#BFBFFF'),
               fill_alpha = 0.8) +
  annotate('text', label = '23', x = -1, y = 0.25, size = 4) +
  annotate("rect", xmin = -1.2, xmax = -0.8, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
  annotate('text', label = '2', x = 0, y = 0.25, size = 4) +
  annotate("rect", xmin = -0.2, xmax = 0.2, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
  annotate('text', label = '6', x = 1, y = 0.25, size = 4) +
  annotate("rect", xmin = 0.8, xmax = 1.2, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
  annotate('text', label = expression('12 ' *symbol('\255')), x = -1, y = -0.25) +
  annotate('text', label = expression('11 ' *symbol('\257')), x = -1, y = -0.55) +
  annotate('text', label = expression('3 ' *symbol('\255')), x = 1, y = -0.25) +
  annotate('text', label = expression('3 ' *symbol('\257')), x = 1, y = -0.55) +
  annotate('text', label = expression('2 ' *symbol('\255')), x = 0, y = -0.25) +
    theme(plot.margin = unit(c(0,0,0,0), "pt"))
  )

save(venn_de, file = 'publication_figures/venn_de.RData')
GSEA_CNS <- gsea_CNS_ashr_annot_exp0_summary %>%
  distinct(ID)

GSEA_eyes <- gsea_eyes_ashr_annot_exp0_summary %>%
  distinct(ID)


x <- c(list(GSEA_CNS$ID), list(GSEA_eyes$ID))
names(x) <- c('CNS', 'Eyes')
(venn_gsea <- ggvenn(x,
                show_percentage = FALSE,
                text_size = 0,
                set_name_size = 4,
                fill_color = c('#4040FF', '#BFBFFF'),
                fill_alpha = 0.8) +
    annotate('text', label = '89', x = -1, y = 0.25, size = 4) +
    annotate("rect", xmin = -1.2, xmax = -0.8, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
    annotate('text', label = '10', x = 0, y = 0.35, size = 4) +
    annotate("rect", xmin = -0.15, xmax = 0.15, ymin = 0.22, ymax = 0.45, fill = NA, colour = 'black', size = 0.5) +
    annotate('text', label = '7', x = 1, y = 0.25, size = 4) +
    annotate("rect", xmin = 0.8, xmax = 1.2, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
    annotate('text', label = expression('69 ' *symbol('\255')), x = -1, y = -0.25) +
    annotate('text', label = expression('20 ' *symbol('\257')), x = -1, y = -0.55) +
    annotate('text', label = expression('5 ' *symbol('\255')), x = 1, y = -0.25) +
    annotate('text', label = expression('2 ' *symbol('\257')), x = 1, y = -0.55) +
    annotate('text', label = expression('5 ' *symbol('\255'), '           / ' *symbol('\255')), x = -0.1, y = 0.1) +
    annotate('text', label = expression('2 ' *symbol('\255'), '           / ' *symbol('\257')), x = -0.1, y = -0.05) +
    annotate('text', label = expression('1 ' *symbol('\257'), '           / ' *symbol('\257')), x = -0.1, y = -0.2) +
    annotate('text', label = expression('2 ' *symbol('\257'), '           / ' *symbol('\255')), x = -0.1, y = -0.35) +
    theme(plot.margin = unit(c(0,0,0,0), "pt"))
)

save(venn_gsea, file = 'publication_figures/venn_gsea.RData')

Combine into one figure

(figure_DE_GSEA <- (
  (
  plot_spacer() 
  |
    (figure_PCA + theme(plot.margin = unit(c(30,50,30,50), "pt")))
  |
    plot_spacer()
) +
  plot_layout(widths = c(0.3,1,0.3))
)
         /
           (
             (
             (venn_de + theme(plot.margin = unit(c(0,0,0,0), "pt"))) 
             | 
              (venn_gsea + theme(plot.margin = unit(c(0,0,0,0), "pt"))) 
           ) +
             plot_layout(widths = c(1,1))
           ) +
   plot_layout(heights = c(1,1)) +
   plot_annotation(tag_levels = 'A') &
   theme(plot.tag = element_text(face = 'bold'))
   
 )
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

Supplementary figure: Top hit species distribution

The top blast hit species distribution .txt file is exported from OmicsBox: First run BLAST statistics, then go to tab ‘Chart: Top-hit species distribution’ and in the toolbar click ‘Data as text’.

species <- read.delim(file = 'data/top_hit_species_distribution_annotated_transcriptome_orfs.txt')
save(species, file = 'publication_figures/species.RData')
# Read in .txt file output from OmicsBox


a <- species %>%
  rename(No._top_hits = X.BLAST.Top.Hits) %>%
  arrange(-No._top_hits) %>%
  slice_head(n = 20) %>%
  mutate(order = as.factor(row_number()))

b <- species %>%
  rename(No._top_hits = X.BLAST.Top.Hits) %>%
  arrange(No._top_hits) %>%
  slice_head(n = nrow(species) - 20) %>%
  summarize(No._top_hits = sum(No._top_hits)) %>%
  mutate(Species = 'others') 

species_data <- a %>%
  full_join(b) %>%
  arrange(order)


(species_distr <- ggplot(species_data, aes(x = No._top_hits, y = rev(order))) +
    geom_col(colour = 'black', size = 0.05, fill = '#FF8080') +
    scale_y_discrete(
      breaks = species_data$order,
      labels = rev(c('squid: *Sepia pharaonis*', 'octopus: *Octopus sinensis*', 'octopus: *Octopus bimaculoides*', 'squid: *Doryteuthis opalescens*', 'squid: *Idiosepius paradoxus*', 'squid: *Doryteuthis pealeii*', 'aphid: *Acyrthosiphon pisum*', 'starfish: *Acanthaster planci*', 'scallop: *Mizuhopecten yessoensis*', 'scallop: *Pecten maximus*', 'freshwater snail: *Pomacea canaliculata*', 'mussel: *Mytilus galloprovincialis*', 'limpet: *Lottia gigantea*', 'butterfly: *Danaus plexippus plexippus*', 'sea slug: *Aplysia californica*', 'cuttlefish: *Sepia officinalis*', 'squid: *Nototodarus sloanii*', 'sea anemone: *Exaiptasia diaphana*', 'squid: *Euprymna scolopes*', 'oyster: *Cassostrea virginica*', 'others'))) +
    scale_x_continuous(breaks = c(0, 2500, 5000, 7500, 10000, 12500, 15000, 17500),
                       labels = c(0, 2500, 5000, 7500, 10000, 12500, 15000, 17500),
                       expand = expansion(mult = c(0.005, 0.02))) +
    labs(x = "Number of BLAST top hits", y = "") +
    coord_cartesian(xlim = c(0, 17800)) +
    theme_classic() +
    theme(axis.text.y = element_markdown(size = 8), 
          axis.title = element_text(size = 10),
          axis.text.x = element_text(size = 8)) )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Supplementary figure: GSEA Dotplot

# Plot all GO terms for CNS and eyes on same graph

# CNS and eyes share same y axis on dotplot - want to order by NES of CNS, and show overlaps of GO terms found in CNS and eyes
# Below method so GO terms in both tissues are not twice on the y axis
CNS <- gsea_CNS_ashr_annot_exp0_summary %>%
  mutate(tissue = "CNS")
eyes <- gsea_eyes_ashr_annot_exp0_summary %>%
  mutate(tissue = "eyes")

CNS_NES <- CNS %>%
  dplyr::select(ID, Description, NES) %>%
  rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_NES <- eyes %>%
  dplyr::select(ID, Description, NES) %>% 
  rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_NES <- CNS_NES %>%
  full_join(eyes_NES) %>%
  ungroup() %>% 
  arrange(NES_CNS, NES_eyes) %>%
  dplyr::mutate(order_NES = as.factor(row_number())) %>%
  pivot_longer(c(NES_CNS, NES_eyes), names_to = "tissue", values_to = "NES") %>%
  mutate(tissue = gsub("NES_", "", tissue)) %>%
  arrange(NES)


CNS_p.adjust <- CNS %>%
  dplyr::select(ID, Description, p.adjust) %>%
  rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_p.adjust <- eyes %>%
  dplyr::select(ID, Description, p.adjust) %>% 
  rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_p.adjust <- CNS_p.adjust %>%
  full_join(eyes_p.adjust) %>% 
  ungroup() %>% 
  arrange(p.adjust_CNS) %>%
  dplyr::mutate(order_p.adjust = as.factor(row_number())) %>% 
  pivot_longer(c(p.adjust_CNS, p.adjust_eyes), names_to = "tissue", values_to = "p.adjust") %>%
  dplyr::mutate(tissue = gsub("p.adjust_", "", tissue)) %>%
  arrange(Description)

CNS_Count <- CNS %>%
  dplyr::select(ID, Description, Count) %>%
  rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_Count <- eyes %>%
  dplyr::select(ID, Description, Count) %>% 
  rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_Count <- CNS_Count %>%
  full_join(eyes_Count) %>% 
  ungroup() %>% 
  arrange(Count_CNS) %>%
  dplyr::mutate(order_Count = as.factor(row_number())) %>% 
  pivot_longer(c(Count_CNS, Count_eyes), names_to = "tissue", values_to = "Count") %>%
  dplyr::mutate(tissue = gsub("Count_", "", tissue))

df_all <- df_NES %>%
  full_join(df_p.adjust) %>%
  full_join(df_Count) %>%
  dplyr::mutate(Description_wrapped = str_wrap(Description, width = 50))

figure_GSEA <- (ggplot(df_all, aes(x = NES, y = order_NES, colour = p.adjust, size = Count)) +
  geom_point() +
  scale_y_discrete(
    breaks = df_all$order_NES,
    labels = df_all$Description_wrapped) +
  ggplot2::facet_grid(. ~ tissue) +
  scale_colour_gradient(low = '#4040FF', high = '#E3E3E3',
                        name = "padj",
                        guide = guide_colourbar(barwidth = 0.5,
                                                barheight = 8,
                                                ticks = FALSE)) +
  scale_size(name = "Count") +
  geom_vline(xintercept = 0) +
  labs(x = "Normalised enrichment score", y = "") +
  theme(panel.border = element_rect(fill = NA, size = 0.5),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(colour = 'grey', size = 0.2),
        panel.background = element_blank(),
        strip.background = element_rect(fill = NA, size = 0.5, colour = 'black'),
        axis.title = element_text(size = 8),
        axis.text.x = element_text(size = 6),
        axis.text.y = element_text(size = 4),
        legend.title = element_text(size = 8),
        legend.text = element_text(size = 6),
        legend.key = element_blank(),
        strip.text = element_text(size = 8, face = 'bold')) )
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
figure_GSEA
## Warning: Removed 96 rows containing missing values (`geom_point()`).

SessionInfo

R version 4.0.4 (2021-02-15) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 18363)

Matrix products: default

locale: [1] LC_COLLATE=English_Australia.1252 LC_CTYPE=English_Australia.1252
[3] LC_MONETARY=English_Australia.1252 LC_NUMERIC=C
[5] LC_TIME=English_Australia.1252

attached base packages: [1] grid parallel stats4 stats graphics grDevices utils datasets methods
[10] base

other attached packages: [1] ggupset_0.3.0 ggnewscale_0.4.5 enrichplot_1.10.2
[4] clusterProfiler_3.18.1 pathview_1.30.1 DOSE_3.16.0
[7] ggvenn_0.1.8 splitstackshape_1.4.8 knitr_1.31
[10] ashr_2.2-47 apeglm_1.12.0 factoextra_1.0.7
[13] ggrepel_0.9.1 DEGreport_1.26.0 pheatmap_1.0.12
[16] patchwork_1.1.1 RColorBrewer_1.1-2 viridis_0.5.1
[19] viridisLite_0.3.0 forcats_0.5.1 stringr_1.4.0
[22] dplyr_1.0.5 purrr_0.3.4 readr_1.4.0
[25] tidyr_1.1.3 tibble_3.1.0 ggplot2_3.3.3
[28] tidyverse_1.3.0 DESeq2_1.30.1 SummarizedExperiment_1.20.0 [31] Biobase_2.50.0 MatrixGenerics_1.2.1 matrixStats_0.58.0
[34] GenomicRanges_1.42.0 GenomeInfoDb_1.26.4 IRanges_2.24.1
[37] S4Vectors_0.28.1 BiocGenerics_0.36.0

loaded via a namespace (and not attached): [1] utf8_1.2.1 tidyselect_1.1.0 RSQLite_2.2.4
[4] AnnotationDbi_1.52.0 BiocParallel_1.24.1 scatterpie_0.1.5
[7] munsell_0.5.0 withr_2.4.1 colorspace_2.0-1
[10] GOSemSim_2.16.1 rstudioapi_0.13 KEGGgraph_1.50.0
[13] lasso2_1.2-21.1 bbmle_1.0.23.1 GenomeInfoDbData_1.2.4
[16] mixsqp_0.3-43 mnormt_2.0.2 polyclip_1.10-0
[19] farver_2.1.0 bit64_4.0.5 downloader_0.4
[22] coda_0.19-4 vctrs_0.3.6 generics_0.1.0
[25] xfun_0.22 R6_2.5.0 graphlayouts_0.7.1
[28] clue_0.3-58 invgamma_1.1 locfit_1.5-9.4
[31] bitops_1.0-6 cachem_1.0.4 reshape_0.8.8
[34] fgsea_1.16.0 DelayedArray_0.16.2 assertthat_0.2.1
[37] scales_1.1.1 ggraph_2.0.5 gtable_0.3.0
[40] Cairo_1.5-12.2 tidygraph_1.2.0 rlang_0.4.11
[43] genefilter_1.72.1 GlobalOptions_0.1.2 splines_4.0.4
[46] broom_0.7.5 BiocManager_1.30.10 yaml_2.2.1
[49] reshape2_1.4.4 modelr_0.1.8 backports_1.2.1
[52] qvalue_2.22.0 tools_4.0.4 psych_2.0.12
[55] logging_0.10-108 ellipsis_0.3.1 ggdendro_0.1.22
[58] Rcpp_1.0.6 plyr_1.8.6 zlibbioc_1.36.0
[61] RCurl_1.98-1.2 GetoptLong_1.0.5 cowplot_1.1.1
[64] haven_2.3.1 cluster_2.1.0 fs_1.5.0
[67] magrittr_2.0.1 data.table_1.14.0 DO.db_2.9
[70] circlize_0.4.12 reprex_1.0.0 truncnorm_1.0-8
[73] tmvnsim_1.0-2 mvtnorm_1.1-1 SQUAREM_2021.1
[76] hms_1.0.0 evaluate_0.14 xtable_1.8-4
[79] XML_3.99-0.5 emdbook_1.3.12 readxl_1.3.1
[82] gridExtra_2.3 shape_1.4.5 compiler_4.0.4
[85] bdsmatrix_1.3-4 shadowtext_0.0.7 crayon_1.4.1
[88] htmltools_0.5.1.1 geneplotter_1.68.0 lubridate_1.7.10
[91] DBI_1.1.1 tweenr_1.0.1 dbplyr_2.1.0
[94] ComplexHeatmap_2.6.2 MASS_7.3-53 Matrix_1.3-2
[97] cli_2.3.1 igraph_1.2.6 pkgconfig_2.0.3
[100] rvcheck_0.1.8 numDeriv_2016.8-1.1 xml2_1.3.2
[103] annotate_1.68.0 XVector_0.30.0 rvest_1.0.0
[106] digest_0.6.27 ConsensusClusterPlus_1.54.0 graph_1.68.0
[109] Biostrings_2.58.0 rmarkdown_2.7 cellranger_1.1.0
[112] fastmatch_1.1-0 edgeR_3.32.1 rjson_0.2.20
[115] lifecycle_1.0.0 nlme_3.1-152 jsonlite_1.7.2
[118] limma_3.46.0 fansi_0.4.2 pillar_1.5.1
[121] lattice_0.20-41 Nozzle.R1_1.1-1 KEGGREST_1.30.1
[124] fastmap_1.1.0 httr_1.4.2 survival_3.2-7
[127] GO.db_3.12.1 glue_1.4.2 png_0.1-7
[130] bit_4.0.4 Rgraphviz_2.34.0 ggforce_0.3.2
[133] stringi_1.5.3 blob_1.2.1 org.Hs.eg.db_3.12.0
[136] memoise_2.0.0 irlba_2.3.3