Preamble

Packages

library(DESeq2)       #for differential expression analysis
library(tidyverse)
library(ggnewscale)   #add extra scale to ggplot
library(ggtext)       #text in ggplot
library(viridis)      #to change colour scale
library(RColorBrewer) #to manage colours
library(patchwork)    #for combining graphs
library(stringr)      #for manipulating strings
library(pheatmap)     #for heatmaps
library(DEGreport)    #to create report of DE results
library(ggplot2)      #for graphs
library(ggrepel)      #to label individual points on ggplot graph
library(factoextra)   #for PCA
library(apeglm)       #for shrinking log2fold change estimates
library(ashr)         #for shrinking log2fold change estimates
library(knitr)        #for kable
library(splitstackshape)  #for spliting concatenated values into separate values
library(ggvenn)       #for venn diagrams
library(patchwork)    #for combining graphs
library(Cairo)        #for cairo graphics to change font, save ggplots as pdfs etc
library(extrafont)  #for extra text font families

#for funtional enrichment analysis
library(DOSE)
library(pathview)
library(clusterProfiler)
library(enrichplot)
library(ggnewscale)
library(ggupset)

Set up data

# count data
count_data <- read.delim("data/corset-counts.txt", row.names = 1)

# meta data
meta_data1 <- read_csv("data/metadata.csv", trim_ws = TRUE)

meta_data1 <- meta_data1 %>%
  filter(Squid == "PS506" | Squid == "PS508" | Squid == "PS509" | Squid == "PS510" | Squid == "PS513" | Squid == "PS514" | Squid == "PS518" | Squid == "PS522" | Squid == "PS527" | Squid == "PS528" | Squid == "PS529" | Squid == "PS534" | Squid == "PS539" | Squid == "PS547" | Squid == "PS551" | Squid == "PS557" | Squid == "PS561" | Squid == "PS584" | Squid == "PS626" | Squid == "PS636") %>%
  mutate(CO2 = factor(CO2, levels=c('Ambient', 'Elevated')))

meta_data2 <- meta_data1

meta_data1$Squid = paste0(meta_data1$Squid, '_CNS')
meta_data2$Squid = paste0(meta_data2$Squid, '_eyes')

meta_data1 <- meta_data1 %>%
  mutate(tissue = "CNS")

meta_data2 <- meta_data2 %>%
  mutate(tissue = "eyes")

meta_data <- meta_data1 %>%
  full_join(meta_data2)

meta_data <- meta_data %>%
  unite("sampletype", CO2, tissue, remove = FALSE)

#Make Squid in meta_data same order as in count_data (alphabetical) and make into row names (rather than a normal column)
meta_data <- meta_data %>%
  dplyr::arrange(Squid) %>%
  column_to_rownames(var = "Squid")

save(count_data, file = 'data/count_data.RData')
save(meta_data, file = 'data/meta_data.RData')

#Subset count_data for just CNS
count_data_CNS <- count_data %>%
  select(contains("CNS"))
save(count_data_CNS, file = 'data/count_data_CNS.RData')

#Subset meta_data for just CNS
meta_data_CNS <- meta_data %>%
  filter(tissue == "CNS")
save(meta_data_CNS, file = 'data/meta_data_CNS.RData')

# Check they match
all(colnames(count_data_CNS) %in% rownames(meta_data_CNS))
all(colnames(count_data_CNS) == rownames(meta_data_CNS))

#Subset count_data for eyes
count_data_eyes <- count_data %>%
  select(contains("eyes"))
save(count_data_eyes, file = 'data/count_data_eyes.RData')

#Subset meta_data for eyes
meta_data_eyes <- meta_data %>%
  filter(tissue == "eyes")
save(meta_data_eyes, file = 'data/meta_data_eyes.RData')

#Check they match
all(colnames(count_data_eyes) %in% rownames(meta_data_eyes))
all(colnames(count_data_eyes) == rownames(meta_data_eyes))

Explore Raw Data

Total raw counts per Individual

counts_CNS <- count_data_CNS %>%
  summarise(across(everything(), ~ sum(., na.rm = TRUE))) %>% 
  pivot_longer(everything(), names_to = 'squid', values_to = 'tot_raw_counts')

ggplot(counts_CNS, aes(x = squid, y = tot_raw_counts)) +
  geom_point() +
  geom_text_repel(aes(label = squid), max.overlaps = 2)

## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

counts_eyes <- count_data_eyes %>%
  summarise(across(everything(), ~ sum(., na.rm = TRUE))) %>% 
  pivot_longer(everything(), names_to = 'squid', values_to = 'tot_raw_counts')

ggplot(counts_eyes, aes(x = squid, y = tot_raw_counts)) +
  geom_point() +
  geom_text_repel(aes(label = squid), max.overlaps = 2)

CNS: PS548 has more counts than others - see if normalisation has any effect on this

Eyes: All samples have similar amount of counts

Raw counts across genes within an individual

count_data_CNS_long <- count_data_CNS %>%
  tibble::rownames_to_column(var = 'gene') %>%
  pivot_longer(cols = -gene, names_to = 'squid', values_to = 'count')

ggplot(count_data_CNS_long, aes(x = count)) +
  geom_histogram(stat = "bin", bins = 200) +
  facet_grid(squid ~ .) +
  labs(x = "raw count", y = "No. of genes with this raw count")

count_data_eyes_long <- count_data_eyes %>%
  tibble::rownames_to_column(var = 'gene') %>%
  pivot_longer(cols = -gene, names_to = 'squid', values_to = 'count')

ggplot(count_data_eyes_long, aes(x = count)) +
  geom_histogram(stat = "bin", bins = 200) +
  facet_grid(squid ~ .) +
  labs(x = "raw count", y = "No. of genes with this raw count")

Looks normal, shows the common features of RNAseq data:

a low number of counts associated with a large proportion of genes
a long right tail due to the lack of any upper limit for expression
large dynamic range

Mean vs Variance

#Do for one group at a time
Ambient_CNS_counts <- count_data %>%
  select("PS506_CNS", "PS508_CNS", "PS513_CNS", "PS514_CNS", "PS518_CNS", "PS529_CNS", "PS534_CNS", "PS551_CNS", "PS557_CNS", "PS561_CNS")

mean_counts <- apply(Ambient_CNS_counts, 1, mean)        #The second argument '1' of 'apply' function indicates the function being applied to rows. Use '2' if applied to columns 
variance_counts <- apply(Ambient_CNS_counts, 1, var)
df <- data.frame(mean_counts, variance_counts)

Ambient_CNS_mean_var <- ggplot(df) +
  geom_point(aes(x=mean_counts, y=variance_counts)) + 
  scale_y_log10() +
  scale_x_log10() +
  geom_abline(intercept = 0, slope = 1, color="red") +
  ggtitle("Ambient_CNS mean vs variance")
Ambient_CNS_mean_var

Elevated_CNS_counts <- count_data %>%
  select("PS509_CNS", "PS510_CNS", "PS522_CNS", "PS527_CNS", "PS528_CNS", "PS539_CNS", "PS547_CNS", "PS584_CNS", "PS626_CNS", "PS636_CNS")

mean_counts <- apply(Elevated_CNS_counts, 1, mean)
variance_counts <- apply(Ambient_CNS_counts, 1, var)
df <- data.frame(mean_counts, variance_counts)

Elevated_CNS_mean_var <- ggplot(df) +
  geom_point(aes(x=mean_counts, y=variance_counts)) + 
  scale_y_log10() +
  scale_x_log10() +
  geom_abline(intercept = 0, slope = 1, color="red") +
  ggtitle("Elevated_CNS mean vs variance")
Elevated_CNS_mean_var

Ambient_eyes_counts <- count_data %>%
  select("PS506_eyes", "PS508_eyes", "PS513_eyes", "PS514_eyes", "PS518_eyes", "PS529_eyes", "PS534_eyes", "PS551_eyes", "PS557_eyes", "PS561_eyes")

mean_counts <- apply(Ambient_eyes_counts, 1, mean)
variance_counts <- apply(Ambient_eyes_counts, 1, var)
df <- data.frame(mean_counts, variance_counts)

Ambient_eyes_mean_var <- ggplot(df) +
  geom_point(aes(x=mean_counts, y=variance_counts)) + 
  scale_y_log10() +
  scale_x_log10() +
  geom_abline(intercept = 0, slope = 1, color="red") +
  ggtitle("Ambient_eyes mean vs variance")
Ambient_eyes_mean_var

Elevated_eyes_counts <- count_data %>%
  select("PS509_eyes", "PS510_eyes", "PS522_eyes", "PS527_eyes", "PS528_eyes", "PS539_eyes", "PS547_eyes", "PS584_eyes", "PS626_eyes", "PS636_eyes")

mean_counts <- apply(Elevated_eyes_counts, 1, mean)
variance_counts <- apply(Ambient_eyes_counts, 1, var)
df <- data.frame(mean_counts, variance_counts)

Elevated_eyes_mean_var <- ggplot(df) +
  geom_point(aes(x=mean_counts, y=variance_counts)) + 
  scale_y_log10() +
  scale_x_log10() +
  geom_abline(intercept = 0, slope = 1, color="red") +
  ggtitle("Elevated_eyes mean vs variance")
Elevated_eyes_mean_var

Each black point is a gene
Red line represents if mean = variance
The variance across replicates tends to be greater than the mean (red line), especially for genes with large mean expression levels.
More scatter/heteroscedascity in lowly expressed genes in the elevated groups.
Indicates data doesn’t fit poisson distribution and better to use the negative binomial

Run DE Analysis (Walds Test)

Create DESeq dataset and run analysis

Entire dataset

#Check sample names match in count_data and meta_data
all(colnames(count_data) %in% rownames(meta_data))
all(colnames(count_data) == rownames(meta_data))

#Create DESeq2 Dataset object
dds <- DESeqDataSetFromMatrix(count_data, colData = meta_data, design = ~ sampletype)

# Run analysis - runs through the steps of estimating size factors, estimates dispersion and runs Wald statistics on NB model
# These steps can all be done separately = estimateSizeFactors, estimateDispersions, nbinomWaldTest
dds <- DESeq(dds)

save(dds, file = 'results/dds.RData')

Dataset subset for CNS

#Create DESeqDataset and run analysis on data subset for CNS
dds_CNS <- DESeqDataSetFromMatrix(count_data_CNS, colData = meta_data_CNS, design = ~ sampletype)

dds_CNS <- DESeq(dds_CNS)

save(dds_CNS, file = 'results/dds_CNS.RData')

Dataset subset for eyes

#Create DESeqDataset and run analysis on data subset for eyes
dds_eyes <- DESeqDataSetFromMatrix(count_data_eyes, colData = meta_data_eyes, design = ~ sampletype)

dds_eyes <- DESeq(dds_eyes)

save(dds_eyes, file = 'results/dds_eyes.RData')

Save raw count data

raw_counts <- counts(dds, normalized=FALSE)
raw_counts_tb <- raw_counts %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()
save(raw_counts, file = 'data/raw_counts.RData')
save(raw_counts_tb, file = 'data/raw_counts_tb.RData')

raw_counts_CNS <- counts(dds_CNS, normalized=FALSE)
raw_counts_CNS_tb <- raw_counts_CNS %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

save(raw_counts_CNS, file = 'data/raw_counts_CNS.RData')
save(raw_counts_CNS_tb, file = 'data/raw_counts_CNS_tb.RData')

raw_counts_eyes <- counts(dds_eyes, normalized=FALSE)
raw_counts_eyes_tb <- raw_counts_eyes %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()
save(raw_counts_eyes, file = 'data/raw_counts_eyes.RData')
save(raw_counts_eyes_tb, file = 'data/raw_counts_eyes_tb.RData')

Save normalised count data

Normalised counts used for data visualisation

#All
normalised_counts <- counts(dds, normalized=TRUE)
normalised_counts_tb <- normalised_counts %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

save(normalised_counts, file = 'data/normalised_counts.RData')
save(normalised_counts_tb, file = 'data/normalised_counts_tb.RData')

#CNS
normalised_counts_CNS <- counts(dds_CNS, normalized=TRUE)
normalised_counts_CNS_tb <- normalised_counts_CNS %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

save(normalised_counts_CNS, file = 'data/normalised_counts_CNS.RData')
save(normalised_counts_CNS_tb, file = 'data/normalised_counts_CNS_tb.RData')


#Eyes
normalised_counts_eyes <- counts(dds_eyes, normalized=TRUE)
normalised_counts_eyes_tb <- normalised_counts_eyes %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

save(normalised_counts_eyes, file = 'data/normalised_counts_eyes.RData')
save(normalised_counts_eyes_tb, file = 'data/normalised_counts_eyes_tb.RData')

Save rlog transformed data

rlog transforming normalised counts improves the distances/clustering for the PCA and heirarchical clustering visualization methods by moderating the variance across the mean
blind=TRUE for transformation unbiased to sample condition information

rld <- rlog(dds, blind=TRUE)
rld_CNS <- rlog(dds_CNS, blind=TRUE)
rld_eyes <- rlog(dds_eyes, blind=TRUE)

save(rld_CNS, file = 'data/rld_CNS.RData')
save(rld, file = 'data/rld.RData')
save(rld_eyes, file = 'data/rld_eyes.RData')

Extract DESeq results (unshrunken)

# Extract results from DESeq analysis
res_CNS_unshrunken <- results(dds_CNS, name = "sampletype_Elevated_CNS_vs_Ambient_CNS", alpha = 0.05)
save(res_CNS_unshrunken, file = "results/res_CNS_unshrunken.RData")

res_CNS_unshrunken_tb <- res_CNS_unshrunken %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()
save(res_CNS_unshrunken_tb, file = "results/res_CNS_unshrunken_tb.RData")

# Extract results from DESeq analysis
res_eyes_unshrunken <- results(dds_eyes, name = "sampletype_Elevated_eyes_vs_Ambient_eyes", alpha = 0.05)
save(res_eyes_unshrunken, file = "results/res_eyes_unshrunken.RData")

res_eyes_unshrunken_tb <- res_eyes_unshrunken %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()
save(res_eyes_unshrunken_tb, file = "results/res_eyes_unshrunken_tb.RData")

Extract DESeq results (shrunken with ashr)

Shrinkage of log2 fold change (LFC) estimates generates more accurate log2 fold change estimates
Shrinks LFC towards zero when the info for a gene is low (e.g. low counts, high dispersion values)

res_CNS_shrunken_ashr <- lfcShrink(dds_CNS, coef="sampletype_Elevated_CNS_vs_Ambient_CNS", res=res_CNS_unshrunken, type = "ashr")
save(res_CNS_shrunken_ashr, file = "results/res_CNS_shrunken_ashr.RData")

res_CNS_shrunken_ashr_tb <- res_CNS_shrunken_ashr %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()
save(res_CNS_shrunken_ashr_tb, file = "results/res_CNS_shrunken_ashr_tb.RData")

res_eyes_shrunken_ashr <- lfcShrink(dds_eyes, coef="sampletype_Elevated_eyes_vs_Ambient_eyes", res=res_eyes_unshrunken, type = "ashr")
save(res_eyes_shrunken_ashr, file = "results/res_eyes_shrunken_ashr.RData")

res_eyes_shrunken_ashr_tb <- res_eyes_shrunken_ashr %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()
save(res_eyes_shrunken_ashr_tb, file = "results/res_eyes_shrunken_ashr_tb.RData")

Sample level Quality Control

PCA Plots

All data from both tissues

rld$sampletype <- factor(rld$sampletype, levels = c("Ambient_CNS", "Elevated_CNS", "Ambient_eyes", "Elevated_eyes"))

All <- plotPCA(rld, intgroup="sampletype", ntop = 50000) +
  scale_colour_discrete(name = "", labels = c("CNS - ambient CO2", "CNS - elevated CO2", "eyes - ambient CO2", "eyes - elevated CO2")) +
  labs(title = "All genes") +
  geom_text_repel(aes(label = name)) +
  coord_cartesian(ylim = c(-80, 80)) +
  theme_classic()
All

top500 <- plotPCA(rld, intgroup="sampletype", ntop = 500) +
  scale_colour_discrete(name = "", labels = c("CNS - ambient CO2", "CNS - elevated CO2", "eyes - ambient CO2", "eyes - elevated CO2")) +
  geom_text_repel(aes(label = name)) +
  labs(title = "Top 500 genes") +
  coord_cartesian(ylim = c(-20, 20)) +
  theme_classic()
top500

Main variation is between tissue types (explained by PC1)
Eyes has more variation than the CNS - so use DE analysis done on data subset for CNS and eyes separately

CNS

rld_CNS$sampletype <- factor(rld_CNS$sampletype, levels = c("Ambient_CNS", "Elevated_CNS"))

All_CNS <- plotPCA(rld_CNS, intgroup="sampletype", ntop = 50000) +
  scale_colour_discrete(name = "", labels = c("CNS - ambient CO2", "CNS - elevated CO2")) +
  stat_ellipse() +
  labs(title = "All genes") +
  geom_text_repel(aes(label = name)) +
  coord_cartesian(ylim = c(-80, 80)) +
  theme_classic()
All_CNS

top500_CNS <- plotPCA(rld_CNS, intgroup="sampletype", ntop = 500) +
  scale_colour_discrete(name = "", labels = c("CNS - ambient CO2", "CNS - elevated CO2")) +
  stat_ellipse() +
  geom_text_repel(aes(label = name)) +
  labs(title = "Top 500 genes") +
  coord_cartesian(ylim = c(-20, 20)) +
  theme_classic()
top500_CNS

Eyes

rld_eyes$sampletype <- factor(rld_eyes$sampletype, levels = c("Ambient_eyes", "Elevated_eyes"))

All_eyes <- plotPCA(rld_eyes, intgroup="sampletype", ntop = 50000) +
  scale_colour_discrete(name = "", labels = c("eyes - ambient CO2", "eyes - elevated CO2")) +
  stat_ellipse() +
  labs(title = "All genes") +
  geom_text_repel(aes(label = name)) +
  coord_cartesian(ylim = c(-80, 80)) +
  theme_classic()
All_eyes

top500_eyes <- plotPCA(rld_eyes, intgroup="sampletype", ntop = 500) +
  scale_colour_discrete(name = "", labels = c("eyes - ambient CO2", "eyes - elevated CO2")) +
  stat_ellipse() +
  geom_text_repel(aes(label = name)) +
  labs(title = "Top 500 genes") +
  coord_cartesian(ylim = c(-20, 20)) +
  theme_classic()
top500_eyes

Heatmaps

Heatmaps using all genes

All data from both tissues

# Extract the rlog matrix from the object
rld_mat <- assay(rld)
# Compute pairwise correlation values
rld_cor <- cor(rld_mat)    ## cor() is a base R function

#meta_data for heatmap annotation
meta_data_heatmap <- meta_data %>%
  select("sampletype")

#plot heatmap
pheatmap(rld_cor, annotation_row = meta_data_heatmap, annotation_col = meta_data_heatmap)

Cluster by tissue type but not by CO2

CNS

rld_mat_CNS <- assay(rld_CNS)
rld_cor_CNS <- cor(rld_mat_CNS) 

meta_data_heatmap_CNS <- meta_data %>%
  filter(tissue == "CNS") %>%
  select("sampletype")

pheatmap(rld_cor_CNS, annotation_row = meta_data_heatmap_CNS, annotation_col = meta_data_heatmap_CNS)

Eyes

rld_mat_eyes <- assay(rld_eyes)
rld_cor_eyes <- cor(rld_mat_eyes) 

meta_data_heatmap_eyes <- meta_data %>%
  filter(tissue == "eyes") %>%
  select("sampletype")

pheatmap(rld_cor_eyes, annotation_row = meta_data_heatmap_eyes, annotation_col = meta_data_heatmap_eyes)

Explore DESeq2 Data

Total normalised Counts per individual

CNS

# Total number of normalized counts per sample
tot_normalised_counts_df <- colSums(counts(dds_CNS, normalized=T)) %>%
  data.frame() %>%
  rownames_to_column(var="squid") %>% 
  as_tibble() %>%
  rename(tot_normalised_counts = '.')

plot_tot_normalised_counts <- ggplot(tot_normalised_counts_df, aes(x = squid, y = tot_normalised_counts)) +
  geom_point() +
  scale_x_discrete(guide = guide_axis(angle = 90))

plot_tot_normalised_counts

Normalisation has dealt with PS548_CNS having more counts than other samples
PS636_CNS has slightly more normalised counts than other samples, but within the same order of magnitude

Eyes

# Total number of normalized counts per sample
tot_normalised_counts_df <- colSums(counts(dds_eyes, normalized=T)) %>%
  data.frame() %>%
  rownames_to_column(var="squid") %>% 
  as_tibble() %>%
  rename(tot_normalised_counts = '.')

plot_tot_normalised_counts <- ggplot(tot_normalised_counts_df, aes(x = squid, y = tot_normalised_counts)) +
  geom_point() +
  scale_x_discrete(guide = guide_axis(angle = 90))

plot_tot_normalised_counts

All eye samples have similar amounts of normalised counts

BaseMean level of expression across genes

BaseMean = the average of the normalised count values, dividing by size factors, taken over all samples

CNS

ggplot(res_CNS_unshrunken_tb, aes(x = baseMean)) +
  geom_histogram(bins = 10000) +
  scale_y_sqrt() +
  labs(y = 'No. of genes') +
  coord_cartesian(xlim = c(0, 1000000))

Eyes

ggplot(res_eyes_unshrunken_tb, aes(x = baseMean)) +
  geom_histogram(bins = 10000) +
  scale_y_sqrt() +
  labs(y = 'No. of genes') +
  coord_cartesian(xlim = c(0, 1000000))

Looks normal and common of RNAseq data - a low number of counts associated with a large proportion of genes and a long right tail due to the lack of any upper limit for expression.

BaseMean level of expression across samples

CNS

res_CNS_shrunken_ashr_tb_basemean <- res_CNS_shrunken_ashr_tb %>%
  mutate(genelabels = "") %>% 
  arrange(-baseMean)

# Populate the genelabels column with contents of the gene  column for the first 10 rows, i.e. the top 10 high;y expressed genes
res_CNS_shrunken_ashr_tb_basemean$genelabels[1:10] <- as.character(res_CNS_shrunken_ashr_tb_basemean$gene[1:10])


plot_basemean_LFC_ashr_CNS <- ggplot(res_CNS_shrunken_ashr_tb_basemean, aes(x = log2FoldChange, y = baseMean)) +
  geom_point() +
  geom_text_repel(aes(label = genelabels))

plot_basemean_LFC_ashr_CNS

## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Eyes

res_eyes_shrunken_ashr_tb_basemean <- res_eyes_shrunken_ashr_tb %>%
  mutate(genelabels = "") %>% 
  arrange(-baseMean)

# Populate the genelabels column with contents of the gene  column for the first 10 rows, i.e. the top 10 high;y expressed genes
res_eyes_shrunken_ashr_tb_basemean$genelabels[1:10] <- as.character(res_eyes_shrunken_ashr_tb_basemean$gene[1:10])


plot_basemean_LFC_ashr_eyes <- ggplot(res_eyes_shrunken_ashr_tb_basemean, aes(x = log2FoldChange, y = baseMean)) +
  geom_point() +
  geom_text_repel(aes(label = genelabels))

plot_basemean_LFC_ashr_eyes

As expected, only a few genes have high basemean expression

Dispersion Estimates

To ensure the data is a good fit for the DESeq2 model

CNS

plotDispEsts(dds_CNS)

Eyes

plotDispEsts(dds_eyes)

Looks as expected for both CNS and eyes - the data is scattered around the curve, with the dispersion decreasing with increasing mean expression levels.
Thus the data is a good fit for the DESeq2 model

DE Results

Annotate DESeq2 results

annotated_transcriptome <- read.csv('data/annotated_transcriptome_ORFs.csv')
save(annotated_transcriptome, file = 'data/annotated_transcriptome.RData')

transcript2gene_map <- read.delim('data/corset-clusters.txt')
save(transcript2gene_map, file = 'data/transcript2gene_map.RData')

annotated_res_CNS_shrunken_ashr_tb <- res_CNS_shrunken_ashr_tb %>%
  full_join(transcript2gene_map) %>%
  full_join(annotated_transcriptome, by = c("transcript" = "Sequence.Name"))
save(annotated_res_CNS_shrunken_ashr_tb, file = 'results/annotated_res_CNS_shrunken_ashr_tb.RData')

annotated_res_eyes_shrunken_ashr_tb <- res_eyes_shrunken_ashr_tb %>%
  full_join(transcript2gene_map) %>%
  full_join(annotated_transcriptome, by = c("transcript" = "Sequence.Name"))
save(annotated_res_eyes_shrunken_ashr_tb, file = 'results/annotated_res_eyes_shrunken_ashr_tb.RData')

Significantly DE genes

CNS

# Summarize results
summary(res_CNS_shrunken_ashr, alpha = 0.05)

## 
## out of 27401 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 14, 0.051%
## LFC < 0 (down)     : 11, 0.04%
## outliers [1]       : 0, 0%
## low counts [2]     : 6, 0.022%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

# Extract sig DE genes
sig_DE_CNS_ashr <- annotated_res_CNS_shrunken_ashr_tb %>%
  filter(padj < 0.05)
save(sig_DE_CNS_ashr, file = 'results/sig_DE_CNS_ashr.RData')

Eyes

# Summarize results
summary(res_eyes_shrunken_ashr, alpha = 0.05)

## 
## out of 27400 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 6, 0.022%
## LFC < 0 (down)     : 5, 0.018%
## outliers [1]       : 0, 0%
## low counts [2]     : 1071, 3.9%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

# Extract sig DE genes
sig_DE_eyes_ashr <- annotated_res_eyes_shrunken_ashr_tb %>%
  filter(padj < 0.05)
save(sig_DE_eyes_ashr, file = 'results/sig_DE_eyes_ashr.RData')

MA Plots

Allows us to evaluate the magnitude of fold changes and how they are distributed relative to mean expression.
Generally, we would expect to see significant genes across the full range of expression levels.

CNS

# Unshrunken results
plotMA(res_CNS_unshrunken, alpha = 0.05, main = "MA plot unshrunken LFC in CNS", ylim=c(-2,2))

# Shrunken results
plotMA(res_CNS_shrunken_ashr, alpha = 0.05, main = "MA plot ashr shrunken LFC in CNS", ylim=c(-2,2))

Eyes

# Unshrunken results
plotMA(res_eyes_unshrunken, alpha = 0.05, main = "MA plot unshrunken LFC in eyes", ylim=c(-2,2))

# Shrunken results
plotMA(res_eyes_shrunken_ashr, alpha = 0.05, main = "MA plot ashr shrunken LFC in eyes", ylim=c(-2,2))

Significantly DE genes (green) are spread across LFC values
Ashr shrinkage has shrunk a lot of the LFCs

Volcano Plots

CNS

# Obtain logical vector where TRUE values denote padj values < 0.05
# Create an empty column to indicate which genes to label
# Sort by padj values 
res_CNS_shrunken_ashr_tb_volcano <- res_CNS_shrunken_ashr_tb %>%
  mutate(threshold_DE = padj < 0.05) %>% 
  mutate(genelabels = "") %>% 
  arrange(padj)

# Populate the genelabels column with contents of the gene  column for the first 25 rows, i.e. all sig DE genes 
res_CNS_shrunken_ashr_tb_volcano$genelabels[1:25] <- as.character(res_CNS_shrunken_ashr_tb_volcano$gene[1:25])

# Volcano plot
volcano_CNS_ashr <- ggplot(res_CNS_shrunken_ashr_tb_volcano, aes(x = log2FoldChange, y = -log10(padj))) +
  geom_point(aes(colour = threshold_DE)) +
  geom_text_repel(aes(label = genelabels)) +
  ggtitle("Volcano plot CNS (ashr shrinkage method): p < 0.05") +
  xlab("log2 fold change") + 
  ylab("-log10 adjusted p-value") +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25))) 
volcano_CNS_ashr

Removed 19 rows containing missing values (geom_point)
6 have low counts (shown in summary table)
6 have NA for padj only in res_table = row is filtered by automatic independent filtering, for having a low mean normalized count.
13 have NA in pvalue and padj = row contains a sample with an extreme count outlier, detected by Cook’s distance.

Eyes

# Obtain logical vector where TRUE values denote padj values < 0.05
# Create an empty column to indicate which genes to label
# Sort by padj values 
res_eyes_shrunken_ashr_tb_volcano <- res_eyes_shrunken_ashr_tb %>%
  mutate(threshold_DE = padj < 0.05) %>% 
  mutate(genelabels = "") %>% 
  arrange(padj)

# Populate the genelabels column with contents of the gene  column for the first 25 rows, i.e. all sig DE genes 
res_eyes_shrunken_ashr_tb_volcano$genelabels[1:25] <- as.character(res_eyes_shrunken_ashr_tb_volcano$gene[1:25])

# Volcano plot
volcano_eyes_ashr <- ggplot(res_eyes_shrunken_ashr_tb_volcano, aes(x = log2FoldChange, y = -log10(padj))) +
  geom_point(aes(colour = threshold_DE)) +
  geom_text_repel(aes(label = genelabels)) +
  ggtitle("Volcano plot eyes (ashr shrinkage method): p < 0.05") +
  xlab("log2 fold change") + 
  ylab("-log10 adjusted p-value") +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25))) 
volcano_eyes_ashr

## Warning: Removed 1083 rows containing missing values (`geom_point()`).

## Warning: Removed 1083 rows containing missing values (`geom_text_repel()`).

## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Removed 1083 rows containing missing values (geom_point)
1071 = low counts (shown in summary table)
12 = have NA in pvalue and padj = row contains a sample with an extreme count outlier, detected by Cook’s distance.

Heatmaps

CNS

Significantly DE genes

meta_data_CNS_heatmap <- meta_data_CNS %>%
  select("sampletype")

# Use rlog transformed data
rld_mat_CNS <- assay(rld_CNS)

rld_mat_CNS_tb <- rld_mat_CNS %>%
  as.data.frame() %>%
  rownames_to_column(var = "gene") %>% 
  as_tibble()

#Extract only the sig DE genes
rld_mat_CNS_heatmap <- rld_mat_CNS_tb %>% 
  filter(gene %in% sig_DE_CNS_ashr$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_CNS_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = T,
         annotation_col = meta_data_CNS_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of significantly DE genes in CNS")

Top 100 genes (sorted by adjusted p-value)

#Extract only the top 100 genes (by padj)
top100 <- res_CNS_shrunken_ashr_tb %>%
  arrange(padj) %>%
  slice_head(n = 100)

rld_mat_CNS_heatmap <- rld_mat_CNS_tb %>% 
  filter(gene %in% top100$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_CNS_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = F,
         annotation_col = meta_data_CNS_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of top 100 genes in CNS")

Eyes

Significantly DE genes

meta_data_eyes_heatmap <- meta_data_eyes %>%
  select("sampletype")

# Use rlog transformed data
rld_mat_eyes <- assay(rld_eyes)

rld_mat_eyes_tb <- rld_mat_eyes %>%
  as.data.frame() %>%
  rownames_to_column(var = "gene") %>% 
  as_tibble()

#Extract only the sig DE genes
rld_mat_eyes_heatmap <- rld_mat_eyes_tb %>% 
  filter(gene %in% sig_DE_eyes_ashr$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_eyes_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = T,
         annotation_col = meta_data_eyes_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of significantly DE genes in eyes")

Top 100 genes (sorted by adjusted p-value)

#Extract only the top 100 genes (by padj)
top100 <- res_eyes_shrunken_ashr_tb %>%
  arrange(padj) %>%
  slice_head(n = 100)

rld_mat_eyes_heatmap <- rld_mat_eyes_tb %>% 
  filter(gene %in% top100$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_eyes_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = F,
         annotation_col = meta_data_eyes_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of top 100 genes in eyes")

Plot gene expression

CNS

sig_DE_CNS_normalised_counts <- normalised_counts_CNS_tb %>%
  filter(gene %in% sig_DE_CNS_ashr$gene)

sig_DE_CNS_normalised_counts <- sig_DE_CNS_normalised_counts %>%
  gather(colnames(normalised_counts_CNS_tb)[2:21], key = "Squid", value = "normalised_counts")

# Combine with meta_data_CNS so can include treatment info in graph
sig_DE_CNS_normalised_counts_metadata <- (meta_data_CNS %>% rownames_to_column(var = "Squid")) %>%
  inner_join(sig_DE_CNS_normalised_counts)

# Dot plot
jitter <- position_jitter(width = 0.15, height = 0)
ggplot(sig_DE_CNS_normalised_counts_metadata, aes(x = gene, y = normalised_counts, color = sampletype)) +
  geom_point(position = jitter) +
  scale_y_log10() +
  xlab("Genes") +
  ylab("log10 Normalized Counts") +
  ggtitle("All significantly DE genes in CNS") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(plot.title = element_text(hjust = 0.5))

# Box plot
ggplot(sig_DE_CNS_normalised_counts_metadata, aes(x = gene, y = normalised_counts, color = sampletype)) +
  geom_boxplot() +
  scale_y_log10() +
  xlab("Genes") +
  ylab("log10 Normalized Counts") +
  ggtitle("All significantly DE genes in CNS") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(plot.title = element_text(hjust = 0.5))

All genes identified as significantly differentially expressed show clear DE

Eyes

sig_DE_eyes_normalised_counts <- normalised_counts_eyes_tb %>%
  filter(gene %in% sig_DE_eyes_ashr$gene)

sig_DE_eyes_normalised_counts <- sig_DE_eyes_normalised_counts %>%
  gather(colnames(normalised_counts_eyes_tb)[2:21], key = "Squid", value = "normalised_counts")

# Combine with meta_data_eyes so can include treatment info in graph
sig_DE_eyes_normalised_counts_metadata <- (meta_data_eyes %>% rownames_to_column(var = "Squid")) %>%
  inner_join(sig_DE_eyes_normalised_counts)

# Dot plot
jitter <- position_jitter(width = 0.15, height = 0)
# jitter log10 y
ggplot(sig_DE_eyes_normalised_counts_metadata, aes(x = gene, y = normalised_counts, color = sampletype)) +
  geom_point(position = jitter) +
  #geom_text_repel(aes(label = Squid), size = 2, position = jitter) +
  scale_y_log10() +
  xlab("Genes") +
  ylab("log10 Normalized Counts") +
  ggtitle("All significantly DE genes in eyes") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(plot.title = element_text(hjust = 0.5))

## Warning: Transformation introduced infinite values in continuous y-axis

# Box plot
ggplot(sig_DE_eyes_normalised_counts_metadata, aes(x = gene, y = normalised_counts, color = sampletype)) +
  geom_boxplot() +
  scale_y_log10() +
  xlab("Genes") +
  ylab("log10 Normalized Counts") +
  ggtitle("All significantly DE genes in eyes") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(plot.title = element_text(hjust = 0.5))

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Removed 21 rows containing non-finite values (`stat_boxplot()`).

Most genes identified as significantly differentially expressed show clear DE
However, a few genes don’t show clear DE so look at these closer

# Look closer at genes that don't appear to be DE

# look at cluster-5283.10996
df <- sig_DE_eyes_normalised_counts_metadata %>%
  filter(gene == "Cluster-5283.10996")

ggplot(df, aes(x = gene, y = normalised_counts, color = sampletype)) +
  geom_point(position = jitter) +
  geom_text_repel(aes(label = Squid), size = 2, position = jitter) +
  scale_y_log10() +
  xlab("Genes") +
  ylab("Normalized Counts") +
  ggtitle("Cluster-5283.10996") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(plot.title = element_text(hjust = 0.5))

## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# look at cluster-5744.0
df <- sig_DE_eyes_normalised_counts_metadata %>%
  filter(gene == "Cluster-5744.0")

ggplot(df, aes(x = gene, y = normalised_counts, color = sampletype)) +
  geom_point(position = jitter) +
  geom_text_repel(aes(label = Squid), size = 2, position = jitter) +
  xlab("Genes") +
  ylab("Normalized Counts") +
  ggtitle("Cluster-5744.0 look at low counts") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(plot.title = element_text(hjust = 0.5)) +
  coord_cartesian(ylim = c(0, 10))

# look at cluster-73.0
df <- sig_DE_eyes_normalised_counts_metadata %>%
  filter(gene == "Cluster-73.0")

ggplot(df, aes(x = gene, y = normalised_counts, color = sampletype)) +
  geom_point(position = jitter) +
  geom_text_repel(aes(label = Squid), size = 2, position = jitter) +
  xlab("Genes") +
  ylab("Normalized Counts") +
  ggtitle("Cluster-73.0 count not on log10 \nscale to look at low counts") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(plot.title = element_text(hjust = 0.5))

## Warning: ggrepel: 18 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Cluster-5283.10996 does not show clear DE
There is a lot of variation and overlap between CO2 levels
LFC is 1.5 but this is not clear by looking at the graph
Even outliers are matched - 2 ambient squid (PS514_eyes and PS518_eyes) with high expression and 2 elevated squid (PS527_eyes and PS636_eyes) with high expression
Cluster-5744.0 also does not show clear DE
There is a lot of variation and overlap between CO2 levels
LFC is -0.7 but this is not clear by looking at the graph
Genes with low expression show overlap between the two CO2 levels
Even outliers are matched - 2 ambient squid (PS514_eyes and PS518_eyes) with high expression and 2 elevated squid (PS527_eyes and PS636_eyes) with high expression
Cluster-73.0 DE looks driven by 2 outliers
All samples have 0 expression but PS534_eyes and PS506_eyes (both ambient) have very high expression.
LFC is -24: surprised shrinkage didn’t deal with this

Therefore visual inspection shows clear DE of all DE genes in the CNS, but in the eyes, 3 of the genes looks problematic - remove these

Final DE results

# 3 genes don't look DE when look at graphs - remove these
sig_DE_eyes_ashr_final <- annotated_res_eyes_shrunken_ashr_tb %>%
  filter(padj < 0.05) %>%
  filter(!gene %in% c("Cluster-5283.10996", "Cluster-5744.0", "Cluster-73.0"))
save(sig_DE_eyes_ashr_final, file = 'results/sig_DE_eyes_ashr_final.RData')

Venn diagram

DE_CNS <- sig_DE_CNS_ashr %>%
  distinct(gene)

DE_eyes <- sig_DE_eyes_ashr_final %>%
  distinct(gene)

x <- c(list(DE_CNS$gene), list(DE_eyes$gene))
names(x) <- c('CNS\n', 'Eyes\n')
ggvenn(x,
       show_percentage = FALSE,
       text_size = 0,
       fill_color = c('#238A8DFF', '#55C667FF'),
       fill_alpha = 0.8) +
  annotate('text', label = '12 up\n11 down', x = -1, y = 0) +
  annotate('text', label = '3 up\n3 down', x = 1, y = 0) +
  annotate('text', label = '2 up', x = 0, y = 0) +
  annotate('text', label = '25 DE genes', x = -1, y = 1.2) +
  annotate('text', label = '8 DE genes', x = 1, y = 1.2)

Heatmap

CNS

meta_data_CNS_heatmap <- meta_data_CNS %>%
  select("sampletype")

# Use rlog transformed data
rld_mat_CNS <- assay(rld_CNS)

rld_mat_CNS_tb <- rld_mat_CNS %>%
  as.data.frame() %>%
  rownames_to_column(var = "gene") %>% 
  as_tibble()

## ONLY SIG DE GENES ##
rld_mat_CNS_heatmap <- rld_mat_CNS_tb %>% 
  filter(gene %in% sig_DE_CNS_ashr$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_CNS_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = T,
         annotation_col = meta_data_CNS_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of significantly DE genes in CNS")

# change colour scheme with color = viridis(100)

## TOP 100 GENES

top100 <- res_CNS_shrunken_ashr_tb %>%
  arrange(padj) %>%
  slice_head(n = 100)

rld_mat_CNS_heatmap <- rld_mat_CNS_tb %>% 
  filter(gene %in% top100$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_CNS_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = F,
         annotation_col = meta_data_CNS_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of top 100 genes in CNS")

Eyes

meta_data_eyes_heatmap <- meta_data_eyes %>%
  select("sampletype")

# Use rlog transformed data
rld_mat_eyes <- assay(rld_eyes)

rld_mat_eyes_tb <- rld_mat_eyes %>%
  as.data.frame() %>%
  rownames_to_column(var = "gene") %>% 
  as_tibble()

## ONLY SIG DE GENES ##
rld_mat_eyes_heatmap <- rld_mat_eyes_tb %>% 
  filter(gene %in% sig_DE_eyes_ashr_final$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_eyes_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = T,
         annotation_col = meta_data_eyes_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of significantly DE genes in eyes")

## TOP 100 GENES ##

top100 <- res_eyes_shrunken_ashr_tb %>%
  arrange(padj) %>%
  slice_head(n = 100)

rld_mat_eyes_heatmap <- rld_mat_eyes_tb %>% 
  filter(gene %in% top100$gene) %>%
  column_to_rownames(var = "gene")

pheatmap(rld_mat_eyes_heatmap,
         cluster_rows = T, 
         clustering_method = "complete",
         show_rownames = F,
         annotation_col = meta_data_eyes_heatmap,
         border_color = NA, 
         fontsize = 10, 
         scale = "row", # scales the data to Z scores (by gene)
         fontsize_row = 10, 
         main = "Heatmap of top 100 genes in eyes")

Table of significantly DE genes

CNS

sig_DE_CNS_ashr %>%
  select(gene, Sequence.Description, padj, log2FoldChange, lfcSE) %>%
  arrange(-log2FoldChange) %>% 
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  kable(caption = 'Significantly DE genes in CNS')

Significantly DE genes in CNS
gene	Sequence.Description	padj	log2FoldChange	lfcSE
Cluster-5283.1476	protein FAM204A isoform X1	0.0044818	1.3394024	0.3855364
Cluster-7057.2	prickle-like protein 3 isoform X4	0.0340854	0.9631720	0.3995900
Cluster-5283.10634	mitogen-activated protein kinase kinase kinase kinase 5-like isoform X4	0.0385885	0.9215585	0.5599760
Cluster-5283.10634	mitogen-activated protein kinase kinase kinase kinase 3-like isoform X3	0.0385885	0.9215585	0.5599760
Cluster-7096.4	synaptobrevin homolog YKT6	0.0385885	0.8850380	0.4291597
Cluster-5283.4688	zinc finger protein 271-like	0.0385885	0.8581222	0.4381018
Cluster-5283.11841	chromatin accessibility complex protein 1	0.0457181	0.7541101	0.4949590
Cluster-6236.3	nucleoside diphosphate kinase 6-like	0.0385885	0.6277556	0.6588387
Cluster-5283.11516	Hypothetical predicted protein	0.0385885	0.5997121	0.2692648
Cluster-7175.0	—NA—	0.0385885	0.5955914	0.2308095
Cluster-5283.537	putative N-acetylated-alpha-linked acidic dipeptidase isoform X4	0.0385885	0.5832244	0.2579140
Cluster-7445.2	hypothetical protein OCBIM_22018451mg	0.0457181	0.5257109	0.6274280
Cluster-5590.1	transferrin-like protein	0.0385885	0.4800865	0.6305837
Cluster-5046.4	PREDICTED: uncharacterized protein LOC106875384 isoform X2	0.0385885	0.3107304	0.5556629
Cluster-2174.0	E3 ubiquitin-protein ligase synoviolin B-like	0.0385885	0.2741695	0.1471531
Cluster-5283.4947	—NA—	0.0340833	-0.3743971	0.1330268
Cluster-5283.6450	cytochrome b561 domain-containing protein 2	0.0385885	-0.4632560	0.6431749
Cluster-1257.0	Microtubule-associated proteins 1A/1B light chain 3B	0.0445542	-0.5404408	0.2593373
Cluster-1257.0	microtubule-associated proteins 1A/1B light chain 3A	0.0445542	-0.5404408	0.2593373
Cluster-5283.3831	proton myo-inositol cotransporter	0.0026007	-0.5563098	0.1130174
Cluster-7238.6	—NA—	0.0385885	-0.7648719	0.3262858
Cluster-5283.750	unnamed protein product	0.0340833	-0.7888475	0.6961008
Cluster-5283.3189	gamma-secretase subunit PEN-2	0.0385885	-0.7952439	0.4251421
Cluster-5290.5	cadherin EGF LAG seven-pass G-type receptor 3	0.0385885	-0.8019407	0.5279131
Cluster-5283.883	methylcrotonoyl-CoA carboxylase beta chain, mitochondrial	0.0340833	-0.8088622	0.2704246
Cluster-5283.2490	—NA—	0.0385885	-0.8291220	0.5330048
Cluster-3464.0	Hypothetical predicted protein	0.0340833	-0.9442758	0.3183627

Eyes

sig_DE_eyes_ashr_final %>%
  select(gene, Sequence.Description, padj, log2FoldChange) %>%
  arrange(-log2FoldChange) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  kable(caption = 'Significantly DE genes in eyes - final set (3 genes  removed due to not showing clear DE upon visual inspection of the normalised counts across CO2 levels')

Significantly DE genes in eyes - final set (3 genes removed due to not showing clear DE upon visual inspection of the normalised counts across CO2 levels
gene	Sequence.Description	padj	log2FoldChange
Cluster-5283.5132	probable flavin-containing monoamine oxidase A	0.0048141	2.2916297
Cluster-1772.0	synaptic vesicular amine transporter	0.0116265	2.0443468
Cluster-5283.1476	protein FAM204A isoform X1	0.0116265	1.4711580
Cluster-7096.4	synaptobrevin homolog YKT6	0.0000059	1.2616376
Cluster-7187.3	cystathionine beta-synthase	0.0383992	0.8890046
Cluster-5283.10628	transcription initiation factor IIE subunit beta	0.0224947	-0.9911208
Cluster-5283.11079	—NA—	0.0183964	-1.4212532
Cluster-1000.0	N-acetylmuramoyl-L-alanine amidase	0.0016092	-2.8983839

Significantly DE in both tissues

CNS <- sig_DE_CNS_ashr %>%
  select(gene, transcript, Sequence.Description, padj, log2FoldChange) %>%
  rename(padj_CNS = padj,
         log2FoldChange_CNS = log2FoldChange)

eyes <- sig_DE_eyes_ashr_final %>%
  select(gene, transcript, Sequence.Description, padj, log2FoldChange) %>%
  rename(padj_eyes = padj,
         log2FoldChange_eyes = log2FoldChange)

CNS_eyes <- CNS %>%
  filter(gene %in% eyes$gene)

eyes_CNS <- eyes %>%
  filter(gene %in% CNS$gene)

sig_DE_CNS_and_eyes_ashr <- CNS_eyes %>%
  full_join(eyes_CNS)

save(sig_DE_CNS_and_eyes_ashr, file = 'results/sig_DE_CNS_and_eyes_ashr.RData')

sig_DE_CNS_and_eyes_ashr %>%
  select(gene, Sequence.Description, padj_CNS, log2FoldChange_CNS, padj_eyes, log2FoldChange_eyes) %>%
  kable(caption = 'Genes significantly DE in both CNS and eyes')

Genes significantly DE in both CNS and eyes
gene	Sequence.Description	padj_CNS	log2FoldChange_CNS	padj_eyes	log2FoldChange_eyes
Cluster-5283.1476	protein FAM204A isoform X1	0.0044818	1.339402	0.0116265	1.471158
Cluster-7096.4	synaptobrevin homolog YKT6	0.0385885	0.885038	0.0000059	1.261638

Functional Enrichment Analysis

Prepare data

Prep transcriptome

# Remove letters form gene names as struggles with this
annotated_transcriptome_rename <- annotated_transcriptome %>%
  rename(transcript = Sequence.Name) %>% 
  mutate(transcript = gsub('transcript/', '', transcript))
save(annotated_transcriptome_rename, file = 'data/annotated_transcriptome_rename.RData')

corset_clusters_2_transcripts_renamed <- corset_clusters_2_transcripts %>%
  rename(transcript = TXNAME,
         gene = GENEID) %>% 
  mutate(transcript = gsub('transcript/', '', transcript),
         gene = gsub('Cluster-', '', gene))
save(corset_clusters_2_transcripts_renamed, file = 'data/corset_clusters_2_transcripts_renamed.RData')

Prep CNS data

# Remove letters form gene names as struggles with this
res_CNS_shrunken_ashr_tb_rename <- res_CNS_shrunken_ashr_tb %>%
  mutate(gene = gsub('Cluster-', '', gene))

# Create gene list - assumes that 1st column is ID and 2nd column is fold change
res_CNS <- res_CNS_shrunken_ashr_tb_rename %>%
  select(gene, log2FoldChange) %>% 
  as.data.frame()
gene_LFC_CNS_ashr <- res_CNS[,2]
names(gene_LFC_CNS_ashr) <- as.character(res_CNS[,1])
gene_LFC_CNS_ashr <- sort(gene_LFC_CNS_ashr, decreasing = TRUE)
head(gene_LFC_CNS_ashr)
save(gene_LFC_CNS_ashr, file = 'results/gene_LFC_CNS_ashr.RData')

Prep eyes data

# Remove letters form gene names as struggles with this
res_eyes_shrunken_ashr_tb_rename <- res_eyes_shrunken_ashr_tb %>%
  mutate(gene = gsub('Cluster-', '', gene))

# Create gene list - assumes that 1st column is ID and 2nd column is fold change
res_eyes <- res_eyes_shrunken_ashr_tb_rename %>%
  select(gene, log2FoldChange) %>% 
  as.data.frame()
gene_LFC_eyes_ashr <- res_eyes[,2]
names(gene_LFC_eyes_ashr) <- as.character(res_eyes[,1])
gene_LFC_eyes_ashr <- sort(gene_LFC_eyes_ashr, decreasing = TRUE)
head(gene_LFC_eyes_ashr)
save(gene_LFC_eyes_ashr, file = 'results/gene_LFC_eyes_ashr.RData')

GO terms data for CNS

# ANNOTATION GO TERMS
GO_annot_data_CNS <- res_CNS_shrunken_ashr_tb_rename %>%
  full_join(corset_clusters_2_transcripts_renamed) %>%
  inner_join(annotated_transcriptome_rename) %>%
  select(Annotation.GO.ID, gene, Annotation.GO.Term)

#Change df format so one GO ID per row so a transcript with multiple GO IDs takes multiple rows
GO_annot_data_CNS_split <- cSplit(GO_annot_data_CNS, c('Annotation.GO.ID', 'Annotation.GO.Term'), ';', direction = "long") 

# TERM2GENE is a data.frame with first column of (GO) term ID and second column of corresponding mapped gene 
term2gene_annot_CNS <- GO_annot_data_CNS_split %>%
  select(Annotation.GO.ID, gene) %>%
  as.data.frame()
save(term2gene_annot_CNS, file = 'data/term2gene_annot_CNS.RData')

# TERM2NAME is a data.frame with first column of (GO) term ID and second column of corresponding term name
term2name_annot_CNS <- GO_annot_data_CNS_split %>%
  select(Annotation.GO.ID, Annotation.GO.Term) %>% 
  as.data.frame()
save(term2name_annot_CNS, file = 'data/term2name_annot_CNS.RData')

GO terms data for eyes

# ANNOTATION GO TERMS
GO_annot_data_eyes <- res_eyes_shrunken_ashr_tb_rename %>%
  full_join(corset_clusters_2_transcripts_renamed) %>%
  inner_join(annotated_transcriptome_rename) %>%
  select(Annotation.GO.ID, gene, Annotation.GO.Term)

#Change df format so one GO ID per row so a transcript with multiple GO IDs takes multiple rows
GO_annot_data_eyes_split <- cSplit(GO_annot_data_eyes, c('Annotation.GO.ID', 'Annotation.GO.Term'), ';', direction = "long")

# TERM2GENE is a data.frame with first column of (GO) term ID and second column of corresponding mapped gene 
term2gene_annot_eyes <- GO_annot_data_eyes_split %>%
  select(Annotation.GO.ID, gene) %>%
  as.data.frame()
save(term2gene_annot_eyes, file = 'data/term2gene_annot_eyes.RData')

# TERM2NAME is a data.frame with first column of (GO) term ID and second column of corresponding term name
term2name_annot_eyes <- GO_annot_data_eyes_split %>%
  select(Annotation.GO.ID, Annotation.GO.Term) %>% 
  as.data.frame()
save(term2name_annot_eyes, file = 'data/term2name_annot_eyes.RData')

Gene set enrichment analysis

Gene set enrichment analysis using the log2FoldChange values of all genes from DESeq2 and the annotated GO terms as the gene sets’
GSEA uses all genes and aggregates the per gene statistics across genes within a gene set, therefore making it possible to detect situations where all genes in a predefined set change in a small but coordinated way. Useful since it is likely that many relevant phenotypic differences are manifested by small but consistent changes in a set of genes.
Using GSEA in clusterProfiler which utilises fgsea, minimum gene set size = 15, max gene set size = 500, unweighted, benjamini-hochberg method for multiple comparisons, adjusted p value cut off of 0.05

CNS

# Will get different results for the GSEA because the permutations performed use random reordering - use set.seed for reproducibility
set.seed(83276453)
gsea_CNS_ashr_annot_exp0 <- GSEA(
  geneList = gene_LFC_CNS_ashr,
  exponent = 0,             # weight of each step (0 = same as classic in omicsbox)
  minGSSize = 15,           # change from default 10 to 15 (what used in omicsbox and suggested in Subramanian 2005)
  maxGSSize = 500,
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  TERM2GENE = term2gene_annot_CNS,
  TERM2NAME = term2name_annot_CNS,
  verbose = TRUE,
  seed = TRUE,
  by = "fgsea"
)
# Warning messages: 1: In preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, There are ties in the preranked stats (0.01% of the list). The order of those tied genes will be arbitrary, which may produce unexpected results.
save(gsea_CNS_ashr_annot_exp0, file = 'results/gsea_CNS_ashr_annot_exp0.RData')

# extract dataframe from gsea object
gsea_CNS_ashr_annot_exp0_summary <- fortify(
  gsea_CNS_ashr_annot_exp0,
  showCategory = 300,
  by = "Count"
)
save(gsea_CNS_ashr_annot_exp0_summary, file = 'results/gsea_CNS_ashr_annot_exp0_summary.RData')

Eyes

# Will get different results for the GSEA because the permutations performed use random reordering - use set.seed fpr reproducibility
set.seed(83276453)
gsea_eyes_ashr_annot_exp0 <- GSEA(
  geneList = gene_LFC_eyes_ashr,
  exponent = 0,             # weight of each step (0 = same as classic in omicsbox)
  minGSSize = 15,           # change from default 10 to 15 (what used in omicsbox and suggested in Subramanian 2005)
  maxGSSize = 500,
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  TERM2GENE = term2gene_annot_eyes,
  TERM2NAME = term2name_annot_eyes,
  verbose = TRUE,
  seed = TRUE,
  by = "fgsea"
)
# Warning messages: 1: In preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, There are ties in the preranked stats (0.01% of the list). The order of those tied genes will be arbitrary, which may produce unexpected results.
save(gsea_eyes_ashr_annot_exp0, file = 'results/gsea_eyes_ashr_annot_exp0.RData')

# extract dataframe from gsea object
gsea_eyes_ashr_annot_exp0_summary <- fortify(
  gsea_eyes_ashr_annot_exp0,
  showCategory = 300,
  by = "Count"
)
save(gsea_eyes_ashr_annot_exp0_summary, file = 'results/gsea_eyes_ashr_annot_exp0_summary.RData')

Dot plot

# Plot all GO terms for CNS and eyes on same graph

# CNS and eyes share same y axis on dotplot - want to order by NES of CNS, and show overlaps of GO terms found in CNS and eyes
# Below method so GO terms in both tissues are not twice on the y axis
CNS <- gsea_CNS_ashr_annot_exp0_summary %>%
  mutate(tissue = "CNS")
eyes <- gsea_eyes_ashr_annot_exp0_summary %>%
  mutate(tissue = "eyes")

CNS_NES <- CNS %>%
  select(ID, Description, NES) %>%
  rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_NES <- eyes %>%
  select(ID, Description, NES) %>% 
  rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_NES <- CNS_NES %>%
  full_join(eyes_NES) %>%
  ungroup() %>% 
  arrange(NES_CNS, NES_eyes) %>%
  mutate(order_NES = as.factor(row_number())) %>%
  pivot_longer(c(NES_CNS, NES_eyes), names_to = "tissue", values_to = "NES") %>%
  mutate(tissue = gsub("NES_", "", tissue)) %>%
  arrange(NES)


CNS_p.adjust <- CNS %>%
  select(ID, Description, p.adjust) %>%
  rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_p.adjust <- eyes %>%
  select(ID, Description, p.adjust) %>% 
  rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_p.adjust <- CNS_p.adjust %>%
  full_join(eyes_p.adjust) %>% 
  ungroup() %>% 
  arrange(p.adjust_CNS) %>%
  mutate(order_p.adjust = as.factor(row_number())) %>% 
  pivot_longer(c(p.adjust_CNS, p.adjust_eyes), names_to = "tissue", values_to = "p.adjust") %>%
  mutate(tissue = gsub("p.adjust_", "", tissue)) %>%
  arrange(Description)

CNS_Count <- CNS %>%
  select(ID, Description, Count) %>%
  rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_Count <- eyes %>%
  select(ID, Description, Count) %>% 
  rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_Count <- CNS_Count %>%
  full_join(eyes_Count) %>% 
  ungroup() %>% 
  arrange(Count_CNS) %>%
  mutate(order_Count = as.factor(row_number())) %>% 
  pivot_longer(c(Count_CNS, Count_eyes), names_to = "tissue", values_to = "Count") %>%
  mutate(tissue = gsub("Count_", "", tissue))

df_all <- df_NES %>%
  full_join(df_p.adjust) %>%
  full_join(df_Count) %>%
  mutate(Description_wrapped = str_wrap(Description, width = 30))

ggplot(df_all, aes(x = NES, y = order_NES, colour = p.adjust, size = Count)) +
  geom_point() +
  scale_y_discrete(
    breaks = df_all$order_NES,
    labels = df_all$Description_wrapped) +
  ggplot2::facet_grid(. ~ tissue) +
  scale_colour_viridis_c(direction = -1) +
  geom_vline(xintercept = 0) +
  xlab("Normalised enrichment score") +
  ylab("") +
  labs(colour = "adjusted p value", size = "No. core enrichment\ngenes in GO term", title = "GSEA - significant GO terms")

## Warning: Removed 96 rows containing missing values (`geom_point()`).

CNS = 99 enriched GO terms in CNS
- 75 upregulated
- 24 downregulated
Eyes = 17 enriched GO terms in eyes
- 12 upregulated
- 5 downregulated
10 GO terms enriched in both tissues

Core enrichment genes

Pull out all core enrichment genes from an interesting cluster of GO terms that were found significant with gene set enrichment analysis

Prep data CNS

gsea_CNS_transcripts <- cSplit(gsea_CNS_ashr_annot_exp0_summary, 'core_enrichment', '/', direction = "long", type.convert = FALSE) %>%
  rename_with(~paste0(., "_gsea")) %>% 
  rename(gene = core_enrichment_gsea)

annotated_res_CNS_shrunken_ashr_tb <- annotated_res_CNS_shrunken_ashr_tb %>% 
  select(gene, transcript, Sequence.Description, log2FoldChange, lfcSE, padj) %>%
  rename(log2FoldChange_ashr = log2FoldChange,
         lfcSE_ashr = lfcSE,
         padj_ashr = padj) %>%
  mutate(gene = gsub('Cluster-', '', gene),
         transcript = gsub("transcript/", "", transcript))

gsea_exp0_transcripts_GO_CNS <- gsea_CNS_transcripts %>%
  full_join(annotated_res_CNS_shrunken_ashr_tb)
save(gsea_exp0_transcripts_GO_CNS, file = 'results/gsea_exp0_transcripts_GO_CNS.RData')

Prep data eyes

gsea_eyes_transcripts <- cSplit(gsea_eyes_ashr_annot_exp0_summary, 'core_enrichment', '/', direction = "long", type.convert = FALSE) %>%
  rename_with(~paste0(., "_gsea")) %>% 
  rename(gene = core_enrichment_gsea)

annotated_res_eyes_shrunken_ashr_tb <- annotated_res_eyes_shrunken_ashr_tb %>% 
  select(gene, transcript, Sequence.Description, log2FoldChange, lfcSE, padj) %>%
  rename(log2FoldChange_ashr = log2FoldChange,
         lfcSE_ashr = lfcSE,
         padj_ashr = padj) %>%
  mutate(gene = gsub('Cluster-', '', gene),
         transcript = gsub("transcript/", "", transcript))

gsea_exp0_transcripts_GO_eyes <- gsea_eyes_transcripts %>%
  full_join(annotated_res_eyes_shrunken_ashr_tb)
save(gsea_exp0_transcripts_GO_eyes, file = 'results/gsea_exp0_transcripts_GO_eyes.RData')

Ion channel cluster of GO terms

All core enrichment genes from the cluster of GO terms related to ion channels, that were found significant with gene set enrichment analysis

# Pull out genes in the ion channel cluster of GO terms
ion_channel_cluster_CNS <- gsea_exp0_transcripts_GO_CNS %>%
  select(gene, transcript, Sequence.Description, Description_gsea) %>%
  filter(Description_gsea %in% c('ionotropic glutamate receptor signaling pathway', 'ionotropic glutamate receptor activity', 'postsynaptic membrane', 'excitatory postsynaptic potential', 'ion transmembrane transport', 'acetylcholine-gated cation-selective channel activity', 'transmembrane signaling receptor activity', 'extracellular ligand-gated ion channel activity', 'synapse')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

ion_channel_cluster_eyes <- gsea_exp0_transcripts_GO_eyes %>%
  select(gene, transcript, Sequence.Description, Description_gsea) %>%
  filter(Description_gsea %in% c('ion transmembrane transport', 'ion channel activity')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

ion_channel_cluster_CNS_eyes <- ion_channel_cluster_CNS %>%
  filter(gene %in% ion_channel_cluster_eyes$gene)

save(ion_channel_cluster_CNS, ion_channel_cluster_eyes, ion_channel_cluster_CNS_eyes, file = 'results/ion_channel_cluster.RData')

CNS

All core enrichment genes from the cluster of GO terms related to ion channels that were found significant with gene set enrichment analysis in the CNS.
This cluster of GO terms includes the following GO terms:
- ionotropic glutamate receptor signaling pathway
- ionotropic glutamate receptor activity
- postsynaptic membrane
- excitatory postsynaptic potential
- ion transmembrane transport
- acetylcholine-gated cation-selective channel activity
- transmembrane signaling receptor activity
- extracellular ligand-gated ion channel activity
- synapse

ion_channel_cluster_CNS %>%
  select(gene, Sequence.Description) %>%
  kable(caption = 'Core enrichment genes within significantly enriched GO terms of the ion channel cluster in the CNS')

Core enrichment genes within significantly enriched GO terms of the ion channel cluster in the CNS
gene	Sequence.Description
5283.10289	—NA—
5283.7784	—NA—
5283.10291	—NA—
3318.0	—NA—
5283.9460	—NA—
5283.4105	—NA—
5283.9217	—NA—
5661.1	acetylcholine receptor subunit alpha-like 1 isoform X1
5661.0	acetylcholine receptor subunit alpha-like isoform X2
5283.4487	acetylcholine receptor subunit beta-like 1 isoform X2
5283.4488	acetylcholine receptor subunit beta-like 1 isoform X2
5283.4489	acetylcholine receptor subunit beta-like 1 isoform X2
5283.7592	cyclic nucleotide-gated cation channel alpha-3
5283.7592	cyclic nucleotide-gated cation channel alpha-3-like
6610.0	cyclic nucleotide-gated cation channel beta-3-like isoform X1
6610.2	cyclic nucleotide-gated cation channel beta-3-like isoform X2
6610.0	cyclic nucleotide-gated cation channel beta-3-like isoform X7
5283.9458	cyclic nucleotide-gated olfactory channel-like
5283.9460	cyclic nucleotide-gated olfactory channel-like
7253.3	Cys-loop ligand-gated ion channel
7253.4	cys-loop ligand-gated ion channel-like isoform X2
7253.1	cys-loop ligand-gated ion channel-like isoform X2
7253.0	cys-loop ligand-gated ion channel-like isoform X2
7253.2	cys-loop ligand-gated ion channel-like isoform X2
7253.3	cys-loop ligand-gated ion channel-like isoform X2
7253.6	cys-loop ligand-gated ion channel-like isoform X3
7253.5	cys-loop ligand-gated ion channel-like isoform X3
5517.0	dnaJ homolog subfamily C member 5
5517.0	dnaJ homolog subfamily C member 5-like isoform X1
5517.0	dnaJ homolog subfamily C member 5-like isoform X2
5517.0	dnaJ homolog subfamily C member 5 isoform X1
5057.2	excitatory amino acid transporter 1-like
5057.1	excitatory amino acid transporter 1-like
5283.8663	F-box/SPRY domain-containing protein 1
5283.8664	F-box/SPRY domain-containing protein 1
5283.8665	F-box/SPRY domain-containing protein 1-like
5283.9038	gamma-aminobutyric acid receptor alpha-like
3318.0	gamma-aminobutyric acid receptor alpha-like
5283.9039	gamma-aminobutyric acid receptor alpha-like
3050.4	glutamate-gated chloride channel isoform X5
5283.11461	glutamate receptor
602.0	glutamate receptor-like
5283.11461	glutamate receptor-like
5283.4759	glutamate receptor-like
5283.8285	glutamate receptor 1-like
5283.4759	glutamate receptor 1-like isoform X1
5283.4759	glutamate receptor 2-like
5283.4759	glutamate receptor 2 isoform X2
5283.4759	glutamate receptor 3 isoform X1
5283.8285	glutamate receptor 4
5283.3373	glutamate receptor ionotropic, kainate 2-like
5283.3372	glutamate receptor ionotropic, kainate 2-like
5283.4614	glutamate receptor ionotropic, kainate 2-like
5283.4105	glutamate receptor ionotropic, kainate 2-like
5283.5622	glutamate receptor ionotropic, kainate 2-like
7335.0	glutamate receptor ionotropic, kainate 2-like isoform X1
7335.3	glutamate receptor ionotropic, kainate 2-like isoform X1
2901.0	glutamate receptor ionotropic, kainate 2-like isoform X1
6740.0	glutamate receptor ionotropic, kainate 2-like isoform X1
7335.4	glutamate receptor ionotropic, kainate 2-like isoform X1
7335.5	glutamate receptor ionotropic, kainate 2-like isoform X1
6740.1	glutamate receptor ionotropic, kainate 2-like isoform X1
6740.2	glutamate receptor ionotropic, kainate 2-like isoform X1
7335.6	glutamate receptor ionotropic, kainate 2-like isoform X1
7335.7	glutamate receptor ionotropic, kainate 2-like isoform X1
7335.1	glutamate receptor ionotropic, kainate 2-like isoform X1
6740.4	glutamate receptor ionotropic, kainate 2-like isoform X2
6740.3	glutamate receptor ionotropic, kainate 2 isoform X1
1742.0	glutamate receptor ionotropic, NMDA 2B-like
4067.1	glutamate receptor ionotropic, NMDA 3A-like
3215.0	glutamate receptor ionotropic, NMDA 3A-like
3215.1	glutamate receptor ionotropic, NMDA 3A-like
4067.0	glutamate receptor ionotropic, NMDA 3A-like
3215.2	glutamate receptor ionotropic, NMDA 3A-like isoform X2
7335.2	glutamate receptor subunit protein GluR6
3050.1	glycine receptor subunit alpha-3 isoform X2
3050.0	glycine receptor subunit alphaZ1-like
2499.0	glycine receptor subunit alphaZ1-like
5283.8285	GRIA4 protein
5283.2592	HCN channel protein
5283.2593	HCN channel protein
5283.9211	Hypothetical predicted protein
5283.9212	Hypothetical predicted protein
7253.3	Hypothetical predicted protein
7211.2	Hypothetical predicted protein
5283.1960	inositol 1,4,5-trisphosphate receptor type 1-like isoform X2
7360.0	intersectin-1-like isoform X1
7360.0	intersectin-1-like isoform X2
7360.0	intersectin-1-like isoform X3
7360.0	intersectin-1 isoform X10
7360.0	intersectin-1 isoform X13
7360.0	intersectin-1 isoform X8
7211.2	liprin-alpha-1-like isoform X2
7211.2	liprin-alpha-1-like isoform X3
7211.2	liprin-alpha-1-like isoform X6
7211.2	liprin-alpha-1-like isoform X9
7211.2	liprin-alpha-1 isoform X3
7211.2	liprin-alpha-1 isoform X4
7211.3	liprin-alpha-1 isoform X8
7211.2	liprin-alpha-1 isoform X8
6774.1	muscarinic acetylcholine receptor M1-like
6774.2	muscarinic acetylcholine receptor M2-like protein
5283.11306	Na+/K+ ATPase beta subunit
5283.5376	neuronal acetylcholine receptor subunit alpha-10-like
5283.5377	neuronal acetylcholine receptor subunit alpha-10-like
6426.1	neuronal acetylcholine receptor subunit alpha-10-like
6426.4	neuronal acetylcholine receptor subunit alpha-10-like
6426.7	neuronal acetylcholine receptor subunit alpha-10-like
6426.8	neuronal acetylcholine receptor subunit alpha-10-like
5283.7784	neuronal acetylcholine receptor subunit alpha-10-like
5283.3847	neuronal acetylcholine receptor subunit alpha-10-like
5505.0	neuronal acetylcholine receptor subunit alpha-10-like
6426.10	neuronal acetylcholine receptor subunit alpha-10-like
6426.3	neuronal acetylcholine receptor subunit alpha-10-like
3136.0	neuronal acetylcholine receptor subunit alpha-10-like
6426.6	neuronal acetylcholine receptor subunit alpha-10-like
6426.5	neuronal acetylcholine receptor subunit alpha-10-like
6423.0	neuronal acetylcholine receptor subunit alpha-10-like
5283.3849	neuronal acetylcholine receptor subunit alpha-10-like
5283.4563	neuronal acetylcholine receptor subunit alpha-10-like
5283.4564	neuronal acetylcholine receptor subunit alpha-10-like
5283.4565	neuronal acetylcholine receptor subunit alpha-10-like
6426.0	neuronal acetylcholine receptor subunit alpha-10-like
6426.9	neuronal acetylcholine receptor subunit alpha-10-like
5283.4560	neuronal acetylcholine receptor subunit alpha-10-like
5283.10464	neuronal acetylcholine receptor subunit alpha-10-like
5283.4562	neuronal acetylcholine receptor subunit alpha-10-like
6426.2	neuronal acetylcholine receptor subunit alpha-10-like
6423.1	neuronal acetylcholine receptor subunit alpha-10-like
5505.0	neuronal acetylcholine receptor subunit alpha-10-like isoform X1
5283.997	neuronal acetylcholine receptor subunit alpha-10-like isoform X1
5283.2323	neuronal acetylcholine receptor subunit alpha-10-like isoform X3
5283.997	neuronal acetylcholine receptor subunit alpha-10 isoform X1
5283.8846	neuronal acetylcholine receptor subunit alpha-3-like
5283.8847	neuronal acetylcholine receptor subunit alpha-3-like
5283.8850	neuronal acetylcholine receptor subunit alpha-3-like
5283.8849	neuronal acetylcholine receptor subunit alpha-3-like
5283.8848	neuronal acetylcholine receptor subunit alpha-3-like
5283.8851	neuronal acetylcholine receptor subunit alpha-3-like
878.0	neuronal acetylcholine receptor subunit alpha-5-like
5283.10289	neuronal acetylcholine receptor subunit alpha-5-like isoform X1
5283.10287	neuronal acetylcholine receptor subunit alpha-5-like isoform X2
5283.10291	neuronal acetylcholine receptor subunit alpha-5-like isoform X2
5283.10286	neuronal acetylcholine receptor subunit alpha-5-like isoform X2
5283.10288	neuronal acetylcholine receptor subunit alpha-5-like isoform X2
5283.10467	neuronal acetylcholine receptor subunit alpha-6-like
5283.10753	neuronal acetylcholine receptor subunit alpha-7 isoform X1
5283.9321	PDZ domain-containing protein 11
2261.1	phosphatidylinositol 3,4,5-trisphosphate 3-phosphatase TPTE2-like isoform X3
4605.3	piezo-type mechanosensitive ion channel component 1-like isoform X1
1571.0	potassium voltage-gated channel subfamily H member 6 isoform X1
5283.2593	potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 2-like
5283.2593	potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X2
5283.2593	potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X4
5283.11306	probable sodium/potassium-transporting ATPase subunit beta-3
5283.7180	Protein unc-13 B
5283.7180	protein unc-13 homolog A-like
5283.7180	protein unc-13 homolog B-like isoform X1
5283.7180	protein unc-13 homolog B-like isoform X2
5283.7177	protein unc-13 homolog B-like isoform X3
5283.7180	protein unc-13 homolog B-like isoform X4
5283.7180	protein unc-13 homolog B isoform X1
5283.7476	Receptor-type tyrosine-protein phosphatase N2
5283.7474	Receptor-type tyrosine-protein phosphatase N2
5283.7476	receptor-type tyrosine-protein phosphatase N2-like
6010.2	sideroflexin-1
6010.1	sideroflexin-1
6165.7	sideroflexin-2
4048.1	sideroflexin-5 isoform X1
5283.9214	sodium/potassium/calcium exchanger 2-like isoform X2
5283.9214	sodium/potassium/calcium exchanger Nckx30C
5283.9214	sodium/potassium/calcium exchanger Nckx30C-like
5283.9211	sodium/potassium/calcium exchanger Nckx30C isoform X2
5899.6	synaptosomal-associated protein 25
5899.7	synaptosomal-associated protein 25
5899.5	synaptosomal-associated protein 25
5899.1	synaptosomal-associated protein 25
6236.5	synaptosomal-associated protein 25
6236.6	synaptosomal-associated protein 25
5899.3	synaptosomal-associated protein 25
5899.0	synaptosomal-associated protein 25-like isoform X2
6236.6	synaptosomal-associated protein 25 isoform X1
5283.6899	syntenin-1
5283.6896	syntenin-1
5283.4554	toll-like receptor Tollo
5283.3487	toll-like receptor Tollo
5283.4187	toll-like receptor Tollo
5283.1236	transient receptor potential cation channel subfamily M member 3-like
3379.0	transient receptor potential cation channel subfamily V member 5-like
4897.2	Translational regulator orb2
5359.2	Two pore calcium channel protein 1
5359.2	two pore calcium channel protein 1-like
4307.0	tyrosine-protein kinase transmembrane receptor ROR1-like isoform X1
5747.0	tyrosine-protein kinase transmembrane receptor Ror2-like
5283.10289	uncharacterized protein LOC118470684 isoform X6
5283.9217	vesicle-associated membrane protein/synaptobrevin-binding protein
5283.9217	vesicle-associated membrane protein/synaptobrevin-binding protein-like isoform X1
5283.2593	voltage-activated ion channel, putative

Eyes

All core enrichment genes from the cluster of GO terms related to ion channels that were found significant with gene set enrichment analysis in the eyes.
This cluster of GO terms includes the following GO terms:
- ion transmembrane transport
- ion channel activity

ion_channel_cluster_eyes %>%
  select(gene, Sequence.Description) %>%
  kable(caption = 'Core enrichment genes within significantly enriched GO terms of the ion channel cluster in the eyes')

Core enrichment genes within significantly enriched GO terms of the ion channel cluster in the eyes
gene	Sequence.Description
5283.9460	—NA—
5283.7784	—NA—
3318.0	—NA—
5661.1	acetylcholine receptor subunit alpha-like 1 isoform X1
5661.0	acetylcholine receptor subunit alpha-like isoform X2
5283.4488	acetylcholine receptor subunit beta-like 1 isoform X2
5283.4489	acetylcholine receptor subunit beta-like 1 isoform X2
5283.7592	cyclic nucleotide-gated cation channel alpha-3
5283.7592	cyclic nucleotide-gated cation channel alpha-3-like
6610.0	cyclic nucleotide-gated cation channel beta-3-like isoform X1
6610.1	cyclic nucleotide-gated cation channel beta-3-like isoform X1
6610.2	cyclic nucleotide-gated cation channel beta-3-like isoform X2
6610.0	cyclic nucleotide-gated cation channel beta-3-like isoform X7
6610.1	cyclic nucleotide-gated cation channel beta-3-like isoform X7
5283.9460	cyclic nucleotide-gated olfactory channel-like
5283.9458	cyclic nucleotide-gated olfactory channel-like
7253.3	Cys-loop ligand-gated ion channel
7253.3	cys-loop ligand-gated ion channel-like isoform X2
7253.4	cys-loop ligand-gated ion channel-like isoform X2
7253.0	cys-loop ligand-gated ion channel-like isoform X2
7253.6	cys-loop ligand-gated ion channel-like isoform X3
5283.9038	gamma-aminobutyric acid receptor alpha-like
5283.9040	gamma-aminobutyric acid receptor alpha-like
3318.0	gamma-aminobutyric acid receptor alpha-like
4411.0	glutamate [NMDA] receptor subunit 1-like isoform X1
5283.8285	glutamate receptor 1-like
5283.8285	glutamate receptor 4
6740.0	glutamate receptor ionotropic, kainate 2-like isoform X1
7335.0	glutamate receptor ionotropic, kainate 2-like isoform X1
7335.6	glutamate receptor ionotropic, kainate 2-like isoform X1
7335.1	glutamate receptor ionotropic, kainate 2-like isoform X1
7335.3	glutamate receptor ionotropic, kainate 2-like isoform X1
2901.0	glutamate receptor ionotropic, kainate 2-like isoform X1
6740.2	glutamate receptor ionotropic, kainate 2-like isoform X1
6740.3	glutamate receptor ionotropic, kainate 2 isoform X1
1742.0	glutamate receptor ionotropic, NMDA 2B-like
4067.0	glutamate receptor ionotropic, NMDA 3A-like
3215.2	glutamate receptor ionotropic, NMDA 3A-like isoform X2
2499.0	glycine receptor subunit alphaZ1-like
5283.8285	GRIA4 protein
5283.2593	HCN channel protein
5283.2592	HCN channel protein
7253.3	Hypothetical predicted protein
5496.1	muscle calcium channel subunit alpha-1-like isoform X1
6426.1	neuronal acetylcholine receptor subunit alpha-10-like
6426.10	neuronal acetylcholine receptor subunit alpha-10-like
6423.3	neuronal acetylcholine receptor subunit alpha-10-like
6426.7	neuronal acetylcholine receptor subunit alpha-10-like
6426.5	neuronal acetylcholine receptor subunit alpha-10-like
3136.0	neuronal acetylcholine receptor subunit alpha-10-like
5283.5377	neuronal acetylcholine receptor subunit alpha-10-like
5283.7784	neuronal acetylcholine receptor subunit alpha-10-like
5283.3848	neuronal acetylcholine receptor subunit alpha-10-like
6426.6	neuronal acetylcholine receptor subunit alpha-10-like
5283.10464	neuronal acetylcholine receptor subunit alpha-10-like
5283.995	neuronal acetylcholine receptor subunit alpha-10-like isoform X1
5283.996	neuronal acetylcholine receptor subunit alpha-10-like isoform X1
5283.994	neuronal acetylcholine receptor subunit alpha-10-like isoform X1
5283.2323	neuronal acetylcholine receptor subunit alpha-10-like isoform X3
5283.8846	neuronal acetylcholine receptor subunit alpha-3-like
5283.8847	neuronal acetylcholine receptor subunit alpha-3-like
878.0	neuronal acetylcholine receptor subunit alpha-5-like
5283.10290	neuronal acetylcholine receptor subunit alpha-5-like isoform X1
5283.10467	neuronal acetylcholine receptor subunit alpha-6-like
2261.2	phosphatidylinositol 3,4,5-trisphosphate 3-phosphatase TPTE2-like isoform X3
4605.3	piezo-type mechanosensitive ion channel component 1-like isoform X1
5283.2593	potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 2-like
5283.2593	potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X2
5283.2593	potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X4
5283.9459	serine-rich adhesin for platelets-like
6010.2	sideroflexin-1
6010.0	sideroflexin-1
6165.9	sideroflexin-2
5283.8067	transient receptor potential-gamma protein-like
5283.8067	transient receptor potential-gamma protein-like isoform X1
5283.2593	voltage-activated ion channel, putative

Both tissues

All core enrichment genes from the cluster of GO terms related to ion channels, that were found significant with gene set enrichment analysis in both the CNS and eyes.

ion_channel_cluster_CNS_eyes %>%
  select(gene, Sequence.Description) %>%
  kable(caption = 'Core enrichment genes within significantly enriched GO terms of the ion channel cluster in both the CNS and eyes')

Core enrichment genes within significantly enriched GO terms of the ion channel cluster in both the CNS and eyes
gene	Sequence.Description
5283.7784	—NA—
3318.0	—NA—
5283.9460	—NA—
5661.1	acetylcholine receptor subunit alpha-like 1 isoform X1
5661.0	acetylcholine receptor subunit alpha-like isoform X2
5283.4488	acetylcholine receptor subunit beta-like 1 isoform X2
5283.4489	acetylcholine receptor subunit beta-like 1 isoform X2
5283.7592	cyclic nucleotide-gated cation channel alpha-3
5283.7592	cyclic nucleotide-gated cation channel alpha-3-like
6610.0	cyclic nucleotide-gated cation channel beta-3-like isoform X1
6610.2	cyclic nucleotide-gated cation channel beta-3-like isoform X2
6610.0	cyclic nucleotide-gated cation channel beta-3-like isoform X7
5283.9458	cyclic nucleotide-gated olfactory channel-like
5283.9460	cyclic nucleotide-gated olfactory channel-like
7253.3	Cys-loop ligand-gated ion channel
7253.4	cys-loop ligand-gated ion channel-like isoform X2
7253.0	cys-loop ligand-gated ion channel-like isoform X2
7253.3	cys-loop ligand-gated ion channel-like isoform X2
7253.6	cys-loop ligand-gated ion channel-like isoform X3
5283.9038	gamma-aminobutyric acid receptor alpha-like
3318.0	gamma-aminobutyric acid receptor alpha-like
5283.8285	glutamate receptor 1-like
5283.8285	glutamate receptor 4
7335.0	glutamate receptor ionotropic, kainate 2-like isoform X1
7335.3	glutamate receptor ionotropic, kainate 2-like isoform X1
2901.0	glutamate receptor ionotropic, kainate 2-like isoform X1
6740.0	glutamate receptor ionotropic, kainate 2-like isoform X1
6740.2	glutamate receptor ionotropic, kainate 2-like isoform X1
7335.6	glutamate receptor ionotropic, kainate 2-like isoform X1
7335.1	glutamate receptor ionotropic, kainate 2-like isoform X1
6740.3	glutamate receptor ionotropic, kainate 2 isoform X1
1742.0	glutamate receptor ionotropic, NMDA 2B-like
4067.0	glutamate receptor ionotropic, NMDA 3A-like
3215.2	glutamate receptor ionotropic, NMDA 3A-like isoform X2
2499.0	glycine receptor subunit alphaZ1-like
5283.8285	GRIA4 protein
5283.2592	HCN channel protein
5283.2593	HCN channel protein
7253.3	Hypothetical predicted protein
5283.5377	neuronal acetylcholine receptor subunit alpha-10-like
6426.1	neuronal acetylcholine receptor subunit alpha-10-like
6426.7	neuronal acetylcholine receptor subunit alpha-10-like
5283.7784	neuronal acetylcholine receptor subunit alpha-10-like
6426.10	neuronal acetylcholine receptor subunit alpha-10-like
3136.0	neuronal acetylcholine receptor subunit alpha-10-like
6426.6	neuronal acetylcholine receptor subunit alpha-10-like
6426.5	neuronal acetylcholine receptor subunit alpha-10-like
5283.10464	neuronal acetylcholine receptor subunit alpha-10-like
5283.2323	neuronal acetylcholine receptor subunit alpha-10-like isoform X3
5283.8846	neuronal acetylcholine receptor subunit alpha-3-like
5283.8847	neuronal acetylcholine receptor subunit alpha-3-like
878.0	neuronal acetylcholine receptor subunit alpha-5-like
5283.10467	neuronal acetylcholine receptor subunit alpha-6-like
4605.3	piezo-type mechanosensitive ion channel component 1-like isoform X1
5283.2593	potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 2-like
5283.2593	potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X2
5283.2593	potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X4
6010.2	sideroflexin-1
5283.2593	voltage-activated ion channel, putative

Ion transport cluster of GO terms

All core enrichment genes from the cluster of GO terms related to ion transport that were found significant with gene set enrichment analysis in the CNS (no ion transport GO terms found significant in the eyes)
This cluster of GO terms includes the following GO terms:
- potassium channel activity
- potassium ion transmembrane transport
- voltage-gated potassium channel activity
- regulation of ion transmembrane transport
- voltage-gated calcium channel complex
- calcium ion transmembrane transport

# Pull out genes in the ion transport, voltage cluster of GO terms
ion_transport_voltage_cluster_CNS <- gsea_exp0_transcripts_GO_CNS %>%
  select(gene, transcript, Sequence.Description, Description_gsea) %>%
  filter(Description_gsea %in% c('potassium ion transport', 'potassium channel activity', 'potassium ion transmembrane transport', 'voltage-gated potassium channel activity', 'regulation of ion transmembrane transport', 'voltage-gated calcium channel complex', 'calcium ion transmembrane transport')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

save(ion_transport_voltage_cluster_CNS, file = 'results/ion_transport_voltage_cluster.RData')

ion_transport_voltage_cluster_CNS %>%
  select(gene, Sequence.Description) %>%
  kable(caption = 'Core enrichment genes within significantly enriched GO terms of the ion transport, voltage cluster in the CNS')

Core enrichment genes within significantly enriched GO terms of the ion transport, voltage cluster in the CNS
gene	Sequence.Description
6427.3	—NA—
5283.9460	—NA—
6093.6	ATP-binding cassette sub-family C member 9-like isoform X1
6093.4	ATP-binding cassette sub-family C member 9-like isoform X1
6093.4	ATP-binding cassette sub-family C member 9-like isoform X2
6093.4	ATP-binding cassette sub-family C member 9-like isoform X3
6093.1	ATP-binding cassette sub-family C member 9 isoform X1
6093.0	ATP-binding cassette sub-family C member 9 isoform X1
5283.3940	CACNA1G
5283.7244	CACNA1G
1506.0	calcium-activated potassium channel slowpoke isoform X15
1506.1	calcium-activated potassium channel subunit alpha-1 isoform X10
1506.1	calcium-activated potassium channel subunit alpha-1 isoform X8
1506.0	calcium-activated potassium channel subunit alpha-1 isoform X8
7412.0	calcium channel beta subunit
5283.10202	calcium uniporter protein, mitochondrial-like
5283.7592	cyclic nucleotide-gated cation channel alpha-3
5283.7592	cyclic nucleotide-gated cation channel alpha-3-like
5283.9460	cyclic nucleotide-gated olfactory channel-like
1329.0	G protein-activated inward rectifier potassium channel 2-like
5187.3	G protein-activated inward rectifier potassium channel 3-like
5283.7191	G protein-activated inward rectifier potassium channel 3-like
5283.7192	G protein-activated inward rectifier potassium channel 3-like
5187.7	G protein-activated inward rectifier potassium channel 3-like
5283.2593	HCN channel protein
5496.1	muscle calcium channel subunit alpha-1-like isoform X1
5496.0	muscle calcium channel subunit alpha-1-like isoform X3
1933.0	muscle calcium channel subunit alpha-1-like isoform X4
5283.11306	Na+/K+ ATPase beta subunit
206.0	neuronal calcium sensor 1
7325.1	plasma membrane calcium-transporting ATPase 2-like isoform X1
7325.6	plasma membrane calcium-transporting ATPase 2-like isoform X1
7325.1	plasma membrane calcium-transporting ATPase 2 isoform X1
7325.5	plasma membrane calcium-transporting ATPase 2 isoform X1
7325.2	plasma membrane calcium-transporting ATPase 2 isoform X1
7107.0	potassium channel Kv1
5283.9860	potassium channel subfamily K member 1-like
5283.4027	potassium channel subfamily K member 4-like
5283.1287	potassium channel subfamily T member 2-like isoform X1
5283.1286	potassium channel subfamily T member 2-like isoform X1
5283.5242	potassium channel subfamily T member 2-like isoform X1
5283.1287	potassium channel subfamily T member 2-like isoform X2
5283.5240	potassium channel subfamily T member 2-like isoform X2
5283.5241	potassium channel subfamily T member 2-like isoform X2
5283.5239	potassium channel subfamily T member 2-like isoform X3
5283.5241	potassium channel subfamily T member 2-like isoform X5
7032.0	potassium intermediate/small conductance calcium-activated channel subfamily N member 3
7032.1	potassium intermediate/small conductance calcium-activated channel subfamily N member 3
7109.1	potassium voltage-gated channel protein Shab-like
5283.2059	potassium voltage-gated channel protein Shaker-like
5283.2058	potassium voltage-gated channel protein Shaker-like
5283.2060	potassium voltage-gated channel protein Shaker-like
5283.2159	potassium voltage-gated channel protein Shal-like
5283.2158	potassium voltage-gated channel protein Shal-like
5283.2160	potassium voltage-gated channel protein Shal-like isoform X1
5283.2157	potassium voltage-gated channel protein Shal-like isoform X1
7066.1	potassium voltage-gated channel protein Shaw-like
7066.0	potassium voltage-gated channel protein Shaw-like
6752.2	potassium voltage-gated channel protein Shaw-like isoform X1
6752.0	potassium voltage-gated channel protein Shaw-like isoform X1
6752.1	potassium voltage-gated channel protein Shaw-like isoform X1
995.0	potassium voltage-gated channel subfamily A member 1-like
7109.0	potassium voltage-gated channel subfamily B member 2 isoform X6
7109.1	potassium voltage-gated channel subfamily B member 2 isoform X6
5283.1038	potassium voltage-gated channel subfamily H member 4
5283.1038	potassium voltage-gated channel subfamily H member 4 isoform X1
1571.0	potassium voltage-gated channel subfamily H member 6 isoform X1
1768.0	potassium voltage-gated channel subfamily H member 7-like isoform X1
5283.1038	potassium voltage-gated channel subfamily H member 8-like
3824.0	potassium voltage-gated channel subfamily KQT member 1-like isoform X1
3824.1	potassium voltage-gated channel subfamily KQT member 1-like isoform X1
5283.2593	potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 2-like
5283.2593	potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X2
5283.2593	potassium/sodium hyperpolarization-activated cyclic nucleotide-gated channel 3-like isoform X4
7109.1	probable serine/threonine-protein kinase DDB_G0282963
5283.11306	probable sodium/potassium-transporting ATPase subunit beta-3
6905.0	putative Na+/K+-ATPase alpha subunit
5283.4923	riboflavin-binding protein-like
5283.324	ryanodine receptor-like isoform X11
5283.324	ryanodine receptor-like isoform X12
5283.324	ryanodine receptor-like isoform X15
5283.324	ryanodine receptor-like isoform X17
5283.324	ryanodine receptor-like isoform X2
5283.324	ryanodine receptor-like isoform X7
5283.324	ryanodine receptor 2
5283.324	ryanodine receptor 2-like isoform X10
1483.0	short transient receptor potential channel 3-like
1418.0	short transient receptor potential channel 3-like isoform X1
7032.1	small conductance calcium-activated potassium channel protein-like
7032.2	small conductance calcium-activated potassium channel protein 2 isoform X1
7032.1	small conductance calcium-activated potassium channel protein 2 isoform X1
7381.7	sodium channel
5283.2924	sodium channel protein 1 brain
5283.2924	sodium channel protein 1 brain-like
5283.2922	sodium channel protein 1 brain-like
5283.2924	sodium channel protein 1 brain-like isoform X1
5283.2924	sodium channel protein 1 brain isoform X1
5283.2924	sodium channel protein para isoform X2
5283.2922	sodium channel protein para isoform X2
7381.6	sodium channel protein type 4 subunit alpha A-like isoform X1
5283.2924	sodium channel protein type 4 subunit alpha B-like
7381.2	sodium channel protein type 4 subunit alpha B-like isoform X1
7381.3	sodium channel protein type 4 subunit alpha B-like isoform X1
7381.1	sodium channel protein type 4 subunit alpha B-like isoform X1
7381.5	sodium channel protein type 4 subunit alpha B-like isoform X1
7381.4	sodium channel protein type 4 subunit alpha B-like isoform X1
5283.9598	sodium/potassium-transporting ATPase subunit beta-like
5283.9595	sodium/potassium-transporting ATPase subunit beta-like
5283.9596	sodium/potassium-transporting ATPase subunit beta-like
5283.9600	sodium/potassium-transporting ATPase subunit beta-like
4619.0	sodium/potassium-transporting ATPase subunit beta-like
5283.9599	sodium/potassium-transporting ATPase subunit beta-like
4619.1	sodium/potassium-transporting ATPase subunit beta-like
5283.9594	sodium/potassium-transporting ATPase subunit beta-like
5283.9214	sodium/potassium/calcium exchanger 2-like isoform X2
7433.2	sodium/potassium/calcium exchanger 4-like
7433.0	sodium/potassium/calcium exchanger 4-like
7433.3	sodium/potassium/calcium exchanger 4-like
7433.1	sodium/potassium/calcium exchanger 4-like
5283.9214	sodium/potassium/calcium exchanger Nckx30C
5283.9214	sodium/potassium/calcium exchanger Nckx30C-like
5283.9213	sodium/potassium/calcium exchanger Nckx30C-like
6396.2	solute carrier family 12 member 4-like isoform X1
6396.3	solute carrier family 12 member 4-like isoform X1
6396.1	solute carrier family 12 member 4-like isoform X2
6396.9	solute carrier family 12 member 4-like isoform X2
3790.0	transient-receptor-potential-like protein
3790.0	transient-receptor-potential-like protein isoform X1
3790.1	transient-receptor-potential-like protein isoform X1
5283.3687	transient receptor potential-gamma protein-like
5811.9	transient receptor potential cation channel subfamily M member-like 2 isoform X1
6627.0	trimeric intracellular cation channel type 1B.1-like
7122.2	TWiK family of potassium channels protein 18-like
7122.1	TWiK family of potassium channels protein 18-like
7122.0	TWiK family of potassium channels protein 18-like
6244.0	two pore potassium channel protein sup-9-like
5283.2593	voltage-activated ion channel, putative
6427.3	voltage-dependent calcium channel
6427.2	voltage-dependent calcium channel
1684.1	voltage-dependent calcium channel
6427.3	voltage-dependent calcium channel type A subunit alpha-1-like
6427.0	voltage-dependent calcium channel type A subunit alpha-1-like
6427.3	voltage-dependent calcium channel type A subunit alpha-1-like isoform X6
1684.0	voltage-dependent calcium channel type A subunit alpha-1 isoform X2
5283.3939	voltage-dependent T-type calcium channel subunit alpha-1G-like
5283.3940	voltage-dependent T-type calcium channel subunit alpha-1G-like isoform X2
5283.7246	voltage-dependent T-type calcium channel subunit alpha-1G-like isoform X2
5283.7243	voltage-dependent T-type calcium channel subunit alpha-1H-like
5283.7245	voltage-dependent T-type calcium channel subunit alpha-1H-like
5283.7242	voltage-dependent T-type calcium channel subunit alpha-1I-like
5283.7244	voltage-dependent T-type calcium channel subunit alpha-1I-like
5283.10568	voltage-gated potassium channel subunit beta-2-like isoform X1
5283.10569	voltage-gated potassium channel subunit beta-2-like isoform X1
7381.5	voltage-gated sodium channel invertebrate type 1

GPCR cluster of GO terms

All core enrichment genes from the cluster of GO terms related to GPCRs that were found significant with gene set enrichment analysis in the CNS (no ion transport GO terms found significant in the eyes)
This cluster of GO terms includes the following GO terms:
- G protein-coupled receptor activity
- G protein-coupled receptor signaling pathway

# Pull out genes in the GPCR cluster of GO terms
GPCR_cluster_CNS <- gsea_exp0_transcripts_GO_CNS %>%
  select(gene, transcript, Sequence.Description, Description_gsea) %>%
  filter(Description_gsea %in% c('G protein-coupled receptor activity', 'G protein-coupled receptor signaling pathway')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)


save(GPCR_cluster_CNS, file = 'results/GPCR_cluster.RData')

GPCR_cluster_CNS %>%
  select(gene, Sequence.Description) %>%
  kable(caption = 'Core enrichment genes within significantly enriched GO terms of the GPCR cluster in the CNS')

Core enrichment genes within significantly enriched GO terms of the GPCR cluster in the CNS
gene	Sequence.Description
5283.11228	—NA—
6174.0	—NA—
5283.7636	—NA—
5283.4060	—NA—
3985.0	5-hydroxytryptamine receptor-like
396.0	5-hydroxytryptamine receptor 1D-like
5283.11786	5-hydroxytryptamine receptor 2C-like
5283.11785	5-hydroxytryptamine receptor 2C-like
5283.1508	5-hydroxytryptamine receptor 4-like
5283.9162	adhesion G-protein coupled receptor D1-like
5283.9162	adhesion G-protein coupled receptor D2-like
252.1	adhesion G protein-coupled receptor A3-like
6594.0	adhesion G protein-coupled receptor L2-like isoform X1
6594.0	adhesion G protein-coupled receptor L2 isoform X1
2554.0	allatostatin-A receptor
1650.0	alpha-2A adrenergic receptor
5283.10427	cadherin EGF LAG seven-pass G-type receptor 1-like
5283.10430	cadherin EGF LAG seven-pass G-type receptor 1-like
5283.10428	cadherin EGF LAG seven-pass G-type receptor 1-like
5596.4	cadherin EGF LAG seven-pass G-type receptor 1 isoform X3
5596.1	cadherin EGF LAG seven-pass G-type receptor 1 isoform X3
5596.3	cadherin EGF LAG seven-pass G-type receptor 2 isoform X1
5596.2	cadherin EGF LAG seven-pass G-type receptor 2 isoform X5
5283.11833	calcitonin gene-related peptide type 1 receptor-like isoform X1
5283.2398	cAMP-dependent protein kinase catalytic subunit beta
5283.2398	cAMP-dependent protein kinase catalytic subunit isoform X5
5283.2397	cAMP-dependent protein kinase catalytic subunit isoform X5
5283.9030	cardioacceleratory peptide receptor-like isoform X1
5283.5948	catalytic subunit of protein kinase A
2171.0	cholecystokinin receptor type A-like
3084.0	cholecystokinin receptor type A-like
5283.8548	corticotropin-releasing factor receptor 2-like
6072.0	CX3C chemokine receptor 1 isoform X1
5283.11354	dopamine receptor 1-like
5283.11353	dopamine receptor 1-like
3912.1	dopamine receptor 1-like
3716.0	FMRFamide receptor-like
1567.0	FMRFamide receptor-like
3871.0	FMRFamide receptor-like
5415.1	FMRFamide receptor-like
6899.1	frizzled-9
6899.1	frizzled-9-like
6899.1	Frizzled 10A
6899.0	Frizzled 10A
6899.1	frizzled 9/10
5283.9573	G-protein coupled receptor 143-like
5283.9574	G-protein coupled receptor 143-like
1827.0	G-protein coupled receptor 161-like
4382.0	G-protein coupled receptor Mth-like
5283.11232	GABR1 protein
5283.11232	gamma-aminobutyric acid type B receptor subunit 1-like
5283.11229	gamma-aminobutyric acid type B receptor subunit 1-like
5283.8308	Gamma-aminobutyric acid type B receptor subunit 2
5283.10141	gonadotropin-releasing hormone receptor
5283.2535	growth hormone secretagogue receptor type 1-like
5283.732	guanine nucleotide-binding protein subunit beta-5
5283.2425	Hypothetical predicted protein
5283.4222	leucine-rich repeat-containing G-protein coupled receptor 5-like
5283.4293	leucine-rich repeat-containing G-protein coupled receptor 5-like
5283.4294	leucine-rich repeat-containing G-protein coupled receptor 5-like
5283.10762	lissencephaly-1 homolog
5283.10762	lissencephaly-1 homolog B
5283.4221	lutropin-choriogonadotropic hormone receptor isoform X3
5283.11295	melatonin receptor type 1A-like
5283.11297	melatonin receptor type 1A-like
5283.7577	metabotropic glutamate receptor 1-like
5283.7570	metabotropic glutamate receptor 1-like
5283.7573	metabotropic glutamate receptor 1-like
5283.7569	metabotropic glutamate receptor 1-like
5283.7574	metabotropic glutamate receptor 1-like
1651.0	metabotropic glutamate receptor 3
5283.7578	metabotropic glutamate receptor 5-like
5283.7575	metabotropic glutamate receptor 5-like
6075.0	neuropeptide FF receptor 1-like
6075.1	neuropeptide FF receptor 1-like
1357.0	octopamine receptor Oamb-like
1357.1	octopamine receptor Oamb-like
2184.1	octopamine receptor Oamb isoform X1
2664.0	orexin receptor type 2-like
5283.9529	orexin receptor type 2-like
3932.1	parathyroid hormone/parathyroid hormone-related peptide receptor isoform X3
6072.0	PREDICTED: uncharacterized protein LOC106880461
4782.0	prolactin-releasing peptide receptor-like
5283.2398	protein kinase A
5283.2397	protein kinase A
1909.1	pyrokinin-1 receptor-like
5283.1602	regulator of G-protein signaling 4-like
5283.1599	regulator of G-protein signaling 4-like
5283.1146	retinochrome
5283.8098	retinochrome
5283.1143	retinochrome
5283.1145	retinochrome
5283.1142	retinochrome
5283.7855	rhodopsin
5283.7636	S-antigen protein-like
3932.0	secretin receptor-like isoform X1
6876.2	somatostatin receptor type 2-like
5629.0	somatostatin receptor type 3-like
5984.0	thyrotropin-releasing hormone receptor-like
5629.0	thyrotropin-releasing hormone receptor-like
5283.1199	type-1 angiotensin II receptor isoform X1
5283.1602	unnamed protein product

Export functional enrichment results for Cytoscape

Enrichmentmap within Cytoscape was used to visualise the GSEA results
In order to do this, the GSEA results need to be exported from R in the correct format to be used within Cytoscape.

GSEA results file

Format exactly the same as the ovarian example (https://bioinformaticsdotca.github.io/Pathways_2019_Module3_Lab-EM_GSEA)
But use .txt not .xls

gsea_result_CNS_ashr_pos <- gsea_CNS_ashr_annot_exp0_summary %>%
  filter(NES > 0) %>%
  select(ID, Description, setSize, enrichmentScore, NES, pvalue, qvalues, p.adjust, rank, leading_edge) %>%
  rename(NAME = ID) %>%
  mutate('GS<br> follow link to MSigDB' = '') %>%
  rename('GS DETAILS' = Description,
         SIZE = setSize,
         ES = enrichmentScore,
         'NOM p-val' = pvalue,
         'FDR q-val' = qvalues,
         'FWER p-val' = p.adjust,
         'RANK AT MAX' = rank,
         'LEADING EDGE' = leading_edge) %>%
  relocate('GS<br> follow link to MSigDB', .after = NAME)

write.table(gsea_result_CNS_ashr_pos, file = 'gsea_cytoscape/gsea_result_CNS_ashr_pos.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)  


gsea_result_CNS_ashr_neg <- gsea_CNS_ashr_annot_exp0_summary %>%
  filter(NES < 0) %>%
  select(ID, Description, setSize, enrichmentScore, NES, pvalue, qvalues, p.adjust, rank, leading_edge) %>%
  rename(NAME = ID) %>%
  mutate('GS<br> follow link to MSigDB' = '') %>%
  rename('GS DETAILS' = Description,
         SIZE = setSize,
         ES = enrichmentScore,
         'NOM p-val' = pvalue,
         'FDR q-val' = qvalues,
         'FWER p-val' = p.adjust,
         'RANK AT MAX' = rank,
         'LEADING EDGE' = leading_edge) %>%
  relocate('GS<br> follow link to MSigDB', .after = NAME)

write.table(gsea_result_CNS_ashr_neg, file = 'gsea_cytoscape/gsea_result_CNS_ashr_neg.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)

gsea_result_eyes_ashr_pos <- gsea_eyes_ashr_annot_exp0_summary %>%
  filter(NES > 0) %>%
  select(ID, Description, setSize, enrichmentScore, NES, pvalue, qvalues, p.adjust, rank, leading_edge) %>%
  rename(NAME = ID) %>%
  mutate('GS<br> follow link to MSigDB' = '') %>%
  rename('GS DETAILS' = Description,
         SIZE = setSize,
         ES = enrichmentScore,
         'NOM p-val' = pvalue,
         'FDR q-val' = qvalues,
         'FWER p-val' = p.adjust,
         'RANK AT MAX' = rank,
         'LEADING EDGE' = leading_edge) %>%
  relocate('GS<br> follow link to MSigDB', .after = NAME)

write.table(gsea_result_eyes_ashr_pos, file = 'gsea_cytoscape/gsea_result_eyes_ashr_pos.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)  


gsea_result_eyes_ashr_neg <- gsea_eyes_ashr_annot_exp0_summary %>%
  filter(NES < 0) %>%
  select(ID, Description, setSize, enrichmentScore, NES, pvalue, qvalues, p.adjust, rank, leading_edge) %>%
  rename(NAME = ID) %>%
  mutate('GS<br> follow link to MSigDB' = '') %>%
  rename('GS DETAILS' = Description,
         SIZE = setSize,
         ES = enrichmentScore,
         'NOM p-val' = pvalue,
         'FDR q-val' = qvalues,
         'FWER p-val' = p.adjust,
         'RANK AT MAX' = rank,
         'LEADING EDGE' = leading_edge) %>%
  relocate('GS<br> follow link to MSigDB', .after = NAME)

write.table(gsea_result_eyes_ashr_neg, file = 'gsea_cytoscape/gsea_result_eyes_ashr_neg.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)

Rank file

column 1 = gene, column 2 = log2FoldChange, ordered

res_CNS_shrunken_ashr_tb_rename <- res_CNS_shrunken_ashr_tb %>%
  mutate(gene = gsub('Cluster-', '', gene))

res_CNS <- res_CNS_shrunken_ashr_tb_rename %>%
  select(gene, log2FoldChange) %>% 
  as.data.frame()

write.table(res_CNS, file = 'gsea_cytoscape/gsea_CNS_ashr_rank.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)

res_eyes_shrunken_ashr_tb_rename <- res_eyes_shrunken_ashr_tb %>%
  mutate(gene = gsub('Cluster-', '', gene))

res_eyes <- res_eyes_shrunken_ashr_tb_rename %>%
  select(gene, log2FoldChange) %>% 
  as.data.frame()

write.table(res_eyes, file = 'gsea_cytoscape/gsea_eyes_ashr_rank.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)

Then manually change .txt to .rnk

Expression data file

txt file
name (–tab–) description (–tab–) sample1 name (–tab–) sample2 name etc…

normalised_counts_CNS_tb_rename <- normalised_counts_CNS_tb %>%
  mutate(gene = gsub("Cluster-", "", gene)) %>%
  rename_with(~ gsub("CNS", "CNS_ambient", .x), c("PS506_CNS", "PS508_CNS", "PS513_CNS", "PS514_CNS", "PS518_CNS", "PS529_CNS", "PS534_CNS", "PS551_CNS", "PS557_CNS", "PS561_CNS")) %>%
  rename_with(~ gsub("CNS", "CNS_elevated", .x), c("PS509_CNS", "PS510_CNS", "PS522_CNS", "PS527_CNS", "PS528_CNS", "PS539_CNS", "PS547_CNS", "PS584_CNS", "PS626_CNS", "PS636_CNS")) %>%
  relocate(ends_with("ambient"), .before = ends_with("elevated"))

normalised_counts_CNS_annotated <- normalised_counts_CNS_tb_rename %>%
  full_join(corset_clusters_2_transcripts_renamed) %>%
  full_join(annotated_transcriptome_rename) %>%
  select(gene, Sequence.Description, PS506_CNS_ambient:PS636_CNS_elevated) %>%
  distinct(gene, .keep_all = TRUE)

write.table(normalised_counts_CNS_annotated, file = 'gsea_cytoscape/gsea_CNS_ashr_expression.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)

normalised_counts_eyes_tb_rename <- normalised_counts_eyes_tb %>%
  mutate(gene = gsub("Cluster-", "", gene)) %>%
  rename_with(~ gsub("eyes", "eyes_ambient", .x), c("PS506_eyes", "PS508_eyes", "PS513_eyes", "PS514_eyes", "PS518_eyes", "PS529_eyes", "PS534_eyes", "PS551_eyes", "PS557_eyes", "PS561_eyes")) %>%
  rename_with(~ gsub("eyes", "eyes_elevated", .x), c("PS509_eyes", "PS510_eyes", "PS522_eyes", "PS527_eyes", "PS528_eyes", "PS539_eyes", "PS547_eyes", "PS584_eyes", "PS626_eyes", "PS636_eyes")) %>%
  relocate(ends_with("ambient"), .before = ends_with("elevated"))

normalised_counts_eyes_annotated <- normalised_counts_eyes_tb_rename %>%
  full_join(corset_clusters_2_transcripts_renamed) %>%
  full_join(annotated_transcriptome_rename) %>%
  select(gene, Sequence.Description, PS506_eyes_ambient:PS636_eyes_elevated) %>%
  distinct(gene, .keep_all = TRUE)

write.table(normalised_counts_eyes_annotated, file = 'gsea_cytoscape/gsea_eyes_ashr_expression.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = TRUE)

Gene Sets file (GMT file)

geneset name (–tab–) description (–tab–) a list of tab-delimited genes
Open annotated transcriptome in OmicsBox > File > Export > Export annotations > Export Sequences per GO (Gene Sets): Gives GO.ID column 1 and list of transcripts column 2

gmt <- read.delim(file = 'data/blast2go_gene_sets.txt')
gmt <- gmt %>%
  as_tibble()

# need corresponding genes for the transcript list
gmt_split <- cSplit(gmt, splitCols = 'Sequences', ',', direction = "long") 
gmt_genes <- gmt_split %>%
  rename(transcript = Sequences) %>% 
  mutate(transcript = gsub("transcript/", '', transcript)) %>%
  full_join(corset_clusters_2_transcripts_renamed) %>%
  select(GO.ID, gene)
#collapse so a list of tab delimited genes for each GO term - should have 5731 rows
gmt_genes_list <- gmt_genes %>%
  drop_na() %>%
  group_by(GO.ID) %>%
  dplyr::summarise(gene = paste(unique(gene), collapse='\t'))

# Add GO term/description to gmt_genes
# Used Annotation.GO.ID for GSEA in ClusterProfiler
# So extract all Annotation.GO.ID and corresponding Annotation.GO.Term and add to gmt_genes and should have all the GO terms with their description that were use din the analysis

GO <- annotated_transcriptome %>%
  select(Annotation.GO.ID, Annotation.GO.Term) %>%
  rename(GO.ID = Annotation.GO.ID,
         GO.Term = Annotation.GO.Term)
GO_split <- cSplit(GO, c('GO.ID', 'GO.Term'), ';', direction = "long") 

GO_split <- GO_split %>%
  distinct(GO.ID, .keep_all = TRUE) # transcriptome contains genes annotated with same GO terms so pull out only one of each GO

gmt_genes_GO <- gmt_genes_list %>%
  left_join(GO_split) %>%
  select(GO.ID, GO.Term, gene)

write.table(gmt_genes_GO, file = 'gsea_cytoscape/genesets.txt', append = FALSE, quote = FALSE, sep = "\t",row.names = FALSE, col.names = FALSE)

Then manually change .txt to .gmt

Annotation Statistics

Calculate some statistics regarding annotation of the transcriptome

Percentage of transcripts with a blast hit

No._no_blast_hits <- sum(is.na(annotated_transcriptome$Blast.Top.Hit.Score))
No._transcripts <- nrow(annotated_transcriptome)
(No._transcripts - No._no_blast_hits)/No._transcripts*100

93% transcripts had a blast hit

Percentage of transcripts succesfully GO mapped

# Replace blank cells with 'NA'
annotated_transcriptome$Mapping.GO.ID[annotated_transcriptome$Mapping.GO.ID == ""] <- NA


No._no_GO_mapping <- sum(is.na(annotated_transcriptome$Mapping.GO.ID))
No._transcripts <- nrow(annotated_transcriptome)
(No._transcripts - No._no_GO_mapping)/No._transcripts*100

73% of transcripts succesfully GO mapped

Percentage of transcripts completely annotated

# Replace blank cells with 'NA'
annotated_transcriptome$Annotation.GO.ID[annotated_transcriptome$Annotation.GO.ID == ""] <- NA

No._no_complete_annotation <- sum(is.na(annotated_transcriptome$Annotation.GO.ID))
No._transcripts <- nrow(annotated_transcriptome)
(No._transcripts - No._no_complete_annotation)/No._transcripts*100

69% of transcripts completely annotated

Publication Figures

Figure: DE and GSEA Results

Make PCA

rld$sampletype <- factor(rld$sampletype, levels = c("Ambient_CNS", "Elevated_CNS", "Ambient_eyes", "Elevated_eyes"))

pcaData <- plotPCA(rld, intgroup = "sampletype", ntop = 50000000, returnData = TRUE)
    
percentVar <- round(100 * attr(pcaData, "percentVar")) 
    

( figure_PCA <- ggplot(pcaData, aes(x = PC1, y = PC2,
                                         fill = factor(sampletype),
                                         shape = factor(sampletype)),
                            colour = "black") + 
    geom_point(size = 2) + 
    scale_shape_manual(values=c(21, 22, 23, 24),
                       name = "",
                       labels = c(bquote("CNS: Current-day CO"[2]),
                                  bquote("CNS: Elevated CO"[2]),
                                  bquote("Eyes: Current-day CO"[2]),
                                  bquote("Eyes: Elevated CO"[2]))) + 
    scale_fill_manual(values = c('#FFFFFF', '#FDE725', '#FFFFFF', '#FDE725'),
                      name = "",
                      labels = c(bquote("CNS: Current-day CO"[2]),
                                 bquote("CNS: Elevated CO"[2]),
                                 bquote("Eyes: Current-day CO"[2]),
                                 bquote("Eyes: Elevated CO"[2]))) +
    xlab(paste0("PC1: ", percentVar[1], "% variance")) + 
    ylab(paste0("PC2: ", percentVar[2], "% variance")) + 
    coord_cartesian(ylim = c(-85, 85), xlim = c(-152, 152)) +
    scale_x_continuous(breaks = c(-100, 0, 100),
                       labels = c(-100, 0, 100)) +
    scale_y_continuous(breaks = c(-80, -40, 0, 40),
                       labels = c(-80, -40, 0, 40)) +
    theme_classic() +
    theme(axis.title = element_text(size = 10),
          axis.text.x = element_text(size = 8),
          axis.text.y = element_text(size = 8),
          legend.text = element_text(size = 8),
          legend.position = c(0.5,1.1)) +
    guides(fill = guide_legend(nrow = 2, byrow = FALSE),
           shape = guide_legend(nrow = 2, byrow = FALSE))
)

save(figure_PCA, file = 'publication_figures/figure_PCA.RData')

Make Venn diagrams

DE_CNS <- sig_DE_CNS_ashr %>%
  distinct(gene)

DE_eyes <- sig_DE_eyes_ashr_final %>%
  distinct(gene)

x <- c(list(DE_CNS$gene), list(DE_eyes$gene))
names(x) <- c('CNS', 'Eyes')
(venn_de <- ggvenn(x,
               show_percentage = FALSE,
               text_size = 0,
               set_name_size = 4,
               fill_color = c('#4040FF', '#BFBFFF'),
               fill_alpha = 0.8) +
  annotate('text', label = '23', x = -1, y = 0.25, size = 4) +
  annotate("rect", xmin = -1.2, xmax = -0.8, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
  annotate('text', label = '2', x = 0, y = 0.25, size = 4) +
  annotate("rect", xmin = -0.2, xmax = 0.2, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
  annotate('text', label = '6', x = 1, y = 0.25, size = 4) +
  annotate("rect", xmin = 0.8, xmax = 1.2, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
  annotate('text', label = expression('12 ' *symbol('\255')), x = -1, y = -0.25) +
  annotate('text', label = expression('11 ' *symbol('\257')), x = -1, y = -0.55) +
  annotate('text', label = expression('3 ' *symbol('\255')), x = 1, y = -0.25) +
  annotate('text', label = expression('3 ' *symbol('\257')), x = 1, y = -0.55) +
  annotate('text', label = expression('2 ' *symbol('\255')), x = 0, y = -0.25) +
    theme(plot.margin = unit(c(0,0,0,0), "pt"))
  )

save(venn_de, file = 'publication_figures/venn_de.RData')

GSEA_CNS <- gsea_CNS_ashr_annot_exp0_summary %>%
  distinct(ID)

GSEA_eyes <- gsea_eyes_ashr_annot_exp0_summary %>%
  distinct(ID)


x <- c(list(GSEA_CNS$ID), list(GSEA_eyes$ID))
names(x) <- c('CNS', 'Eyes')
(venn_gsea <- ggvenn(x,
                show_percentage = FALSE,
                text_size = 0,
                set_name_size = 4,
                fill_color = c('#4040FF', '#BFBFFF'),
                fill_alpha = 0.8) +
    annotate('text', label = '89', x = -1, y = 0.25, size = 4) +
    annotate("rect", xmin = -1.2, xmax = -0.8, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
    annotate('text', label = '10', x = 0, y = 0.35, size = 4) +
    annotate("rect", xmin = -0.15, xmax = 0.15, ymin = 0.22, ymax = 0.45, fill = NA, colour = 'black', size = 0.5) +
    annotate('text', label = '7', x = 1, y = 0.25, size = 4) +
    annotate("rect", xmin = 0.8, xmax = 1.2, ymin = 0.1, ymax = 0.4, fill = NA, colour = 'black', size = 0.5) +
    annotate('text', label = expression('69 ' *symbol('\255')), x = -1, y = -0.25) +
    annotate('text', label = expression('20 ' *symbol('\257')), x = -1, y = -0.55) +
    annotate('text', label = expression('5 ' *symbol('\255')), x = 1, y = -0.25) +
    annotate('text', label = expression('2 ' *symbol('\257')), x = 1, y = -0.55) +
    annotate('text', label = expression('5 ' *symbol('\255'), '           / ' *symbol('\255')), x = -0.1, y = 0.1) +
    annotate('text', label = expression('2 ' *symbol('\255'), '           / ' *symbol('\257')), x = -0.1, y = -0.05) +
    annotate('text', label = expression('1 ' *symbol('\257'), '           / ' *symbol('\257')), x = -0.1, y = -0.2) +
    annotate('text', label = expression('2 ' *symbol('\257'), '           / ' *symbol('\255')), x = -0.1, y = -0.35) +
    theme(plot.margin = unit(c(0,0,0,0), "pt"))
)

save(venn_gsea, file = 'publication_figures/venn_gsea.RData')

Combine into one figure

(figure_DE_GSEA <- (
  (
  plot_spacer() 
  |
    (figure_PCA + theme(plot.margin = unit(c(30,50,30,50), "pt")))
  |
    plot_spacer()
) +
  plot_layout(widths = c(0.3,1,0.3))
)
         /
           (
             (
             (venn_de + theme(plot.margin = unit(c(0,0,0,0), "pt"))) 
             | 
              (venn_gsea + theme(plot.margin = unit(c(0,0,0,0), "pt"))) 
           ) +
             plot_layout(widths = c(1,1))
           ) +
   plot_layout(heights = c(1,1)) +
   plot_annotation(tag_levels = 'A') &
   theme(plot.tag = element_text(face = 'bold'))
   
 )

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

Supplementary figure: Top hit species distribution

The top blast hit species distribution .txt file is exported from OmicsBox: First run BLAST statistics, then go to tab ‘Chart: Top-hit species distribution’ and in the toolbar click ‘Data as text’.

species <- read.delim(file = 'data/top_hit_species_distribution_annotated_transcriptome_orfs.txt')
save(species, file = 'publication_figures/species.RData')

# Read in .txt file output from OmicsBox


a <- species %>%
  rename(No._top_hits = X.BLAST.Top.Hits) %>%
  arrange(-No._top_hits) %>%
  slice_head(n = 20) %>%
  mutate(order = as.factor(row_number()))

b <- species %>%
  rename(No._top_hits = X.BLAST.Top.Hits) %>%
  arrange(No._top_hits) %>%
  slice_head(n = nrow(species) - 20) %>%
  summarize(No._top_hits = sum(No._top_hits)) %>%
  mutate(Species = 'others') 

species_data <- a %>%
  full_join(b) %>%
  arrange(order)


(species_distr <- ggplot(species_data, aes(x = No._top_hits, y = rev(order))) +
    geom_col(colour = 'black', size = 0.05, fill = '#FF8080') +
    scale_y_discrete(
      breaks = species_data$order,
      labels = rev(c('squid: *Sepia pharaonis*', 'octopus: *Octopus sinensis*', 'octopus: *Octopus bimaculoides*', 'squid: *Doryteuthis opalescens*', 'squid: *Idiosepius paradoxus*', 'squid: *Doryteuthis pealeii*', 'aphid: *Acyrthosiphon pisum*', 'starfish: *Acanthaster planci*', 'scallop: *Mizuhopecten yessoensis*', 'scallop: *Pecten maximus*', 'freshwater snail: *Pomacea canaliculata*', 'mussel: *Mytilus galloprovincialis*', 'limpet: *Lottia gigantea*', 'butterfly: *Danaus plexippus plexippus*', 'sea slug: *Aplysia californica*', 'cuttlefish: *Sepia officinalis*', 'squid: *Nototodarus sloanii*', 'sea anemone: *Exaiptasia diaphana*', 'squid: *Euprymna scolopes*', 'oyster: *Cassostrea virginica*', 'others'))) +
    scale_x_continuous(breaks = c(0, 2500, 5000, 7500, 10000, 12500, 15000, 17500),
                       labels = c(0, 2500, 5000, 7500, 10000, 12500, 15000, 17500),
                       expand = expansion(mult = c(0.005, 0.02))) +
    labs(x = "Number of BLAST top hits", y = "") +
    coord_cartesian(xlim = c(0, 17800)) +
    theme_classic() +
    theme(axis.text.y = element_markdown(size = 8), 
          axis.title = element_text(size = 10),
          axis.text.x = element_text(size = 8)) )

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Supplementary figure: GSEA Dotplot

# Plot all GO terms for CNS and eyes on same graph

# CNS and eyes share same y axis on dotplot - want to order by NES of CNS, and show overlaps of GO terms found in CNS and eyes
# Below method so GO terms in both tissues are not twice on the y axis
CNS <- gsea_CNS_ashr_annot_exp0_summary %>%
  mutate(tissue = "CNS")
eyes <- gsea_eyes_ashr_annot_exp0_summary %>%
  mutate(tissue = "eyes")

CNS_NES <- CNS %>%
  dplyr::select(ID, Description, NES) %>%
  rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_NES <- eyes %>%
  dplyr::select(ID, Description, NES) %>% 
  rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_NES <- CNS_NES %>%
  full_join(eyes_NES) %>%
  ungroup() %>% 
  arrange(NES_CNS, NES_eyes) %>%
  dplyr::mutate(order_NES = as.factor(row_number())) %>%
  pivot_longer(c(NES_CNS, NES_eyes), names_to = "tissue", values_to = "NES") %>%
  mutate(tissue = gsub("NES_", "", tissue)) %>%
  arrange(NES)


CNS_p.adjust <- CNS %>%
  dplyr::select(ID, Description, p.adjust) %>%
  rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_p.adjust <- eyes %>%
  dplyr::select(ID, Description, p.adjust) %>% 
  rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_p.adjust <- CNS_p.adjust %>%
  full_join(eyes_p.adjust) %>% 
  ungroup() %>% 
  arrange(p.adjust_CNS) %>%
  dplyr::mutate(order_p.adjust = as.factor(row_number())) %>% 
  pivot_longer(c(p.adjust_CNS, p.adjust_eyes), names_to = "tissue", values_to = "p.adjust") %>%
  dplyr::mutate(tissue = gsub("p.adjust_", "", tissue)) %>%
  arrange(Description)

CNS_Count <- CNS %>%
  dplyr::select(ID, Description, Count) %>%
  rename_with(~paste0(., "_CNS"), -c("ID", "Description"))
eyes_Count <- eyes %>%
  dplyr::select(ID, Description, Count) %>% 
  rename_with(~paste0(., "_eyes"), -c("ID", "Description"))
df_Count <- CNS_Count %>%
  full_join(eyes_Count) %>% 
  ungroup() %>% 
  arrange(Count_CNS) %>%
  dplyr::mutate(order_Count = as.factor(row_number())) %>% 
  pivot_longer(c(Count_CNS, Count_eyes), names_to = "tissue", values_to = "Count") %>%
  dplyr::mutate(tissue = gsub("Count_", "", tissue))

df_all <- df_NES %>%
  full_join(df_p.adjust) %>%
  full_join(df_Count) %>%
  dplyr::mutate(Description_wrapped = str_wrap(Description, width = 50))

figure_GSEA <- (ggplot(df_all, aes(x = NES, y = order_NES, colour = p.adjust, size = Count)) +
  geom_point() +
  scale_y_discrete(
    breaks = df_all$order_NES,
    labels = df_all$Description_wrapped) +
  ggplot2::facet_grid(. ~ tissue) +
  scale_colour_gradient(low = '#4040FF', high = '#E3E3E3',
                        name = "padj",
                        guide = guide_colourbar(barwidth = 0.5,
                                                barheight = 8,
                                                ticks = FALSE)) +
  scale_size(name = "Count") +
  geom_vline(xintercept = 0) +
  labs(x = "Normalised enrichment score", y = "") +
  theme(panel.border = element_rect(fill = NA, size = 0.5),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(colour = 'grey', size = 0.2),
        panel.background = element_blank(),
        strip.background = element_rect(fill = NA, size = 0.5, colour = 'black'),
        axis.title = element_text(size = 8),
        axis.text.x = element_text(size = 6),
        axis.text.y = element_text(size = 4),
        legend.title = element_text(size = 8),
        legend.text = element_text(size = 6),
        legend.key = element_blank(),
        strip.text = element_text(size = 8, face = 'bold')) )

## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

figure_GSEA

## Warning: Removed 96 rows containing missing values (`geom_point()`).

SessionInfo

R version 4.0.4 (2021-02-15) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 18363)

Matrix products: default

locale: [1] LC_COLLATE=English_Australia.1252 LC_CTYPE=English_Australia.1252
[3] LC_MONETARY=English_Australia.1252 LC_NUMERIC=C
[5] LC_TIME=English_Australia.1252

attached base packages: [1] grid parallel stats4 stats graphics grDevices utils datasets methods
[10] base

other attached packages: [1] ggupset_0.3.0 ggnewscale_0.4.5 enrichplot_1.10.2
[4] clusterProfiler_3.18.1 pathview_1.30.1 DOSE_3.16.0
[7] ggvenn_0.1.8 splitstackshape_1.4.8 knitr_1.31
[10] ashr_2.2-47 apeglm_1.12.0 factoextra_1.0.7
[13] ggrepel_0.9.1 DEGreport_1.26.0 pheatmap_1.0.12
[16] patchwork_1.1.1 RColorBrewer_1.1-2 viridis_0.5.1
[19] viridisLite_0.3.0 forcats_0.5.1 stringr_1.4.0
[22] dplyr_1.0.5 purrr_0.3.4 readr_1.4.0
[25] tidyr_1.1.3 tibble_3.1.0 ggplot2_3.3.3
[28] tidyverse_1.3.0 DESeq2_1.30.1 SummarizedExperiment_1.20.0 [31] Biobase_2.50.0 MatrixGenerics_1.2.1 matrixStats_0.58.0
[34] GenomicRanges_1.42.0 GenomeInfoDb_1.26.4 IRanges_2.24.1
[37] S4Vectors_0.28.1 BiocGenerics_0.36.0

loaded via a namespace (and not attached): [1] utf8_1.2.1 tidyselect_1.1.0 RSQLite_2.2.4
[4] AnnotationDbi_1.52.0 BiocParallel_1.24.1 scatterpie_0.1.5
[7] munsell_0.5.0 withr_2.4.1 colorspace_2.0-1
[10] GOSemSim_2.16.1 rstudioapi_0.13 KEGGgraph_1.50.0
[13] lasso2_1.2-21.1 bbmle_1.0.23.1 GenomeInfoDbData_1.2.4
[16] mixsqp_0.3-43 mnormt_2.0.2 polyclip_1.10-0
[19] farver_2.1.0 bit64_4.0.5 downloader_0.4
[22] coda_0.19-4 vctrs_0.3.6 generics_0.1.0
[25] xfun_0.22 R6_2.5.0 graphlayouts_0.7.1
[28] clue_0.3-58 invgamma_1.1 locfit_1.5-9.4
[31] bitops_1.0-6 cachem_1.0.4 reshape_0.8.8
[34] fgsea_1.16.0 DelayedArray_0.16.2 assertthat_0.2.1
[37] scales_1.1.1 ggraph_2.0.5 gtable_0.3.0
[40] Cairo_1.5-12.2 tidygraph_1.2.0 rlang_0.4.11
[43] genefilter_1.72.1 GlobalOptions_0.1.2 splines_4.0.4
[46] broom_0.7.5 BiocManager_1.30.10 yaml_2.2.1
[49] reshape2_1.4.4 modelr_0.1.8 backports_1.2.1
[52] qvalue_2.22.0 tools_4.0.4 psych_2.0.12
[55] logging_0.10-108 ellipsis_0.3.1 ggdendro_0.1.22
[58] Rcpp_1.0.6 plyr_1.8.6 zlibbioc_1.36.0
[61] RCurl_1.98-1.2 GetoptLong_1.0.5 cowplot_1.1.1
[64] haven_2.3.1 cluster_2.1.0 fs_1.5.0
[67] magrittr_2.0.1 data.table_1.14.0 DO.db_2.9
[70] circlize_0.4.12 reprex_1.0.0 truncnorm_1.0-8
[73] tmvnsim_1.0-2 mvtnorm_1.1-1 SQUAREM_2021.1
[76] hms_1.0.0 evaluate_0.14 xtable_1.8-4
[79] XML_3.99-0.5 emdbook_1.3.12 readxl_1.3.1
[82] gridExtra_2.3 shape_1.4.5 compiler_4.0.4
[85] bdsmatrix_1.3-4 shadowtext_0.0.7 crayon_1.4.1
[88] htmltools_0.5.1.1 geneplotter_1.68.0 lubridate_1.7.10
[91] DBI_1.1.1 tweenr_1.0.1 dbplyr_2.1.0
[94] ComplexHeatmap_2.6.2 MASS_7.3-53 Matrix_1.3-2
[97] cli_2.3.1 igraph_1.2.6 pkgconfig_2.0.3
[100] rvcheck_0.1.8 numDeriv_2016.8-1.1 xml2_1.3.2
[103] annotate_1.68.0 XVector_0.30.0 rvest_1.0.0
[106] digest_0.6.27 ConsensusClusterPlus_1.54.0 graph_1.68.0
[109] Biostrings_2.58.0 rmarkdown_2.7 cellranger_1.1.0
[112] fastmatch_1.1-0 edgeR_3.32.1 rjson_0.2.20
[115] lifecycle_1.0.0 nlme_3.1-152 jsonlite_1.7.2
[118] limma_3.46.0 fansi_0.4.2 pillar_1.5.1
[121] lattice_0.20-41 Nozzle.R1_1.1-1 KEGGREST_1.30.1
[124] fastmap_1.1.0 httr_1.4.2 survival_3.2-7
[127] GO.db_3.12.1 glue_1.4.2 png_0.1-7
[130] bit_4.0.4 Rgraphviz_2.34.0 ggforce_0.3.2
[133] stringi_1.5.3 blob_1.2.1 org.Hs.eg.db_3.12.0
[136] memoise_2.0.0 irlba_2.3.3

R code to analyse the transcriptomic response of the two-toned pygmy squid (Idiosepius pygmaeus) nervous system to elevated CO2 (differential expression and gene set enrichment analyses)

Jodi T Thomas, Roger Huerlimann, Celia Schunter, Sue-Ann Watson, Philip L Munday, Timothy Ravasi

18 May, 2023

Preamble

Packages

Set up data

Explore Raw Data

Total raw counts per Individual

Raw counts across genes within an individual

Mean vs Variance

Run DE Analysis (Walds Test)

Create DESeq dataset and run analysis

Save raw count data

Save normalised count data

Save rlog transformed data

Extract DESeq results (unshrunken)

Extract DESeq results (shrunken with ashr)

Sample level Quality Control

PCA Plots

All data from both tissues

CNS

Eyes

Heatmaps

All data from both tissues

CNS

Eyes

Explore DESeq2 Data

Total normalised Counts per individual

CNS

Eyes

BaseMean level of expression across genes

CNS

Eyes

BaseMean level of expression across samples

CNS

Eyes

Dispersion Estimates

CNS

Eyes

DE Results

Annotate DESeq2 results

Significantly DE genes

CNS

Eyes

MA Plots

CNS

Eyes

Volcano Plots

CNS

Eyes

Heatmaps

CNS

Eyes

Plot gene expression

CNS

Eyes

Final DE results

Venn diagram

Heatmap

CNS

Eyes

Table of significantly DE genes

CNS

Eyes

Significantly DE in both tissues

Functional Enrichment Analysis

Prepare data

Gene set enrichment analysis

CNS

Eyes

Dot plot

Core enrichment genes

Ion channel cluster of GO terms

CNS

Eyes

Both tissues

Ion transport cluster of GO terms

GPCR cluster of GO terms

Export functional enrichment results for Cytoscape

GSEA results file

R code to analyse the transcriptomic response of the two-toned pygmy squid (Idiosepius pygmaeus) nervous system to elevated CO₂ (differential expression and gene set enrichment analyses)