Preamble

library(WGCNA) # for WGCNA
library(clusterProfiler) # for enrichment analyses
library(knitr) # for kable tables
library(DESeq2) # for variance stabilising transformation
library(ggplot2) # for plots
library(ggpubr) # for R value on correlation plot
library(GGally) # extension to ggplot2
library(CCA)# for canonical correlation analyses
library(ggbiplot) # for PCA plots
library(splitstackshape) # for splitting concatenated values
library(tidyverse)

options(stringsAsFactors = FALSE)

Central nervous system (CNS)

Set up data

Expression Data

# Count data
count_data <- read.delim("data/corset-counts.txt", row.names = 1)
count_data_CNS <- count_data %>%
  select(contains('CNS'))

# Meta data
meta_data <- read_csv("data/metadata.csv", trim_ws = TRUE)

# use only squid with count data
meta_data <- meta_data %>%
  filter(Squid == "PS506" | Squid == "PS508" | Squid == "PS509" | Squid == "PS510" | Squid == "PS513" | Squid == "PS514" | Squid == "PS518" | Squid == "PS522" | Squid == "PS527" | Squid == "PS528" | Squid == "PS529" | Squid == "PS534" | Squid == "PS539" | Squid == "PS547" | Squid == "PS551" | Squid == "PS557" | Squid == "PS561" | Squid == "PS584" | Squid == "PS626" | Squid == "PS636") %>%
  mutate(CO2 = gsub('Ambient', '0', CO2),
         CO2 = gsub('Elevated', '1', CO2),
         CO2 = as.numeric(CO2))
str(meta_data)

meta_data$Squid = paste0(meta_data$Squid, '_CNS')

meta_data_CNS <- meta_data %>%
  arrange(Squid) %>%
  column_to_rownames(var = "Squid")

# Check they match
all(colnames(count_data_CNS) %in% rownames(meta_data_CNS))
all(colnames(count_data_CNS) == rownames(meta_data_CNS))

# Make DESeq dataset
dds_CNS <- DESeqDataSetFromMatrix(count_data_CNS, colData = meta_data_CNS, design = ~ CO2)
dds_CNS <- estimateSizeFactors(dds_CNS)

# Filter out genes with low counts: remove all genes with a count of less than 10 in more than 90% of the samples (= 18 samples).
keep <- rowSums(counts(dds_CNS) >= 10 ) >= 18 # 18 or more samples with counts of 10 or more
dds_CNS_filtered <- dds_CNS[keep,]

# Variance stabilising transformation from DEseq2
VST_counts_CNS <- varianceStabilizingTransformation(dds_CNS_filtered)

VST_counts_CNS_tb <- assay(VST_counts_CNS) %>% 
  as.data.frame() %>% 
  rownames_to_column(var="gene") %>% 
  as_tibble()

datExpr_CNS0 = as.data.frame(t(VST_counts_CNS_tb[, -1])) 
names(datExpr_CNS0) = VST_counts_CNS_tb$gene

save(datExpr_CNS0, file = 'data/expression_data_pre_outliers_removed_CNS.RData')
save(dds_CNS_filtered, file = 'data/dds_CNS_filtered.RData')

Check for outliers

Gene Outliers

# Check for genes with too many misssing values
gsg = goodSamplesGenes(datExpr_CNS0, verbose = 0)
gsg$allOK

## [1] TRUE

No gene outliers

Sample Outliers

# Check for sample outliers
sampleTree = hclust(dist(datExpr_CNS0), method = "average")
# sizeGrWindow(12,9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
     cex.axis = 1.5, cex.main = 2)

Remove sample outliers PS510_CNS, PS506_CNS, PS518_CNS

abline(h = 155, col = "red")
clust = cutreeStatic(sampleTree, cutHeight = 155, minSize = 10)
table(clust)
# clust 1 contains the samples we want to keep.
keepSamples = (clust==1)
datExpr_CNS = datExpr_CNS0[keepSamples, ]
nGenes = ncol(datExpr_CNS)
nSamples = nrow(datExpr_CNS)

sampleTree = hclust(dist(datExpr_CNS), method = "average")
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering after outlier removal", sub="", xlab="", cex.lab = 1.5,
     cex.axis = 1.5, cex.main = 2)

Traits data

# create squid as a column
allTraits = meta_data_CNS %>%
  rownames_to_column(var = "Squid")

# Form a data frame analogous to expression data that will hold the traits.
CNSSamples = rownames(datExpr_CNS)
traitRows = match(CNSSamples, allTraits$Squid)
datTraits_CNS = allTraits[traitRows, -1]
rownames(datTraits_CNS) = allTraits[traitRows, 1]
collectGarbage()

Save data

save(datExpr_CNS, datTraits_CNS, file = "data/WGCNA_CNS_01_dataInput.RData")

Check for any global drivers of expression

pca <- prcomp(datExpr_CNS)
#summary(pca) 
ggscreeplot(pca) +
  scale_x_continuous(breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9))

datTraits_CNS1 <- datTraits_CNS %>%
  mutate(CO2 = as.factor(CO2))
ggbiplot(pca,
         labels = rownames(datTraits_CNS1),
         ellipse = TRUE,
         var.axes = FALSE,
         groups = datTraits_CNS1$CO2,
         choices = c(1, 2), # PC axes to include
) +
  theme_classic() +
  coord_cartesian(xlim = c(-2, 2), ylim = c(-2, 3)) +
  ggtitle('PCA of expression data')

0 = current-day CO₂, 1 = elevated CO₂
PC1 only explains 18.4% of variance which suggests there’s not a main driver
Slightly groups by CO₂, but we’re interested in this so want to keep CO₂

Soft Thresholding Power

Choose soft thresholding power

powers = c(c(1:10), seq(from = 12, to = 20, by = 2))
sft_powers_CNS = pickSoftThreshold(datExpr_CNS,
                                 powerVector = powers,
                                 verbose = 1,
                                 networkType = 'signed',
                                 corFnc = "cor")
save(sft_powers_CNS, file = 'results/soft_thresholding_powers_CNS.RData')

powers = c(c(1:20))
# Call the network topology analysis function
sft_powers_all_CNS = pickSoftThreshold(datExpr_CNS,
                                     powerVector = powers,
                                     verbose = 1,
                                     networkType = 'signed',
                                     corFnc = "cor")
save(sft_powers_all_CNS, file = 'results/soft_thresholding_powers_all_CNS.RData')

sft_powers_all_CNS[[2]] %>% kable()

Power	SFT.R.sq	slope	truncated.R.sq	mean.k.	median.k.	max.k.
1	0.0402095	-67.688209	0.9814769	12942.96374	12943.09779	13078.1759
2	0.4862635	-17.722759	0.7337170	7021.99904	6962.03873	7692.6342
3	0.8113725	-9.895274	0.9123072	4063.62552	3967.83849	5095.7115
4	0.8749529	-6.550859	0.9524439	2479.29243	2372.68771	3633.7195
5	0.8909474	-4.967528	0.9623227	1581.29455	1476.01411	2731.6514
6	0.8991374	-4.126454	0.9655260	1047.51410	948.98385	2136.6708
7	0.9101993	-3.574302	0.9711171	717.06684	627.69783	1723.4619
8	0.9122715	-3.227687	0.9705286	505.16030	425.79480	1424.4125
9	0.9179160	-2.977824	0.9750905	365.00645	295.06096	1200.5613
10	0.9223976	-2.781527	0.9782868	269.73963	208.37198	1028.2366
11	0.9223809	-2.635500	0.9784148	203.38524	149.41123	892.4121
12	0.9252505	-2.518028	0.9805367	156.14586	108.73794	783.4290
13	0.9273523	-2.425553	0.9830224	121.84397	80.22217	695.6729
14	0.9275649	-2.349229	0.9845817	96.48622	59.88433	622.8722
15	0.9250048	-2.293843	0.9849420	77.43230	45.18614	561.6742
16	0.9233672	-2.240420	0.9862656	62.90023	34.51574	509.6304
17	0.9224295	-2.195162	0.9868124	51.66456	26.60069	464.9179
18	0.9201277	-2.156841	0.9861020	42.86789	20.67750	426.1557
19	0.9168200	-2.123342	0.9844186	35.90071	16.21797	392.2811
20	0.9190664	-2.084805	0.9877374	30.32332	12.83714	362.4648

powers = c(c(1:10), seq(from = 12, to = 20, by = 2))
par(mfrow = c(1,2))
cex1 = 0.9
plot(sft_powers_CNS$fitIndices[,1], -sign(sft_powers_CNS$fitIndices[,3])*sft_powers_CNS$fitIndices[,2],
     xlab = "Soft Threshold (power)", ylab="Scale Free Topology Model Fit,signed R^2", type="n",
     main = paste("Scale independence"))
text(sft_powers_CNS$fitIndices[,1], -sign(sft_powers_CNS$fitIndices[,3])*sft_powers_CNS$fitIndices[,2],
     labels = powers, cex = cex1, col = "red")
# R^2 cut-off of 0.9
abline(h = 0.9, col = "red")

# Mean connectivity as a function of the soft-thresholding power
plot(sft_powers_CNS$fitIndices[,1], sft_powers_CNS$fitIndices[,5],
     xlab = "Soft Threshold (power)", ylab = "Mean Connectivity", type = "n",
     main = paste("Mean connectivity"))
text(sft_powers_CNS$fitIndices[,1], sft_powers_CNS$fitIndices[,5], labels = powers, cex = cex1, col = "red")
# Mean connectivity cut-off of 100
abline(h = 100, col="red")

Power 7 first to cross threshold of R^2 > 0.9
But mean connectivity is still high which can result in all genes in the network being highly connected and resulting in few clusters of large genes
Thus, use power 14 when mean connectivity < 100

Check soft thresholding power

k_14_CNS = softConnectivity(datE = datExpr_CNS, power = 14, corFnc = "cor", type = "signed")
save(k_14_CNS, file = 'results/soft_power_14_CNS.RData')

hist(k_14_CNS)

plot <- scaleFreePlot(k_14_CNS, main="Check scale free topology when power = 14\n")

Power 14 looks good to approximate a scale free topology: high R^2 > 0.9 and slope < -2

Network construction and module detection

Network construction and module detection was run using the High Performance Computing (HPC) at Okinawa Institute of Science and Technology Graduate University (OIST), Japan, allowing multiple threads to be enabled and the data to be analysed in one block.

## R CODE ##
.libPaths(c('/home/j/jodi-thomas/Rlibs_2', .libPaths()))
library(WGCNA)

options(stringsAsFactors = FALSE)
enableWGCNAThreads()
lnames = load(file = "/home/j/jodi-thomas/WGCNA/WGCNA_final/WGCNA_CNS_01_dataInput.RData")
lnames

## ADJACENCIES ##
adjacency_signedpearson_power14_CNS = adjacency(datExpr_CNS,
                                                type = 'signed',
                                                power = 14,
                                                corFnc = 'cor')
save(adjacency_signedpearson_power14_CNS, file = '/flash/RavasiU/Jodi/adjacency_signedpearson_power14_CNS.RData')

## TOM ##
TOM_signedpearson_power14_CNS = TOMsimilarity(adjacency_signedpearson_power14_CNS,
                                              TOMType = 'signed')
dissTOM_signedpearson_power14_CNS = 1-TOM_signedpearson_power14_CNS
save(dissTOM_signedpearson_power14_CNS, file = '/flash/RavasiU/Jodi/dissTOM_signedpearson_power14_CNS.RData')

## CLUSTERING ##
geneTree_signedpearson_power14_CNS = hclust(as.dist(dissTOM_signedpearson_power14_CNS), method = "average")
save(geneTree_signedpearson_power14_CNS, file = '/flash/RavasiU/Jodi/geneTree_signedpearson_power14_CNS.RData')

## MODULES ##
dynamicMods_signedpearson_power14_CNS = cutreeDynamic(dendro = geneTree_signedpearson_power14_CNS,
                                                      minClusterSize = 30,
                                                      distM = dissTOM_signedpearson_power14_CNS,
                                                      deepSplit = 2,
                                                      pamRespectsDendro = FALSE)
save(dynamicMods_signedpearson_power14_CNS, file = '/flash/RavasiU/Jodi/dynamicMods_signedpearson_power14_CNS.RData')

dynamicColors_signedpearson_power14_CNS = labels2colors(dynamicMods_signedpearson_power14_CNS)
save(dynamicColors_signedpearson_power14_CNS, file = '/flash/RavasiU/Jodi/dynamicColors_signedpearson_power14_CNS.RData')

Merge Modules

Load modules that were detected using the HPC

load(file = 'results/geneTree_signedpearson_power14_CNS.RData')
load(file = 'results/dynamicMods_signedpearson_power14_CNS.RData')
load(file = 'results/dynamicColors_signedpearson_power14_CNS.RData')

Modules pre-merge

table(dynamicMods_signedpearson_power14_CNS) %>%
  kable()

42 modules

Merge modules

MEList = moduleEigengenes(datExpr_CNS, colors = dynamicColors_signedpearson_power14_CNS)
MEs_CNS_premerge = MEList$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss_CNS_premerge = 1-cor(MEs_CNS_premerge)
# Cluster module eigengenes
METree_CNS = hclust(as.dist(MEDiss_CNS_premerge), method = "average")

plot(METree_CNS, main = "Clustering of module eigengenes",
     xlab = "", sub = "")
abline(h=0.3, col = "red")

Use height cut-off of 0.3, which corresponds to a correlation of 0.7 for merging

merge_0.3_CNS = mergeCloseModules(datExpr_CNS,
                                  dynamicColors_signedpearson_power14_CNS,
                                  corFnc = 'cor',
                                  cutHeight = 0.30,
                                  verbose = 3)
mergedColors_signedpearson_power14_CNS = merge_0.3_CNS$colors
save(mergedColors_signedpearson_power14_CNS, file = 'results/mergedColors_signedpearson_power14_CNS.RData')

table(mergedColors_signedpearson_power14_CNS) %>%
  kable()

mergedColors_signedpearson_power14_CNS	Freq
black	3917
blue	3346
brown	2839
cyan	475
darkgreen	896
darkgrey	211
darkolivegreen	108
darkorange	177
darkred	249
green	3099
grey60	508
lightcyan	500
lightcyan1	50
lightsteelblue1	395
lightyellow	266
mediumpurple3	62
orange	738
orangered4	70
paleturquoise	128
pink	728
plum1	73
purple	597
saddlebrown	151
salmon	423
skyblue3	85
steelblue	139
turquoise	5654

27 modules after merging

Dendrogram before and after merging modules

plotDendroAndColors(geneTree_signedpearson_power14_CNS, cbind(dynamicColors_signedpearson_power14_CNS, mergedColors_signedpearson_power14_CNS),
                    c("Dynamic Tree Cut", "Merged dynamic"),
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)

Save final merged modules

# Module colours as the final merged module colours
moduleColors_CNS = mergedColors_signedpearson_power14_CNS
# Construct numerical labels corresponding to the colors
colorOrder = c("grey", standardColors(50))
moduleLabels_CNS = match(moduleColors_CNS, colorOrder)-1
# Module eigengenes from final merged modules
MEs_CNS = merge_0.3_CNS$newMEs
# Rename geneTree
geneTree_CNS <- geneTree_signedpearson_power14_CNS

save(moduleColors_CNS, moduleLabels_CNS, MEs_CNS, geneTree_CNS, file = "results/WGCNA_CNS_02_Network_Modules.RData")

Determining Interesting Modules

Canonical Correlation Analysis

Run regularised CCA

x <- as.matrix(scale(datTraits_CNS)) # center and scale
y <- as.matrix(scale(MEs_CNS))

# Calculate CCA (regularised as have more variables than observations)
res.reg <- CCA::estim.regul(x, y)
rcca_CNS <- CCA::rcc(x, y, lambda1 = res.reg$lambda1, lambda2 = res.reg$lambda2)
save(rcca_CNS, file = 'results/rcca_CNS.RData')

x <- as.matrix(scale(datTraits_CNS))
y <- as.matrix(scale(MEs_CNS))

# Display the canonical correlations
bp <- barplot(rcca_CNS$cor,
              xlab="Canonical Function",
              ylab="Canonical Correlation (Rc)",
              names.arg = paste0(1:min(ncol(x), ncol(y))), ylim=c(0,1))
text(bp, rcca_CNS$cor-0.05, labels = paste(round(rcca_CNS$cor, 2)))

Loadings/cross loadings heatmaps

## LOADINGS
# canonical loadings of x variate (traits set) = correlation of each trait variable to the entire x variate/traits set
can.load.x_CNS <- rcca_CNS$scores$corr.X.xscores %>% 
  as.data.frame() %>%
  rownames_to_column(var = 'Trait')
can.load.x_CNS %>% kable(caption = 'Canonical loadings for traits set')

# canonical loadings of y variate (MEs set) = correlation of each ME variable to the entire y variate/ME set
can.load.y_CNS <- rcca_CNS$scores$corr.Y.yscores %>% 
  as.data.frame() %>%
  rownames_to_column(var = 'Module_Eigengene')
can.load.y_CNS %>% kable(caption = 'Canonical loadings for module eigengenes set')

## CROSS LOADINGS
# canonical cross laodings of x variate (traits set) = correlation of each trait variable to the entire y variate/MEs set
can.crossload.x_CNS <- rcca_CNS$scores$corr.X.yscores %>% 
  as.data.frame() %>%
  rownames_to_column(var = 'Trait')
can.crossload.x_CNS %>% kable(caption = 'Canonical cross loadings for traits set')

# canonical cross laodings of y variate (MEs set) = correlation of each ME variable to the entire x variate/traits set
can.crossload.y_CNS <- rcca_CNS$scores$corr.Y.xscores %>% 
  as.data.frame() %>%
  rownames_to_column(var = 'Module_Eigengene')
can.crossload.y_CNS %>% kable(caption = 'Canonical cross loadings for module eigengenes set')


save(can.load.x_CNS, can.crossload.x_CNS, can.load.y_CNS, can.crossload.y_CNS, file = 'results/CCA_loads_crossloads_CNS_power14_out.RData')

# Heatmap of loadings and cross-loadings, coloured by loadings and loadings printed above (cross loadings)
can.load.x.long <- can.load.x_CNS %>%
  pivot_longer(cols = 2:5, names_to = 'CF', values_to = 'loadings') %>%
  select(Trait, CF, loadings)

can.crossload.x.long <- can.crossload.x_CNS %>%
  pivot_longer(cols = 2:5, names_to = 'CF', values_to = 'crossloadings')%>%
  select(Trait, CF, crossloadings)

load.crossload.x <- can.load.x.long %>% full_join(can.crossload.x.long)

ggplot(load.crossload.x, aes(x = Trait, y = CF)) +
  geom_tile(aes(fill = loadings)) +
  geom_text(aes(label = round(loadings, 2)), vjust = -1, size = 6) +
  geom_text(aes(label = paste("(", round(crossloadings, 2), ")", sep = '')), vjust = 1, size = 4) +
  scale_fill_gradient2(low="blue", mid = "white", high="red") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust=1),
        axis.text = element_text(size = 16)) +
  scale_x_discrete(limits = c('CO2', 'Active_time_s', 'Dist', 'Vel', 'Time_A_s', 'Soft_touch', 'Aggressive_touch', 'Soft_touch_no.', 'Aggressive_touch_no.'), name = '') +
  scale_y_discrete(labels = c('V1' = 'CF 1', 'V2' = 'CF 2', 'V3' = 'CF 3', 'V4' = 'CF 4'), name = '')

can.load.y.long <- can.load.y_CNS %>%
  pivot_longer(cols = 2:5, names_to = 'CF', values_to = 'loadings') %>%
  select(Module_Eigengene, CF, loadings)

can.crossload.y.long <- can.crossload.y_CNS %>%
  pivot_longer(cols = 2:5, names_to = 'CF', values_to = 'crossloadings')%>%
  select(Module_Eigengene, CF, crossloadings)

load.crossload.y <- can.load.y.long %>% full_join(can.crossload.y.long)

ggplot(load.crossload.y, aes(x = Module_Eigengene, y = CF)) +
  geom_tile(aes(fill = loadings)) +
  geom_text(aes(label = round(loadings, 2)), vjust = -1, size = 5) +
  geom_text(aes(label = paste("(", round(crossloadings, 2), ")", sep = '')), vjust = 1, size = 4) +
  scale_fill_gradient2(low="blue", mid = "white", high="red") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust=1),
        axis.text = element_text(size = 16)) +
  scale_x_discrete(name = '') +
  scale_y_discrete(labels = c('V1' = 'CF 1', 'V2' = 'CF 2', 'V3' = 'CF 3', 'V4' = 'CF 4'), name = '')

Biplots

# Biplots
plt.cc(rcca_CNS,
       d1 = 1,
       d2 = 2,
       var.label = T,
       Xnames = colnames(datTraits_CNS),
       Ynames = colnames(MEs_CNS),
       type = 'v') # i for individuals - plots all squid, or b for both

plt.cc(rcca_CNS,
       d1 = 1,
       d2 = 3,
       var.label = TRUE,
       Xnames = colnames(datTraits_CNS),
       Ynames = colnames(MEs_CNS),
       type = 'v')

plt.cc(rcca_CNS,
       d1 = 2,
       d2 = 3,
       var.label = TRUE,
       Xnames = colnames(datTraits_CNS),
       Ynames = colnames(MEs_CNS),
       type = 'v')

plt.cc(rcca_CNS,
       d1 = 1,
       d2 = 4,
       var.label = TRUE,
       Xnames = colnames(datTraits_CNS),
       Ynames = colnames(MEs_CNS),
       type = 'v')

plt.cc(rcca_CNS,
       d1 = 2,
       d2 = 4,
       var.label = TRUE,
       Xnames = colnames(datTraits_CNS),
       Ynames = colnames(MEs_CNS),
       type = 'v')

plt.cc(rcca_CNS,
       d1 = 3,
       d2 = 4,
       var.label = TRUE,
       Xnames = colnames(datTraits_CNS),
       Ynames = colnames(MEs_CNS),
       type = 'v')

Modules of interest identified with CCA:

Canonical loading and cross-loading of a given variable from both the ME set and traits set were ≥ 0.3 for the same CF, this ME and trait were considered correlated
MEs and traits within the same or opposite quarters of the biplot, and sitting on or outside the biplot inner ring with a radius of 0.5, were considered positively or negatively correlated, respectively
If MEs and traits were considered correlated by both the canonical loadings/cross-loadings heatmap AND biplots they were identified as modules of interest for the given trait(s) by CCA.

Module Membership vs Gene Significance

Module membership (MM) = pearson correlation between gene expression and module eigengene
Gene significance (GS) = pearson correlation between gene expression and trait
All modules of interest identified by CCA, plot MM vs GS

# Creat a list of MM and GS
GS_CNS = list()
MM_CNS = list()
GS_CNS = corAndPvalue(datExpr_CNS, datTraits_CNS)
MM_CNS = corAndPvalue(datExpr_CNS, MEs_CNS)
# Put all info from two lists into one dataframe
GSmat = rbind(GS_CNS$cor, GS_CNS$p, GS_CNS$Z, GS_CNS$t)
nTraits = ncol(datTraits_CNS)
traitNames = colnames(datTraits_CNS)
nGenes = ncol(datExpr_CNS)
dim(GSmat) = c(nGenes, 4*nTraits)
rownames(GSmat) = colnames(datExpr_CNS)
colnames(GSmat) = spaste(
  c("GS_", "p_GS_", "Z_GS_", "t_GS_"),
  rep(traitNames, rep(4, nTraits)))


MMmat = rbind(MM_CNS$cor, MM_CNS$p, MM_CNS$Z, MM_CNS$t)
MEnames = colnames(MEs_CNS)
nMEs = ncol(MEs_CNS)
dim(MMmat) = c(nGenes, 4*nMEs)
rownames(MMmat) = colnames(datExpr_CNS)
colnames(MMmat) = spaste(
  c("MM_", "p_MM_", "Z_MM_", "t_MM_"),
  rep(MEnames, rep(4, nMEs)))

MM_GS_CNS = data.frame(gene = colnames(datExpr_CNS),
                                      ModuleLabel = moduleLabels_CNS,
                                      ModuleColor = moduleColors_CNS,
                                      GSmat,
                                      MMmat)

modNames_CNS = substring(names(MEs_CNS), 3)
traitNames = colnames(datTraits_CNS)

save(GS_CNS, MM_CNS, modNames_CNS, traitNames, MM_GS_CNS, file = 'results/WGCNA_CNS_03_MM_GS.RData')

CO2

for (i in c('steelblue', 'skyblue3', 'darkgreen', 'green', 'darkolivegreen', 'brown', 'black'))
{
  module = i
  trait = "CO2"
  column_module = match(module, modNames_CNS)
  column_trait = match(trait, traitNames)
  moduleGenes = moduleColors_CNS==module
  verboseScatterplot(abs(MM_CNS$cor[moduleGenes, column_module]),
                     abs(GS_CNS$cor[moduleGenes, column_trait]),
                     xlab = paste("Module Membership in", module, "module"),
                     ylab = paste("Gene significance for", trait),
                     main = paste("Module membership vs. gene significance\n"),
                     cex.main = 1, cex.lab = 1, cex.axis = 1, col = module)
}

Activity Traits

for (j in c('Active_time_s', 'Dist', 'Vel'))
{
  for (i in c('darkgreen', 'green', 'orange', 'pink', 'brown', 'black', 'grey60', 'steelblue', 'lightcyan1'))
  {
    module = i
    trait = j
    column_module = match(module, modNames_CNS)
    column_trait = match(trait, traitNames)
    moduleGenes = moduleColors_CNS==module
    verboseScatterplot(abs(MM_CNS$cor[moduleGenes, column_module]),
                       abs(GS_CNS$cor[moduleGenes, column_trait]),
                       xlab = paste("Module Membership in", module, "module"),
                       ylab = paste("Gene significance for", trait),
                       main = paste("Module membership vs. gene significance\n"),
                       cex.main = 1, cex.lab = 1, cex.axis = 1, col = module)
  }
}

Time in A

for (i in c('brown', 'orange', 'darkgreen', 'grey60', 'lightcyan1', 'skyblue3', 'steelblue', 'pink'))
{
  module = i
  trait = "Time_A_s"
  column_module = match(module, modNames_CNS)
  column_trait = match(trait, traitNames)
  moduleGenes = moduleColors_CNS==module
  verboseScatterplot(abs(MM_CNS$cor[moduleGenes, column_module]),
                     abs(GS_CNS$cor[moduleGenes, column_trait]),
                     xlab = paste("Module Membership in", module, "module"),
                     ylab = paste("Gene significance for", trait),
                     main = paste("Module membership vs. gene significance\n"),
                     cex.main = 1, cex.lab = 1, cex.axis = 1, col = module)
}

Exploratory interaction?

for (i in c('orange', 'brown', 'lightcyan1', 'steelblue', 'grey60', 'skyblue3', 'pink'))
{
  module = i
  trait = 'Soft_touch'
  column_module = match(module, modNames_CNS)
  column_trait = match(trait, traitNames)
  moduleGenes = moduleColors_CNS==module
  verboseScatterplot(abs(MM_CNS$cor[moduleGenes, column_module]),
                     abs(GS_CNS$cor[moduleGenes, column_trait]),
                     xlab = paste("Module Membership in", module, "module"),
                     ylab = paste("Gene significance for", trait),
                     main = paste("Module membership vs. gene significance\n"),
                     cex.main = 1, cex.lab = 1, cex.axis = 1, col = module)
}

Aggressive interaction?

for (i in c('orange', 'brown', 'lightcyan1', 'steelblue', 'grey60', 'pink'))
{
  module = i
  trait = 'Aggressive_touch'
  column_module = match(module, modNames_CNS)
  column_trait = match(trait, traitNames)
  moduleGenes = moduleColors_CNS==module
  verboseScatterplot(abs(MM_CNS$cor[moduleGenes, column_module]),
                     abs(GS_CNS$cor[moduleGenes, column_trait]),
                     xlab = paste("Module Membership in", module, "module"),
                     ylab = paste("Gene significance for", trait),
                     main = paste("Module membership vs. gene significance\n"),
                     cex.main = 1, cex.lab = 1, cex.axis = 1, col = module)
}

No. exploratory interactions

for (i in c('orange', 'brown', 'lightcyan1', 'steelblue', 'grey60', 'skyblue3', 'lightcyan', 'paleturquoise'))
{
  module = i
  trait = 'Soft_touch_no.'
  column_module = match(module, modNames_CNS)
  column_trait = match(trait, traitNames)
  moduleGenes = moduleColors_CNS==module
  verboseScatterplot(abs(MM_CNS$cor[moduleGenes, column_module]),
                     abs(GS_CNS$cor[moduleGenes, column_trait]),
                     xlab = paste("Module Membership in", module, "module"),
                     ylab = paste("Gene significance for", trait),
                     main = paste("Module membership vs. gene significance\n"),
                     cex.main = 1, cex.lab = 1, cex.axis = 1, col = module)
}

No. aggressive interactions

for (i in c('orange', 'brown', 'lightcyan1', 'steelblue', 'skyblue3', 'pink', 'darkorange', 'paleturquoise'))
{
  module = i
  trait = 'Aggressive_touch_no.'
  column_module = match(module, modNames_CNS)
  column_trait = match(trait, traitNames)
  moduleGenes = moduleColors_CNS==module
  verboseScatterplot(abs(MM_CNS$cor[moduleGenes, column_module]),
                     abs(GS_CNS$cor[moduleGenes, column_trait]),
                     xlab = paste("Module Membership in", module, "module"),
                     ylab = paste("Gene significance for", trait),
                     main = paste("Module membership vs. gene significance\n"),
                     cex.main = 1, cex.lab = 1, cex.axis = 1, col = module)
}

Final Modules of Interest

All modules of interest initially identified by CCA that had a MM vs GS correlation (R-value) > 0.2 were chosen as the final modules of interest

trait <- c('CO2 treatment', 'Active time', 'Distance', 'Speed', 'Exploratory interaction?', 'Aggressive interaction?', 'No. exploratory interactions', 'No. aggressive interactions', 'Time in Zone A')
darkolivegreen <- c('1', '0', '0', '0', '0', '0', '0', '0', '0')
green <- c('1', '1', '1', '1', '0', '0', '0', '0', '0')
steelblue <- c('1', '-1', '-1', '0', '0', '0', '0', '0', '0')
darkgreen <- c('-1', '-1', '-1', '-1', '0', '0', '0', '0', '0')
skyblue3 <- c('-1', '0', '0', '0', '1', '0', '1', '0', '0')
black <- c('0', '1', '1', '0', '0', '0', '0', '0', '0')
grey60 <- c('0', '0', '-1', '0', '0', '0', '0', '0', '0')
pink <- c('0', '-1', '-1', '-1', '1', '1', '0', '0', '0')
lightcyan1 <- c('0', '0', '0', '1', '1', '1', '1', '1', '0')
brown <- c('0', '0', '0', '0', '1', '1', '1', '1', '0')
orange <- c('0', '0', '0', '0', '-1', '-1', '-1', '0', '0')
paleturquoise <- c('0', '0', '0', '0', '0', '0', '0', '1', '0')
darkorange <- c('0', '0', '0', '0', '0', '0', '0', '-1', '0')

final_modules <- data.frame(trait, darkolivegreen, green, steelblue, darkgreen, skyblue3, black, grey60, pink, lightcyan1, brown, orange, paleturquoise, darkorange)

final_modules_long_CNS <- final_modules %>%
  pivot_longer(cols = 2:14, names_to = 'modules', values_to = 'relationship')

save(final_modules_long_CNS, file = 'results/final_modules_CNS.RData')

final_modules_labels <- final_modules_long_CNS %>%
  dplyr::mutate(relationship = gsub('-1', '-', relationship),
                relationship = gsub('1', '+', relationship),
                relationship = gsub('0', '', relationship))


(plot_final_modules_CNS <- ggplot(final_modules_long_CNS, aes(x = modules, y = trait)) +
  geom_tile(aes(fill = relationship), colour = 'black') +
  scale_y_discrete(limits = rev(c('CO2 treatment', 'Active time', 'Distance', 'Speed', 'Exploratory interaction?', 'Aggressive interaction?', 'No. exploratory interactions', 'No. aggressive interactions', 'Time in Zone A')),
                   labels = rev(c(expression('CO'[2]~'treatment'), 'Active time', 'Distance', 'Speed', 'Exploratory interaction?', 'Aggressive interaction?', 'No. exploratory interactions', 'No. aggressive interactions', 'Time in Zone A')),
                   name = '') +
  scale_x_discrete(limits = c('darkolivegreen', 'green', 'steelblue', 'darkgreen', 'skyblue3', 'black', 'grey60', 'pink', 'lightcyan1', 'brown', 'orange', 'paleturquoise', 'darkorange'), name = '') +
  scale_fill_manual(values = c("#4040FF", "#FFFFFF", "#FF4040"),
                    name = 'Correlation',
                    breaks = c(-1, 0, 1), labels = c('Negative', 'None', 'Positive')) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust=1),
        axis.text = element_text(size = 10, colour = 'black'),
        axis.title = element_text(size = 10, colour = 'black'),
        legend.text = element_text(size = 10, colour = 'black'),
        legend.title = element_text(size = 10, colour = 'black')))

save(plot_final_modules_CNS, file = 'results/plot_final_modules_CNS.RData')

Hub genes in modules of interest

Define hub genes as module membership > 0.8 and trait gene significance > 0.4
Then calculate the correlation (R) between the normalised expression of the hub gene and the trait. Any hub genes with R < 0.2 excluded.

annotated_transcriptome <- read.csv(file = 'data/annotated_transcriptome_ORFs.csv')

corset_clusters_2_transcripts <- read.delim('data/corset-clusters.txt')

normalised_counts_CNS <- counts(dds_CNS_filtered, normalized = TRUE)

reorder <- match(rownames(datExpr_CNS), colnames(normalised_counts_CNS))
normalised_counts_CNS_ordered  <- normalised_counts_CNS[ , reorder]
normalised_counts_CNS_tb <- normalised_counts_CNS_ordered %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

save(normalised_counts_CNS_tb, file = 'data/normalised_counts_CNS_tb.RData')

# Hub genes for CO2 modules
for (i in c('darkolivegreen', 'green', 'steelblue', 'darkgreen', 'skyblue3'))
{
  module = i
  trait = "CO2"
  
  hub_genes <- MM_GS_CNS %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_CNS_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_CNS %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_CNS_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_CNS", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}


# Hub genes for active time
for (i in c("green", 'steelblue', "darkgreen", 'black', "pink"))
    { 
    module = i
    trait = "Active_time_s"
    
    hub_genes <- MM_GS_CNS %>%
      dplyr::filter(ModuleColor == module) %>%
      dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
      dplyr::filter_at(2, all_vars(. > 0.8)) %>%
      dplyr::filter_at(3, all_vars(abs(.) > 0.4))
    
    df <- normalised_counts_CNS_tb %>%
      dplyr::filter(gene %in% hub_genes$gene) %>%
      gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
      full_join(datTraits_CNS %>% rownames_to_column(var = "Squid"))
    
    rvalues <- df %>%
      group_by(gene) %>% 
      dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
    
    hub_genes_r_values <- hub_genes %>%
      full_join(rvalues) %>%
      dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
    
    name = paste("hub_genes_CNS_annotated", module, trait, sep = "_")
    assign(name, annotated_transcriptome %>%
             full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
             dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
             full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
             drop_na() %>%
             distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
             arrange(Sequence.Description)
    )
    
    name = paste("hub_genes_CNS", module, trait, sep = "_")
    assign(name, hub_genes_r_values)
}

# Hub genes for distance
for (i in c("green", 'steelblue', "darkgreen", 'black','grey60', "pink"))
{ 
  module = i
  trait = "Dist"
  
  hub_genes <- MM_GS_CNS %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_CNS_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_CNS %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_CNS_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_CNS", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}
 
# Hub genes for speed
for (i in c("green", "darkgreen", "pink", "lightcyan1"))
{ 
  module = i
  trait = "Vel"
  
  hub_genes <- MM_GS_CNS %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_CNS_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_CNS %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_CNS_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_CNS", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}

# Hub genes for softly touch mirror?
for (i in c("skyblue3", "pink", "lightcyan1", "brown", "orange"))
{ 
  module = i
  trait = "Soft_touch"
  
  hub_genes <- MM_GS_CNS %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_CNS_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_CNS %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_CNS_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_CNS", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}

# Hub genes for aggressively touch mirror?
for (i in c("pink", "lightcyan1", "brown", "orange"))
{ 
  module = i
  trait = "Aggressive_touch"
  
  hub_genes <- MM_GS_CNS %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_CNS_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_CNS %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_CNS_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_CNS", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}

# Hub genes for no. soft mirror touches
for (i in c("skyblue3", "lightcyan1", "brown", "orange", "steelblue"))
{ 
  module = i
  trait = "Soft_touch_no."
  
  hub_genes <- MM_GS_CNS %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_CNS_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_CNS %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_CNS_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_CNS", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}


# Hub genes for no. aggressive mirror touches
for (i in c("lightcyan1", "brown", "paleturquoise", "darkorange"))
{ 
  module = i
  trait = "Aggressive_touch_no."
  
  hub_genes <- MM_GS_CNS %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_CNS_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_CNS %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_CNS_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_CNS", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}

save(list = ls()[grepl("^hub_genes_", ls())], file = 'results/hub_genes_interesting_modules_CNS.RData')

Eyes

Set up data

Expression data

# Count data
count_data <- read.delim("data/corset-counts.txt", row.names = 1)
count_data_eyes <- count_data %>%
  select(contains('eyes'))

# Meta data
meta_data <- read_csv("data/metadata.csv", trim_ws = TRUE)

# use only squid with count data
meta_data <- meta_data %>%
  filter(Squid == "PS506" | Squid == "PS508" | Squid == "PS509" | Squid == "PS510" | Squid == "PS513" | Squid == "PS514" | Squid == "PS518" | Squid == "PS522" | Squid == "PS527" | Squid == "PS528" | Squid == "PS529" | Squid == "PS534" | Squid == "PS539" | Squid == "PS547" | Squid == "PS551" | Squid == "PS557" | Squid == "PS561" | Squid == "PS584" | Squid == "PS626" | Squid == "PS636") %>%
  mutate(CO2 = gsub('Ambient', '0', CO2),
         CO2 = gsub('Elevated', '1', CO2),
         CO2 = as.numeric(CO2))
str(meta_data)

meta_data$Squid = paste0(meta_data$Squid, '_eyes')

meta_data_eyes <- meta_data %>%
  arrange(Squid) %>%
  column_to_rownames(var = "Squid")

# Check they match
all(colnames(count_data_eyes) %in% rownames(meta_data_eyes))
all(colnames(count_data_eyes) == rownames(meta_data_eyes))

# Make DESeq dataset
dds_eyes <- DESeqDataSetFromMatrix(count_data_eyes, colData = meta_data_eyes, design = ~ CO2)
dds_eyes <- estimateSizeFactors(dds_eyes)

# Filter out genes with low counts: remove all genes with a count of less than 10 in more than 90% of the samples (= 18 samples).
keep <- rowSums(counts(dds_eyes) >= 10 ) >= 18 # 18 or more samples with counts of 10 or more
dds_eyes_filtered <- dds_eyes[keep,]

# Variance stabilising transformation from DEseq2
VST_counts_eyes <- varianceStabilizingTransformation(dds_eyes_filtered)

VST_counts_eyes_tb <- assay(VST_counts_eyes) %>% 
  as.data.frame() %>% 
  rownames_to_column(var="gene") %>% 
  as_tibble()

datExpr_eyes0 = as.data.frame(t(VST_counts_eyes_tb[, -1])) 
names(datExpr_eyes0) = VST_counts_eyes_tb$gene

save(datExpr_eyes0, file = 'data/expression_data_pre_outliers_removed_eyes.RData')
save(dds_eyes_filtered, file = 'data/dds_eyes_filtered.RData')

Check for outliers

Gene Outliers

# Check for genes with too many misssing values
gsg = goodSamplesGenes(datExpr_eyes0, verbose = 0)
gsg$allOK

## [1] TRUE

No gene outliers

Sample Outliers

# Check for sample outliers
sampleTree = hclust(dist(datExpr_eyes0), method = "average")
# sizeGrWindow(12,9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
     cex.axis = 1.5, cex.main = 2)

Remove sample outliers PS510_eyes, PS506_eyes

abline(h = 198, col = "red")
clust = cutreeStatic(sampleTree, cutHeight = 198, minSize = 10)
table(clust)
# clust 1 contains the samples we want to keep.
keepSamples = (clust==1)
datExpr_eyes = datExpr_eyes0[keepSamples, ]
nGenes = ncol(datExpr_eyes)
nSamples = nrow(datExpr_eyes)

Check plot after outliers removed

sampleTree = hclust(dist(datExpr_eyes), method = "average")
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering after outlier removal", sub="", xlab="", cex.lab = 1.5,
     cex.axis = 1.5, cex.main = 2)

Traits data

# create squid as a column
allTraits = meta_data_eyes %>%
  rownames_to_column(var = "Squid")

# Form a data frame analogous to expression data that will hold the traits.
eyesSamples = rownames(datExpr_eyes)
traitRows = match(eyesSamples, allTraits$Squid)
datTraits_eyes = allTraits[traitRows, -1]
rownames(datTraits_eyes) = allTraits[traitRows, 1]
collectGarbage()

Save data

save(datExpr_eyes, datTraits_eyes, file = "data/WGCNA_eyes_01_dataInput.RData")

Check for any global drivers of expression

pca <- prcomp(datExpr_eyes)
#summary(pca) 
ggscreeplot(pca) +
  scale_x_continuous(breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9))

datTraits_eyes1 <- datTraits_eyes %>%
  mutate(CO2 = as.factor(CO2))
ggbiplot(pca,
         labels = rownames(datTraits_eyes1),
         ellipse = TRUE,
         var.axes = FALSE,
         groups = datTraits_eyes1$CO2,
         choices = c(1, 2), # PC axes to include
) +
  theme_classic() +
  coord_cartesian(xlim = c(-2, 2), ylim = c(-2, 3)) +
  ggtitle('PCA of expression data')

0 = current-day CO₂, 1 = elevated CO₂
PC1 only explains 17.6% of variance which suggests there’s not a main driver
Doesn’t really group by CO2 treatment

Soft Thresholding Power

Choose soft thresholding power

powers = c(c(1:10), seq(from = 12, to = 20, by = 2))
sft_powers_eyes = pickSoftThreshold(datExpr_eyes,
                                   powerVector = powers,
                                   verbose = 1,
                                   networkType = 'signed',
                                   corFnc = "cor")
save(sft_powers_eyes, file = 'results/soft_thresholding_powers_eyes.RData')

powers = c(c(1:20))
# Call the network topology analysis function
sft_powers_all_eyes = pickSoftThreshold(datExpr_eyes,
                                       powerVector = powers,
                                       verbose = 1,
                                       networkType = 'signed',
                                       corFnc = "cor")
save(sft_powers_all_eyes, file = 'results/soft_thresholding_powers_all_eyes.RData')

sft_powers_all_eyes[[2]] %>% kable()

Power	SFT.R.sq	slope	truncated.R.sq	mean.k.	median.k.	max.k.
1	0.0717878	46.439778	0.9634079	11583.95834	11583.37815	11808.6603
2	0.2669664	-14.916834	0.7860847	6247.94359	6213.58509	6878.9162
3	0.6638415	-10.460603	0.8747264	3580.88175	3518.78114	4510.8390
4	0.8149305	-7.376349	0.9397487	2158.17527	2089.32855	3185.3325
5	0.8567488	-5.526249	0.9574333	1357.16483	1290.70465	2372.9327
6	0.8936651	-4.557931	0.9724515	885.11790	824.01113	1839.7831
7	0.9201775	-3.973087	0.9829440	595.81110	541.00574	1470.9414
8	0.9374830	-3.579827	0.9889335	412.35205	363.97971	1204.9337
9	0.9494223	-3.289299	0.9935070	292.47674	250.67178	1006.5677
10	0.9563949	-3.081118	0.9935265	212.04066	175.83536	854.5419
11	0.9607237	-2.919567	0.9930025	156.77348	125.66472	735.3513
12	0.9644223	-2.782440	0.9929498	117.98211	91.05638	640.0946
13	0.9691231	-2.671251	0.9934783	90.22570	66.99600	562.7077
14	0.9710324	-2.578811	0.9943580	70.01488	49.86466	498.9408
15	0.9735871	-2.490989	0.9941180	55.06178	37.51995	445.7421
16	0.9767255	-2.415113	0.9951590	43.83585	28.53992	400.8730
17	0.9752146	-2.362317	0.9937789	35.29415	21.90392	362.6602
18	0.9749441	-2.305349	0.9918102	28.71388	16.98087	329.8325
19	0.9762823	-2.253728	0.9939154	23.58623	13.27208	301.4097
20	0.9770029	-2.201482	0.9939926	19.54786	10.45445	276.6259

powers = c(c(1:10), seq(from = 12, to = 20, by = 2))
par(mfrow = c(1,2))
cex1 = 0.9
plot(sft_powers_eyes$fitIndices[,1], -sign(sft_powers_eyes$fitIndices[,3])*sft_powers_eyes$fitIndices[,2],
     xlab = "Soft Threshold (power)", ylab="Scale Free Topology Model Fit,signed R^2", type="n",
     main = paste("Scale independence"))
text(sft_powers_eyes$fitIndices[,1], -sign(sft_powers_eyes$fitIndices[,3])*sft_powers_eyes$fitIndices[,2],
     labels = powers, cex = cex1, col = "red")
# R^2 cut-off of 0.9
abline(h = 0.9, col = "red")

# Mean connectivity as a function of the soft-thresholding power
plot(sft_powers_eyes$fitIndices[,1], sft_powers_eyes$fitIndices[,5],
     xlab = "Soft Threshold (power)", ylab = "Mean Connectivity", type = "n",
     main = paste("Mean connectivity"))
text(sft_powers_eyes$fitIndices[,1], sft_powers_eyes$fitIndices[,5], labels = powers, cex = cex1, col = "red")
# Mean connectivity cut-off of 100
abline(h = 100, col="red")

Power 7 first to cross threshold of R^2 > 0.9
But mean connectivity is still high which can result in all genes in the network being highly connected and resulting in few clusters of large genes
Thus, use power 13 when mean connectivity < 100

Check soft thresholding power

k_13_eyes = softConnectivity(datE = datExpr_eyes, power = 13, corFnc = "cor", type = "signed")
save(k_13_eyes, file = 'results/soft_power_13_eyes.RData')

hist(k_13_eyes)

plot <- scaleFreePlot(k_13_eyes, main="Check scale free topology when power = 13\n")

Power 13 looks good to approximate a scale free topology: high R^2 > 0.9 and slope < -2

Network construction and module detection

## R CODE ##
.libPaths(c('/home/j/jodi-thomas/Rlibs_2', .libPaths()))
library(WGCNA)

options(stringsAsFactors = FALSE)
enableWGCNAThreads()
lnames = load(file = "/home/j/jodi-thomas/WGCNA/WGCNA_final/WGCNA_eyes_01_dataInput.RData")
lnames

## ADJACENCIES ##
adjacency_signedpearson_power13_eyes = adjacency(datExpr_eyes,
                                                 type = 'signed',
                                                 power = 13,
                                                 corFnc = 'cor')
save(adjacency_signedpearson_power13_eyes, file = '/flash/RavasiU/Jodi/adjacency_signedpearson_power13_eyes.RData')

## TOM ##
TOM_signedpearson_power13_eyes = TOMsimilarity(adjacency_signedpearson_power13_eyes,
                                               TOMType = 'signed')
dissTOM_signedpearson_power13_eyes = 1-TOM_signedpearson_power13_eyes
save(dissTOM_signedpearson_power13_eyes, file = '/flash/RavasiU/Jodi/dissTOM_signedpearson_power13_eyes.RData')

## CLUSTERING ##
geneTree_signedpearson_power13_eyes = hclust(as.dist(dissTOM_signedpearson_power13_eyes), method = "average")
save(geneTree_signedpearson_power13_eyes, file = '/flash/RavasiU/Jodi/geneTree_signedpearson_power13_eyes.RData')

## MODULES ##
dynamicMods_signedpearson_power13_eyes = cutreeDynamic(dendro = geneTree_signedpearson_power13_eyes,
                                                       minClusterSize = 30,
                                                       distM = dissTOM_signedpearson_power13_eyes,
                                                       deepSplit = 2,
                                                       pamRespectsDendro = FALSE)
save(dynamicMods_signedpearson_power13_eyes, file = '/flash/RavasiU/Jodi/dynamicMods_signedpearson_power13_eyes.RData')

dynamicColors_signedpearson_power13_eyes = labels2colors(dynamicMods_signedpearson_power13_eyes)
save(dynamicColors_signedpearson_power13_eyes, file = '/flash/RavasiU/Jodi/dynamicColors_signedpearson_power13_eyes.RData')

Merge Modules

Load modules that were detected using the HPC

load(file = 'results/geneTree_signedpearson_power13_eyes.RData')
load(file = 'results/dynamicMods_signedpearson_power13_eyes.RData')
load(file = 'results/dynamicColors_signedpearson_power13_eyes.RData')

Modules pre-merge

table(dynamicMods_signedpearson_power13_eyes) %>%
  kable()

63 modules

Merge modules

MEList = moduleEigengenes(datExpr_eyes, colors = dynamicColors_signedpearson_power13_eyes)
MEs_eyes_premerge = MEList$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss_eyes_premerge = 1-cor(MEs_eyes_premerge)
# Cluster module eigengenes
METree_eyes = hclust(as.dist(MEDiss_eyes_premerge), method = "average")

plot(METree_eyes, main = "Clustering of module eigengenes",
     xlab = "", sub = "")
abline(h=0.3, col = "red")

Use height cut-off of 0.3, which corresponds to a correlation of 0.7 for merging

merge_0.3_eyes = mergeCloseModules(datExpr_eyes,
                                  dynamicColors_signedpearson_power13_eyes,
                                  corFnc = 'cor',
                                  cutHeight = 0.30,
                                  verbose = 3)
mergedColors_signedpearson_power13_eyes = merge_0.3_eyes$colors
save(mergedColors_signedpearson_power13_eyes, file = 'results/mergedColors_signedpearson_power13_eyes.RData')

table(mergedColors_signedpearson_power13_eyes) %>%
  kable()

mergedColors_signedpearson_power13_eyes	Freq
antiquewhite4	52
bisque4	108
blue	3442
brown	1470
brown4	112
coral2	1916
cyan	327
darkgreen	682
darkgrey	1078
darkmagenta	448
darkolivegreen	1155
darkorange	688
darkorange2	116
darkred	304
darkseagreen4	55
darkturquoise	236
floralwhite	119
green	1017
honeydew1	60
lavenderblush3	63
lightcyan	301
lightsteelblue1	127
maroon	78
midnightblue	1248
navajowhite2	81
orange	214
orangered4	151
paleturquoise	188
palevioletred3	90
plum1	929
plum2	632
salmon	4278
sienna3	540
skyblue	202
steelblue	196
thistle2	94
violet	186
yellowgreen	173

38 modules after merging

Dendrogram before and after merging modules

plotDendroAndColors(geneTree_signedpearson_power13_eyes, cbind(dynamicColors_signedpearson_power13_eyes, mergedColors_signedpearson_power13_eyes),
                    c("Dynamic Tree Cut", "Merged dynamic"),
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)

Save final merged modules

# Module colours as the final merged module colours
moduleColors_eyes = mergedColors_signedpearson_power13_eyes
# Construct numerical labels corresponding to the colors
colorOrder = c("grey", standardColors(50))
moduleLabels_eyes = match(moduleColors_eyes, colorOrder)-1
# Module eigengenes from final merged modules
MEs_eyes = merge_0.3_eyes$newMEs
# Rename geneTree
geneTree_eyes <- geneTree_signedpearson_power13_eyes

save(moduleColors_eyes, moduleLabels_eyes, MEs_eyes, geneTree_eyes, file = "results/WGCNA_eyes_02_Network_Modules.RData")

Determining Interesting Modules

Canonical Correlation Analysis

Run regularised CCA

x <- as.matrix(scale(datTraits_eyes)) # center and scale
y <- as.matrix(scale(MEs_eyes))

# Calculate CCA (regularised as have more variables than observations)
res.reg <- CCA::estim.regul(x, y)
rcca_eyes <- CCA::rcc(x, y, lambda1 = res.reg$lambda1, lambda2 = res.reg$lambda2)
save(rcca_eyes, file = 'results/rcca_eyes.RData')

x <- as.matrix(scale(datTraits_eyes))
y <- as.matrix(scale(MEs_eyes))

# Display the canonical correlations
bp <- barplot(rcca_eyes$cor,
              xlab="Canonical Function",
              ylab="Canonical Correlation (Rc)",
              names.arg = paste0(1:min(ncol(x), ncol(y))), ylim=c(0,1))
text(bp, rcca_eyes$cor-0.05, labels = paste(round(rcca_eyes$cor, 2)))

Loadings/cross loadings heatmaps

## LOADINGS
# canonical loadings of x variate (traits set) = correlation of each trait variable to the entire x variate/traits set
can.load.x_eyes <- rcca_eyes$scores$corr.X.xscores %>% 
  as.data.frame() %>%
  rownames_to_column(var = 'Trait')
can.load.x_eyes %>% kable(caption = 'Canonical loadings for traits set')

# canonical loadings of y variate (MEs set) = correlation of each ME variable to the entire y variate/ME set
can.load.y_eyes <- rcca_eyes$scores$corr.Y.yscores %>% 
  as.data.frame() %>%
  rownames_to_column(var = 'Module_Eigengene')
can.load.y_eyes %>% kable(caption = 'Canonical loadings for module eigengenes set')

## CROSS LOADINGS
# canonical cross laodings of x variate (traits set) = correlation of each trait variable to the entire y variate/MEs set
can.crossload.x_eyes <- rcca_eyes$scores$corr.X.yscores %>% 
  as.data.frame() %>%
  rownames_to_column(var = 'Trait')
can.crossload.x_eyes %>% kable(caption = 'Canonical cross loadings for traits set')

# canonical cross laodings of y variate (MEs set) = correlation of each ME variable to the entire x variate/traits set
can.crossload.y_eyes <- rcca_eyes$scores$corr.Y.xscores %>% 
  as.data.frame() %>%
  rownames_to_column(var = 'Module_Eigengene')
can.crossload.y_eyes %>% kable(caption = 'Canonical cross loadings for module eigengenes set')

save(can.load.x_eyes, can.crossload.x_eyes, can.load.y_eyes, can.crossload.y_eyes, file = 'results/CCA_loads_crossloads_eyes.RData')

# Heatmap of loadings and cross-loadings, coloured by loadings and loadings printed above (cross loadings)
can.load.x.long <- can.load.x_eyes %>%
  pivot_longer(cols = 2:5, names_to = 'CF', values_to = 'loadings') %>%
  select(Trait, CF, loadings)

can.crossload.x.long <- can.crossload.x_eyes %>%
  pivot_longer(cols = 2:5, names_to = 'CF', values_to = 'crossloadings')%>%
  select(Trait, CF, crossloadings)

load.crossload.x <- can.load.x.long %>% full_join(can.crossload.x.long)

ggplot(load.crossload.x, aes(x = Trait, y = CF)) +
  geom_tile(aes(fill = loadings)) +
  geom_text(aes(label = round(loadings, 2)), vjust = -1, size = 6) +
  geom_text(aes(label = paste("(", round(crossloadings, 2), ")", sep = '')), vjust = 1, size = 4) +
  scale_fill_gradient2(low="blue", mid = "white", high="red") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust=1),
        axis.text = element_text(size = 16)) +
  scale_x_discrete(limits = c('CO2', 'Active_time_s', 'Dist', 'Vel', 'Time_A_s', 'Soft_touch', 'Aggressive_touch', 'Soft_touch_no.', 'Aggressive_touch_no.'), name = '') +
  scale_y_discrete(labels = c('V1' = 'CF 1', 'V2' = 'CF 2', 'V3' = 'CF 3', 'V4' = 'CF 4'), name = '')

can.load.y.long <- can.load.y_eyes %>%
  pivot_longer(cols = 2:5, names_to = 'CF', values_to = 'loadings') %>%
  select(Module_Eigengene, CF, loadings)

can.crossload.y.long <- can.crossload.y_eyes %>%
  pivot_longer(cols = 2:5, names_to = 'CF', values_to = 'crossloadings')%>%
  select(Module_Eigengene, CF, crossloadings)

load.crossload.y <- can.load.y.long %>% full_join(can.crossload.y.long)

ggplot(load.crossload.y, aes(x = Module_Eigengene, y = CF)) +
  geom_tile(aes(fill = loadings)) +
  geom_text(aes(label = round(loadings, 2)), vjust = -1, size = 5) +
  geom_text(aes(label = paste("(", round(crossloadings, 2), ")", sep = '')), vjust = 1, size = 4) +
  scale_fill_gradient2(low="blue", mid = "white", high="red") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust=1),
        axis.text = element_text(size = 16)) +
  scale_x_discrete(name = '') +
  scale_y_discrete(labels = c('V1' = 'CF 1', 'V2' = 'CF 2', 'V3' = 'CF 3', 'V4' = 'CF 4'), name = '')

Biplots

# Biplots
plt.cc(rcca_eyes,
       d1 = 1,
       d2 = 2,
       var.label = T,
       Xnames = colnames(datTraits_eyes),
       Ynames = colnames(MEs_eyes),
       type = 'v') # i for individuals - plots all squid, or b for both

plt.cc(rcca_eyes,
       d1 = 1,
       d2 = 3,
       var.label = TRUE,
       Xnames = colnames(datTraits_eyes),
       Ynames = colnames(MEs_eyes),
       type = 'v')

plt.cc(rcca_eyes,
       d1 = 2,
       d2 = 3,
       var.label = TRUE,
       Xnames = colnames(datTraits_eyes),
       Ynames = colnames(MEs_eyes),
       type = 'v')

plt.cc(rcca_eyes,
       d1 = 1,
       d2 = 4,
       var.label = TRUE,
       Xnames = colnames(datTraits_eyes),
       Ynames = colnames(MEs_eyes),
       type = 'v')

plt.cc(rcca_eyes,
       d1 = 2,
       d2 = 4,
       var.label = TRUE,
       Xnames = colnames(datTraits_eyes),
       Ynames = colnames(MEs_eyes),
       type = 'v')

plt.cc(rcca_eyes,
       d1 = 3,
       d2 = 4,
       var.label = TRUE,
       Xnames = colnames(datTraits_eyes),
       Ynames = colnames(MEs_eyes),
       type = 'v')

Modules of interest identified with CCA:

Canonical loading and cross-loading of a given variable from both the ME set and traits set were ≥ 0.3 for the same CF, this ME and trait were considered correlated
MEs and traits within the same or opposite quarters of the biplot, and sitting on or outside the biplot inner ring with a radius of 0.5, were considered positively or negatively correlated, respectively
If MEs and traits were considered correlated by both the canonical loadings/cross-loadings heatmap AND biplots they were identified as modules of interest for the given trait(s) by CCA.

Module Membership vs Gene Significance

Module membership (MM) = pearson correlation between gene expression and module eigengene
Gene significance (GS) = pearson correlation between gene expression and trait
All modules of interest identified by CCA, plot MM vs GS

# Creat a list of MM and GS
GS_eyes = list()
MM_eyes = list()
GS_eyes = corAndPvalue(datExpr_eyes, datTraits_eyes)
MM_eyes = corAndPvalue(datExpr_eyes, MEs_eyes)
# Put all info from two lists into one dataframe
GSmat = rbind(GS_eyes$cor, GS_eyes$p, GS_eyes$Z, GS_eyes$t)
nTraits = ncol(datTraits_eyes)
traitNames = colnames(datTraits_eyes)
nGenes = ncol(datExpr_eyes)
dim(GSmat) = c(nGenes, 4*nTraits)
rownames(GSmat) = colnames(datExpr_eyes)
colnames(GSmat) = spaste(
  c("GS_", "p_GS_", "Z_GS_", "t_GS_"),
  rep(traitNames, rep(4, nTraits)))


MMmat = rbind(MM_eyes$cor, MM_eyes$p, MM_eyes$Z, MM_eyes$t)
MEnames = colnames(MEs_eyes)
nMEs = ncol(MEs_eyes)
dim(MMmat) = c(nGenes, 4*nMEs)
rownames(MMmat) = colnames(datExpr_eyes)
colnames(MMmat) = spaste(
  c("MM_", "p_MM_", "Z_MM_", "t_MM_"),
  rep(MEnames, rep(4, nMEs)))

MM_GS_eyes = data.frame(gene = colnames(datExpr_eyes),
                       ModuleLabel = moduleLabels_eyes,
                       ModuleColor = moduleColors_eyes,
                       GSmat,
                       MMmat)

modNames_eyes = substring(names(MEs_eyes), 3)
traitNames = colnames(datTraits_eyes)

save(GS_eyes, MM_eyes, modNames_eyes, traitNames, MM_GS_eyes, file = 'results/WGCNA_eyes_03_MM_GS.RData')

CO2

for (i in c('antiquewhite4', 'yellowgreen'))
{
  module = i
  trait = "CO2"
  column_module = match(module, modNames_eyes)
  column_trait = match(trait, traitNames)
  moduleGenes = moduleColors_eyes==module
  verboseScatterplot(abs(MM_eyes$cor[moduleGenes, column_module]),
                     abs(GS_eyes$cor[moduleGenes, column_trait]),
                     xlab = paste("Module Membership in", module, "module"),
                     ylab = paste("Gene significance for", trait),
                     main = paste("Module membership vs. gene significance\n"),
                     cex.main = 1, cex.lab = 1, cex.axis = 1, col = module)
}

Activity Traits

for (j in c('Active_time_s', 'Dist', 'Vel'))
{
  for (i in c('bisque4', 'cyan', 'darkgreen', 'darkorange', 'darkseagreen4', 'floralwhite', 'green', 'navajowhite2', 'yellowgreen'))
  {
    module = i
    trait = j
    column_module = match(module, modNames_eyes)
    column_trait = match(trait, traitNames)
    moduleGenes = moduleColors_eyes==module
    verboseScatterplot(abs(MM_eyes$cor[moduleGenes, column_module]),
                       abs(GS_eyes$cor[moduleGenes, column_trait]),
                       xlab = paste("Module Membership in", module, "module"),
                       ylab = paste("Gene significance for", trait),
                       main = paste("Module membership vs. gene significance\n"),
                       cex.main = 1, cex.lab = 1, cex.axis = 1, col = module)
  }
}

Time in A

for (i in c('antiquewhite4', 'darkmagenta', 'palevioletred3'))
{
  module = i
  trait = "Time_A_s"
  column_module = match(module, modNames_eyes)
  column_trait = match(trait, traitNames)
  moduleGenes = moduleColors_eyes==module
  verboseScatterplot(abs(MM_eyes$cor[moduleGenes, column_module]),
                     abs(GS_eyes$cor[moduleGenes, column_trait]),
                     xlab = paste("Module Membership in", module, "module"),
                     ylab = paste("Gene significance for", trait),
                     main = paste("Module membership vs. gene significance\n"),
                     cex.main = 1, cex.lab = 1, cex.axis = 1, col = module)
}

Exploratory interaction?

for (i in c('cyan', 'antiquewhite4', 'darkmagenta', 'lightsteelblue1', 'yellowgreen'))
{
  module = i
  trait = "Soft_touch"
  column_module = match(module, modNames_eyes)
  column_trait = match(trait, traitNames)
  moduleGenes = moduleColors_eyes==module
  verboseScatterplot(abs(MM_eyes$cor[moduleGenes, column_module]),
                     abs(GS_eyes$cor[moduleGenes, column_trait]),
                     xlab = paste("Module Membership in", module, "module"),
                     ylab = paste("Gene significance for", trait),
                     main = paste("Module membership vs. gene significance\n"),
                     cex.main = 1, cex.lab = 1, cex.axis = 1, col = module)
}

Aggressive interaction?

for (i in c('cyan', 'antiquewhite4', 'darkmagenta', 'lightsteelblue1', 'yellowgreen'))
{
  module = i
  trait = "Aggressive_touch"
  column_module = match(module, modNames_eyes)
  column_trait = match(trait, traitNames)
  moduleGenes = moduleColors_eyes==module
  verboseScatterplot(abs(MM_eyes$cor[moduleGenes, column_module]),
                     abs(GS_eyes$cor[moduleGenes, column_trait]),
                     xlab = paste("Module Membership in", module, "module"),
                     ylab = paste("Gene significance for", trait),
                     main = paste("Module membership vs. gene significance\n"),
                     cex.main = 1, cex.lab = 1, cex.axis = 1, col = module)
}

No. exploratory interactions

for (i in c('cyan', 'antiquewhite4', 'darkmagenta', 'lightsteelblue1', 'yellowgreen', 'midnightblue', 'paleturquoise', 'steelblue'))
{
  module = i
  trait = "Soft_touch_no."
  column_module = match(module, modNames_eyes)
  column_trait = match(trait, traitNames)
  moduleGenes = moduleColors_eyes==module
  verboseScatterplot(abs(MM_eyes$cor[moduleGenes, column_module]),
                     abs(GS_eyes$cor[moduleGenes, column_trait]),
                     xlab = paste("Module Membership in", module, "module"),
                     ylab = paste("Gene significance for", trait),
                     main = paste("Module membership vs. gene significance\n"),
                     cex.main = 1, cex.lab = 1, cex.axis = 1, col = module)
}

No. aggressive interactions

for (i in c('cyan', 'antiquewhite4', 'darkmagenta', 'yellowgreen', 'midnightblue', 'paleturquoise', 'steelblue'))
{
  module = i
  trait = "Aggressive_touch_no."
  column_module = match(module, modNames_eyes)
  column_trait = match(trait, traitNames)
  moduleGenes = moduleColors_eyes==module
  verboseScatterplot(abs(MM_eyes$cor[moduleGenes, column_module]),
                     abs(GS_eyes$cor[moduleGenes, column_trait]),
                     xlab = paste("Module Membership in", module, "module"),
                     ylab = paste("Gene significance for", trait),
                     main = paste("Module membership vs. gene significance\n"),
                     cex.main = 1, cex.lab = 1, cex.axis = 1, col = module)
}

Final Modules of Interest

All modules of interest initially identified by CCA that had a MM vs GS correlation (R-value) > 0.2 were chosen as the final modules of interest

trait <- c('CO2 treatment', 'Active time', 'Distance', 'Speed', 'Exploratory interaction?', 'Aggressive interaction?', 'No. exploratory interactions', 'No. aggressive interactions', 'Time in Zone A')
yellowgreen <- c('1', '-1', '-1', '-1', '0', '0', '0', '0', '0')
antiquewhite4 <- c('-1', '0', '0', '0', '1', '0', '1', '0', '0')
green <- c('0', '1', '1', '1', '0', '0', '0', '0', '0')
bisque4 <- c('0', '1', '1', '1', '0', '0', '0', '0', '0')
darkgreen <- c('0', '1', '1', '0', '0', '0', '0', '0', '0')
floralwhite <- c('0', '1', '1', '0', '0', '0', '0', '0', '0')
darkseagreen4 <- c('0', '0', '1', '0', '0', '0', '0', '0', '0')
navajowhite2 <- c('0', '-1', '-1', '-1', '0', '0', '0', '0', '0')
darkorange <- c('0', '-1', '-1', '0', '0', '0', '0', '0', '0')
cyan <- c('0', '-1', '-1', '0', '1', '1', '1', '1', '0')
midnightblue <- c('0', '0', '0', '0', '0', '0', '1', '0', '0')
darkmagenta <- c('0', '0', '0', '0', '0', '0', '-1', '-1', '0')
lightsteelblue1 <- c('0', '0', '0', '0', '0', '0', '-1', '0', '0')
paleturquoise <- c('0', '0', '0', '0', '0', '0', '0', '-1', '0')


final_modules <- data.frame(trait, yellowgreen, antiquewhite4, green, bisque4, darkgreen, floralwhite, darkseagreen4, navajowhite2, darkorange, cyan, midnightblue, darkmagenta, lightsteelblue1, paleturquoise)

final_modules_long_eyes <- final_modules %>%
  pivot_longer(cols = 2:15, names_to = 'modules', values_to = 'relationship')

save(final_modules_long_eyes, file = 'results/final_modules_eyes.RData')

final_modules_labels <- final_modules_long_eyes %>%
  dplyr::mutate(relationship = gsub('-1', '-', relationship),
                relationship = gsub('1', '+', relationship),
                relationship = gsub('0', '', relationship))


(plot_final_modules_eyes <- ggplot(final_modules_long_eyes, aes(x = modules, y = trait)) +
  geom_tile(aes(fill = relationship), colour = 'black') +
  scale_y_discrete(limits = rev(c('CO2 treatment', 'Active time', 'Distance', 'Speed', 'Exploratory interaction?', 'Aggressive interaction?', 'No. exploratory interactions', 'No. aggressive interactions', 'Time in Zone A')),
                   labels = rev(c(expression('CO'[2]~'treatment'), 'Active time', 'Distance', 'Speed', 'Exploratory interaction?', 'Aggressive interaction?', 'No. exploratory interactions', 'No. aggressive interactions', 'Time in Zone A')),
                   name = '') +
  scale_x_discrete(limits = c('yellowgreen', 'antiquewhite4', 'green', 'bisque4', 'darkgreen', 'floralwhite', 'darkseagreen4', 'navajowhite2', 'darkorange', 'cyan', 'midnightblue', 'darkmagenta', 'lightsteelblue1', 'paleturquoise'), 
                   name = '') +
  scale_fill_manual(values = c("#4040FF", "#FFFFFF", "#FF4040"),
                    name = 'Correlation',
                    breaks = c(-1, 0, 1), labels = c('Negative', 'None', 'Positive')) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust=1),
        axis.text = element_text(size = 10, colour = 'black'),
        axis.title = element_text(size = 10, colour = 'black'),
        legend.text = element_text(size = 10, colour = 'black'),
        legend.title = element_text(size = 10, colour = 'black')))

save(plot_final_modules_eyes, file = 'results/plot_final_modules_eyes.RData')

Hub genes in modules of interest

Define hub genes as module membership > 0.8 and trait gene significance > 0.4
Then calculate the correlation (R) between the normalised expression of the hub gene and the trait. Any hub genes with R < 0.2 excluded.

normalised_counts_eyes <- counts(dds_eyes_filtered, normalized = TRUE)

reorder <- match(rownames(datExpr_eyes), colnames(normalised_counts_eyes))
normalised_counts_eyes_ordered  <- normalised_counts_eyes[ , reorder]
normalised_counts_eyes_tb <- normalised_counts_eyes_ordered %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

save(normalised_counts_eyes_tb, file = 'data/normalised_counts_eyes_tb.RData')

# Hub genes for CO2 modules
for (i in c("yellowgreen", "antiquewhite4"))
{
  module = i
  trait = "CO2"
  
  hub_genes <- MM_GS_eyes %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_eyes_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_eyes %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_eyes_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_eyes", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}

# Hub genes for active time
for (i in c("yellowgreen", 'green', "bisque4", 'darkgreen', "floralwhite", "navajowhite2", "darkorange", "cyan"))
{ 
  module = i
  trait = "Active_time_s"
  
  hub_genes <- MM_GS_eyes %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_eyes_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_eyes %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_eyes_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_eyes", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}

# Hub genes for distance 
for (i in c("yellowgreen", 'green', "bisque4", 'darkgreen', "floralwhite", "darkseagreen4", "navajowhite2", "darkorange", "cyan"))
{ 
  module = i
  trait = "Dist"
  
  hub_genes <- MM_GS_eyes %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_eyes_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_eyes %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_eyes_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_eyes", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}

# Hub genes for speed
for (i in c("yellowgreen", 'green', "bisque4", "navajowhite2"))
{ 
  module = i
  trait = "Vel"
  
  hub_genes <- MM_GS_eyes %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_eyes_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_eyes %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_eyes_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_eyes", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}

# Hub genes for soft touch?
for (j in c("antiquewhite4", "cyan"))
{ 
  module = j
  trait = "Soft_touch"
  
  hub_genes <- MM_GS_eyes %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_eyes_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_eyes %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_eyes_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_eyes", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}

# Hub genes for aggressive touch?
for (j in c("cyan"))
{
  module = j
  trait = "Aggressive_touch"
  
  hub_genes <- MM_GS_eyes %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_eyes_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_eyes %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_eyes_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_eyes", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}

# Hub genes for no. soft mirror touches
for (j in c("antiquewhite4", "cyan", "midnightblue", "darkmagenta", "lightsteelblue1"))
{
  module = j
  trait = "Soft_touch_no."
  
  hub_genes <- MM_GS_eyes %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_eyes_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_eyes %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_eyes_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_eyes", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}


# Hub genes for no. aggressive mirror touches
for (j in c("cyan", "darkmagenta", "paleturquoise"))
{ 
  module = j
  trait = "Aggressive_touch_no."
  
  hub_genes <- MM_GS_eyes %>%
    dplyr::filter(ModuleColor == module) %>%
    dplyr::select(gene, paste('MM_ME', module, sep = ''), paste('GS_', trait, sep = '')) %>%
    dplyr::filter_at(2, all_vars(. > 0.8)) %>%
    dplyr::filter_at(3, all_vars(abs(.) > 0.4))
  
  df <- normalised_counts_eyes_tb %>%
    dplyr::filter(gene %in% hub_genes$gene) %>%
    gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts") %>%
    full_join(datTraits_eyes %>% rownames_to_column(var = "Squid"))
  
  rvalues <- df %>%
    group_by(gene) %>% 
    dplyr::summarise(r_normexpr_vs_trait = (signif(stats::cor(normalised_counts, .data[[trait]]), 2)))
  
  hub_genes_r_values <- hub_genes %>%
    full_join(rvalues) %>%
    dplyr::filter(abs(r_normexpr_vs_trait) > 0.2)
  
  name = paste("hub_genes_eyes_annotated", module, trait, sep = "_")
  assign(name, annotated_transcriptome %>%
           full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
           dplyr::select(Sequence.Name, gene, Sequence.Description) %>%
           full_join(hub_genes_r_values, by = c('gene' = 'gene')) %>%
           drop_na() %>%
           distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
           arrange(Sequence.Description)
  )
  
  name = paste("hub_genes_eyes", module, trait, sep = "_")
  assign(name, hub_genes_r_values)
}

save(list = ls()[grepl("^hub_genes_", ls())], file = 'results/hub_genes_interesting_modules_eyes.RData')

Comparison of CNS and Eyes Results

Compare CO2 hub genes across tissues

annotations <- annotated_transcriptome %>%
  full_join(corset_clusters_2_transcripts, by = c('Sequence.Name' = 'transcript')) %>%
  dplyr::select(Sequence.Name, gene, Sequence.Description)

save(annotations, file = "data/annotations.RData")

### CO2 hub genes found in both CNS and eyes ###

# Modules correlated with CO2 CNS: darkolivegreen, green, steelblue, darkgreen, skyblue3
# All CO2 hub genes in CNS
hub_genes_CNS_CO2 <-  hub_genes_CNS_darkolivegreen_CO2 %>%
  full_join(hub_genes_CNS_green_CO2) %>%
  full_join(hub_genes_CNS_steelblue_CO2) %>%
  full_join(hub_genes_CNS_darkgreen_CO2) %>%
  full_join(hub_genes_CNS_skyblue3_CO2) %>%
  rename(GS_CO2_CNS = GS_CO2) %>%
  select(gene, GS_CO2_CNS)
# Modules correlated with CO2 eyes: yellowgreen, antiquewhite4
# All CO2 hub genes in eyes
hub_genes_eyes_CO2 <- hub_genes_eyes_yellowgreen_CO2 %>%
  full_join(hub_genes_eyes_antiquewhite4_CO2) %>%
  rename(GS_CO2_eyes = GS_CO2) %>%
  select(gene, GS_CO2_eyes)

### CO2 hub genes shared by CNS and eyes ###
hub_genes_CNS_eyes_CO2 <- hub_genes_CNS_CO2 %>%
  inner_join(hub_genes_eyes_CO2)
nrow(hub_genes_CNS_eyes_CO2)

hub_genes_CNS_eyes_annotated_CO2 <- hub_genes_CNS_eyes_CO2 %>% 
  inner_join(annotations) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(gene)


### CNS-specific CO2 hub genes ###
hub_genes_CNS_specific_CO2 <- hub_genes_CNS_CO2 %>%
  filter(!gene %in% hub_genes_eyes_CO2$gene)
nrow(hub_genes_CNS_specific_CO2)

hub_genes_CNS_specific_annotated_CO2 <- hub_genes_CNS_specific_CO2 %>%
  inner_join((annotations)) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

### Eyes-specific CO2 hub genes
hub_genes_eyes_specific_CO2 <- hub_genes_eyes_CO2 %>%
  filter(!gene %in% hub_genes_CNS_CO2$gene)
nrow(hub_genes_eyes_specific_CO2)

hub_genes_eyes_specific_annotated_CO2 <- hub_genes_eyes_specific_CO2 %>%
  inner_join((annotations)) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

save(hub_genes_CNS_CO2, hub_genes_eyes_CO2, hub_genes_CNS_eyes_CO2, hub_genes_CNS_eyes_annotated_CO2, hub_genes_CNS_specific_CO2, hub_genes_CNS_specific_annotated_CO2, hub_genes_eyes_specific_CO2,  hub_genes_eyes_specific_annotated_CO2, file = 'results/Compare_hub_genes_CO2.RData')

CO2 hub genes in both tissues

hub_genes_CNS_eyes_annotated_CO2 %>%
  dplyr::select(gene, Sequence.Description, contains('GS')) %>%
  kable(caption = "Hub genes for CO2 in both the CNS and eyes")

Hub genes for CO2 in both the CNS and eyes
gene	Sequence.Description	GS_CO2_CNS	GS_CO2_eyes
Cluster-1798.0	von Hippel-Lindau disease tumor suppressor-like	0.5197694	0.4275397
Cluster-1798.1	von Hippel-Lindau disease tumor suppressor-like	-0.5068472	-0.4988114
Cluster-4644.0	glucose-induced degradation protein 4 homolog	0.5326470	0.4958090
Cluster-5283.11447	derlin-1-like	0.4213809	0.4252181
Cluster-5283.11447	derlin-1-like isoform X1	0.4213809	0.4252181
Cluster-5283.11591	signal recognition particle subunit SRP72-like	0.6009590	0.5305654
Cluster-5283.11592	signal recognition particle subunit SRP72-like	-0.5403115	-0.5298213
Cluster-5283.5934	ubiquitin thioesterase zranb1-B	-0.5830062	-0.5241672
Cluster-5283.7600	cyclin-dependent kinase 10	-0.5580496	-0.5808814
Cluster-5283.9395	protein transport protein Sec16A isoform X2	0.5669365	0.5648262
Cluster-5283.9395	acyl carrier protein, mitochondrial	0.5669365	0.5648262
Cluster-5283.9397	acyl carrier protein, mitochondrial	-0.5502659	-0.5267482
Cluster-5283.9788	acyl-coenzyme A thioesterase 8-like	-0.5745791	-0.6212306
Cluster-5283.997	neuronal acetylcholine receptor subunit alpha-10-like isoform X1	-0.6804924	-0.6469036
Cluster-5283.997	neuronal acetylcholine receptor subunit alpha-10 isoform X1	-0.6804924	-0.6469036
Cluster-6136.0	mannose-1-phosphate guanyltransferase alpha-A-like isoform X1	0.5648902	0.5365811
Cluster-6136.0	mannose-1-phosphate guanyltransferase alpha-A-like isoform X2	0.5648902	0.5365811
Cluster-6136.1	mannose-1-phosphate guanyltransferase alpha-A-like isoform X2	-0.6553945	-0.6958523
Cluster-6136.1	mannose-1-phosphate guanyltransferase alpha-A-like isoform X1	-0.6553945	-0.6958523

14 CO2 hub genes found in both tissues
11 different genes
- von Hippel-Lindau disease tumor suppressor-like (2)
- glucose-induced degradation protein 4 homolog
- derlin-1-like
- signal recognition particle subunit SRP72-like (2)
- ubiquitin thioesterase zranb1-B
- cyclin-dependent kinase 10
- protein transport protein Sec16A isoform X2 / acyl carrier protein, mitochondrial
- acyl carrier protein, mitochondrial
- acyl-coenzyme A thioesterase 8-like
- neuronal acetylcholine receptor subunit alpha-10
- mannose-1-phosphate guanyltransferase alpha-A-like (2)

CNS-specific CO2 hub genes

hub_genes_CNS_specific_annotated_CO2 %>%
  dplyr::select(gene, Sequence.Description, contains('GS')) %>%
  kable(caption = "CNS-specific CO2 hub genes")

CNS-specific CO2 hub genes
gene	Sequence.Description	GS_CO2_CNS
Cluster-5283.8383	—NA—	0.4838917
Cluster-5283.9687	—NA—	0.4692527
Cluster-6824.0	—NA—	0.4213050
Cluster-5283.5399	—NA—	-0.4132802
Cluster-5283.5400	—NA—	-0.5855474
Cluster-5283.5401	—NA—	-0.4694759
Cluster-5283.5402	—NA—	-0.5019657
Cluster-5283.5403	—NA—	-0.5705385
Cluster-5283.5404	—NA—	-0.5702464
Cluster-5283.5405	—NA—	-0.4401043
Cluster-5283.5406	—NA—	-0.4891360
Cluster-5283.5407	—NA—	-0.5426610
Cluster-5283.5409	—NA—	-0.6199384
Cluster-5283.5410	—NA—	-0.6299001
Cluster-5283.5411	—NA—	-0.5875239
Cluster-5283.5412	—NA—	-0.5696314
Cluster-5283.5413	—NA—	-0.6316107
Cluster-5283.5414	—NA—	-0.5847724
Cluster-5283.11225	—NA—	-0.5621948
Cluster-5283.11862	—NA—	-0.6092525
Cluster-4203.2	2-(3-amino-3-carboxypropyl)histidine synthase subunit 2-like	-0.5093102
Cluster-5063.0	60S ribosomal protein L23a	0.4602487
Cluster-2968.1	60S ribosomal protein L29	0.5701237
Cluster-6260.0	60S ribosomal protein L4	0.4284396
Cluster-1573.0	60S ribosomal protein L7-like	0.4486509
Cluster-5283.9398	78 kDa glucose-regulated protein	0.4004436
Cluster-5283.7703	Acidic leucine-rich nuclear phosphoprotein 32 family member A	0.4129792
Cluster-5283.8057	actin-related protein 2/3 complex subunit 5-like	0.5253153
Cluster-7117.3	ADP-ribosylation factor-binding protein GGA	-0.4191597
Cluster-7117.3	ADP-ribosylation factor-binding protein GGA1-like	-0.4191597
Cluster-4907.1	alpha-catulin isoform X1	-0.5008865
Cluster-2928.0	alpha 1,3-glucosidase	0.4187676
Cluster-4731.0	amyloid beta A4 precursor protein-binding family B member 1-interacting protein-like isoform X10	0.4733935
Cluster-4670.0	anillin isoform X2	0.4540275
Cluster-6415.0	ANKL2 protein	0.4197708
Cluster-6415.0	Ankyrin repeat and LEM domain-containing protein 2	0.4197708
Cluster-3047.0	apoptosis inhibitor 5-like	0.4046946
Cluster-5124.1	ATP-binding cassette sub-family D member 2-like	-0.4157442
Cluster-7226.0	bifunctional purine biosynthesis protein PURH	0.4124972
Cluster-324.0	brain-specific angiogenesis inhibitor 3	0.4452138
Cluster-4273.2	breast cancer anti-estrogen resistance protein 3 homolog isoform X10	-0.4164102
Cluster-4869.1	BTG1 protein	-0.4434387
Cluster-5283.11440	Calmodulin	-0.4624930
Cluster-5283.7470	cell wall protein RBR3 isoform X3	-0.5315182
Cluster-1052.0	cholesterol 7-desaturase-like	0.4498531
Cluster-3427.0	cleavage stimulation factor subunit 1-like	0.4166065
Cluster-7099.0	collagen alpha-1(I) chain-like isoform X4	0.4061226
Cluster-7099.0	collagen alpha-1(I) chain-like isoform X5	0.4061226
Cluster-5171.0	condensin complex subunit 2-like	0.4391482
Cluster-5283.3285	cyclin-dependent kinase 10	0.4848457
Cluster-3761.3	deaminated glutathione amidase-like	-0.4359909
Cluster-5283.11448	derlin-1-like	-0.4397138
Cluster-5283.11449	derlin-1-like isoform X1	-0.5712288
Cluster-6523.3	diacylglycerol kinase theta-like isoform X1	-0.4810414
Cluster-4663.0	DNA-directed RNA polymerase I subunit RPA1-like	0.4621146
Cluster-6841.1	DNA replication complex GINS protein PSF2-like	0.4152517
Cluster-351.0	DNA replication factor Cdt1-like	0.4029504
Cluster-5283.10091	DNA replication licensing factor mcm5-like	0.4104516
Cluster-5283.10092	DNA replication licensing factor mcm5-like	0.4395230
Cluster-1917.0	dolichyl-diphosphooligosaccharide–protein glycosyltransferase subunit 1	0.4407760
Cluster-1917.0	dolichyl-diphosphooligosaccharide–protein glycosyltransferase subunit 1-like	0.4407760
Cluster-7415.2	dolichyl-diphosphooligosaccharide–protein glycosyltransferase subunit 2	0.4322243
Cluster-5467.0	dolichyl-diphosphooligosaccharide–protein glycosyltransferase subunit STT3A	0.4361419
Cluster-4583.0	DONS protein	0.5138341
Cluster-5283.776	dual specificity protein kinase Ttk-like	0.4272533
Cluster-5283.9043	dual specificity protein kinase TTK-like	0.4190020
Cluster-5283.11346	dystroglycan-like	-0.4129503
Cluster-5303.2	E3 SUMO-protein ligase RanBP2-like isoform X1	0.4056205
Cluster-6635.0	E3 ubiquitin-protein ligase CBL-B-like isoform X2	0.5129386
Cluster-4436.0	E3 ubiquitin-protein ligase TTC3	0.5676908
Cluster-4064.0	E3 ubiquitin-protein ligase UHRF1-like	0.4985406
Cluster-5283.2557	ectonucleoside triphosphate diphosphohydrolase 8 isoform X4	0.4057635
Cluster-7185.0	elongation factor 1-alpha	0.4187221
Cluster-7185.0	elongation factor 1-alpha 1	0.4187221
Cluster-7185.0	Elongation factor 1-alpha 1	0.4187221
Cluster-7185.0	elongation factor 1-alpha, oocyte form-like	0.4187221
Cluster-2941.0	elongation factor 1-gamma-like	0.4612368
Cluster-5283.9398	endoplasmic reticulum chaperone BiP	0.4004436
Cluster-6260.0	EOG090X0822	0.4284396
Cluster-6766.1	ETS domain-containing protein Elk-3-like isoform X1	0.5481240
Cluster-6766.2	ETS domain-containing protein Elk-3-like isoform X1	0.5444661
Cluster-5283.5477	eukaryotic elongation factor 2 kinase-like	0.5820170
Cluster-4300.0	eukaryotic translation initiation factor 3 subunit B-like	0.4736163
Cluster-825.0	eukaryotic translation initiation factor 3 subunit D	0.4332197
Cluster-4038.0	exosome component 10-like	0.4429619
Cluster-6718.1	FACT complex subunit SPT16-like	0.4105340
Cluster-5283.4049	Forkhead box protein M1	0.5075015
Cluster-5283.4049	FOXM1 protein	0.5075015
Cluster-5283.7253	FOXM1 protein	0.4390796
Cluster-1876.1	G2/mitotic-specific cyclin-B3-like	0.4046854
Cluster-5739.0	glycine N-acyltransferase-like	0.5365564
Cluster-5283.5083	GTP-binding nuclear protein Ran	0.5674211
Cluster-5283.127	heparan sulfate glucosamine 3-O-sulfotransferase 5	0.4396789
Cluster-5283.8911	Histone-lysine N-methyltransferase ASHH2,Putative histone-lysine N-methyltransferase ASHH4,Histone-lysine N-methyltransferase ASHH3,Probable histone-lysine N-methyltransferase Mes-4,Histone-lysine N-methyltransferase SETD2,Histone-lysine N-methyltransferase ASH1L,Histone-lysine N-methyltransferase, H3 lysine-36 specific,Histone-lysine N-methyltransferase ASHH1	0.5815156
Cluster-2996.0	Hypothetical predicted protein	0.5397028
Cluster-5283.4623	Hypothetical predicted protein	0.4339888
Cluster-5283.4625	Hypothetical predicted protein	0.5982087
Cluster-5283.8329	Hypothetical predicted protein	-0.4949699
Cluster-5883.0	Hypothetical predicted protein	-0.4622240
Cluster-5283.2729	hypothetical protein OCBIM_22018450mg	0.4002355
Cluster-5283.2953	hypothetical protein OCBIM_22018450mg	0.4880647
Cluster-5283.6317	hypothetical protein OCBIM_22018450mg	0.4029723
Cluster-5738.0	inhibitor of growth protein 3-like	-0.4072543
Cluster-5738.4	inhibitor of growth protein 3-like	-0.4141309
Cluster-5283.9055	inner centromere protein A isoform X1	0.4139061
Cluster-5283.765	integrin alpha-4	0.4070706
Cluster-5283.6821	integrin alpha-9	0.4229773
Cluster-5283.6821	integrin alpha-9-like	0.4229773
Cluster-5283.12040	kelch-like protein 2	-0.4467670
Cluster-5283.12041	kelch-like protein 2	-0.4059537
Cluster-5434.0	Krueppel-like factor 10 isoform X2	0.4667081
Cluster-6083.1	lactadherin-like isoform X1	-0.4276027
Cluster-1716.2	major egg antigen	0.4098704
Cluster-6547.1	maternal embryonic leucine zipper kinase-like isoform X1	0.4212175
Cluster-1712.0	mdm2-binding protein-like	0.4419845
Cluster-4552.1	mesoderm-specific transcript homolog protein	0.4211081
Cluster-4098.2	microtubule-associated protein futsch	-0.4632491
Cluster-3137.3	mitochondrial folate transporter/carrier	0.5368605
Cluster-3137.2	Mitochondrial folate transporter/carrier	0.4427928
Cluster-6037.1	mitochondrial mRNA pseudouridine synthase Trub2-like	0.5336372
Cluster-5222.0	mitotic checkpoint protein BUB3-like	0.4472736
Cluster-3693.0	mitotic checkpoint serine/threonine-protein kinase BUB1-like isoform X1	0.4814614
Cluster-5283.1674	monoacylglycerol lipase ABHD12-like	0.4239891
Cluster-1187.0	MPN domain-containing protein	0.4770145
Cluster-5283.3723	myophilin	0.4469886
Cluster-5283.4518	myophilin-like	0.4531665
Cluster-5283.7141	myosin heavy chain, embryonic smooth muscle isoform-like	0.4424952
Cluster-5283.7142	myosin heavy chain, non-muscle-like isoform X1	0.4297195
Cluster-5283.7141	myosin heavy chain, non-muscle-like isoform X2	0.4424952
Cluster-5283.7141	myosin heavy chain, non-muscle-like isoform X3	0.4424952
Cluster-5283.7142	myosin heavy chain, non-muscle-like isoform X3	0.4297195
Cluster-5283.7142	myosin heavy chain, non-muscle-like isoform X5	0.4297195
Cluster-5283.7138	myosin heavy chain, non-muscle isoform X1	0.4088282
Cluster-5283.7142	myosin heavy chain, non-muscle isoform X1	0.4297195
Cluster-5283.7142	myosin heavy chain, non-muscle isoform X3	0.4297195
Cluster-5283.7138	myosin heavy chain, non-muscle isoform X4	0.4088282
Cluster-5283.7142	myosin heavy chain, non-muscle isoform X4	0.4297195
Cluster-5593.1	myosin regulatory light polypeptide 9	0.4509710
Cluster-5593.1	myosin regulatory light polypeptide 9 isoform X2	0.4509710
Cluster-4358.0	nascent polypeptide-associated complex subunit alpha, muscle-specific form	0.4522862
Cluster-2928.0	neutral alpha-glucosidase AB-like isoform X1	0.4187676
Cluster-5283.7142	non-muscle myosin II heavy chain	0.4297195
Cluster-5815.1	nuclear pore complex protein Nup155	0.4605414
Cluster-2220.0	nuclear pore complex protein Nup160-like	0.4757864
Cluster-1082.0	nuclear pore complex protein Nup205	0.4280466
Cluster-386.0	nucleolar protein 58	0.4785102
Cluster-6908.0	nucleosome-remodeling factor subunit BPTF	0.4537115
Cluster-6908.0	nucleosome-remodeling factor subunit BPTF-like	0.4537115
Cluster-6908.0	nucleosome-remodeling factor subunit BPTF isoform X2	0.4537115
Cluster-6908.0	nucleosome-remodeling factor subunit NURF301-like isoform X1	0.4537115
Cluster-3207.0	origin recognition complex subunit 1-like isoform X1	0.5468322
Cluster-2667.0	peptidyl-prolyl cis-trans isomerase B	0.4143032
Cluster-6662.1	PHD finger protein 24-like	0.4091448
Cluster-5780.0	piwi-like protein 1	0.4480475
Cluster-7336.0	plasminogen activator inhibitor 1 RNA-binding protein-like	0.4309512
Cluster-7336.0	plasminogen activator inhibitor 1 RNA-binding protein-like isoform X2	0.4309512
Cluster-7336.0	plasminogen activator inhibitor 1 RNA-binding protein isoform X2	0.4309512
Cluster-4600.0	pre-mRNA-processing factor 40 homolog A-like isoform X1	0.4552494
Cluster-281.0	predicted protein	0.4079243
Cluster-2189.0	PREDICTED: uncharacterized protein LOC106871855	0.4796581
Cluster-5283.9773	PREDICTED: uncharacterized protein LOC106873754	0.4551946
Cluster-4472.0	PREDICTED: uncharacterized protein LOC106877732 isoform X22	0.4396003
Cluster-5283.4185	proactivator polypeptide-like	0.7106963
Cluster-5283.4457	proactivator polypeptide-like	-0.4730449
Cluster-5283.1948	probable 2-oxoglutarate dehydrogenase E1 component DHKTD1, mitochondrial	0.4086804
Cluster-5283.1947	probable 2-oxoglutarate dehydrogenase E1 component DHKTD1, mitochondrial	-0.4384041
Cluster-1629.0	probable ATP-dependent RNA helicase DDX4 isoform X2	0.4135452
Cluster-6037.1	probable tRNA pseudouridine synthase 2	0.5336372
Cluster-4213.0	proliferation-associated protein 2G4-like	0.4259426
Cluster-5283.4185	prosaposin-like isoform X1	0.7106963
Cluster-5283.4457	prosaposin-like isoform X1	-0.4730449
Cluster-5283.4185	prosaposin-like isoform X2	0.7106963
Cluster-5283.4457	prosaposin-like isoform X2	-0.4730449
Cluster-811.0	proteasome subunit alpha type-5	0.4536093
Cluster-5845.1	protein ABHD13-like	-0.4327278
Cluster-1789.0	protein DBF4 homolog A-like	0.4131291
Cluster-2500.9	protein disulfide-isomerase A3-like	0.4290375
Cluster-5293.0	protein disulfide-isomerase A4	0.4149667
Cluster-5293.0	protein disulfide-isomerase A4-like	0.4149667
Cluster-2344.0	protein disulfide-isomerase A5-like	0.5035412
Cluster-4583.0	protein downstream neighbor of son homolog	0.5138341
Cluster-5283.350	protein ecdysoneless homolog	0.4501941
Cluster-377.0	protein kintoun-like	0.4254137
Cluster-6699.0	protein lin-37 homolog	-0.4157659
Cluster-5283.262	protein phosphatase 1 regulatory subunit 37-like isoform X1	0.4257566
Cluster-5283.267	protein phosphatase 1 regulatory subunit 37-like isoform X1	-0.4450159
Cluster-5283.262	protein phosphatase 1 regulatory subunit 37-like isoform X2	0.4257566
Cluster-5283.9489	protocadherin-1 isoform X1	-0.4121199
Cluster-5283.11220	putative uncharacterized transposon-derived protein F52C9.6	-0.5290094
Cluster-5283.11221	putative uncharacterized transposon-derived protein F52C9.6	-0.6296988
Cluster-5283.8790	ras-related C3 botulinum toxin substrate 1	0.4205143
Cluster-5283.1756	receptor-type tyrosine-protein phosphatase C isoform X1	0.4067967
Cluster-5283.1760	receptor-type tyrosine-protein phosphatase C isoform X1	0.4736964
Cluster-5283.1759	Receptor-type tyrosine-protein phosphatase mu,Receptor-type tyrosine-protein phosphatase F,Receptor-type tyrosine-protein phosphatase H,Receptor-type tyrosine-protein phosphatase S,Receptor-type tyrosine-protein phosphatase eta,Receptor-type tyrosine-protein phosphatase gamma,Receptor-type tyrosine-protein phosphatase O,Receptor-type tyrosine-protein phosphatase delta	0.4185260
Cluster-7340.0	rhotekin-like isoform X2	0.4028565
Cluster-6628.2	RNA-binding protein 25	0.4041850
Cluster-6628.2	RNA-binding protein 25-like isoform X1	0.4041850
Cluster-4049.0	RNA cytidine acetyltransferase	0.4334360
Cluster-4049.0	RNA cytidine acetyltransferase-like isoform X1	0.4334360
Cluster-4324.0	RS27A protein	0.4725817
Cluster-5640.3	serine-rich adhesin for platelets-like	-0.4576066
Cluster-4210.0	serine hydroxymethyltransferase, cytosolic-like	0.4061178
Cluster-5283.9550	serine/threonine-protein kinase 10-like isoform X2	-0.4635737
Cluster-3336.1	serrate RNA effector molecule homolog isoform X1	0.5201193
Cluster-2682.0	SH3 domain-binding protein 5-like	-0.4181318
Cluster-5873.0	slit homolog 3 protein isoform X1	0.4868951
Cluster-5873.0	slit homolog 3 protein isoform X3	0.4868951
Cluster-2779.1	small integral membrane protein 29-like isoform X1	-0.4493128
Cluster-5283.7294	sodium-dependent phosphate transporter 1-A-like isoform X1	-0.4167145
Cluster-5283.7294	Sodium-dependent phosphate transporter 1-B	-0.4167145
Cluster-7336.0	sodium bicarbonate transporter-like protein 11	0.4309512
Cluster-5043.1	splicing factor 45-like	0.4999898
Cluster-939.0	staphylococcal nuclease domain-containing protein 1-like	0.4160267
Cluster-1589.1	structural maintenance of chromosomes protein 4-like	0.5943608
Cluster-1589.2	structural maintenance of chromosomes protein 4-like	0.4803711
Cluster-4508.1	succinate-semialdehyde dehydrogenase, mitochondrial isoform X2	-0.5185147
Cluster-6960.5	suppressor of tumorigenicity 7 protein homolog isoform X1	0.4455554
Cluster-6960.3	suppressor of tumorigenicity 7 protein homolog isoform X3	0.6010888
Cluster-6960.5	suppressor of tumorigenicity 7 protein homolog isoform X3	0.4455554
Cluster-5407.0	TAF6-like RNA polymerase II p300/CBP-associated factor-associated factor 65 kDa subunit 6L	0.4951831
Cluster-5407.2	TAF6-like RNA polymerase II p300/CBP-associated factor-associated factor 65 kDa subunit 6L	-0.4407740
Cluster-5283.8458	TBC1 domain family member 1 isoform X1	0.5502460
Cluster-5283.8459	TBC1 domain family member 1 isoform X1	0.4874911
Cluster-5655.2	TBC1 domain family member 10A-like	0.4257123
Cluster-4279.0	TBC1 domain family member 22B-like isoform X2	0.4556684
Cluster-5283.8458	TBC1 domain family member 4-like	0.5502460
Cluster-2471.0	Thymidine kinase, cytosolic	0.4111023
Cluster-513.0	transcription factor AP-4-like	0.4130607
Cluster-5283.11735	transcription factor E2F5-like	0.5619094
Cluster-5595.2	transcriptional repressor scratch 2-like	-0.4305969
Cluster-5595.3	transcriptional repressor scratch 2-like	-0.4492102
Cluster-5590.1	transferrin-like protein	0.4650926
Cluster-5283.8587	transforming growth factor beta-1-induced transcript 1 protein isoform X5	0.4133377
Cluster-1556.0	translocon-associated protein subunit alpha-like	0.4666831
Cluster-2193.0	transmembrane 6 superfamily member 1-like	0.5408441
Cluster-7069.2	transmembrane and coiled-coil domains protein 1-like isoform X9	0.4382419
Cluster-7069.2	Transmembrane and coiled-coil domains protein 1,Transmembrane and coiled-coil domains protein 2	0.4382419
Cluster-7069.0	Transmembrane and coiled-coil domains protein 1,Transmembrane and coiled-coil domains protein 2	-0.4438194
Cluster-7069.2	transmembrane and coiled-coil domains protein 2-like isoform X5	0.4382419
Cluster-5283.8974	transmembrane emp24 domain-containing protein 2-like	0.4562876
Cluster-5842.0	transmembrane protein 214-B-like	0.4256480
Cluster-4896.0	tRNA pseudouridine(38/39) synthase-like	0.4454004
Cluster-4826.0	tubulin gamma-1 chain	0.4609642
Cluster-543.0	U1 small nuclear ribonucleoprotein A	0.4662582
Cluster-7041.5	U5 small nuclear ribonucleoprotein 200 kDa helicase	0.4254674
Cluster-7041.4	U5 small nuclear ribonucleoprotein 200 kDa helicase-like	-0.6455209
Cluster-4324.0	ubiquitin-40S ribosomal protein S27a	0.4725817
Cluster-5283.12004	uncharacterized protein LOC115215064 isoform X1	0.4728251
Cluster-5283.3155	uncharacterized protein LOC115216338 isoform X2	0.5341755
Cluster-5283.8328	universal stress protein A-like protein isoform X5	-0.5894106
Cluster-5283.8330	universal stress protein A-like protein isoform X5	-0.6338910
Cluster-5283.3645	unnamed protein product	0.4171142
Cluster-5283.9503	unnamed protein product	0.5956725
Cluster-5283.8643	UPF0687 protein C20orf27 homolog	0.5115440
Cluster-3640.1	vacuolar protein sorting-associated protein 11 homolog	0.4792737
Cluster-3640.0	vacuolar protein sorting-associated protein 11 homolog	-0.4168875
Cluster-6926.0	vinculin-like isoform X11	0.4559213
Cluster-6926.0	vinculin isoform X11	0.4559213
Cluster-1798.2	von Hippel-Lindau disease tumor suppressor-like	0.4350425
Cluster-1616.0	WD repeat-containing protein 75	0.4112103
Cluster-1816.1	zinc finger protein 40 isoform X1	-0.4580972
Cluster-5283.1831	zinc finger protein 704-like	-0.4140061

216 CO2 hub genes are found in CNS and not eyes

Eyes-specific CO2 hub genes

hub_genes_eyes_specific_annotated_CO2 %>%
  dplyr::select(gene, Sequence.Description, contains('GS')) %>%
  kable(caption = "Eyes-specific CO2 hub genes")

Eyes-specific CO2 hub genes
gene	Sequence.Description	GS_CO2_eyes
Cluster-7432.3	flotillin-1 isoform X1	0.4666175
Cluster-7432.3	flotillin-1 isoform X2	0.4666175
Cluster-7432.3	flotillin-1 isoform X4	0.4666175
Cluster-5283.4627	Hypothetical predicted protein	-0.6905153
Cluster-5283.994	neuronal acetylcholine receptor subunit alpha-10-like isoform X1	0.4227888
Cluster-5283.995	neuronal acetylcholine receptor subunit alpha-10-like isoform X1	0.4846243
Cluster-5283.659	protein crumbs-like isoform X1	0.5507875
Cluster-5283.660	protein crumbs-like isoform X1	0.5597179
Cluster-5283.661	protein crumbs-like isoform X2	0.5282035
Cluster-5283.8076	protein D2-like	0.4540652
Cluster-5283.8078	protein D2-like	0.5614226
Cluster-5283.3666	protein D2-like	-0.4700909
Cluster-5283.8075	protein D2-like	-0.5421762

11 CO2 hub genes are found in eyes and not CNS
5 different genes
- flotillin-1
- Hypothetical predicted protein
- neuronal acetylcholine receptor subunit alpha-10-like (2)
- protein crumbs-like (3)
- protein D2-like (4)

Compare hub genes shared by CO2 and a 2nd trait across tissues

Determine which hub genes are shared by CO2 and 1 or more other traits for each tissue

# CNS: modules shared by CO2 and 2nd trait:
# Modules shared by CO2, active time, dist, vel= green, darkgreen
# Modules shared by CO2, active time, dist, soft no. = steelblue
# Module shared by CO2, soft no., soft? = skyblue3

# GREEN
# Hub genes shared by CO2 and active_time_s: green
hub_genes_CNS_annotated_green_CO2_Active_time_s <- hub_genes_CNS_annotated_green_CO2 %>%
  full_join(hub_genes_CNS_annotated_green_Active_time_s, by = c('gene', 'MM_MEgreen', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# Hub genes shared by CO2 and dist: green
hub_genes_CNS_annotated_green_CO2_Dist <- hub_genes_CNS_annotated_green_CO2 %>%
  full_join(hub_genes_CNS_annotated_green_Dist, by = c('gene', 'MM_MEgreen', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# Hub genes shared by CO2 and vel: green
hub_genes_CNS_annotated_green_CO2_Vel <- hub_genes_CNS_annotated_green_CO2 %>%
  full_join(hub_genes_CNS_annotated_green_Vel, by = c('gene', 'MM_MEgreen', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# DARKGREEN
# Hub genes shared by CO2 and active_time_s: darkgreen
hub_genes_CNS_annotated_darkgreen_CO2_Active_time_s <- hub_genes_CNS_annotated_darkgreen_CO2 %>%
  full_join(hub_genes_CNS_annotated_darkgreen_Active_time_s, by = c('gene', 'MM_MEdarkgreen', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# Hub genes shared by CO2 and dist: darkgreen
hub_genes_CNS_annotated_darkgreen_CO2_Dist <- hub_genes_CNS_annotated_darkgreen_CO2 %>%
  full_join(hub_genes_CNS_annotated_darkgreen_Dist, by = c('gene', 'MM_MEdarkgreen', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# Hub genes shared by CO2 and vel: darkgreen
hub_genes_CNS_annotated_darkgreen_CO2_Vel <- hub_genes_CNS_annotated_darkgreen_CO2 %>%
  full_join(hub_genes_CNS_annotated_darkgreen_Vel, by = c('gene', 'MM_MEdarkgreen', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# STEELBLUE
# Hub genes shared by CO2 and active_time_s: steelblue
hub_genes_CNS_annotated_steelblue_CO2_Active_time_s <- hub_genes_CNS_annotated_steelblue_CO2 %>%
  full_join(hub_genes_CNS_annotated_steelblue_Active_time_s, by = c('gene', 'MM_MEsteelblue', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# Hub genes shared by CO2 and dist: steelblue
hub_genes_CNS_annotated_steelblue_CO2_Dist <- hub_genes_CNS_annotated_steelblue_CO2 %>%
  full_join(hub_genes_CNS_annotated_steelblue_Dist, by = c('gene', 'MM_MEsteelblue', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# Hub genes shared by CO2 and no. soft touches: steelblue
hub_genes_CNS_annotated_steelblue_CO2_Soft_touch_no. <- hub_genes_CNS_annotated_steelblue_CO2 %>%
  full_join(hub_genes_CNS_annotated_steelblue_Soft_touch_no., by = c('gene', 'MM_MEsteelblue', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# SKYBLUE3
# Hub genes shared by CO2 and softly touch?: skyblue3
hub_genes_CNS_annotated_skyblue3_CO2_Soft_touch <- hub_genes_CNS_annotated_skyblue3_CO2 %>%
  full_join(hub_genes_CNS_annotated_skyblue3_Soft_touch, by = c('gene', 'MM_MEskyblue3', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# Hub genes shared by CO2 and no. soft touches: skyblue3
hub_genes_CNS_annotated_skyblue3_CO2_Soft_touch_no. <- hub_genes_CNS_annotated_skyblue3_CO2 %>%
  full_join(hub_genes_CNS_annotated_skyblue3_Soft_touch_no., by = c('gene', 'MM_MEskyblue3', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)



# List of all hub genes shared by CO2 and 1 or more other traits, in the CNS
hub_genes_CNS_annotated_CO2_2ndtrait <- hub_genes_CNS_annotated_green_CO2_Active_time_s %>%
  full_join(hub_genes_CNS_annotated_green_CO2_Dist) %>%
  full_join(hub_genes_CNS_annotated_green_CO2_Vel) %>%
  full_join(hub_genes_CNS_annotated_darkgreen_CO2_Active_time_s) %>%
  full_join(hub_genes_CNS_annotated_darkgreen_CO2_Dist) %>%
  full_join(hub_genes_CNS_annotated_darkgreen_CO2_Vel) %>%
  full_join(hub_genes_CNS_annotated_steelblue_CO2_Active_time_s) %>%
  full_join(hub_genes_CNS_annotated_steelblue_CO2_Dist) %>%
  full_join(hub_genes_CNS_annotated_steelblue_CO2_Soft_touch_no.) %>%
  full_join(hub_genes_CNS_annotated_skyblue3_CO2_Soft_touch) %>%
  full_join(hub_genes_CNS_annotated_skyblue3_CO2_Soft_touch_no.) %>%
  rename(GS_CO2_CNS = GS_CO2,
         GS_Active_time_s_CNS = GS_Active_time_s,
         GS_Dist_CNS = GS_Dist,
         GS_Vel_CNS = GS_Vel,
         GS_Soft_touch_CNS = GS_Soft_touch,
         GS_Soft_touch_no._CNS = GS_Soft_touch_no.) %>%
        distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
        arrange(Sequence.Description)


# Eyes: modules shared by CO2 and 2nd trait:
# Modules shared by CO2, active time, dist, vel = yellowgreen
# Module shared by CO2, soft no., soft? = antiquewhite4

# YELLOWGREEN
# Hub genes shared by CO2 and active_time_s: yellowgreen
hub_genes_eyes_annotated_yellowgreen_CO2_Active_time_s <- hub_genes_eyes_annotated_yellowgreen_CO2 %>%
  full_join(hub_genes_eyes_annotated_yellowgreen_Active_time_s, by = c('gene', 'MM_MEyellowgreen', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# Hub genes shared by CO2 and dist: yellowgreen
hub_genes_eyes_annotated_yellowgreen_CO2_Dist <- hub_genes_eyes_annotated_yellowgreen_CO2 %>%
  full_join(hub_genes_eyes_annotated_yellowgreen_Dist, by = c('gene', 'MM_MEyellowgreen', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# Hub genes shared by CO2 and vel: yellowgreen
hub_genes_eyes_annotated_yellowgreen_CO2_Vel <- hub_genes_eyes_annotated_yellowgreen_CO2 %>%
  full_join(hub_genes_eyes_annotated_yellowgreen_Vel, by = c('gene', 'MM_MEyellowgreen', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# ANTIQUEWHITE4
# Hub genes shared by CO2 and soft_touch: antiquewhite4
hub_genes_eyes_annotated_antiquewhite4_CO2_Soft_touch <- hub_genes_eyes_annotated_antiquewhite4_CO2 %>%
  full_join(hub_genes_eyes_annotated_antiquewhite4_Soft_touch, by = c('gene', 'MM_MEantiquewhite4', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# Hub genes shared by CO2 and soft_touch_no.: antiquewhite4
hub_genes_eyes_annotated_antiquewhite4_CO2_Soft_touch_no. <- hub_genes_eyes_annotated_antiquewhite4_CO2 %>%
  full_join(hub_genes_eyes_annotated_antiquewhite4_Soft_touch_no., by = c('gene', 'MM_MEantiquewhite4', 'Sequence.Description', 'Sequence.Name')) %>%
  drop_na() %>% 
  dplyr::select(-contains('r_normexpr_vs_trait')) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)

# List of all hub genes shared by CO2 and 1 or more other traits, in the eyes
hub_genes_eyes_annotated_CO2_2ndtrait <- hub_genes_eyes_annotated_yellowgreen_CO2_Active_time_s %>%
  full_join(hub_genes_eyes_annotated_yellowgreen_CO2_Dist) %>%
  full_join(hub_genes_eyes_annotated_yellowgreen_CO2_Vel) %>%
  full_join(hub_genes_eyes_annotated_antiquewhite4_CO2_Soft_touch) %>%
  full_join(hub_genes_eyes_annotated_antiquewhite4_CO2_Soft_touch_no.) %>%
  rename(GS_CO2_eyes = GS_CO2,
         GS_Active_time_s_eyes = GS_Active_time_s,
         GS_Dist_eyes = GS_Dist,
         GS_Vel_eyes = GS_Vel,
         GS_Soft_touch_eyes = GS_Soft_touch,
         GS_Soft_touch_no._eyes = GS_Soft_touch_no.) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description)



# Are any CO2 hub genes found in both tissues also a hub gene for a 2nd trait in one or both tissues?
# Filter by gene name and not exact Cluster- match

# CO2 hub genes in both tissues that are also a hub gene for a 2nd trait in the CNS (filter by gene name and not exact Cluster- match)
a <- hub_genes_CNS_eyes_annotated_CO2 %>%
  filter(gene %in% hub_genes_CNS_annotated_CO2_2ndtrait$gene) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) 
b <- a %>%
  left_join(hub_genes_CNS_annotated_CO2_2ndtrait)

# CO2 hub genes in both tissues that are also a hub gene for a 2nd trait in the eyes (filter by gene name and not exact Cluster- match)
c <- hub_genes_CNS_eyes_annotated_CO2 %>%
  filter(gene %in% hub_genes_eyes_annotated_CO2_2ndtrait$gene) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) 
d <- c %>%
  left_join(hub_genes_eyes_annotated_CO2_2ndtrait)

# Combine to have full list of genes that are hub genes for CO2 treatment in both tissues, and also a hub gene for a 2nd trait in one or both tissues
hub_genes_CNS_eyes_annotated_CO2_2ndtrait <- b %>%
  full_join(d) %>%
  select(Sequence.Name, gene, Sequence.Description, contains('GS')) %>%
  arrange(Sequence.Description, gene)
nrow(hub_genes_CNS_eyes_annotated_CO2_2ndtrait %>%
       distinct(gene))

# All hub genes for CO2 in either tissue plus showing which genes are also hub genes for another trait in either tissue
hub_genes_CNS_eyes_annotated_all_CO2_2ndtrait <- hub_genes_CNS_eyes_annotated_CO2_2ndtrait %>%
  full_join(hub_genes_CNS_eyes_annotated_CO2)



# CNS-specific hub genes for CO2 and a 2nd trait in the CNS
hub_genes_CNS_specific_annotated_CO2_2ndtrait <- hub_genes_CNS_annotated_CO2_2ndtrait %>%
  filter(!gene %in% hub_genes_CNS_eyes_annotated_CO2_2ndtrait$gene) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description, gene)
nrow(hub_genes_CNS_specific_annotated_CO2_2ndtrait %>% 
       distinct(gene))
# All CNS-specific hub genes for CO2 plus showing which genes are also hub genes for another trait in the CNS
hub_genes_CNS_specific_annotated_all_CO2_2ndtrait <- hub_genes_CNS_specific_annotated_CO2 %>%
  left_join(hub_genes_CNS_specific_annotated_CO2_2ndtrait)

# Split full table to look at only those genes whose 2nd trait is soft touch no. or soft_touch (14 genes)
hub_genes_CNS_specific_CO2_Soft_touch_no. <- hub_genes_CNS_specific_annotated_CO2_2ndtrait %>%
  arrange(GS_Soft_touch_no._CNS, GS_Soft_touch_CNS) %>%
  slice_head(n = 14)
nrow(hub_genes_CNS_specific_CO2_Soft_touch_no. %>%
       distinct(gene))
# Split full table to look at only those genes whose 2nd trait is one or more of the activity traits (active time, distance, speed)
hub_genes_CNS_specific_CO2_1activity <- hub_genes_CNS_specific_annotated_CO2_2ndtrait %>%
  arrange(GS_Soft_touch_no._CNS, GS_Soft_touch_CNS) %>%
  slice_tail(n = (nrow(hub_genes_CNS_specific_annotated_CO2_2ndtrait) - 14))
nrow(hub_genes_CNS_specific_CO2_1activity %>%
       distinct(gene))

# Split full table to look at only those genes whose 2nd trait is all 3 activity traits - i.e. these hub genes are shared by CO2, active time, distance and speed in the CNS only
hub_genes_CNS_specific_CO2_activity <- hub_genes_CNS_specific_CO2_1activity %>%
  select(-GS_Soft_touch_no._CNS, -GS_Soft_touch_CNS, -contains('MM')) %>%
  drop_na()
nrow(hub_genes_CNS_specific_CO2_activity %>%
       distinct(gene))



# Eyes-specific hub genes for CO2 and a 2nd trait in the eyes
hub_genes_eyes_specific_annotated_CO2_2ndtrait <- hub_genes_eyes_annotated_CO2_2ndtrait %>%
  filter(!gene %in% hub_genes_CNS_eyes_annotated_CO2_2ndtrait$gene) %>%
  distinct(gene, Sequence.Description, .keep_all = TRUE) %>%
  arrange(Sequence.Description, gene)
nrow(hub_genes_eyes_specific_annotated_CO2_2ndtrait %>%
       distinct(gene))

# All eyes-specific hub genes for CO2 plus showing which genes are also hub genes for another trait in the eyes
hub_genes_eyes_specific_annotated_all_CO2_2ndtrait <- hub_genes_eyes_specific_annotated_CO2 %>%
  left_join(hub_genes_eyes_specific_annotated_CO2_2ndtrait)


save(hub_genes_CNS_annotated_CO2_2ndtrait, hub_genes_eyes_annotated_CO2_2ndtrait, hub_genes_CNS_eyes_annotated_CO2_2ndtrait, hub_genes_CNS_eyes_annotated_all_CO2_2ndtrait, hub_genes_CNS_specific_annotated_CO2_2ndtrait, hub_genes_eyes_specific_annotated_CO2_2ndtrait, hub_genes_CNS_specific_annotated_all_CO2_2ndtrait, hub_genes_eyes_specific_annotated_all_CO2_2ndtrait, hub_genes_CNS_specific_CO2_Soft_touch_no., hub_genes_CNS_specific_CO2_1activity, hub_genes_CNS_specific_CO2_activity, file = 'results/Compare_hub_genes_CO2_2ndtrait.RData')

Hub genes for CO2 in both tissues that are also a hub gene for another trait in one or both tissues

hub_genes_CNS_eyes_annotated_all_CO2_2ndtrait %>%
  dplyr::select(gene, Sequence.Description, contains('GS')) %>%
  arrange(-GS_CO2_CNS, -GS_CO2_eyes, -GS_Active_time_s_CNS, -GS_Active_time_s_eyes, -GS_Dist_CNS, -GS_Dist_eyes, -GS_Vel_CNS, -GS_Vel_eyes, -GS_Soft_touch_no._CNS, -GS_Soft_touch_no._eyes) %>%
  kable(caption = "All hub genes for CO2 in both tissues plus showing any that are also a hub gene for another trait in one or both tissues")

All hub genes for CO2 in both tissues plus showing any that are also a hub gene for another trait in one or both tissues
gene	Sequence.Description	GS_CO2_CNS	GS_CO2_eyes	GS_Active_time_s_CNS	GS_Dist_CNS	GS_Vel_CNS	GS_Soft_touch_no._CNS	GS_Soft_touch_CNS	GS_Active_time_s_eyes	GS_Dist_eyes	GS_Vel_eyes	GS_Soft_touch_eyes	GS_Soft_touch_no._eyes
Cluster-5283.11591	signal recognition particle subunit SRP72-like	0.6009590	0.5305654	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA
Cluster-5283.9395	acyl carrier protein, mitochondrial	0.5669365	0.5648262	-0.4102501	NA	NA	NA	NA	NA	NA	NA	NA	NA
Cluster-5283.9395	protein transport protein Sec16A isoform X2	0.5669365	0.5648262	-0.4102501	NA	NA	NA	NA	NA	NA	NA	NA	NA
Cluster-6136.0	mannose-1-phosphate guanyltransferase alpha-A-like isoform X1	0.5648902	0.5365811	NA	NA	NA	-0.5424273	NA	NA	NA	NA	NA	NA
Cluster-6136.0	mannose-1-phosphate guanyltransferase alpha-A-like isoform X2	0.5648902	0.5365811	NA	NA	NA	-0.5424273	NA	NA	NA	NA	NA	NA
Cluster-4644.0	glucose-induced degradation protein 4 homolog	0.5326470	0.4958090	NA	NA	NA	NA	NA	-0.4455365	NA	NA	NA	NA
Cluster-1798.0	von Hippel-Lindau disease tumor suppressor-like	0.5197694	0.4275397	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA
Cluster-5283.11447	derlin-1-like	0.4213809	0.4252181	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA
Cluster-5283.11447	derlin-1-like isoform X1	0.4213809	0.4252181	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA
Cluster-1798.1	von Hippel-Lindau disease tumor suppressor-like	-0.5068472	-0.4988114	NA	NA	NA	0.4457227	NA	NA	NA	NA	NA	NA
Cluster-5283.11592	signal recognition particle subunit SRP72-like	-0.5403115	-0.5298213	NA	NA	NA	NA	NA	NA	NA	NA	NA	0.4223930
Cluster-5283.9397	acyl carrier protein, mitochondrial	-0.5502659	-0.5267482	NA	NA	NA	0.4305405	NA	NA	NA	NA	NA	NA
Cluster-5283.7600	cyclin-dependent kinase 10	-0.5580496	-0.5808814	NA	NA	NA	NA	NA	NA	NA	NA	NA	0.4917453
Cluster-5283.9788	acyl-coenzyme A thioesterase 8-like	-0.5745791	-0.6212306	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA
Cluster-5283.5934	ubiquitin thioesterase zranb1-B	-0.5830062	-0.5241672	NA	NA	NA	0.4747209	NA	NA	NA	NA	NA	0.4607134
Cluster-6136.1	mannose-1-phosphate guanyltransferase alpha-A-like isoform X1	-0.6553945	-0.6958523	NA	NA	NA	0.4593150	NA	NA	NA	NA	NA	NA
Cluster-6136.1	mannose-1-phosphate guanyltransferase alpha-A-like isoform X2	-0.6553945	-0.6958523	NA	NA	NA	0.4593150	NA	NA	NA	NA	NA	NA
Cluster-5283.997	neuronal acetylcholine receptor subunit alpha-10-like isoform X1	-0.6804924	-0.6469036	NA	NA	NA	0.5047080	NA	NA	NA	NA	NA	NA
Cluster-5283.997	neuronal acetylcholine receptor subunit alpha-10 isoform X1	-0.6804924	-0.6469036	NA	NA	NA	0.5047080	NA	NA	NA	NA	NA	NA

All CNS-specific CO2 hub genes, showing which are also hub genes for another trait in the CNS

hub_genes_CNS_specific_annotated_all_CO2_2ndtrait %>%
  dplyr::select(gene, Sequence.Description, contains('GS')) %>%
  arrange(-GS_CO2_CNS, -GS_Active_time_s_CNS, -GS_Dist_CNS, -GS_Vel_CNS, -GS_Soft_touch_no._CNS) %>%
  kable(caption = "All hub genes for CO2 only in the CNS, showing any genes that are also a hub gene for another trait in the CNS")

All hub genes for CO2 only in the CNS, showing any genes that are also a hub gene for another trait in the CNS
gene	Sequence.Description	GS_CO2_CNS	GS_Active_time_s_CNS	GS_Dist_CNS	GS_Vel_CNS	GS_Soft_touch_no._CNS	GS_Soft_touch_CNS
Cluster-5283.4185	proactivator polypeptide-like	0.7106963	NA	NA	NA	NA	NA
Cluster-5283.4185	prosaposin-like isoform X1	0.7106963	NA	NA	NA	NA	NA
Cluster-5283.4185	prosaposin-like isoform X2	0.7106963	NA	NA	NA	NA	NA
Cluster-6960.3	suppressor of tumorigenicity 7 protein homolog isoform X3	0.6010888	NA	NA	NA	NA	NA
Cluster-5283.4625	Hypothetical predicted protein	0.5982087	NA	NA	NA	NA	NA
Cluster-5283.9503	unnamed protein product	0.5956725	NA	NA	NA	-0.4459484	NA
Cluster-1589.1	structural maintenance of chromosomes protein 4-like	0.5943608	0.4145229	0.5019827	0.5170001	NA	NA
Cluster-5283.5477	eukaryotic elongation factor 2 kinase-like	0.5820170	NA	NA	NA	NA	NA
Cluster-5283.8911	Histone-lysine N-methyltransferase ASHH2,Putative histone-lysine N-methyltransferase ASHH4,Histone-lysine N-methyltransferase ASHH3,Probable histone-lysine N-methyltransferase Mes-4,Histone-lysine N-methyltransferase SETD2,Histone-lysine N-methyltransferase ASH1L,Histone-lysine N-methyltransferase, H3 lysine-36 specific,Histone-lysine N-methyltransferase ASHH1	0.5815156	0.6158873	0.6814359	0.6430162	NA	NA
Cluster-2968.1	60S ribosomal protein L29	0.5701237	NA	NA	NA	NA	NA
Cluster-4436.0	E3 ubiquitin-protein ligase TTC3	0.5676908	NA	0.4146534	0.4033603	NA	NA
Cluster-5283.5083	GTP-binding nuclear protein Ran	0.5674211	NA	NA	NA	NA	NA
Cluster-5283.11735	transcription factor E2F5-like	0.5619094	NA	NA	NA	NA	NA
Cluster-5283.8458	TBC1 domain family member 1 isoform X1	0.5502460	0.5357103	0.6111004	0.5440863	NA	NA
Cluster-5283.8458	TBC1 domain family member 4-like	0.5502460	0.5357103	0.6111004	0.5440863	NA	NA
Cluster-6766.1	ETS domain-containing protein Elk-3-like isoform X1	0.5481240	0.4308333	0.5060292	0.4953842	NA	NA
Cluster-3207.0	origin recognition complex subunit 1-like isoform X1	0.5468322	NA	NA	NA	NA	NA
Cluster-6766.2	ETS domain-containing protein Elk-3-like isoform X1	0.5444661	NA	0.4876224	0.4448396	NA	NA
Cluster-2193.0	transmembrane 6 superfamily member 1-like	0.5408441	0.4889102	0.5644748	0.5346213	NA	NA
Cluster-2996.0	Hypothetical predicted protein	0.5397028	NA	NA	NA	NA	NA
Cluster-3137.3	mitochondrial folate transporter/carrier	0.5368605	0.5607357	0.6150422	0.5411519	NA	NA
Cluster-5739.0	glycine N-acyltransferase-like	0.5365564	NA	0.4433379	0.4065594	NA	NA
Cluster-5283.3155	uncharacterized protein LOC115216338 isoform X2	0.5341755	NA	NA	NA	NA	NA
Cluster-6037.1	mitochondrial mRNA pseudouridine synthase Trub2-like	0.5336372	NA	NA	NA	-0.5248529	NA
Cluster-6037.1	probable tRNA pseudouridine synthase 2	0.5336372	NA	NA	NA	-0.5248529	NA
Cluster-5283.8057	actin-related protein 2/3 complex subunit 5-like	0.5253153	NA	0.4470360	0.4088042	NA	NA
Cluster-3336.1	serrate RNA effector molecule homolog isoform X1	0.5201193	0.4468640	0.5221397	0.4668425	NA	NA
Cluster-4583.0	DONS protein	0.5138341	0.4575650	0.5130570	0.4270464	NA	NA
Cluster-4583.0	protein downstream neighbor of son homolog	0.5138341	0.4575650	0.5130570	0.4270464	NA	NA
Cluster-6635.0	E3 ubiquitin-protein ligase CBL-B-like isoform X2	0.5129386	NA	NA	0.4404907	NA	NA
Cluster-5283.8643	UPF0687 protein C20orf27 homolog	0.5115440	NA	NA	NA	NA	NA
Cluster-5283.4049	Forkhead box protein M1	0.5075015	0.4048226	0.4899203	0.4504801	NA	NA
Cluster-5283.4049	FOXM1 protein	0.5075015	0.4048226	0.4899203	0.4504801	NA	NA
Cluster-2344.0	protein disulfide-isomerase A5-like	0.5035412	0.5510129	0.6187048	0.5004431	NA	NA
Cluster-5043.1	splicing factor 45-like	0.4999898	NA	NA	NA	NA	NA
Cluster-4064.0	E3 ubiquitin-protein ligase UHRF1-like	0.4985406	NA	0.4734848	0.5389602	NA	NA
Cluster-5407.0	TAF6-like RNA polymerase II p300/CBP-associated factor-associated factor 65 kDa subunit 6L	0.4951831	NA	NA	NA	NA	NA
Cluster-5283.2953	hypothetical protein OCBIM_22018450mg	0.4880647	0.4613747	0.5271165	0.4834484	NA	NA
Cluster-5283.8459	TBC1 domain family member 1 isoform X1	0.4874911	0.4542003	0.5228744	0.4358728	NA	NA
Cluster-5873.0	slit homolog 3 protein isoform X1	0.4868951	NA	0.4264991	NA	NA	NA
Cluster-5873.0	slit homolog 3 protein isoform X3	0.4868951	NA	0.4264991	NA	NA	NA
Cluster-5283.3285	cyclin-dependent kinase 10	0.4848457	NA	NA	NA	-0.4835698	NA
Cluster-5283.8383	—NA—	0.4838917	0.4394363	0.5266279	0.6377481	NA	NA
Cluster-3693.0	mitotic checkpoint serine/threonine-protein kinase BUB1-like isoform X1	0.4814614	0.5366176	0.6056960	0.5449362	NA	NA
Cluster-1589.2	structural maintenance of chromosomes protein 4-like	0.4803711	0.4412973	0.5082565	0.4263741	NA	NA
Cluster-2189.0	PREDICTED: uncharacterized protein LOC106871855	0.4796581	0.5426931	0.6241615	0.6333080	NA	NA
Cluster-3640.1	vacuolar protein sorting-associated protein 11 homolog	0.4792737	NA	NA	NA	NA	NA
Cluster-386.0	nucleolar protein 58	0.4785102	0.5690514	0.6351542	0.5867639	NA	NA
Cluster-1187.0	MPN domain-containing protein	0.4770145	NA	NA	NA	NA	NA
Cluster-2220.0	nuclear pore complex protein Nup160-like	0.4757864	0.4363580	0.5080370	NA	NA	NA
Cluster-5283.1760	receptor-type tyrosine-protein phosphatase C isoform X1	0.4736964	0.5563567	0.6171082	0.5326798	NA	NA
Cluster-4300.0	eukaryotic translation initiation factor 3 subunit B-like	0.4736163	0.4726097	0.5475163	0.4133434	NA	NA
Cluster-4731.0	amyloid beta A4 precursor protein-binding family B member 1-interacting protein-like isoform X10	0.4733935	0.6477663	0.7165739	0.6440861	NA	NA
Cluster-5283.12004	uncharacterized protein LOC115215064 isoform X1	0.4728251	0.5437884	0.6119825	0.5211616	NA	NA
Cluster-4324.0	RS27A protein	0.4725817	NA	NA	NA	NA	NA
Cluster-4324.0	ubiquitin-40S ribosomal protein S27a	0.4725817	NA	NA	NA	NA	NA
Cluster-5283.9687	—NA—	0.4692527	0.5518659	0.6126426	0.5892631	NA	NA
Cluster-5434.0	Krueppel-like factor 10 isoform X2	0.4667081	0.6873601	0.7187858	0.5122383	NA	NA
Cluster-1556.0	translocon-associated protein subunit alpha-like	0.4666831	0.4872717	0.5481953	0.4271813	NA	NA
Cluster-543.0	U1 small nuclear ribonucleoprotein A	0.4662582	0.5563076	0.6085967	0.5428685	NA	NA
Cluster-5590.1	transferrin-like protein	0.4650926	0.4677857	0.5310557	NA	NA	NA
Cluster-4663.0	DNA-directed RNA polymerase I subunit RPA1-like	0.4621146	0.4989963	0.5711303	0.4952457	NA	NA
Cluster-2941.0	elongation factor 1-gamma-like	0.4612368	0.5194887	0.5801571	0.4741960	NA	NA
Cluster-4826.0	tubulin gamma-1 chain	0.4609642	NA	NA	0.4066123	NA	NA
Cluster-5815.1	nuclear pore complex protein Nup155	0.4605414	0.6403703	0.7022137	0.6565980	NA	NA
Cluster-5063.0	60S ribosomal protein L23a	0.4602487	0.4345528	0.4889867	0.4649673	NA	NA
Cluster-5283.8974	transmembrane emp24 domain-containing protein 2-like	0.4562876	0.4458391	0.5006172	NA	NA	NA
Cluster-6926.0	vinculin-like isoform X11	0.4559213	0.6645853	0.7219131	0.5763072	NA	NA
Cluster-6926.0	vinculin isoform X11	0.4559213	0.6645853	0.7219131	0.5763072	NA	NA
Cluster-4279.0	TBC1 domain family member 22B-like isoform X2	0.4556684	0.4878803	0.5727117	0.4852384	NA	NA
Cluster-4600.0	pre-mRNA-processing factor 40 homolog A-like isoform X1	0.4552494	NA	0.4311468	NA	NA	NA
Cluster-5283.9773	PREDICTED: uncharacterized protein LOC106873754	0.4551946	0.5880532	0.6541798	0.6075964	NA	NA
Cluster-4670.0	anillin isoform X2	0.4540275	NA	0.4053864	NA	NA	NA
Cluster-6908.0	nucleosome-remodeling factor subunit BPTF	0.4537115	0.4521841	0.5255662	NA	NA	NA
Cluster-6908.0	nucleosome-remodeling factor subunit BPTF-like	0.4537115	0.4521841	0.5255662	NA	NA	NA
Cluster-6908.0	nucleosome-remodeling factor subunit BPTF isoform X2	0.4537115	0.4521841	0.5255662	NA	NA	NA
Cluster-6908.0	nucleosome-remodeling factor subunit NURF301-like isoform X1	0.4537115	0.4521841	0.5255662	NA	NA	NA
Cluster-811.0	proteasome subunit alpha type-5	0.4536093	NA	NA	NA	NA	NA
Cluster-5283.4518	myophilin-like	0.4531665	0.5189168	0.5963496	0.5999028	NA	NA
Cluster-4358.0	nascent polypeptide-associated complex subunit alpha, muscle-specific form	0.4522862	0.5265049	0.6089686	0.5295673	NA	NA
Cluster-5593.1	myosin regulatory light polypeptide 9	0.4509710	NA	NA	0.4161821	NA	NA
Cluster-5593.1	myosin regulatory light polypeptide 9 isoform X2	0.4509710	NA	NA	0.4161821	NA	NA
Cluster-5283.350	protein ecdysoneless homolog	0.4501941	0.4702083	0.5275103	NA	NA	NA
Cluster-1052.0	cholesterol 7-desaturase-like	0.4498531	0.5259601	0.6104803	0.5527711	NA	NA
Cluster-1573.0	60S ribosomal protein L7-like	0.4486509	0.4062193	0.4674642	NA	NA	NA
Cluster-5780.0	piwi-like protein 1	0.4480475	0.5119447	0.5952261	0.5033515	NA	NA
Cluster-5222.0	mitotic checkpoint protein BUB3-like	0.4472736	NA	0.4156521	NA	NA	NA
Cluster-5283.3723	myophilin	0.4469886	0.4525416	0.5315464	0.5096110	NA	NA
Cluster-6960.5	suppressor of tumorigenicity 7 protein homolog isoform X1	0.4455554	NA	NA	NA	NA	NA
Cluster-6960.5	suppressor of tumorigenicity 7 protein homolog isoform X3	0.4455554	NA	NA	NA	NA	NA
Cluster-4896.0	tRNA pseudouridine(38/39) synthase-like	0.4454004	0.4955366	0.5711006	0.5054832	NA	NA
Cluster-324.0	brain-specific angiogenesis inhibitor 3	0.4452138	0.4129137	0.4823173	NA	NA	NA
Cluster-4038.0	exosome component 10-like	0.4429619	NA	0.4913831	0.4881418	NA	NA
Cluster-3137.2	Mitochondrial folate transporter/carrier	0.4427928	NA	NA	NA	NA	NA
Cluster-5283.7141	myosin heavy chain, embryonic smooth muscle isoform-like	0.4424952	0.5756057	0.6480807	0.5724905	NA	NA
Cluster-5283.7141	myosin heavy chain, non-muscle-like isoform X2	0.4424952	0.5756057	0.6480807	0.5724905	NA	NA
Cluster-5283.7141	myosin heavy chain, non-muscle-like isoform X3	0.4424952	0.5756057	0.6480807	0.5724905	NA	NA
Cluster-1712.0	mdm2-binding protein-like	0.4419845	0.5447745	0.5929601	0.5223902	NA	NA
Cluster-1917.0	dolichyl-diphosphooligosaccharide–protein glycosyltransferase subunit 1	0.4407760	0.4581178	0.5151159	NA	NA	NA
Cluster-1917.0	dolichyl-diphosphooligosaccharide–protein glycosyltransferase subunit 1-like	0.4407760	0.4581178	0.5151159	NA	NA	NA
Cluster-5283.127	heparan sulfate glucosamine 3-O-sulfotransferase 5	0.4396789	NA	NA	NA	NA	NA
Cluster-4472.0	PREDICTED: uncharacterized protein LOC106877732 isoform X22	0.4396003	-0.5708133	NA	NA	-0.4931514	NA
Cluster-5283.10092	DNA replication licensing factor mcm5-like	0.4395230	NA	NA	NA	NA	NA
Cluster-5171.0	condensin complex subunit 2-like	0.4391482	0.4930385	0.5645805	0.4940832	NA	NA
Cluster-5283.7253	FOXM1 protein	0.4390796	0.4587317	0.5197212	0.4467482	NA	NA
Cluster-7069.2	transmembrane and coiled-coil domains protein 1-like isoform X9	0.4382419	NA	NA	NA	NA	NA
Cluster-7069.2	Transmembrane and coiled-coil domains protein 1,Transmembrane and coiled-coil domains protein 2	0.4382419	NA	NA	NA	NA	NA
Cluster-7069.2	transmembrane and coiled-coil domains protein 2-like isoform X5	0.4382419	NA	NA	NA	NA	NA
Cluster-5467.0	dolichyl-diphosphooligosaccharide–protein glycosyltransferase subunit STT3A	0.4361419	0.5230774	0.5877986	0.4665402	NA	NA
Cluster-1798.2	von Hippel-Lindau disease tumor suppressor-like	0.4350425	NA	NA	NA	NA	NA
Cluster-5283.4623	Hypothetical predicted protein	0.4339888	NA	NA	NA	-0.4363499	NA
Cluster-4049.0	RNA cytidine acetyltransferase	0.4334360	0.4743763	0.5481965	0.5254321	NA	NA
Cluster-4049.0	RNA cytidine acetyltransferase-like isoform X1	0.4334360	0.4743763	0.5481965	0.5254321	NA	NA
Cluster-825.0	eukaryotic translation initiation factor 3 subunit D	0.4332197	0.4593105	0.5096982	NA	NA	NA
Cluster-7415.2	dolichyl-diphosphooligosaccharide–protein glycosyltransferase subunit 2	0.4322243	0.4305076	0.4864740	NA	NA	NA
Cluster-7336.0	plasminogen activator inhibitor 1 RNA-binding protein-like	0.4309512	0.4587919	0.5260941	NA	NA	NA
Cluster-7336.0	plasminogen activator inhibitor 1 RNA-binding protein-like isoform X2	0.4309512	0.4587919	0.5260941	NA	NA	NA
Cluster-7336.0	plasminogen activator inhibitor 1 RNA-binding protein isoform X2	0.4309512	0.4587919	0.5260941	NA	NA	NA
Cluster-7336.0	sodium bicarbonate transporter-like protein 11	0.4309512	0.4587919	0.5260941	NA	NA	NA
Cluster-5283.7142	myosin heavy chain, non-muscle-like isoform X1	0.4297195	0.5474446	0.6262804	0.5555448	NA	NA
Cluster-5283.7142	myosin heavy chain, non-muscle-like isoform X3	0.4297195	0.5474446	0.6262804	0.5555448	NA	NA
Cluster-5283.7142	myosin heavy chain, non-muscle-like isoform X5	0.4297195	0.5474446	0.6262804	0.5555448	NA	NA
Cluster-5283.7142	myosin heavy chain, non-muscle isoform X1	0.4297195	0.5474446	0.6262804	0.5555448	NA	NA
Cluster-5283.7142	myosin heavy chain, non-muscle isoform X3	0.4297195	0.5474446	0.6262804	0.5555448	NA	NA
Cluster-5283.7142	myosin heavy chain, non-muscle isoform X4	0.4297195	0.5474446	0.6262804	0.5555448	NA	NA
Cluster-5283.7142	non-muscle myosin II heavy chain	0.4297195	0.5474446	0.6262804	0.5555448	NA	NA
Cluster-2500.9	protein disulfide-isomerase A3-like	0.4290375	0.5488578	0.5873160	0.4097081	NA	NA
Cluster-6260.0	60S ribosomal protein L4	0.4284396	0.5469604	0.5999999	0.5059476	NA	NA
Cluster-6260.0	EOG090X0822	0.4284396	0.5469604	0.5999999	0.5059476	NA	NA
Cluster-1082.0	nuclear pore complex protein Nup205	0.4280466	0.4056541	0.5042148	0.4544634	NA	NA
Cluster-5283.776	dual specificity protein kinase Ttk-like	0.4272533	0.6460198	0.7074168	0.6251784	NA	NA
Cluster-4213.0	proliferation-associated protein 2G4-like	0.4259426	0.4620948	0.5319588	NA	NA	NA
Cluster-5283.262	protein phosphatase 1 regulatory subunit 37-like isoform X1	0.4257566	NA	NA	NA	NA	NA
Cluster-5283.262	protein phosphatase 1 regulatory subunit 37-like isoform X2	0.4257566	NA	NA	NA	NA	NA
Cluster-5655.2	TBC1 domain family member 10A-like	0.4257123	0.5671433	0.6411173	0.6769426	NA	NA
Cluster-5842.0	transmembrane protein 214-B-like	0.4256480	0.4922950	0.5574602	0.4593846	NA	NA
Cluster-7041.5	U5 small nuclear ribonucleoprotein 200 kDa helicase	0.4254674	NA	NA	NA	-0.4449092	NA
Cluster-377.0	protein kintoun-like	0.4254137	0.5499403	0.6297460	0.5736690	NA	NA
Cluster-5283.1674	monoacylglycerol lipase ABHD12-like	0.4239891	0.5310818	0.5730769	0.5200302	NA	NA
Cluster-5283.6821	integrin alpha-9	0.4229773	0.4595080	0.5336641	0.4999438	NA	NA
Cluster-5283.6821	integrin alpha-9-like	0.4229773	0.4595080	0.5336641	0.4999438	NA	NA
Cluster-6824.0	—NA—	0.4213050	0.4136889	0.4838990	0.4636983	NA	NA
Cluster-6547.1	maternal embryonic leucine zipper kinase-like isoform X1	0.4212175	0.5019134	0.5800106	0.4887123	NA	NA
Cluster-4552.1	mesoderm-specific transcript homolog protein	0.4211081	0.4239405	0.4720126	NA	NA	NA
Cluster-5283.8790	ras-related C3 botulinum toxin substrate 1	0.4205143	0.5532699	0.6213737	0.5522195	NA	NA
Cluster-6415.0	ANKL2 protein	0.4197708	NA	NA	NA	NA	NA
Cluster-6415.0	Ankyrin repeat and LEM domain-containing protein 2	0.4197708	NA	NA	NA	NA	NA
Cluster-5283.9043	dual specificity protein kinase TTK-like	0.4190020	0.5200373	0.5863534	0.4081272	NA	NA
Cluster-2928.0	alpha 1,3-glucosidase	0.4187676	0.4067997	0.4683816	NA	NA	NA
Cluster-2928.0	neutral alpha-glucosidase AB-like isoform X1	0.4187676	0.4067997	0.4683816	NA	NA	NA
Cluster-7185.0	elongation factor 1-alpha	0.4187221	0.5391999	0.5877397	0.4760490	NA	NA
Cluster-7185.0	elongation factor 1-alpha 1	0.4187221	0.5391999	0.5877397	0.4760490	NA	NA
Cluster-7185.0	Elongation factor 1-alpha 1	0.4187221	0.5391999	0.5877397	0.4760490	NA	NA
Cluster-7185.0	elongation factor 1-alpha, oocyte form-like	0.4187221	0.5391999	0.5877397	0.4760490	NA	NA
Cluster-5283.1759	Receptor-type tyrosine-protein phosphatase mu,Receptor-type tyrosine-protein phosphatase F,Receptor-type tyrosine-protein phosphatase H,Receptor-type tyrosine-protein phosphatase S,Receptor-type tyrosine-protein phosphatase eta,Receptor-type tyrosine-protein phosphatase gamma,Receptor-type tyrosine-protein phosphatase O,Receptor-type tyrosine-protein phosphatase delta	0.4185260	0.5501010	0.6093177	0.5678264	NA	NA
Cluster-5283.3645	unnamed protein product	0.4171142	NA	NA	0.4059121	NA	NA
Cluster-3427.0	cleavage stimulation factor subunit 1-like	0.4166065	0.4734487	0.5131751	NA	NA	NA
Cluster-939.0	staphylococcal nuclease domain-containing protein 1-like	0.4160267	0.5439668	0.6242923	0.5176595	NA	NA
Cluster-6841.1	DNA replication complex GINS protein PSF2-like	0.4152517	0.4509917	0.5075637	0.4646441	NA	NA
Cluster-5293.0	protein disulfide-isomerase A4	0.4149667	0.5005162	0.5638310	0.4922633	NA	NA
Cluster-5293.0	protein disulfide-isomerase A4-like	0.4149667	0.5005162	0.5638310	0.4922633	NA	NA
Cluster-2667.0	peptidyl-prolyl cis-trans isomerase B	0.4143032	0.4044762	0.4614062	NA	NA	NA
Cluster-5283.9055	inner centromere protein A isoform X1	0.4139061	0.5360396	0.6026661	0.5533933	NA	NA
Cluster-1629.0	probable ATP-dependent RNA helicase DDX4 isoform X2	0.4135452	0.5798278	0.6494416	0.6085173	NA	NA
Cluster-5283.8587	transforming growth factor beta-1-induced transcript 1 protein isoform X5	0.4133377	0.5304703	0.6020447	0.5024574	NA	NA
Cluster-1789.0	protein DBF4 homolog A-like	0.4131291	0.5137073	0.5706035	0.4312890	NA	NA
Cluster-513.0	transcription factor AP-4-like	0.4130607	0.4226689	0.5022077	0.4150948	NA	NA
Cluster-5283.7703	Acidic leucine-rich nuclear phosphoprotein 32 family member A	0.4129792	0.4737791	0.5645247	0.4854038	NA	NA
Cluster-7226.0	bifunctional purine biosynthesis protein PURH	0.4124972	0.5800309	0.6303639	0.6403693	NA	NA
Cluster-1616.0	WD repeat-containing protein 75	0.4112103	NA	NA	NA	NA	NA
Cluster-2471.0	Thymidine kinase, cytosolic	0.4111023	0.4367639	0.5117047	0.4909734	NA	NA
Cluster-6718.1	FACT complex subunit SPT16-like	0.4105340	0.4558954	0.5295690	0.4657071	NA	NA
Cluster-5283.10091	DNA replication licensing factor mcm5-like	0.4104516	0.5633576	0.6365702	0.5656444	NA	NA
Cluster-1716.2	major egg antigen	0.4098704	NA	NA	NA	NA	NA
Cluster-6662.1	PHD finger protein 24-like	0.4091448	0.5851486	0.6616664	0.6240618	NA	NA
Cluster-5283.7138	myosin heavy chain, non-muscle isoform X1	0.4088282	0.4945993	0.5714918	0.4977869	NA	NA
Cluster-5283.7138	myosin heavy chain, non-muscle isoform X4	0.4088282	0.4945993	0.5714918	0.4977869	NA	NA
Cluster-5283.1948	probable 2-oxoglutarate dehydrogenase E1 component DHKTD1, mitochondrial	0.4086804	NA	NA	NA	NA	NA
Cluster-281.0	predicted protein	0.4079243	0.5545656	0.6036338	0.5978387	NA	NA
Cluster-5283.765	integrin alpha-4	0.4070706	0.4194893	0.4696509	NA	NA	NA
Cluster-5283.1756	receptor-type tyrosine-protein phosphatase C isoform X1	0.4067967	0.5335603	0.5894543	0.4643350	NA	NA
Cluster-7099.0	collagen alpha-1(I) chain-like isoform X4	0.4061226	0.5064063	0.5732697	0.5542891	NA	NA
Cluster-7099.0	collagen alpha-1(I) chain-like isoform X5	0.4061226	0.5064063	0.5732697	0.5542891	NA	NA
Cluster-4210.0	serine hydroxymethyltransferase, cytosolic-like	0.4061178	0.5542600	0.6041619	0.5474605	NA	NA
Cluster-5283.2557	ectonucleoside triphosphate diphosphohydrolase 8 isoform X4	0.4057635	0.4121116	0.4823309	0.4917850	NA	NA
Cluster-5303.2	E3 SUMO-protein ligase RanBP2-like isoform X1	0.4056205	0.6210427	0.6878701	0.6062516	NA	NA
Cluster-3047.0	apoptosis inhibitor 5-like	0.4046946	0.4332673	0.5022241	NA	NA	NA
Cluster-1876.1	G2/mitotic-specific cyclin-B3-like	0.4046854	0.4847092	0.5597549	0.5200708	NA	NA
Cluster-6628.2	RNA-binding protein 25	0.4041850	NA	NA	NA	NA	NA
Cluster-6628.2	RNA-binding protein 25-like isoform X1	0.4041850	NA	NA	NA	NA	NA
Cluster-5283.6317	hypothetical protein OCBIM_22018450mg	0.4029723	0.4067886	0.4950878	0.5543326	NA	NA
Cluster-351.0	DNA replication factor Cdt1-like	0.4029504	0.6261815	0.6916195	0.6260415	NA	NA
Cluster-7340.0	rhotekin-like isoform X2	0.4028565	0.4749544	0.5585382	0.5372194	NA	NA
Cluster-5283.9398	78 kDa glucose-regulated protein	0.4004436	0.5901399	0.6544263	0.5480038	NA	NA
Cluster-5283.9398	endoplasmic reticulum chaperone BiP	0.4004436	0.5901399	0.6544263	0.5480038	NA	NA
Cluster-5283.2729	hypothetical protein OCBIM_22018450mg	0.4002355	0.5781054	0.6445750	0.6352411	NA	NA
Cluster-5283.12041	kelch-like protein 2	-0.4059537	NA	NA	NA	NA	NA
Cluster-5738.0	inhibitor of growth protein 3-like	-0.4072543	-0.4790698	NA	NA	NA	NA
Cluster-5283.9489	protocadherin-1 isoform X1	-0.4121199	-0.5113639	NA	NA	NA	NA
Cluster-5283.11346	dystroglycan-like	-0.4129503	-0.4445432	NA	NA	NA	NA
Cluster-5283.5399	—NA—	-0.4132802	NA	-0.4019131	NA	NA	NA
Cluster-5283.1831	zinc finger protein 704-like	-0.4140061	NA	NA	NA	NA	NA
Cluster-5738.4	inhibitor of growth protein 3-like	-0.4141309	-0.4882775	NA	NA	NA	NA
Cluster-5124.1	ATP-binding cassette sub-family D member 2-like	-0.4157442	-0.4541987	NA	NA	NA	NA
Cluster-6699.0	protein lin-37 homolog	-0.4157659	-0.4665888	NA	NA	NA	NA
Cluster-4273.2	breast cancer anti-estrogen resistance protein 3 homolog isoform X10	-0.4164102	NA	NA	NA	0.4550852	NA
Cluster-5283.7294	sodium-dependent phosphate transporter 1-A-like isoform X1	-0.4167145	-0.4051805	NA	NA	NA	NA
Cluster-5283.7294	Sodium-dependent phosphate transporter 1-B	-0.4167145	-0.4051805	NA	NA	NA	NA
Cluster-3640.0	vacuolar protein sorting-associated protein 11 homolog	-0.4168875	NA	NA	NA	NA	NA
Cluster-2682.0	SH3 domain-binding protein 5-like	-0.4181318	-0.5613283	NA	NA	NA	NA
Cluster-7117.3	ADP-ribosylation factor-binding protein GGA	-0.4191597	NA	NA	NA	NA	NA
Cluster-7117.3	ADP-ribosylation factor-binding protein GGA1-like	-0.4191597	NA	NA	NA	NA	NA
Cluster-6083.1	lactadherin-like isoform X1	-0.4276027	-0.4863402	NA	NA	NA	NA
Cluster-5595.2	transcriptional repressor scratch 2-like	-0.4305969	-0.5514365	NA	NA	NA	NA
Cluster-5845.1	protein ABHD13-like	-0.4327278	-0.4100081	NA	NA	NA	NA
Cluster-3761.3	deaminated glutathione amidase-like	-0.4359909	-0.4838543	NA	NA	NA	NA
Cluster-5283.1947	probable 2-oxoglutarate dehydrogenase E1 component DHKTD1, mitochondrial	-0.4384041	-0.4161433	NA	NA	NA	NA
Cluster-5283.11448	derlin-1-like	-0.4397138	NA	NA	NA	0.4699032	NA
Cluster-5283.5405	—NA—	-0.4401043	NA	NA	NA	NA	NA
Cluster-5407.2	TAF6-like RNA polymerase II p300/CBP-associated factor-associated factor 65 kDa subunit 6L	-0.4407740	-0.4302824	NA	NA	NA	NA
Cluster-4869.1	BTG1 protein	-0.4434387	-0.4234482	NA	NA	NA	NA
Cluster-7069.0	Transmembrane and coiled-coil domains protein 1,Transmembrane and coiled-coil domains protein 2	-0.4438194	-0.4001737	NA	NA	NA	NA
Cluster-5283.267	protein phosphatase 1 regulatory subunit 37-like isoform X1	-0.4450159	NA	NA	NA	NA	NA
Cluster-5283.12040	kelch-like protein 2	-0.4467670	NA	NA	NA	NA	NA
Cluster-5595.3	transcriptional repressor scratch 2-like	-0.4492102	-0.4646579	NA	NA	NA	NA
Cluster-2779.1	small integral membrane protein 29-like isoform X1	-0.4493128	NA	-0.4090608	NA	NA	NA
Cluster-5640.3	serine-rich adhesin for platelets-like	-0.4576066	-0.4923835	NA	NA	NA	NA
Cluster-1816.1	zinc finger protein 40 isoform X1	-0.4580972	NA	NA	NA	NA	NA
Cluster-5883.0	Hypothetical predicted protein	-0.4622240	-0.4586667	NA	NA	NA	NA
Cluster-5283.11440	Calmodulin	-0.4624930	-0.4774220	NA	NA	NA	NA
Cluster-4098.2	microtubule-associated protein futsch	-0.4632491	NA	-0.4023118	NA	NA	NA
Cluster-5283.9550	serine/threonine-protein kinase 10-like isoform X2	-0.4635737	-0.4789373	NA	NA	NA	NA
Cluster-5283.5401	—NA—	-0.4694759	NA	NA	NA	NA	NA
Cluster-5283.4457	proactivator polypeptide-like	-0.4730449	NA	NA	NA	0.4391044	NA
Cluster-5283.4457	prosaposin-like isoform X1	-0.4730449	NA	NA	NA	0.4391044	NA
Cluster-5283.4457	prosaposin-like isoform X2	-0.4730449	NA	NA	NA	0.4391044	NA
Cluster-6523.3	diacylglycerol kinase theta-like isoform X1	-0.4810414	-0.4174170	NA	NA	NA	NA
Cluster-5283.5406	—NA—	-0.4891360	NA	NA	NA	NA	NA
Cluster-5283.8329	Hypothetical predicted protein	-0.4949699	NA	NA	NA	NA	NA
Cluster-4907.1	alpha-catulin isoform X1	-0.5008865	NA	NA	NA	NA	NA
Cluster-5283.5402	—NA—	-0.5019657	NA	NA	NA	NA	NA
Cluster-4203.2	2-(3-amino-3-carboxypropyl)histidine synthase subunit 2-like	-0.5093102	-0.4282713	NA	NA	NA	NA
Cluster-4508.1	succinate-semialdehyde dehydrogenase, mitochondrial isoform X2	-0.5185147	-0.4798472	NA	NA	NA	NA
Cluster-5283.11220	putative uncharacterized transposon-derived protein F52C9.6	-0.5290094	-0.4325657	NA	NA	NA	NA
Cluster-5283.7470	cell wall protein RBR3 isoform X3	-0.5315182	NA	NA	NA	NA	NA
Cluster-5283.5407	—NA—	-0.5426610	-0.4095644	NA	NA	NA	NA
Cluster-5283.11225	—NA—	-0.5621948	NA	NA	NA	NA	NA
Cluster-5283.5412	—NA—	-0.5696314	NA	-0.4362320	NA	NA	NA
Cluster-5283.5404	—NA—	-0.5702464	NA	NA	NA	NA	NA
Cluster-5283.5403	—NA—	-0.5705385	NA	NA	NA	NA	NA
Cluster-5283.11449	derlin-1-like isoform X1	-0.5712288	NA	NA	NA	NA	0.4653392
Cluster-5283.5414	—NA—	-0.5847724	-0.4372945	NA	NA	NA	NA
Cluster-5283.5400	—NA—	-0.5855474	NA	-0.4262392	NA	NA	NA
Cluster-5283.5411	—NA—	-0.5875239	NA	NA	NA	NA	NA
Cluster-5283.8328	universal stress protein A-like protein isoform X5	-0.5894106	-0.4495092	NA	NA	NA	NA
Cluster-5283.11862	—NA—	-0.6092525	-0.4458292	NA	NA	NA	NA
Cluster-5283.5409	—NA—	-0.6199384	-0.4241966	NA	NA	NA	NA
Cluster-5283.11221	putative uncharacterized transposon-derived protein F52C9.6	-0.6296988	-0.4236826	NA	NA	NA	NA
Cluster-5283.5410	—NA—	-0.6299001	-0.4185640	NA	NA	NA	NA
Cluster-5283.5413	—NA—	-0.6316107	-0.4270238	NA	NA	NA	NA
Cluster-5283.8330	universal stress protein A-like protein isoform X5	-0.6338910	-0.4117872	NA	NA	NA	NA
Cluster-7041.4	U5 small nuclear ribonucleoprotein 200 kDa helicase-like	-0.6455209	NA	NA	NA	0.4930253	NA

All eyes-specific hub genes for CO2 showing which are also hub genes for a 2nd trait

hub_genes_eyes_specific_annotated_all_CO2_2ndtrait %>%
  dplyr::select(gene, Sequence.Description, contains('GS')) %>%
  arrange(-GS_CO2_eyes, -GS_Active_time_s_eyes, -GS_Dist_eyes, -GS_Vel_eyes, -GS_Soft_touch_no._eyes) %>%
  kable(caption = "All hub genes for CO2 only in the eyes, showing any genes that are also a hub gene for another trait in the eyes")

All hub genes for CO2 only in the eyes, showing any genes that are also a hub gene for another trait in the eyes
gene	Sequence.Description	GS_CO2_eyes	GS_Active_time_s_eyes	GS_Dist_eyes	GS_Vel_eyes	GS_Soft_touch_eyes	GS_Soft_touch_no._eyes
Cluster-5283.8078	protein D2-like	0.5614226	NA	NA	NA	NA	NA
Cluster-5283.660	protein crumbs-like isoform X1	0.5597179	-0.4049612	NA	NA	NA	NA
Cluster-5283.659	protein crumbs-like isoform X1	0.5507875	-0.5400349	-0.4735232	NA	NA	NA
Cluster-5283.661	protein crumbs-like isoform X2	0.5282035	-0.4396015	NA	NA	NA	NA
Cluster-5283.995	neuronal acetylcholine receptor subunit alpha-10-like isoform X1	0.4846243	-0.5622183	-0.5003011	-0.4577785	NA	NA
Cluster-7432.3	flotillin-1 isoform X1	0.4666175	NA	NA	NA	NA	NA
Cluster-7432.3	flotillin-1 isoform X2	0.4666175	NA	NA	NA	NA	NA
Cluster-7432.3	flotillin-1 isoform X4	0.4666175	NA	NA	NA	NA	NA
Cluster-5283.8076	protein D2-like	0.4540652	-0.4722269	-0.4156060	NA	NA	NA
Cluster-5283.994	neuronal acetylcholine receptor subunit alpha-10-like isoform X1	0.4227888	-0.5064105	-0.4338126	NA	NA	NA
Cluster-5283.3666	protein D2-like	-0.4700909	NA	NA	NA	NA	NA
Cluster-5283.8075	protein D2-like	-0.5421762	NA	NA	NA	NA	NA
Cluster-5283.4627	Hypothetical predicted protein	-0.6905153	NA	NA	NA	NA	NA

Functional enrichment analysis of hub genes shared by CO2 and a 2nd trait, specific to the CNS

# All genes included in WGCNA, remove 'Cluster-' as have problems with this
datExpr_renamed <- datExpr_CNS %>%
  rename_with(~ gsub("Cluster-", "", .x))
CNS_all_genes <- colnames(datExpr_renamed)

# CO2 hub genes that are also a hub gene for a 2nd trait, specific to CNS
hub_genes_CNS_specific_CO2_2ndtrait_list <- hub_genes_CNS_specific_annotated_CO2_2ndtrait %>%
  select(gene) %>%
  distinct(gene) %>%
  mutate(gene = gsub("Cluster-", "", gene))
hub_genes_CNS_specific_CO2_2ndtrait_list <- as.character(hub_genes_CNS_specific_CO2_2ndtrait_list$gene)

# Hub genes shared by CO2 and no. soft mirror touches only in the CNS
hub_genes_CNS_specific_CO2_Soft_touch_no._list <- hub_genes_CNS_specific_CO2_Soft_touch_no. %>%
  select(gene) %>%
  distinct(gene) %>%
  mutate(gene = gsub("Cluster-", "", gene))
hub_genes_CNS_specific_CO2_Soft_touch_no._list <- as.character(hub_genes_CNS_specific_CO2_Soft_touch_no._list$gene)
# Hub genes shared by CO2 and at least one of the activity traits only in the CNS
hub_genes_CNS_specific_CO2_1activity_list <- hub_genes_CNS_specific_CO2_1activity %>%
  select(gene) %>%
  distinct(gene) %>%
  mutate(gene = gsub("Cluster-", "", gene))
hub_genes_CNS_specific_CO2_1activity_list <- as.character(hub_genes_CNS_specific_CO2_1activity_list$gene)
# Hub genes shared by CO2 and all 3 of the activity traits only in the CNS
hub_genes_CNS_specific_CO2_activity_list <- hub_genes_CNS_specific_CO2_activity %>%
  select(gene) %>%
  distinct(gene) %>%
  mutate(gene = gsub("Cluster-", "", gene))
hub_genes_CNS_specific_CO2_activity_list <- as.character(hub_genes_CNS_specific_CO2_activity_list$gene)

# All
ORA_hub_genes_CNS_specific_CO2_2ndtrait <- enricher(
  gene = hub_genes_CNS_specific_CO2_2ndtrait_list,
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  universe = CNS_all_genes,
  TERM2GENE = term2gene,
  TERM2NAME = term2name)

ORA_hub_genes_CNS_specific_CO2_2ndtrait_summary <- fortify(
  ORA_hub_genes_CNS_specific_CO2_2ndtrait,
  showCategory = 300,
  by = "Count")

# Don't do ORA on CO2 shared with soft touch no. in CNS as only 6 genes

# Hub genes shared by CO2 and at least one of the activity traits only in the CNS
ORA_hub_genes_CNS_specific_CO2_1activity <- enricher(
  gene = hub_genes_CNS_specific_CO2_1activity_list,
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  universe = CNS_all_genes,
  TERM2GENE = term2gene,
  TERM2NAME = term2name)

ORA_hub_genes_CNS_specific_CO2_1activity_summary <- fortify(
  ORA_hub_genes_CNS_specific_CO2_1activity,
  showCategory = 300,
  by = "Count")

# Hub genes shared by CO2 and all 3 of the activity traits only in the CNS
ORA_hub_genes_CNS_specific_CO2_activity <- enricher(
  gene = hub_genes_CNS_specific_CO2_activity_list,
  pvalueCutoff = 0.05,
  pAdjustMethod = "BH",
  universe = CNS_all_genes,
  TERM2GENE = term2gene,
  TERM2NAME = term2name)

ORA_hub_genes_CNS_specific_CO2_activity_summary <- fortify(
  ORA_hub_genes_CNS_specific_CO2_activity,
  showCategory = 300,
  by = "Count")


save(ORA_hub_genes_CNS_specific_CO2_2ndtrait, ORA_hub_genes_CNS_specific_CO2_2ndtrait_summary, ORA_hub_genes_CNS_specific_CO2_activity, ORA_hub_genes_CNS_specific_CO2_activity_summary, ORA_hub_genes_CNS_specific_CO2_1activity, ORA_hub_genes_CNS_specific_CO2_1activity_summary, file = 'results/ORA_compare_hub_genes_CO2_2ndtrait.RData')

List of significant GO terms

ORA_hub_genes_CNS_specific_CO2_2ndtrait_summary %>%
  dplyr::select(Description, p.adjust) %>%
  kable(caption = "Significant GOs in hub genes shared by CO2 and a 2nd trait and specific to the CNS")

Significant GOs in hub genes shared by CO2 and a 2nd trait and specific to the CNS
	Description	p.adjust
GO:0003756	protein disulfide isomerase activity	0.014175
GO:0030016	myofibril	0.015382
GO:0005788	endoplasmic reticulum lumen	0.017574
GO:0019901	protein kinase binding	0.017574

ORA_hub_genes_CNS_specific_CO2_1activity_summary %>%
  dplyr::select(Description, p.adjust) %>%
  kable(caption = "Significant GOs in hub genes shared by CO2 and one or more activity traits (active time, distance, speed) and specific to the CNS")

Significant GOs in hub genes shared by CO2 and one or more activity traits (active time, distance, speed) and specific to the CNS
	Description	p.adjust
GO:0003756	protein disulfide isomerase activity	0.0107071
GO:0030016	myofibril	0.0116344
GO:0005788	endoplasmic reticulum lumen	0.0123758
GO:0019901	protein kinase binding	0.0123758

ORA_hub_genes_CNS_specific_CO2_activity_summary %>%
  dplyr::select(Description, p.adjust) %>%
  kable(caption = "Significant GOs in hub genes shared by CO2, active time, distance and speed, and specific to the CNS")

Significant GOs in hub genes shared by CO2, active time, distance and speed, and specific to the CNS
	Description	p.adjust
GO:0003756	protein disulfide isomerase activity	0.0011841
GO:0030016	myofibril	0.0012955
GO:0005788	endoplasmic reticulum lumen	0.0016003
GO:0051015	actin filament binding	0.0062756
GO:0019901	protein kinase binding	0.0125231
GO:0032587	ruffle membrane	0.0125231
GO:0043138	3’-5’ DNA helicase activity	0.0125231
GO:0031047	gene silencing by RNA	0.0133598
GO:0005694	chromosome	0.0140569
GO:0016459	myosin complex	0.0204783
GO:0003774	motor activity	0.0342939
GO:0009986	cell surface	0.0364108
GO:0005643	nuclear pore	0.0367208

Dot plot of significant GO terms

# Plot all GO terms for CNS-specific hub genes shared by CO2 and 1 or more activity traits, and hub genes shared by CO2 and all three activity traits on one graph.

# CO2 + 1 or more and CO2 + all three share same y axis on dotplot - want to order by p.adjust of CO2 + 1 or more, and show overlaps of GO terms 
# Below method so GO terms are not twice on the y axis
activity_1 <- ORA_hub_genes_CNS_specific_CO2_1activity_summary %>%
  mutate(hub_genes = "activity_1")
activity_3 <- ORA_hub_genes_CNS_specific_CO2_activity_summary %>%
  mutate(hub_genes = "activity_3")

activity_1_p.adjust <- activity_1 %>%
  dplyr::select(ID, Description, p.adjust) %>%
  rename_with(~paste0(., "_activity_1"), -c("ID", "Description"))
activity_3_p.adjust <- activity_3 %>%
  dplyr::select(ID, Description, p.adjust) %>% 
  rename_with(~paste0(., "_activity_3"), -c("ID", "Description"))
df_p.adjust <- activity_1_p.adjust %>%
  full_join(activity_3_p.adjust) %>%
  ungroup() %>% 
  arrange(p.adjust_activity_3, p.adjust_activity_1) %>%
  dplyr::mutate(order_p.adjust = as.factor(row_number())) %>% 
  pivot_longer(c(p.adjust_activity_3, p.adjust_activity_1), names_to = "hub_genes", values_to = "p.adjust") %>% 
  mutate(hub_genes = gsub("p.adjust_", "", hub_genes))
  #arrange(p.adjust)

activity_1_Count <- activity_1 %>%
  dplyr::select(ID, Description, Count) %>%
  rename_with(~paste0(., "_activity_1"), -c("ID", "Description"))
activity_3_Count <- activity_3 %>%
  dplyr::select(ID, Description, Count) %>% 
  rename_with(~paste0(., "_activity_3"), -c("ID", "Description"))
df_Count <- activity_1_Count %>%
  full_join(activity_3_Count) %>%
  ungroup() %>% 
  arrange(Count_activity_3, Count_activity_1) %>%
  dplyr::mutate(order_Count = as.factor(row_number())) %>% 
  pivot_longer(c(Count_activity_3, Count_activity_1), names_to = "hub_genes", values_to = "Count") %>% 
  mutate(hub_genes = gsub("Count_", "", hub_genes))

activity_1_GeneRatio <- activity_1 %>%
  dplyr::select(ID, Description, GeneRatio) %>%
  rename_with(~paste0(., "_activity_1"), -c("ID", "Description"))
activity_3_GeneRatio <- activity_3 %>%
  dplyr::select(ID, Description, GeneRatio) %>% 
  rename_with(~paste0(., "_activity_3"), -c("ID", "Description"))
df_GeneRatio <- activity_1_GeneRatio %>%
  full_join(activity_3_GeneRatio) %>%
  ungroup() %>% 
  arrange(GeneRatio_activity_3, GeneRatio_activity_1) %>%
  dplyr::mutate(order_GeneRatio = as.factor(row_number())) %>% 
  pivot_longer(c(GeneRatio_activity_3, GeneRatio_activity_1), names_to = "hub_genes", values_to = "GeneRatio") %>% 
  mutate(hub_genes = gsub("GeneRatio_", "", hub_genes))

df_all <- df_p.adjust %>%
  full_join(df_Count) %>%
  full_join(df_GeneRatio) %>%
  dplyr::mutate(Description_wrapped = str_wrap(Description, width = 50))


(figure_ORA <- (ggplot(df_all, aes(x = hub_genes, y = order_p.adjust, colour = p.adjust, size = GeneRatio)) +
                  geom_point() +
                  scale_y_discrete(
                    breaks = df_all$order_p.adjust,
                    labels = df_all$Description_wrapped) +
                  scale_colour_gradient(low = '#4040FF', high = '#E3E3E3',
                                        name = "padj",
                                        guide = guide_colourbar(barwidth = 0.5,
                                                                barheight = 8,
                                                                ticks = FALSE)) +
                  scale_size(name = "Gene Ratio") +
                  labs(x = "", y = "") +
                  scale_x_discrete(labels = c(expression('CO'[2]*' + 1 or more activity traits'), expression('CO'[2]*' + all 3 activity traits'))) +
                  theme(panel.border = element_rect(fill = NA, size = 0.5),
                        panel.grid.major.x = element_blank(),
                        panel.grid.major.y = element_line(colour = 'grey', size = 0.2),
                        panel.background = element_blank(),
                        strip.background = element_rect(fill = NA, size = 0.5, colour = 'black'),
                        axis.text.x = element_text(size = 8, colour = 'black', 
                                                   family = "Arial"),
                        axis.text.y = element_text(size = 8, colour = 'black', 
                                                   family = "Arial"),
                        legend.title = element_text(size = 10, colour = 'black', 
                                                    family = "Arial"),
                        legend.text = element_text(size = 8, colour = 'black', 
                                                   family = "Arial"),
                        legend.key = element_blank()) )
)

Plot expression of hub genes

metadata_CNS <- datTraits_CNS %>%
  mutate(CO2 = gsub('0', 'Ambient', CO2),
         CO2 = gsub('1', 'Elevated', CO2),
         CO2 = as.factor(CO2))
save(metadata_CNS, file = 'data/metadata_CNS.RData')

metadata_eyes <- datTraits_eyes %>%
  mutate(CO2 = gsub('0', 'Ambient', CO2),
         CO2 = gsub('1', 'Elevated', CO2),
         CO2 = as.factor(CO2))
save(metadata_eyes, file = 'data/metadata_eyes.RData')

CNS-specific hub genes

CO₂ treatment

a <- hub_genes_CNS_specific_annotated_CO2 %>%
  distinct(gene, .keep_all = TRUE)

df <- normalised_counts_CNS_tb %>%
  full_join(a, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_CNS %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

(plot <- ggplot(df) +
  geom_boxplot(aes(x = Sequence.Description, y = normalised_counts, color = CO2)) +
  scale_y_log10() +
  xlab(paste("CNS-specific hub genes for CO2 treatment")) +
  ylab("log10 Normalized Counts") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        axis.text = element_text(size = 12)) +
  theme(plot.title = element_text(hjust = 0.5)) +
  scale_x_discrete(labels = wrap_format(30)) +
  coord_flip()
)

Shared by CO₂ treatment and active time

a <- hub_genes_CNS_specific_annotated_all_CO2_2ndtrait %>%
  select(gene, Sequence.Name, Sequence.Description, GS_Active_time_s_CNS) %>%
  drop_na() %>%
  distinct(gene, .keep_all = TRUE)

df1 <- a %>%
  dplyr::arrange(-across(starts_with("GS"))) %>%
  mutate(Sequence.Description_wrapped = str_wrap(Sequence.Description, width = 30))

df <- normalised_counts_CNS_tb %>%
  full_join(df1, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_CNS %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

gene_names = list()
gene_names = df1$Sequence.Description_wrapped
names(gene_names) = df1$gene

(plot <- ggplot(df, aes(x = Active_time_s, y = normalised_counts)) +
  geom_point() +
  xlab("Active time (s)") +
  ylab("log10 Normalized Counts") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.text = element_text(size = 12)) +
  theme(plot.title = element_text(hjust = 0.5)) +
  facet_wrap( ~ gene, scales = "free_y", ncol = 5,
              labeller = as_labeller(gene_names)) +
  geom_smooth(method='lm', se = FALSE, formula = y ~ x, aes(x = Active_time_s, y = normalised_counts)) +
  stat_cor(label.y.npc="top", label.x.npc = "left",
           aes(label = paste(..r.label..)))
)

## Warning: The dot-dot notation (`..r.label..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(r.label)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Shared by CO₂ treatment and distance

a <- hub_genes_CNS_specific_annotated_all_CO2_2ndtrait %>%
  select(gene, Sequence.Name, Sequence.Description, GS_Dist_CNS) %>%
  drop_na() %>%
  distinct(gene, .keep_all = TRUE)

df1 <- a %>%
  dplyr::arrange(-across(starts_with("GS"))) %>%
  mutate(Sequence.Description_wrapped = str_wrap(Sequence.Description, width = 30))

df <- normalised_counts_CNS_tb %>%
  full_join(df1, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_CNS %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

gene_names = list()
gene_names = df1$Sequence.Description_wrapped
names(gene_names) = df1$gene

(plot <- ggplot(df, aes(x = Dist, y = normalised_counts)) +
    geom_point() +
    xlab("Distance moved (cm)") +
    ylab("log10 Normalized Counts") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          axis.text = element_text(size = 12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    facet_wrap( ~ gene, scales = "free_y", ncol = 5,
                labeller = as_labeller(gene_names)) +
    geom_smooth(method='lm', se = FALSE, formula = y ~ x, aes(x = Dist, y = normalised_counts)) +
    stat_cor(label.y.npc="top", label.x.npc = "left",
             aes(label = paste(..r.label..)))
)

Shared by CO₂ treatment and speed

a <- hub_genes_CNS_specific_annotated_all_CO2_2ndtrait %>%
  select(gene, Sequence.Name, Sequence.Description, GS_Vel_CNS) %>%
  drop_na() %>%
  distinct(gene, .keep_all = TRUE)

df1 <- a %>%
  dplyr::arrange(-across(starts_with("GS"))) %>%
  mutate(Sequence.Description_wrapped = str_wrap(Sequence.Description, width = 30))

df <- normalised_counts_CNS_tb %>%
  full_join(df1, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_CNS %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

gene_names = list()
gene_names = df1$Sequence.Description_wrapped
names(gene_names) = df1$gene

(plot <- ggplot(df, aes(x = Vel, y = normalised_counts)) +
    geom_point() +
    xlab("Average speed (cm/s)") +
    ylab("log10 Normalized Counts") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          axis.text = element_text(size = 12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    facet_wrap( ~ gene, scales = "free_y", ncol = 5,
                labeller = as_labeller(gene_names)) +
    geom_smooth(method='lm', se = FALSE, formula = y ~ x, aes(x = Vel, y = normalised_counts)) +
    stat_cor(label.y.npc="top", label.x.npc = "left",
             aes(label = paste(..r.label..)))
)

Shared by CO₂ treatment and no. of exploratory interactions

a <- hub_genes_CNS_specific_annotated_all_CO2_2ndtrait %>%
  select(gene, Sequence.Name, Sequence.Description, GS_Soft_touch_no._CNS) %>%
  drop_na() %>%
  distinct(gene, .keep_all = TRUE)

df1 <- a %>%
  dplyr::arrange(-across(starts_with("GS"))) %>%
  mutate(Sequence.Description_wrapped = str_wrap(Sequence.Description, width = 30))

df <- normalised_counts_CNS_tb %>%
  full_join(df1, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_CNS %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

gene_names = list()
gene_names = df1$Sequence.Description_wrapped
names(gene_names) = df1$gene

(plot <- ggplot(df, aes(x = Soft_touch_no., y = normalised_counts)) +
    geom_point() +
    xlab("No. soft mirror touches") +
    ylab("log10 Normalized Counts") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          axis.text = element_text(size = 12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    facet_wrap( ~ gene, scales = "free_y", ncol = 5,
                labeller = as_labeller(gene_names)) +
    geom_smooth(method='lm', se = FALSE, formula = y ~ x, aes(x = Soft_touch_no., y = normalised_counts)) +
    stat_cor(label.y.npc="top", label.x.npc = "left",
             aes(label = paste(..r.label..)))
)

Shared by CO₂ treatment and exploratory interaction?

a <- hub_genes_CNS_specific_annotated_all_CO2_2ndtrait %>%
  select(gene, Sequence.Name, Sequence.Description, GS_Soft_touch_CNS) %>%
  drop_na() %>%
  distinct(gene, .keep_all = TRUE)

df <- normalised_counts_CNS_tb %>%
  full_join(a, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_CNS %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df) %>%
  mutate(Soft_touch = as.factor(Soft_touch))

(plot <- ggplot(df) +
    geom_boxplot(aes(x = Sequence.Description, y = normalised_counts, color = Soft_touch)) +
    scale_y_log10() +
    xlab(paste("CNS-specific hub genes for CO2 treatment and whether squid touches mirror softly")) +
    ylab("log10 Normalized Counts") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
          axis.text = element_text(size = 12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    scale_x_discrete(labels = wrap_format(30)) +
    coord_flip()
)

Eyes-specific hub genes

CO₂ treatment

a <- hub_genes_eyes_specific_annotated_CO2 %>%
  distinct(gene, .keep_all = TRUE)

df <- normalised_counts_eyes_tb %>%
  full_join(a, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_eyes %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

(plot <- ggplot(df) +
    geom_boxplot(aes(x = Sequence.Description, y = normalised_counts, color = CO2)) +
    scale_y_log10() +
    xlab(paste("eyes-specific hub genes for CO2 treatment")) +
    ylab("log10 Normalized Counts") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
          axis.text = element_text(size = 12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    scale_x_discrete(labels = wrap_format(30)) +
    coord_flip()
)

Shared by CO₂ treatment and active time

a <- hub_genes_eyes_specific_annotated_all_CO2_2ndtrait %>%
  select(gene, Sequence.Name, Sequence.Description, GS_Active_time_s_eyes) %>%
  drop_na() %>%
  distinct(gene, .keep_all = TRUE)

df1 <- a %>%
  dplyr::arrange(-across(starts_with("GS"))) %>%
  mutate(Sequence.Description_wrapped = str_wrap(Sequence.Description, width = 30))

df <- normalised_counts_eyes_tb %>%
  full_join(df1, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_eyes %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

gene_names = list()
gene_names = df1$Sequence.Description_wrapped
names(gene_names) = df1$gene

(plot <- ggplot(df, aes(x = Active_time_s, y = normalised_counts)) +
    geom_point() +
    xlab("Distance moved (cm)") +
    ylab("log10 Normalized Counts") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          axis.text = element_text(size = 12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    facet_wrap( ~ gene, scales = "free_y", ncol = 5,
                labeller = as_labeller(gene_names)) +
    geom_smooth(method='lm', se = FALSE, formula = y ~ x, aes(x = Active_time_s, y = normalised_counts)) +
    stat_cor(label.y.npc="top", label.x.npc = "left",
             aes(label = paste(..r.label..)))
)

Shared by CO₂ treatment and distance

a <- hub_genes_eyes_specific_annotated_all_CO2_2ndtrait %>%
  select(gene, Sequence.Name, Sequence.Description, GS_Dist_eyes) %>%
  drop_na() %>%
  distinct(gene, .keep_all = TRUE)

df1 <- a %>%
  dplyr::arrange(-across(starts_with("GS"))) %>%
  mutate(Sequence.Description_wrapped = str_wrap(Sequence.Description, width = 30))

df <- normalised_counts_eyes_tb %>%
  full_join(df1, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_eyes %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

gene_names = list()
gene_names = df1$Sequence.Description_wrapped
names(gene_names) = df1$gene

(plot <- ggplot(df, aes(x = Dist, y = normalised_counts)) +
    geom_point() +
    xlab("Distance moved (cm)") +
    ylab("log10 Normalized Counts") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          axis.text = element_text(size = 12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    facet_wrap( ~ gene, scales = "free_y", ncol = 5,
                labeller = as_labeller(gene_names)) +
    geom_smooth(method='lm', se = FALSE, formula = y ~ x, aes(x = Dist, y = normalised_counts)) +
    stat_cor(label.y.npc="top", label.x.npc = "left",
             aes(label = paste(..r.label..)))
)

Shared by CO₂ treatment and speed

a <- hub_genes_eyes_specific_annotated_all_CO2_2ndtrait %>%
  select(gene, Sequence.Name, Sequence.Description, GS_Vel_eyes) %>%
  drop_na() %>%
  distinct(gene, .keep_all = TRUE)

df1 <- a %>%
  dplyr::arrange(-across(starts_with("GS"))) %>%
  mutate(Sequence.Description_wrapped = str_wrap(Sequence.Description, width = 30))

df <- normalised_counts_eyes_tb %>%
  full_join(df1, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_eyes %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

gene_names = list()
gene_names = df1$Sequence.Description_wrapped
names(gene_names) = df1$gene

(plot <- ggplot(df, aes(x = Vel, y = normalised_counts)) +
    geom_point() +
    xlab("Average speed (cm/s)") +
    ylab("log10 Normalized Counts") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          axis.text = element_text(size = 12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    facet_wrap( ~ gene, scales = "free_y", ncol = 5,
                labeller = as_labeller(gene_names)) +
    geom_smooth(method='lm', se = FALSE, formula = y ~ x, aes(x = Vel, y = normalised_counts)) +
    stat_cor(label.y.npc="top", label.x.npc = "left",
             aes(label = paste(..r.label..)))
)

Hub genes in both tissues

CO₂ treatment

# Expression in CNS
a <- hub_genes_CNS_eyes_annotated_all_CO2_2ndtrait %>%
  distinct(gene, .keep_all = TRUE) %>%
  select(Sequence.Name, gene, Sequence.Description, GS_CO2_CNS)

df <- normalised_counts_CNS_tb %>%
  full_join(a, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_CNS %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

(plot <- ggplot(df) +
    geom_boxplot(aes(x = Sequence.Description, y = normalised_counts, color = CO2)) +
    scale_y_log10() +
    xlab(paste("Hub genes for CO2 treatment in both tissues")) +
    ylab("log10 Normalized Counts in CNS") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
          axis.text = element_text(size = 12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    scale_x_discrete(labels = wrap_format(30)) +
    coord_flip()
)

# Expression in eyes
a <- hub_genes_CNS_eyes_annotated_all_CO2_2ndtrait %>%
  distinct(gene, .keep_all = TRUE) %>%
  select(Sequence.Name, gene, Sequence.Description, GS_CO2_eyes)

df <- normalised_counts_eyes_tb %>%
  full_join(a, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_eyes %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

(plot <- ggplot(df) +
    geom_boxplot(aes(x = Sequence.Description, y = normalised_counts, color = CO2)) +
    scale_y_log10() +
    xlab(paste("Hub genes for CO2 treatment in both tissues")) +
    ylab("log10 Normalized Counts in eyes") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
          axis.text = element_text(size = 12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    scale_x_discrete(labels = wrap_format(30)) +
    coord_flip()
)

Shared by CO₂ treatment and active time

# Expression in CNS
a <- hub_genes_CNS_eyes_annotated_all_CO2_2ndtrait %>%
  select(gene, Sequence.Name, Sequence.Description, GS_Active_time_s_CNS) %>%
  drop_na() %>%
  distinct(gene, .keep_all = TRUE)

df1 <- a %>%
  dplyr::arrange(-across(starts_with("GS"))) %>%
  mutate(Sequence.Description_wrapped = str_wrap(Sequence.Description, width = 30))

df <- normalised_counts_CNS_tb %>%
  full_join(df1, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_CNS %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

gene_names = list()
gene_names = df1$Sequence.Description_wrapped
names(gene_names) = df1$gene

(plot <- ggplot(df, aes(x = Active_time_s, y = normalised_counts)) +
    geom_point() +
    xlab("Active time (s)") +
    ylab("log10 Normalized Counts in CNS") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          axis.text = element_text(size = 12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    facet_wrap( ~ gene, scales = "free_y", ncol = 5,
                labeller = as_labeller(gene_names)) +
    geom_smooth(method='lm', se = FALSE, formula = y ~ x, aes(x = Active_time_s, y = normalised_counts)) +
    stat_cor(label.y.npc="top", label.x.npc = "left",
             aes(label = paste(..r.label..)))
)

# Expression in eyes
a <- hub_genes_CNS_eyes_annotated_all_CO2_2ndtrait %>%
  select(gene, Sequence.Name, Sequence.Description, GS_Active_time_s_eyes) %>%
  drop_na() %>%
  distinct(gene, .keep_all = TRUE)

df1 <- a %>%
  dplyr::arrange(-across(starts_with("GS"))) %>%
  mutate(Sequence.Description_wrapped = str_wrap(Sequence.Description, width = 30))

df <- normalised_counts_eyes_tb %>%
  full_join(df1, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_eyes %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

gene_names = list()
gene_names = df1$Sequence.Description_wrapped
names(gene_names) = df1$gene

(plot <- ggplot(df, aes(x = Active_time_s, y = normalised_counts)) +
    geom_point() +
    xlab("Active time (s)") +
    ylab("log10 Normalized Counts in eyes") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          axis.text = element_text(size = 12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    facet_wrap( ~ gene, scales = "free_y", ncol = 5,
                labeller = as_labeller(gene_names)) +
    geom_smooth(method='lm', se = FALSE, formula = y ~ x, aes(x = Active_time_s, y = normalised_counts)) +
    stat_cor(label.y.npc="top", label.x.npc = "left",
             aes(label = paste(..r.label..)))
)

Shared by CO₂ treatment and no. exploratory interactions

# Expression in CNS
a <- hub_genes_CNS_eyes_annotated_all_CO2_2ndtrait %>%
  select(gene, Sequence.Name, Sequence.Description, GS_Soft_touch_no._CNS) %>%
  drop_na() %>%
  distinct(gene, .keep_all = TRUE)
           

df1 <- a %>%
  dplyr::arrange(-across(starts_with("GS"))) %>%
  mutate(Sequence.Description_wrapped = str_wrap(Sequence.Description, width = 30))

df <- normalised_counts_CNS_tb %>%
  full_join(df1, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_CNS_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_CNS %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

gene_names = list()
gene_names = df1$Sequence.Description_wrapped
names(gene_names) = df1$gene

(plot <- ggplot(df, aes(x = Soft_touch_no., y = normalised_counts)) +
    geom_point() +
    xlab("No. soft mirror touches") +
    ylab("log10 Normalized Counts in CNS") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          axis.text = element_text(size = 12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    facet_wrap( ~ gene, scales = "free_y", ncol = 5,
                labeller = as_labeller(gene_names)) +
    geom_smooth(method='lm', se = FALSE, formula = y ~ x, aes(x = Soft_touch_no., y = normalised_counts)) +
    stat_cor(label.y.npc="top", label.x.npc = "left",
             aes(label = paste(..r.label..)))
)

# Expression in eyes
a <- hub_genes_CNS_eyes_annotated_all_CO2_2ndtrait %>%
  select(gene, Sequence.Name, Sequence.Description, GS_Soft_touch_no._eyes) %>%
  drop_na() %>%
  distinct(gene, .keep_all = TRUE)

df1 <- a %>%
  dplyr::arrange(-across(starts_with("GS"))) %>%
  mutate(Sequence.Description_wrapped = str_wrap(Sequence.Description, width = 30))

df <- normalised_counts_eyes_tb %>%
  full_join(df1, by = 'gene') %>%
  drop_na() %>%
  gather(colnames(normalised_counts_eyes_tb)[2:18], key = "Squid", value = "normalised_counts")

df <- metadata_eyes %>%
  rownames_to_column(var = 'Squid') %>%
  inner_join(df)

gene_names = list()
gene_names = df1$Sequence.Description_wrapped
names(gene_names) = df1$gene

(plot <- ggplot(df, aes(x = Soft_touch_no., y = normalised_counts)) +
    geom_point() +
    xlab("No. soft mirror touches") +
    ylab("log10 Normalized Counts in eyes") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          axis.text = element_text(size = 12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    facet_wrap( ~ gene, scales = "free_y", ncol = 5,
                labeller = as_labeller(gene_names)) +
    geom_smooth(method='lm', se = FALSE, formula = y ~ x, aes(x = Soft_touch_no., y = normalised_counts)) +
    stat_cor(label.y.npc="top", label.x.npc = "left",
             aes(label = paste(..r.label..)))
)

Publication Figures

Figure: Dot plot enriched functions of CNS-specific genes correlated with CO2 treatment and all 3 activity traits

# Plot all GO terms for CNS-specific hub genes positively correlated with CO2 and all three activity traits on one graph.

WGCNA_ORA_results <- ORA_hub_genes_CNS_specific_CO2_activity_summary %>% 
  dplyr::mutate(Description_wrapped = str_wrap(Description, width = 50)) %>%
  dplyr::mutate(X = 'WGCNA') %>% 
  arrange(p.adjust) %>%
  dplyr::mutate(order_p.adjust = as.factor(row_number()))

(figure_WGCNA_ORA_dotplot <- (ggplot(WGCNA_ORA_results, aes(x = X, y = order_p.adjust, colour = p.adjust, size = GeneRatio)) +
                  geom_point() +
                    scale_y_discrete(
                      breaks = WGCNA_ORA_results$order_p.adjust,
                      labels = WGCNA_ORA_results$Description_wrapped) +
                  scale_colour_gradient(low = '#4040FF', high = '#E3E3E3',
                                        name = "padj",
                                        guide = guide_colourbar(barwidth = 0.5,
                                                                barheight = 8,
                                                                ticks = FALSE)) +
                    scale_size(name = "Gene Ratio",
                               limits = c(0.03, 0.063)) +
                  labs(x = "", y = "") +
                    xlab(bquote('Positive correlation with'~CO[2]~'treatment and activity traits')) +
                    scale_x_discrete(labels = '') +
                  theme(panel.border = element_rect(fill = NA, size = 0.5),
                        panel.grid.major.x = element_blank(),
                        panel.grid.major.y = element_line(colour = 'grey', size = 0.2),
                        panel.background = element_blank(),
                        strip.background = element_rect(fill = NA, size = 0.5, colour = 'black'),
                        axis.text.x = element_text(size = 8, colour = 'black'),
                        axis.text.y = element_text(size = 8, colour = 'black'),
                        axis.title.x = element_text(size = 8, colour = 'black'),
                        legend.title = element_text(size = 10, colour = 'black'),
                        legend.text = element_text(size = 8, colour = 'black'),
                        legend.key = element_blank()) )
)

Session Info

Laptop

R version 4.0.4 (2021-02-15) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 18363)

Matrix products: default

locale: [1] LC_COLLATE=English_Australia.1252 LC_CTYPE=English_Australia.1252
[3] LC_MONETARY=English_Australia.1252 LC_NUMERIC=C
[5] LC_TIME=English_Australia.1252

attached base packages: [1] grid splines parallel stats4 stats graphics grDevices utils datasets [10] methods base

other attached packages: [1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.5
[4] purrr_0.3.4 readr_1.4.0 tidyr_1.1.3
[7] tibble_3.1.0 tidyverse_1.3.0 splitstackshape_1.4.8
[10] ggbiplot_0.55 scales_1.1.1 plyr_1.8.6
[13] CCA_1.2.1 fields_12.3 viridis_0.5.1
[16] viridisLite_0.3.0 spam_2.6-0 dotCall64_1.0-1
[19] fda_5.1.9 fds_1.8 RCurl_1.98-1.2
[22] rainbow_3.6 pcaPP_1.9-73 MASS_7.3-53
[25] Matrix_1.3-2 GGally_2.1.1 ggpubr_0.4.0
[28] ggplot2_3.3.3 DESeq2_1.30.1 SummarizedExperiment_1.20.0 [31] Biobase_2.50.0 MatrixGenerics_1.2.1 matrixStats_0.58.0
[34] GenomicRanges_1.42.0 GenomeInfoDb_1.26.4 IRanges_2.24.1
[37] S4Vectors_0.28.1 BiocGenerics_0.36.0 knitr_1.31
[40] clusterProfiler_3.18.1 WGCNA_1.70-3 fastcluster_1.1.25
[43] dynamicTreeCut_1.63-1

loaded via a namespace (and not attached): [1] utf8_1.2.1 ks_1.12.0 tidyselect_1.1.0
[4] RSQLite_2.2.4 AnnotationDbi_1.52.0 htmlwidgets_1.5.3
[7] BiocParallel_1.24.1 scatterpie_0.1.5 munsell_0.5.0
[10] codetools_0.2-18 preprocessCore_1.52.1 withr_2.4.1
[13] colorspace_2.0-1 GOSemSim_2.16.1 highr_0.8
[16] rstudioapi_0.13 ggsignif_0.6.1 DOSE_3.16.0
[19] GenomeInfoDbData_1.2.4 polyclip_1.10-0 bit64_4.0.5
[22] farver_2.1.0 downloader_0.4 vctrs_0.3.6
[25] generics_0.1.0 xfun_0.22 R6_2.5.0
[28] doParallel_1.0.16 graphlayouts_0.7.1 locfit_1.5-9.4
[31] bitops_1.0-6 cachem_1.0.4 reshape_0.8.8
[34] fgsea_1.16.0 DelayedArray_0.16.2 assertthat_0.2.1
[37] ggraph_2.0.5 nnet_7.3-15 enrichplot_1.10.2
[40] gtable_0.3.0 tidygraph_1.2.0 rlang_0.4.11
[43] genefilter_1.72.1 rstatix_0.7.0 impute_1.64.0
[46] broom_0.7.5 checkmate_2.0.0 modelr_0.1.8
[49] BiocManager_1.30.10 yaml_2.2.1 reshape2_1.4.4
[52] abind_1.4-5 backports_1.2.1 rsconnect_0.8.16
[55] qvalue_2.22.0 Hmisc_4.5-0 tools_4.0.4
[58] ellipsis_0.3.1 RColorBrewer_1.1-2 Rcpp_1.0.6
[61] base64enc_0.1-3 zlibbioc_1.36.0 rpart_4.1-15
[64] cowplot_1.1.1 haven_2.3.1 ggrepel_0.9.1
[67] cluster_2.1.0 fs_1.5.0 magrittr_2.0.1
[70] data.table_1.14.0 hdrcde_3.4 DO.db_2.9
[73] openxlsx_4.2.3 reprex_1.0.0 mvtnorm_1.1-1
[76] hms_1.0.0 evaluate_0.14 xtable_1.8-4
[79] XML_3.99-0.5 rio_0.5.26 jpeg_0.1-8.1
[82] mclust_5.4.7 readxl_1.3.1 gridExtra_2.3
[85] compiler_4.0.4 maps_3.3.0 KernSmooth_2.23-18
[88] crayon_1.4.1 shadowtext_0.0.7 htmltools_0.5.1.1
[91] Formula_1.2-4 geneplotter_1.68.0 lubridate_1.7.10
[94] DBI_1.1.1 tweenr_1.0.1 dbplyr_2.1.0
[97] car_3.0-10 cli_2.3.1 igraph_1.2.6
[100] pkgconfig_2.0.3 rvcheck_0.1.8 foreign_0.8-81
[103] xml2_1.3.2 foreach_1.5.1 annotate_1.68.0
[106] XVector_0.30.0 rvest_1.0.0 digest_0.6.27
[109] rmarkdown_2.7 cellranger_1.1.0 fastmatch_1.1-0
[112] htmlTable_2.1.0 curl_4.3 jsonlite_1.7.2
[115] lifecycle_1.0.0 carData_3.0-4 fansi_0.4.2
[118] pillar_1.5.1 lattice_0.20-41 fastmap_1.1.0
[121] httr_1.4.2 survival_3.2-7 GO.db_3.12.1
[124] glue_1.4.2 zip_2.1.1 png_0.1-7
[127] iterators_1.0.13 bit_4.0.4 ggforce_0.3.2
[130] stringi_1.5.3 blob_1.2.1 latticeExtra_0.6-29
[133] memoise_2.0.0

HPC

R version 4.0.4 (2021-02-15) Platform: x86_64-pc-linux-gnu (64-bit) Running under: CentOS Linux 8 (Core)

Matrix products: default BLAS/LAPACK: /apps/free81/OpenBLAS.gcc/0.3.9/lib/libopenblasp-r0.3.9.so

locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=en_US.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C

attached base packages: [1] stats graphics grDevices utils datasets methods base

other attached packages: [1] WGCNA_1.70-3 fastcluster_1.1.25 dynamicTreeCut_1.63-1

loaded via a namespace (and not attached): [1] Rcpp_1.0.6 lattice_0.20-41 GO.db_3.12.1 [4] png_0.1-7 digest_0.6.27 foreach_1.5.1 [7] utf8_1.2.1 R6_2.5.0 backports_1.2.1 [10] stats4_4.0.4 RSQLite_2.2.7 ggplot2_3.3.3 [13] pillar_1.6.0 rlang_0.4.11 data.table_1.14.0 [16] rstudioapi_0.13 blob_1.2.1 S4Vectors_0.28.1 [19] rpart_4.1-15 Matrix_1.3-2 preprocessCore_1.52.1 [22] checkmate_2.0.0 splines_4.0.4 stringr_1.4.0 [25] foreign_0.8-81 htmlwidgets_1.5.3 bit_4.0.4 [28] munsell_0.5.0 xfun_0.22 compiler_4.0.4 [31] pkgconfig_2.0.3 BiocGenerics_0.36.1 base64enc_0.1-3 [34] htmltools_0.5.1.1 nnet_7.3-15 tidyselect_1.1.1 [37] tibble_3.1.1 gridExtra_2.3 htmlTable_2.1.0 [40] Hmisc_4.5-0 IRanges_2.24.1 codetools_0.2-18 [43] matrixStats_0.58.0 fansi_0.4.2 crayon_1.4.1 [46] dplyr_1.0.5 grid_4.0.4 gtable_0.3.0 [49] lifecycle_1.0.0 DBI_1.1.1 magrittr_2.0.1 [52] scales_1.1.1 impute_1.64.0 stringi_1.5.3 [55] cachem_1.0.4 doParallel_1.0.16 latticeExtra_0.6-29 [58] ellipsis_0.3.2 generics_0.1.0 vctrs_0.3.8 [61] Formula_1.2-4 RColorBrewer_1.1-2 tools_4.0.4 [64] iterators_1.0.13 bit64_4.0.5 Biobase_2.50.0 [67] glue_1.4.2 purrr_0.3.4 jpeg_0.1-8.1 [70] parallel_4.0.4 fastmap_1.1.0 survival_3.2-7 [73] AnnotationDbi_1.52.0 colorspace_2.0-0 cluster_2.1.0 [76] memoise_2.0.0 knitr_1.33

R code to correlate gene expression with CO2 treatment level and behaviour in the two-toned pygmy squid (Idiosepius pygmaeus) nervous system

Jodi T Thomas

24 May, 2023

Preamble

Central nervous system (CNS)

Set up data

Expression Data

Check for outliers

Gene Outliers

Sample Outliers

Traits data

Save data

Check for any global drivers of expression

Soft Thresholding Power

Choose soft thresholding power

Check soft thresholding power

Network construction and module detection

Merge Modules

Modules pre-merge

Merge modules

Dendrogram before and after merging modules

Save final merged modules

Determining Interesting Modules

Canonical Correlation Analysis

Loadings/cross loadings heatmaps

Biplots

Module Membership vs Gene Significance

CO2

Activity Traits

Time in A

Exploratory interaction?

Aggressive interaction?

No. exploratory interactions

No. aggressive interactions

Final Modules of Interest

Hub genes in modules of interest

Eyes

Set up data

Expression data

Check for outliers

Gene Outliers

Sample Outliers

Traits data

Save data

Check for any global drivers of expression

Soft Thresholding Power

Choose soft thresholding power

Check soft thresholding power

Network construction and module detection

Merge Modules

Modules pre-merge

Merge modules

Dendrogram before and after merging modules

Save final merged modules

Determining Interesting Modules

Canonical Correlation Analysis

Loadings/cross loadings heatmaps

Biplots

Module Membership vs Gene Significance

CO2

Activity Traits

Time in A

Exploratory interaction?

Aggressive interaction?

No. exploratory interactions

No. aggressive interactions

Final Modules of Interest

Hub genes in modules of interest

Comparison of CNS and Eyes Results

Compare CO2 hub genes across tissues

CO2 hub genes in both tissues

CNS-specific CO2 hub genes

Eyes-specific CO2 hub genes

Compare hub genes shared by CO2 and a 2nd trait across tissues

Hub genes for CO2 in both tissues that are also a hub gene for another trait in one or both tissues

All CNS-specific CO2 hub genes, showing which are also hub genes for another trait in the CNS

All eyes-specific hub genes for CO2 showing which are also hub genes for a 2nd trait

Functional enrichment analysis of hub genes shared by CO2 and a 2nd trait, specific to the CNS

List of significant GO terms

Dot plot of significant GO terms

R code to correlate gene expression with CO₂ treatment level and behaviour in the two-toned pygmy squid (Idiosepius pygmaeus) nervous system

CO₂ treatment

Shared by CO₂ treatment and active time

Shared by CO₂ treatment and distance

Shared by CO₂ treatment and speed

Shared by CO₂ treatment and no. of exploratory interactions

Shared by CO₂ treatment and exploratory interaction?

CO₂ treatment

Shared by CO₂ treatment and active time

Shared by CO₂ treatment and distance

Shared by CO₂ treatment and speed

CO₂ treatment

Shared by CO₂ treatment and active time

Shared by CO₂ treatment and no. exploratory interactions