#----- Loading packages -----#
library(ape) #Version 5.4.1
library(phytools) #Version 0.7.70
library(scales) #Version 1.1.1
library(diversitree) #Version 0.9.15
library(coda) #Version 0.19.4
library(dplyr) #Version 1.0.2
library(HDInterval) #Version 0.2.2
library(tidyverse) #Version 1.3.0


#----- Setting WD -----#
wd <- 'XXX' #Insert your working directory here
setwd(wd)

colours <- setNames(c("#C74221","#8BA370","#FFC857","#C6AE80","#084C61","#32221F"), 
                    c("Generalized carnivores","Herbivores/Detritivores","Mobile invertivores","Omnivores","Planktivores","Sessile invertivores"))


#####Run simmaps on the consensus family trees comparing the transitions to planktivory within and outside the IAA

#----- Reading data -----#
db <- read.csv('Data/Data_final_Siqueira_etal_2020.csv', header=T, sep = ",", stringsAsFactors = T)
db <- db[complete.cases(db$Trophic_ID),]
rownames(db) <- db$Species

reef_families <- c("Holocentridae","Labridae","Acanthuridae","Chaetodontidae","Pomacanthidae","Lutjanidae","Serranidae","Carangidae",
                   "Pomacentridae","Blenniidae","Mullidae","Apogonidae","Gobiidae")

db_fam <- subset(db, db$Family %in% reef_families)

db_PK <- db_fam[db_fam$Trophic_ID == "PK",]

db_biogeo <- read.csv('Data/Data_biogeo.csv', header=T, sep = ",", stringsAsFactors = T)
rownames(db_biogeo) <- db_biogeo$Species
db_biogeo <- db_biogeo[,-1]

tree <- read.tree('Data/Reef_fish_all.tacted.tre')

pk_netdiv <- read.csv('Data/NetDiv_PerCell_pk.csv', header=T, sep = ",", stringsAsFactors = T)


#----- Sourcing simmap.musse function -----#
source('Scripts/make.simmap.musse.R')

prop_trans_PK <- data.frame(IAA = rep(NA,1000), CP = rep(NA,1000), WI = rep(NA,1000),
                            TEP = rep(NA,1000), WA = rep(NA,1000), EA = rep(NA,1000))


#----- Running simmaps -----#
#Note - This analysis might take a long time to run

for (i in 1:6) {
  sub_bg <- db_biogeo[db_biogeo[,i] == 1,]
  sub_troph <- db_fam[rownames(sub_bg),'Trophic_ID']
  
  sub_tree <- keep.tip(tree, rownames(sub_bg))
  
  #----- Preparing data for analysis -----#
  fish.troph <- as.numeric(droplevels(sub_troph)); names(fish.troph) <- rownames(sub_bg)
  
  mtree <- make.simmap.musse(sub_tree,fish.troph,nsim = 1000, sampling.f = nrow(db_biogeo)/nrow(db_fam), Q = "musse") 

  #----- Extracting mean transition values -----#
  cols <- setNames(c("#C74221","#8BA370","#FFC857","#C6AE80","#084C61","#32221F"),sort(unique(fish.troph)))
  plot(mtree[[1]],cols,ftype="i",lwd=1,mar=c(4.1,1.1,1.1,1.1), fsize = 0.001, type = "fan", part = 0.98)
  trans_total <- sapply(mtree,markChanges, plot = F)
  dev.off()
  
  lis <- list()
  prop_PK <- vector()
  for (x in 1:length(trans_total)){
    sub <- trans_total[[x]]
    Total_matrix <- matrix(0,nrow = length(unique(fish.troph)), ncol = length(unique(fish.troph)), dimnames = list(as.character(sort(unique(fish.troph))),as.character(sort(unique(fish.troph)))))
    for (y in 1:nrow(sub)){
      from = sub('-.*', '', rownames(sub)[y])
      to = sub('.*>', '', rownames(sub)[y])
      Total_matrix[from,to] = Total_matrix[from,to] + 1
    }
    lis[[x]] <- Total_matrix 
    rownames(lis[[x]]) <- levels(sub_troph); colnames(lis[[x]]) <- levels(sub_troph)
    
    prop_PK[x] <- sum(lis[[x]][,5])/sum(lis[[x]])
  }
  
  prop_trans_PK[,i] <- prop_PK
  
}

#write.csv(prop_trans_PK,'simmap.musse_results.csv')

prop_trans_PK <- prop_trans_PK[,c('IAA','CP','WI','TEP','EA','WA')]


#----- Running GeoSSE -----#
#Note - This analysis might take a long time to run

db_biogeo_PK <- db_biogeo[rownames(db_biogeo) %in% rownames(db_PK),]

data.geosse <- ifelse(db_biogeo_PK$IAA == 1 & db_biogeo_PK$CP == 1 |
                        db_biogeo_PK$IAA == 1 & db_biogeo_PK$WI == 1 |
                        db_biogeo_PK$IAA == 1 & db_biogeo_PK$TEP == 1 |
                        db_biogeo_PK$IAA == 1 & db_biogeo_PK$WA == 1 |
                        db_biogeo_PK$IAA == 1 & db_biogeo_PK$EA == 1, 0, ifelse(db_biogeo_PK$IAA == 1, 1,2))
names(data.geosse) <- rownames(db_biogeo_PK)

tree.sub <- keep.tip(tree, names(data.geosse))

mg <- make.geosse(tree.sub,data.geosse, sampling.f=Ntip(tree.sub)/nrow(db_PK))
pc <- starting.point.geosse(tree.sub, eps=0.5)

#----- Fitting the model -----#
fit <- find.mle(mg,pc,control=list(maxit=100000))

p <- coef(fit)

#----- MCMC -----#

prior <- make.prior.exponential(1/2)
prelim <- diversitree::mcmc(mg, coef(fit, full=TRUE), nsteps=100, prior=prior, w=1, print.every=10)

w <- diff(sapply(prelim[2:(ncol(prelim)-1)], quantile, c(0.05, 0.95)))
samples <- diversitree::mcmc(mg, coef(fit, full=TRUE), nsteps=4000, prior = prior, w=w, print.every=50) # Might need to run for 5000 just to get to 200

# Removing burnin
samples <- samples[401:4000,]

#write.csv(samples.pb,'samples_geosse.csv')

# Checking convergence
effectiveSize(samples$p)
plot(samples$i, samples$p, type="l", xlab="generation", ylab="log(L)")


#----- Plotting all together - Figure 4 -----#

cols <- colorRampPalette(c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7",
                           "#f7f7f7","#d1e5f0","#92c5de","#4393c3","#2166ac",
                           "#053061")) 

## Plot 1
par(mfrow = c(1,3), oma = c(0,0.5,0,0))

plot(1, type="n", xlab="", ylab="", xlim=c(0, 20000), ylim=c(0.06, 0.13), tcl = -0.3, mgp = c(3, 0.5, 0), 
     las = 1, yaxt = 'n', xaxt = 'n', bty = "n", lwd = 1.2); box(lwd = 1.4)
axis (1, at = seq(0, 20000,5000), labels = as.character(seq(0, 20000,5000)), 
      tcl = -0.3, mgp = c(3, 0.5, 0), cex.axis = 1.2, las = 1, lwd = 1.4)
mtext(side = 1, text = "Distance to the Indo-Australian Archipelago (km)", line = 1.8, cex = 1)
axis (2, at = seq(0.06, 0.13,0.02), labels = as.character(seq(0.06, 0.13,0.02)), tcl = -0.3, 
      mgp = c(3, 0.5, 0), cex.axis = 1.2, las = 1, lwd = 1.4)
mtext(side = 2, text = bquote('Net diversification (lineages '*Myr^-1*' 150'*km^-2*')'), line = 2.6, cex = 1)


mod <- glm (netdiv ~ log10(DistIAA), family = Gamma (link = 'log'), 
            data = pk_netdiv)

summ <- summary(mod)
r_sqr <- with(summary(mod), 1 - deviance/null.deviance)

pred <- data.frame (DistIAA = seq (300, 20000, length = 200))
pred <- cbind(pred, predict(mod, pred, se.fit=TRUE, type = 'link'))
pred <- within(pred, {
  Occ <- exp(fit)
  LL <- exp(fit - 1.96 * se.fit)
  UL <- exp(fit + 1.96 * se.fit)
})

points(pk_netdiv$netdiv ~ pk_netdiv$DistIAA, pch = 16, col = alpha(cols(nrow(pk_netdiv)),0.3))

lines(pred[,"Occ"] ~ pred$DistIAA, col = alpha('black',0.8),lwd = 4)
text(2500,0.129,bquote(italic(R)^2 == .(paste0('.',strsplit(as.character(sprintf('%.2f',r_sqr)), "\\.")[[1]][2]))), cex = 1.2)

## Plot2
plot(1, type="n", xlab="", ylab="", xlim=c(0.8, 6.3), ylim=c(0.1, 0.4), tcl = -0.3, mgp = c(3, 0.5, 0), 
     las = 1, yaxt = 'n', xaxt = 'n', bty = "n", lwd = 1.2); box(lwd = 1.4)
axis (1, at = seq(1, 6,1), labels = colnames(prop_trans_PK), 
      tcl = -0.3, mgp = c(3, 0.5, 0), cex.axis = 1.2, las = 1, lwd = 1.4)
mtext(side = 1, text = "Biogeographic province", line = 1.8, cex = 1)
axis (2, at = seq(0.1, 0.4,0.1), labels = seq(0.1, 0.4,0.1), tcl = -0.3, 
      mgp = c(3, 0.5, 0), cex.axis = 1.2, las = 1, lwd = 1.4)
mtext(side = 2, text = "Proportion of transitions to planktivory", line = 2.2, cex = 1)
for(i in 1:6) {
  sub.reg <- prop_trans_PK[,i]
  
  points(jitter(rep(i,length(sub.reg)), amount = 0.1), sub.reg, pch = 16, col = alpha(cols(6)[i],0.2))
  lines(c(i,i),c(quantile(sub.reg, probs = c(0.25,0.75))[1],
                 quantile(sub.reg, probs = c(0.25,0.75))[2]), col = alpha('grey89',0.7),lwd = 15)
  points(i, mean(sub.reg), pch = 21, bg = 'black', cex = 2, col = 'white')
  
  
}


## Plot 3
mode <- function(s) {
  d <- density(s)
  d$x[which.max(d$y)]
}

dt <- density(samples[,'dA'])

cols_2 <- c('#CC8828','#481938')

plot(dt, bty = "n", xlim = c(0,2), ylim = c(0,6), col = 'transparent',
     xlab="", ylab="", yaxt="n", xaxt="n", main="", lwd=1, zero.line = F); box(lwd = 1.4)
axis(side = 1, at = seq(0,2,0.5), lwd = 1.4, cex.axis = 1.2, tcl = -0.3, mgp = c(3, 0.5, 0))
mtext(side = 1, text = bquote('Dispersal rate (lineages '*Myr^-1*')'), line = 1.9, cex = 1)
mtext(side = 2, text = "Probability density", line = 0.2, cex = 1)

polygon(dt$x, dt$y, col=cols_2[1], lty = 0)
z <- seq(0, 1, length.out=4)[-1] * par("usr")[3]
arrows(hdi(samples[,'dA'])["lower"], z[1], hdi(samples[,'dA'])["upper"], z[1], code=0, angle=90, col=cols_2[1],lwd = 7)
points(mode(samples[,'dA']), z[1], col = 'white', pch = 21,bg = 'black', cex = 1.5)


dt <- density(samples[,'dB'])
lines(dt, lwd=1, col='transparent')

polygon(dt$x, dt$y, col=cols_2[2], lty = 0)
arrows(hdi(samples[,'dB'])["lower"], z[2], hdi(samples[,'dB'])["upper"], z[2], code=0, angle=90, col=cols_2[2],lwd = 7)
points(mode(samples[,'dB']), z[2], col = 'white', pch = 21, bg = 'black', cex = 1.5)

legend('topright', legend = c('From IAA','Into IAA'), fill = cols_2, border = 'transparent', bty = 'n', cex = 1.2)

###### End of script
