# -------------------------------------------------------------
# -- Example code -- read in empirical data, use it in a metacommunity simulation
# -- updated 2016-03-13
# -- contact Eric R. Sokol with questions

# -------------------------------------------------------------
# -- Read in empirical data, calculate niches
# --------------------------
library(ade4)

# -- Make sure you have a "DATA_IN" dsubdirectory with these files in your working directory
d.comm<-read.csv('DATA_IN/d.comm.densities.csv', row.names=1)
d.env<-read.csv('DATA_IN/d.env.scaled.csv', row.names=1)
d.geo<-read.csv('DATA_IN/d.geo.utm.csv', row.names=1)
d.siteinfo<-read.csv('DATA_IN/d.siteinfo.csv', row.names=1)

# -- calculate RAs from densities
d.comm.ra<-d.comm/rowSums(d.comm)

# -- remove species with 0 abundances
d.comm.ra<-d.comm.ra[,colSums(d.comm.ra)>0]

# -- calculate niches for species
dudi.pca.env<-dudi.pca(d.env, scale = TRUE, scan = FALSE, nf=1)
niche.species<-niche(dudi.pca.env, Y = d.comm.ra, scann = FALSE)
d.niche<-data.frame(
  niche.pos=niche.species$li,
  as.data.frame(niche.param(niche.species))
)

# -- calculate niche position of sites
d.site.niche <- data.frame(
  d.siteinfo,
  dudi.pca.env$li
)

# -- make 1D axes for arranging sites in a plot
mod.pca.geo<-princomp(d.geo)
d.site.1D<-data.frame(
  site.name = as.character(d.siteinfo$Site.code),
  region = as.character(d.siteinfo$Region),
  pca.score = mod.pca.geo$scores[,1],
  pca.rank = rank(mod.pca.geo$scores[,1])
)

# -------------------------------------------------------------
# -- MCSim
# --------------------------

# -- this example uses MCSim V0.4.1.9001
devtools::install_github('sokole/MCSim@v0.4.1.9001')
library(MCSim)

# JL = 10000 (min RA is 0.00023, therefore, need at least 10000 individs at a site to observe an RA that small)
# using JM of 1e6 gives 45000 inidivids / site on average

# -- take commas out of pond areas, make numeric
d.siteinfo$area.m2<-as.numeric(as.character(gsub(',','',d.siteinfo$Area)))

# -- set metacommunity size
JM.init<-1e6

# -------------------------------------
# -- best fit neutral scenario
# -------------
set.seed(1)
simoutput<-fn.metaSIM(
  output.dir.path = 'SIM_OUTPUT_MCM-diatom-examples',
  scenario.ID = 'pond-diatoms-MCSim-example',  
  sim.ID = 'neutral.model',
  landscape = fn.make.landscape(
    site.coords = d.geo,
    Ef = d.site.niche$Axis1,
    I.rate.m2 = 0.0005 * JM.init/nrow(d.geo),
    area.m2 = 1,
    JM = JM.init),
  trait.Ef = d.niche$Axis1,
  trait.Ef.sd = 1000 * (sqrt(d.niche$Tol)),
  J.t0 = d.comm.ra,
  n.timestep = 100,
  W.r = 5e6,
  nu = 0,
  speciation.limit = 0,
  save.sim = TRUE
)

sim.neutral.model <- simoutput

write.csv(
  sim.neutral.model$J.long,
  paste('SIM_OUTPUT_MCM-diatom-examples/sim.neutral.model-species.counts.csv'),
  row.names = FALSE
)

# -------------------------------------
# -- baas-becking type species sorting
# -- everything everywhere, but, environment selects
# -------------
set.seed(1)
simoutput<-fn.metaSIM(
  output.dir.path = 'SIM_OUTPUT_MCM-diatom-examples',
  scenario.ID = 'pond-diatoms-MCSim-example',
  sim.ID = 'species.sorting',
  landscape = fn.make.landscape(
    site.coords = d.geo,
    Ef = d.site.niche$Axis1,
    I.rate.m2 = 0.01 * JM.init/nrow(d.geo),
    area.m2 = 1,
    JM = JM.init),
  trait.Ef = d.niche$Axis1,
  trait.Ef.sd = 1 * (sqrt(d.niche$Tol)),
  J.t0 = d.comm.ra,
  n.timestep = 100,
  W.r = 0,
  nu = 0,
  speciation.limit = 0,
  save.sim = TRUE
)

sim.species.sorting <- simoutput

write.csv(
  sim.species.sorting$J.long,
  paste('SIM_OUTPUT_MCM-diatom-examples/sim.species.sorting-species.counts.csv'),
  row.names = FALSE
)
# -----------------------------------------------------
# Plotting a dotplot metacommunity compositions through time

# -- using the species sorting outcomes
simoutput <- sim.species.sorting

# -----------------------------------------------------
# -- extract initial regional species pool from simulation
dat.gamma.t0 <- simoutput$dat.gamma.t0
dat.gamma.t0$env.rank<-rank(dat.gamma.t0$trait.Ef)

# -----------------------------------------------------
# -- make some dot blot plots
# ----------------------------
graphics.off()
windows(6,3)

library(ggplot2)
library(dplyr)

# -- extract timesteps to plot
J.long<-filter(simoutput$J.long, timestep%in%c(1,2,10,max(timestep)))

# -- group spp counts by time and site to calculate site count totals
J.time.site <- group_by(J.long, timestep, site)
JLs <- summarise(J.time.site, site.totals=sum(count))
J.JLs <- full_join(J.long, JLs, by=c('timestep','site'))

# -- calculate relative abundances (RAs) from counts and site totals, remove
# observations with RAs of 0
J.RAs.long<-J.JLs
J.RAs.long$RA<-J.RAs.long$count/J.RAs.long$site.totals
J.RAs.long<-filter(J.RAs.long, RA > 0)

# -- add environmental data and species names for plotting
J.RAs.long<-mutate(J.RAs.long, 
                   spp.no = as.numeric(as.factor(spp)))
J.RAs.long$region <- d.site.1D[J.RAs.long$site, 'region']
J.RAs.long$pca.rank <- d.site.1D[J.RAs.long$site, 'pca.rank']
J.RAs.long$pca.score <- d.site.1D[J.RAs.long$site, 'pca.score']
J.RAs.long$env.rank <- dat.gamma.t0[J.RAs.long$spp,'env.rank']

# -- make a species characteristic data frame for labeling the spp axis
d.spp.RAs <- as.data.frame(J.RAs.long %>% group_by(spp) %>% summarise(max.RA = max(RA),
                                                                      spp.no = spp.no[1],
                                                                      env.rank = env.rank[1]))
spp.labels <- filter(d.spp.RAs, max.RA > .4)

# -- make plot
p<-ggplot(J.RAs.long, aes(pca.rank, 
                          env.rank, 
                          size = RA,
                          color = region))

p + geom_point() + 
  facet_grid(. ~ timestep) +
  theme(axis.text.x = element_text(size = 8)) +
  theme(axis.text.y = element_text(size = 6)) +
  scale_size('Relative\nAbundance', range = c(.5,4)) +
  scale_color_discrete('Region') +
  ylab('Species ID') +
  xlab('Site ID') +
  scale_y_continuous(
    breaks = spp.labels$env.rank,
    labels = spp.labels$spp
  )

