<a href="https://colab.research.google.com/github/zboraon/bayesian_temperature_reconstruction/blob/main/temperature_reconstruction_bayesian_hierarch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Necessary libraries

Install the necessary libraries

In [None]:
system("apt install -y jags")
system("apt install -y r-base")

In [None]:
install.packages(c( "rjags", "HDInterval", "car", "runjags", "compute.es", 
                    "RCurl","filesstrings")) 

In [None]:
rm(list = ls())
graphics.off()
library("rjags")
library("knitr")
library("HDInterval")
library("runjags")
library("compute.es")
library("parallel")
library("RCurl")
library("filesstrings")

# Functions


In [None]:

genMCMC = function(data, priordata, predictdata, xName="x", yName="y",
                    sName="s", numSavedSteps=10000, thinSteps = 1, 
                    saveName=NULL, runjagsMethod=runjagsMethodDefault , 
                    nChains=nChainsDefault) { 
  
  #-----------------------------------------------------------------------------
  # THE DATA.
  y = data[,yName]
  x = data[,xName]
  # Convert sName to consecutive integers:
  s = as.numeric(factor(data[,sName]))
  t1 = priordata$t1
  t2 = priordata$t2
  y1 = priordata$y1
  y2 = priordata$y2
  xpredict = predictdata
  # Do some checking that data make sense:
  if (any(!is.finite(y))) {stop("All y values must be finite.")}
  if (any(!is.finite(x))) {stop("All x values must be finite.")}
  # Specify the data in a list, for later shipment to JAGS:
  dataList = list(
    x = x,
    y = y,
    s = s,
    t1 = t1,
    t2 = t2,
    y1 = y1,
    y2 = y2,
    xpredict = predictdata,
    Nsubj = length(unique(tested.locs)) 
  )
  #-----------------------------------------------------------------------------
  # THE MODEL.
  modelString = "
  # Standardize the data:
  data {
    Ntotal <- length(y)
    xm <- mean(x)
    ym <- mean(y)
    xsd <- sd(x)
    ysd <- sd(y)
    for ( i in 1:Ntotal ) {
      zx[i] <- ( x[i] - xm ) / xsd
      zy[i] <- ( y[i] - ym ) / ysd
    }
    zt1 <- ( t1 - xm ) / xsd
    zt2 <- ( t2 - xm ) / xsd
    zy1 <- ( y1 - ym ) / ysd
    zy2 <- ( y2 - ym ) / ysd
    paleoviewbeta1 <- ( zy2 - zy1 ) / ( zt2 - zt1 )
    paleoviewbeta0 <- paleoviewbeta1 * ( 0 - zt1 ) + zy1
  }
  
  # Specify the model for standardized data:
  model {
    for ( i in 1:Ntotal ) {
      zy[i] ~ dt( zbeta0[s[i]] + zbeta1[s[i]] * zxnorm[i] , 1/zsigma^2 , nu )
      zxnorm[i] ~ dt( zx[i] , 1/(0.1)^2 , 1 ) 
    }
    for ( j in 1:Nsubj ) {
      zbeta0[j] ~ dnorm( zbeta0mu , zbeta0sigma )  
      zbeta1[j] ~ dnorm( zbeta1mu , zbeta1sigma )
    }
    # Priors on standardized scale:
    zbeta0mu ~ dnorm( 0 , 1/3^2 )
    zbeta1mu ~ dnorm( 0 , 1/2^2 ) 
    zsigma ~ dscaled.gamma( 3 , 1 ) 
    zbeta0sigma ~ zbeta0sigma ~ dgamma( 0.01 , 0.01 ) 
    zbeta1sigma ~ zbeta0sigma ~ dgamma( 0.01 , 0.01 )
    
    nu <- nuMinusOne+1
    nuMinusOne ~ dexp(1/29)
    
    # Transform to original scale:
    for ( j in 1:Nsubj ) {
      beta1[j] <- zbeta1[j] * ysd / xsd  
      beta0[j] <- zbeta0[j] * ysd  + ym - zbeta1[j] * xm * ysd / xsd 
    }
    beta1mu <- zbeta1mu * ysd / xsd  
    beta0mu <- zbeta0mu * ysd  + ym - zbeta1mu * xm * ysd / xsd 
    sigma <- zsigma * ysd
    
    # Posterior predictive distribution
    ypredict ~ dt( beta0mu + beta1mu * xpredict/1000 , 1/sigma^2 , nu )   
  } 
  " # close quote for modelString
  # Write out modelString to a text file
  writeLines(modelString, con="TEMPmodel.txt")
  #-----------------------------------------------------------------------------
  # INTIALIZE THE CHAINS.
  #-----------------------------------------------------------------------------
  # RUN THE CHAINS
  parameters <- c("beta0",  "beta1", "beta0mu", "beta1mu", "zbeta0", "zbeta1", 
                  "zbeta0mu", "zbeta1mu", "zsigma", "sigma", "nu", 
                  "zbeta0sigma", "zbeta1sigma", "ypredict")
  adaptSteps <- 10000  # Number of steps to "tune" the samplers, 10000 if 
                      # not converged otw 2000
  burnInSteps <- 10000 # 10000 if not converged otw 3000
  
  runJagsOut <- run.jags(method = runjagsMethod,
                        model = "TEMPmodel.txt", 
                        monitor = parameters, 
                        data = dataList,  
                        #inits = initsList, 
                        n.chains = nChains,
                        adapt = adaptSteps,
                        burnin = burnInSteps, 
                        sample = ceiling(numSavedSteps/nChains),
                        thin = thinSteps,
                        summarise = FALSE,
                        plots = FALSE,
                        modules = 'glm')
  codaSamples <- as.mcmc.list(runJagsOut)
  # resulting codaSamples object has these indices: 
  # codaSamples[[ chainIdx ]][ stepIdx , paramIdx ]
  
  if (!is.null(saveName)) {
    save(codaSamples, file=paste(saveName,"Mcmc.Rdata",sep=""))
  }
  return( codaSamples )
} # end function

#===============================================================================

smryMCMC = function(codaSamples, 
                      saveName=NULL) {
  mcmcMat = as.matrix(codaSamples,chains=FALSE)
  paramNames = colnames(mcmcMat)
  summaryInfo = NULL
  for (pName in paramNames) {
    summaryInfo = rbind(summaryInfo,  summarizePost(mcmcMat[ ,pName]))
  }
  rownames(summaryInfo) = paramNames
  if (!is.null(saveName)) {
    write.csv(summaryInfo, file=paste(saveName,"SummaryInfo.csv",sep=""))
  }
  return(summaryInfo)
}

#===============================================================================


# Loading and tidying the data

In [None]:
library("RCurl")

graphFileType = "pdf" 

filename <- paste0(format(Sys.time(), "%Y%m%d"),"_",format(Sys.time(), "%H%M%S")
                   , "ypredict.csv")
file.create(filename)

write.table(t(c("AgeBP", "Median", "HDIlow", "HDIhigh")),  
             file = filename, 
             append = T, 
             sep = ',', 
             row.names = F, 
             col.names = F )


emed.data.whole.File <- getURL("https://raw.githubusercontent.com/zboraon/
                                bayesian_temperature_reconstruction/main/
                                data/20211105.emed.data_ascolumn.csv")
emed.data.whole <- read.csv(text = emed.data.whole.File)

# 150 yrs resolution
trace21kaFile <- getURL("https://raw.githubusercontent.com/zboraon/
                         bayesian_temperature_reconstruction/main/data/
                         trace21ka_150yearsresolution/
                         trace21ka_150yrsresoltn_temperature.csv")

trace21ka <- read.csv(text = trace21kaFile)

median.trace21ka <- data.frame(agetrace21ka = trace21ka$YearBP,
                               mediantrace21ka = trace21ka$Median)
age <- emed.data.whole$age_BP 

predictionpoints <- seq(150, 9900, by = 150)


# Run the loop

In [None]:
for (agepoint in predictionpoints) { # for all the midpoints of each interval

  graphics.off()
  dt <- 150
  
  # Define the end points of the interval
  t1 <- agepoint - dt
  t2 <- agepoint + dt

 # Prepare the TRaCE21ka dataset for the interval 
  y2 <- median.trace21ka$mediantrace21ka[median.trace21ka$agetrace21ka == (t2)]
  y1 <- median.trace21ka$mediantrace21ka[median.trace21ka$agetrace21ka == (t1)]
  trace21kadataasprior <- data.frame(t1 = t1/1000,
                                     t2 = t2/1000,
                                     y1 = y1,
                                     y2 = y2
  )
  
  predictdata <- agepoint
  
  datatotest.index <- which(age <= t2 & age >= t1)
  emed.data.totest <- emed.data.whole[datatotest.index,]
  emed.data.totest$age_BP <- emed.data.totest$age_BP/1000
  
  tested.locs <- emed.data.totest$Code 
  tested.locs.factor <- as.numeric(factor(tested.locs))
  
  # Prepare the folders for the results
  foldername = paste0("EMedtempreghierressionreconst-",agepoint)
  fileNameRoot = paste0(foldername,"/EMedtempreghierressionreconst-",agepoint)
  dir.create(foldername)
  
  mcmcCoda = genMCMC(data = emed.data.totest, priordata = trace21kadataasprior, 
                      predictdata = predictdata, xName = "age_BP", 
                      yName = "T_anomaly", sName = "Code",
                      numSavedSteps = 30000, thinSteps = 15, 
                      saveName = fileNameRoot) 
                      # if no convergence numSavedSteps = 60000, thinSteps = 30
  codaSamples = as.mcmc.list(mcmcCoda)
  

  
  #------------------------------------------------------------------------------- 
  # Get summary statistics of chain:
  summaryInfo = smryMCMC(mcmcCoda, saveName=fileNameRoot)
  # show(summaryInfo)
  columnsneeded <- summaryInfo[ ,grep("Median|HDIlow|HDIhigh" , 
                                 colnames(summaryInfo)) ]
  ypredictrows <- columnsneeded[grep("ypredict", rownames(columnsneeded)), ]

  ypredictrowswitht <- c(predictdata,ypredictrows)
  
  
  write.table(t(ypredictrowswitht),  
               file = filename, 
               append = T, 
               sep = ',', 
               row.names = F, 
               col.names = !file.exists(filename))
}
  
  
  
  #-------------------------------------------------------------------------------  
