traceplot(m1)
trankplot(m1)
100-94.5
set.seed(835217)
post<-extract.samples(m1)
# First, plot the observed data
plot(math12 ~ smath8, data=d, pch=19, col="firebrick2", xlab="Std Math Scores Grade 8",
ylab="Math Scores Grade 12", cex.lab=1.5, cex=1.5)
round(vcov(m1), 3)
# First, plot the observed data
plot(math12 ~ smath8, data=d, pch=19, col="firebrick2", xlab="Std Math Scores Grade 8",
ylab="Math Scores Grade 12", cex.lab=1.5, cex=1.5)
# Take the mean of the "a" and "b" parameters from the posterior distribution extracted above
a_m1<-mean(post$a)
b_m1<-mean(post$b)
#Now add a line that represents the predicted values of dependent for values of ...
#predictor in graph according to the average "a" and "b" of the posterior extracted above
curve(a_map+b_map*(x), lwd=6, add=TRUE, col="firebrick4")
# First, plot the observed data
plot(math12 ~ smath8, data=d, pch=19, col="firebrick2", xlab="Std Math Scores Grade 8",
ylab="Math Scores Grade 12", cex.lab=1.5, cex=1.5)
# Take the mean of the "a" and "b" parameters from the posterior distribution extracted above
a_m1<-mean(post$a)
b_m1<-mean(post$b)
#Now add a line that represents the predicted values of dependent for values of ...
#predictor in graph according to the average "a" and "b" of the posterior extracted above
curve(a_map+b_map*(x), lwd=6, add=TRUE, col="firebrick4")
curve(a_m1+b_m1*(x), lwd=6, add=TRUE, col="firebrick4")
set.seed(835217)
post<-extract.samples(m1)
# Link function
plot(math12 ~ smath8, data=d, pch=19, col="firebrick2", xlab="Std Math Scores Grade 8",
ylab="Math Scores Grade 12", cex.lab=1.5, cex=1.5)
smath8.seq<-seq(from=-2, to=3, by=.1 )
norm <- function(x) sqrt(x%*%x)
norm(1:4)
norm <- function(x) sqrt(x%*%x)
norm <- function(x) sqrt(x%*%x)
norm(1:4)
mu.link<-function(smath8) post$a + post$b*smath8
View(mu.link)
View(mu.link)
#based on the indipdendent and the liner function parameters from the posterior
mu.link<-function(smath8) post$a + post$b*smath8
#This now calculates the value of "mu" for each value of the sequence smath8
mu<-sapply(smath8.seq, mu.link)
#now I can calculate the mean of these values
mu.mean<-apply(mu, 2, mean)
#and now I can calculate the CIs considering p=.95
mu.ci<-apply(mu,2,Pi, prob=0.95)
mu.link<-function(smath8) post$a + post$b*smath8
#This now calculates the value of "mu" for each value of the sequence smath8
mu<-sapply(smath8.seq, mu.link)
#now I can calculate the mean of these values
mu.mean<-apply(mu, 2, mean)
#and now I can calculate the CIs considering p=.95
mu.ci<-apply(mu,2,PI, prob=0.95)
# plot the observed data
plot(math12 ~ smath8, data=d, ylim=c(-2,3), pch=19, col="firebrick2", xlab="Std Math Scores Grade 8",
ylab="Math Scores Grade 12", cex.lab=1.5, cex=1.5)
#Now add a line that represents the predicted values of dependent for values of ...
#predictor in graph according to the average "a" and "b" of the posterior extracted above
lines(smath8.seq, mu.mean, lwd=6, add=TRUE, col="firebrick4")
# plot the observed data
plot(math12 ~ smath8, data=d, ylim=c(-2,3), pch=19, col="firebrick2", xlab="Std Math Scores Grade 8",
ylab="Math Scores Grade 12", cex.lab=1.5, cex=1.5)
lines(smath8.seq, mu.mean, lwd=6, col="firebrick4")
# plot the observed data
plot(math12 ~ smath8, data=d,  pch=19, col="firebrick2", xlab="Std Math Scores Grade 8",
ylab="Math Scores Grade 12", cex.lab=1.5, cex=1.5)
#Now add a line that represents the predicted values of dependent for values of ...
#predictor in graph according to the average "a" and "b" of the posterior extracted above
lines(smath8.seq, mu.mean, lwd=6, col="firebrick4")
# plot the observed data
plot(math12 ~ smath8, data=d,  pch=19, col="firebrick2", xlab="Std Math Scores Grade 8",
ylab="Math Scores Grade 12", cex.lab=1.5, cex=1.5)
#Now add a line that represents the predicted values of dependent for values of ...
#predictor in graph according to the average "a" and "b" of the posterior extracted above
#### Add information about uncertainty around the central values ####
shade(mu.ci, mw.seq, col=col.alpha("peachpuff", .75))
shade(mu.ci, smath8.seq, col=col.alpha("peachpuff", .75))
### Add the mean of mu
lines(smath8.seq, mu.mean, lwd=6, col="firebrick4")
#Simulate data from the model
sim.math12<-sim(m1, data=list(smath8=smath8.seq))
smath8.seq<-seq(from=-2, to=3, by=.1 )
sim.math12<-sim(m1, data=list(smath8=smath8.seq))
# Open the dataset
d <- read.delim("D:/Teaching/Bayes Reg/Intro to Bayes Regression OPerra/Exercises/private_school.csv")
str(d)
head(d)
summary(d)
### Before you can run these examples, you will need to download and install...
#...the packages below. These also require the installation of Tool Chain ++
# See this webpage for installation instructions:
# https://github.com/rmcelreath/rethinking#installation
library(StanHeaders)
library(cmdstanr)
library(rstan)
library(ggplot2)
library(rethinking)
library(psych)
#package "psych" allows some functions like "describe"
###########################################################################################################
#### 1. Build a linear regression model where math scores in Grade 12 are a function of math scores in Grade 8. ####
###########################################################################################################
###################################################
#1a. Consider the metric of outcome and predictors:
#would it be sensible to centre and/or standardise some of these variables?
###################################################
#describe the variables of interest
describe (d[ , c("math12",  "math8") ], fast=TRUE)
# The scores of the predictor are translated in a different metric where the mean 50-->0 and the SD=10-->1
d$smath8<-(d$math8-50)/10
cor.test(d$math12, d$smath8)
describe (d[ , c("math12",  "math8", "smath8") ], fast=TRUE)
###################################################
#1b. Create and check priors for the parameters in the model (intercept, slopes, SD).
###################################################
#The model can be described as:
#m12 ~ Norm(mu, sigma)
#mu= a + b(m8) http://127.0.0.1:38705/graphics/plot_zoom_png?width=1200&height=900
#a=Norm( Ma, SDa)
#b=Norm( Mb, SDb)
#sigma=?
#Since the math12 "m12" data have been standardised to SD=1, I may give sigma a uniform distribution
#with values 0 to 3, say.
#However, given that m12 is assumed normally distributed with SD approx 1, I will expect that values
#close to 1 will be more likely and extreme values increasingly unlikely. At the same time, the distribution
#has to be positive. One possible distribution that meets these demands is exponential with rate=1, so:
#sigma=Exp(1). See a random sample of 1k draws from a similar distribution to get a sense of what it can look like:
example.sigma<-rlnorm(1000, 2, .7)
dens(example.sigma)
describe(example.sigma)
#Given the standardisation of scores, the prior for the intercept a may be also quite simple, say mean=0, SD=1
#visualise this prior
set.seed(27416)
sample_mu<-rnorm(1000, 50, 10)
sample_sigma<-rlnorm(1000, 2, .7)
prior_m1<-rnorm(1000, sample_mu, sample_sigma)
dens(prior_m1)
quantile(prior_m1, c(0.1, 0.90))
#Change it to make it narrower, e.g. SD=5
set.seed(27416)
sample_mu<-rnorm(1000, 50, 5)
sample_sigma<-rlnorm(1000, 2, .7)
prior_m1<-rnorm(1000, sample_mu, sample_sigma)
dens(prior_m1)
quantile(prior_m1, c(0.1, 0.90))
# Assuming that the slope has mean 0 and SD=10, what would the expected slopes look like?
set.seed(27416)
N<-100
a<-rnorm(N, 50, 5 )
b<-rnorm(N, 0, 10)
#Create an empty plot
plot(NULL, xlim=c(-3,3), ylim=c(0,100), xlab="Math scores Grade 8", ylab="Math scores Grade 12")
mtext("b ~ dnorm(0,10)")
#the function below creates and plots lines using the linear model
for (i in 1:N) curve(a[i] + b[i]*(x), from=-3, to=3, add=TRUE, col=col.alpha("brown3",0.2), lwd=3)
# Assuming that the slope has mean 0 and SD=5, what would the expected slopes look like?
set.seed(4978)
N<-100
a<-rnorm(N, 50, 5)
b<-rnorm(N, 0, 5)
#Create an empty plot
plot(NULL, xlim=c(-3,3), ylim=c(0,100), xlab="Math scores Grade 8", ylab="Math scores Grade 12")
mtext("b ~ dnorm(0,5)")
#the function below creates and plots lines using the linear model
for (i in 1:N) curve(a[i] + b[i]*(x), from=-3, to=3, add=TRUE, col=col.alpha("brown3",0.2), lwd=3)
### Compile a list with only the two variables of interest. There are no missing data in those variables.
dl <- list(
math12 = d$math12,
smath8 = d$smath8 )
### Run the Model using the function "ulam"
set.seed(96760) #Used to ensure the output is replicated
m1<- ulam(
alist(
math12 ~ dnorm( mu, sigma) ,
mu<- a + b*(smath8),
a ~ dnorm(50, 5),
b ~ dnorm(0, 5),
sigma ~ dlnorm(2, .7)
), data=dl, chains=1)
#Check the run of the model
show(m1)
#Check the results in tabular form, as well as some key diagnostics
precis(m1)
#Run further diagnostics for the algorithm. See "Statistical Rethinking" book...
#...particularly Chapter 9, for more information
traceplot(m1)
### Run the Model using the function "ulam", but add more chains
set.seed(96760) #Used to ensure the output is replicated
m1<- ulam(
alist(
math12 ~ dnorm( mu, sigma) ,
mu<- a + b*(smath8),
a ~ dnorm(50, 5),
b ~ dnorm(0, 5),
sigma ~ dlnorm(2, .7)
), data=dl, chains=6, core=6)
#Check the run of the model
show(m1)
#Check the results in tabular form, as well as some key diagnostics
precis(m1)
#Run further diagnostics for the algorithm. See "Statistical Rethinking" book...
#...particularly Chapter 9, for more information
traceplot(m1)
trankplot(m1)
### Estimate variacs and covariances from posterior
round(vcov(m1), 3)
# Extract samples from the posterior distribution using "rethinking" package...
#...function "extract.samples"
set.seed(835217)
post<-extract.samples(m1)
# Create a sequence of values for the predictor
smath8.seq<-seq(from=-2, to=3, by=.1 )
# The following creates a function to calculate the expected values of dependent
#based on the indipdendent and the liner function parameters from the posterior
mu.link<-function(smath8) post$a + post$b*smath8
#This now calculates the value of "mu" for each value of the sequence smath8
mu<-sapply(smath8.seq, mu.link)
#now I can calculate the mean of these values
mu.mean<-apply(mu, 2, mean)
#and now I can calculate the CIs considering p=.95
mu.ci<-apply(mu,2,PI, prob=0.95)
# plot the observed data
plot(math12 ~ smath8, data=d,  pch=19, col="firebrick2", xlab="Std Math Scores Grade 8",
ylab="Math Scores Grade 12", cex.lab=1.5, cex=1.5)
#Now add a line that represents the predicted values of dependent for values of ...
#predictor in graph according to the average "a" and "b" of the posterior extracted above
#### Add information about uncertainty around the central values ####
shade(mu.ci, smath8.seq, col=col.alpha("peachpuff", .75))
### Add the mean of mu
lines(smath8.seq, mu.mean, lwd=6, col="firebrick4")
#Simulate data from the model
sim.math12<-sim(m1, data=list(smath8=smath8.seq))
# plot the observed data
plot(math12 ~ smath8, data=d,  pch=19, col="firebrick2", xlab="Std Math Scores Grade 8",
ylab="Math Scores Grade 12", cex.lab=1.5, cex=1.5)
#Now add a line that represents the predicted values of dependent for values of ...
#predictor in graph according to the average "a" and "b" of the posterior extracted above
#### Add information about uncertainty around the central values ####
shade(mu.ci, smath8.seq, col=col.alpha("peachpuff", .75))
### Add the mean of mu
lines(smath8.seq, mu.mean, lwd=6, col="firebrick4")
shade(mu.ci, smath8.seq, col=col.alpha("peachpuff", .75))
sim.math12<-sim(m1, data=list(smath8=smath8.seq))
sim.math12<-sim(m1 , data= list(smath8=smath8.seq))
library(StanHeaders)
library(cmdstanr)
library(rstan)
library(ggplot2)
library(rethinking)
library(psych)
sim.math12<-sim(m1 , data= list(smath8=smath8.seq))
### Before you can run these examples, you will need to download and install...
#...the packages below. These also require the installation of Tool Chain ++
# See this webpage for installation instructions:
# https://github.com/rmcelreath/rethinking#installation
library(StanHeaders)
library(cmdstanr)
library(rstan)
library(ggplot2)
library(rethinking)
#Upload the data provided. Change the according to the path on your machine.
d <- read.csv("D:/Teaching/Bayes Reg/data_birthweight.csv")
head(d)
summary(d)
### Compile a list with only the two variables of interest. There are no missing data in those variables.
dl <- list(
mw = d$mw,
bw = d$bw )
### Run the Model using the function "ulam"
set.seed(12593) #Used to ensure the output is replicated
m1<- ulam(
alist(
bw ~ dnorm( mu, sigma) ,
mu<- a + b*(mw-72.1),
a ~ dnorm(3300, 600),
b ~ dnorm(0, 25),
sigma ~ dunif(0, 1000)
), data=dl, chains=6, cores=6)
#Check the run of the model
show(m1)
#Check the results in tabular form, as well as some key diagnostics
precis(m1)
#Run further diagnostics for the algorithm. See "Statistical Rethinking" book...
#...particularly Chapter 9, for more information
traceplot(m1)
trankplot(m1)
#This command also provides information regarding the variances and covariances...
#...of parameters in the model
round(vcov(m1), 3)
#...variances and covariances can also be plotted:
pairs(m1)
#reset plots to show only one graph at a time
old.par <- par(mfrow=c(1, 1))
#### Creates Plots from posterior ####
# Set a seed number if you want to replicate this draw
set.seed(12593)
# Extract samples from the posterior distribution using "rethinking" package...
#...function "extract.samples"
post<-extract.samples(m1)
# First, plot the observed data
plot(bw ~ mw, data=d, pch=19, col="firebrick2", xlab="Maternal Weight (Kg)",
ylab="Newborn Birth Weight (g)", cex.lab=1.5, cex=1.5)
# Take the mean of the "a" and "b" parameters from the posterior distribution extracted above
a_map<-mean(post$a)
b_map<-mean(post$b)
#Now add a line that represents the predicted values of dependent for values of ...
#predictor in graph according to the average "a" and "b" of the posterior extracted above
curve(a_map+b_map*(x-72.1), lwd=6, add=TRUE, col="firebrick4")
#### Add information about uncertainty around the central values ####
#Clear the previous plot before proceeding
#Define a sequence of maternal weights to compute predictions for these.
#The sequence here is 35 to 120 kg, increasing by 1 Kg.
mw.seq<-seq(from=35, to=120, by=1)
#Now use the "link" function from "rethinking" package to compute mu ...
#...for each sample from the posterior as a function of each maternal weight...
#...in the mw.sequence created above
set.seed(12593)
mu<-link(m1, data=data.frame(mw=mw.seq))
#Summarise the distribution of mu, taking its mean and its credibility interval
mu.mean<-apply(mu, 2, mean)
mu.ci<-apply(mu, 2, PI, prob=0.89)
# Plot the observed data
plot(bw ~ mw, data=d, pch=19, col="firebrick2", xlab="Maternal Weight (Kg)",
ylab="Newborn Birth Weight (g)", cex.lab=1.5, cex=1.5)
# Plot the the Mean My for each weight
lines(mw.seq, mu.mean,lwd=3, add=TRUE, col="firebrick4")
# Plot a shaded area for the 89% CI
shade(mu.ci, mw.seq, col=col.alpha("peachpuff", .75))
### Add 89% prediction interval for actual birth weights ###
# This will incorporate the SD and its uncertainty, i.e. the spread (sigma) around mu
# To do so, the "rethinking" function "sim" allows simulating from the normal...
# ...distribution of birthweights with parameter mu corresponding to a given ...
#...maternal weight value, while considering the SD around mu, sigma, according...
#...to the posterior
sim.bw<-sim(m1, data=list(mw=mw.seq))
# Open the dataset
d <- read.delim("D:/Teaching/Bayes Reg/Intro to Bayes Regression OPerra/Exercises/private_school.csv")
str(d)
head(d)
summary(d)
### Before you can run these examples, you will need to download and install...
#...the packages below. These also require the installation of Tool Chain ++
# See this webpage for installation instructions:
# https://github.com/rmcelreath/rethinking#installation
library(StanHeaders)
library(cmdstanr)
library(rstan)
library(ggplot2)
library(rethinking)
library(psych)
#package "psych" allows some functions like "describe"
###########################################################################################################
#### 1. Build a linear regression model where math scores in Grade 12 are a function of math scores in Grade 8. ####
###########################################################################################################
###################################################
#1a. Consider the metric of outcome and predictors:
#would it be sensible to centre and/or standardise some of these variables?
###################################################
#describe the variables of interest
describe (d[ , c("math12",  "math8") ], fast=TRUE)
# The scores of the predictor are translated in a different metric where the mean 50-->0 and the SD=10-->1
d$smath8<-(d$math8-50)/10
cor.test(d$math12, d$smath8)
describe (d[ , c("math12",  "math8", "smath8") ], fast=TRUE)
###################################################
#1b. Create and check priors for the parameters in the model (intercept, slopes, SD).
###################################################
#The model can be described as:
#m12 ~ Norm(mu, sigma)
#mu= a + b(m8) http://127.0.0.1:38705/graphics/plot_zoom_png?width=1200&height=900
#a=Norm( Ma, SDa)
#b=Norm( Mb, SDb)
#sigma=?
#Since the math12 "m12" data have been standardised to SD=1, I may give sigma a uniform distribution
#with values 0 to 3, say.
#However, given that m12 is assumed normally distributed with SD approx 1, I will expect that values
#close to 1 will be more likely and extreme values increasingly unlikely. At the same time, the distribution
#has to be positive. One possible distribution that meets these demands is exponential with rate=1, so:
#sigma=Exp(1). See a random sample of 1k draws from a similar distribution to get a sense of what it can look like:
example.sigma<-rlnorm(1000, 2, .7)
dens(example.sigma)
describe(example.sigma)
#Given the standardisation of scores, the prior for the intercept a may be also quite simple, say mean=0, SD=1
#visualise this prior
set.seed(27416)
sample_mu<-rnorm(1000, 50, 10)
sample_sigma<-rlnorm(1000, 2, .7)
prior_m1<-rnorm(1000, sample_mu, sample_sigma)
dens(prior_m1)
quantile(prior_m1, c(0.1, 0.90))
#Change it to make it narrower, e.g. SD=5
set.seed(27416)
sample_mu<-rnorm(1000, 50, 5)
sample_sigma<-rlnorm(1000, 2, .7)
prior_m1<-rnorm(1000, sample_mu, sample_sigma)
dens(prior_m1)
quantile(prior_m1, c(0.1, 0.90))
# Assuming that the slope has mean 0 and SD=10, what would the expected slopes look like?
set.seed(27416)
N<-100
a<-rnorm(N, 50, 5 )
b<-rnorm(N, 0, 10)
#Create an empty plot
plot(NULL, xlim=c(-3,3), ylim=c(0,100), xlab="Math scores Grade 8", ylab="Math scores Grade 12")
mtext("b ~ dnorm(0,10)")
#the function below creates and plots lines using the linear model
for (i in 1:N) curve(a[i] + b[i]*(x), from=-3, to=3, add=TRUE, col=col.alpha("brown3",0.2), lwd=3)
# Assuming that the slope has mean 0 and SD=5, what would the expected slopes look like?
set.seed(4978)
N<-100
a<-rnorm(N, 50, 5)
b<-rnorm(N, 0, 5)
#Create an empty plot
plot(NULL, xlim=c(-3,3), ylim=c(0,100), xlab="Math scores Grade 8", ylab="Math scores Grade 12")
mtext("b ~ dnorm(0,5)")
#the function below creates and plots lines using the linear model
for (i in 1:N) curve(a[i] + b[i]*(x), from=-3, to=3, add=TRUE, col=col.alpha("brown3",0.2), lwd=3)
###################################################
#2 make sure that you use an ad-hoc list with the variables of interest
###################################################
### Compile a list with only the two variables of interest. There are no missing data in those variables.
dl <- list(
math12 = d$math12,
smath8 = d$smath8 )
###################################################
#2 Run the model using ULAM specifying only 1 chain.
###################################################
### Run the Model using the function "ulam"
set.seed(96760) #Used to ensure the output is replicated
m1<- ulam(
alist(
math12 ~ dnorm( mu, sigma) ,
mu<- a + b*(smath8),
a ~ dnorm(50, 5),
b ~ dnorm(0, 5),
sigma ~ dlnorm(2, .7)
), data=dl, chains=1)
#Check the run of the model
show(m1)
#Check the results in tabular form, as well as some key diagnostics
precis(m1)
#Run further diagnostics for the algorithm. See "Statistical Rethinking" book...
#...particularly Chapter 9, for more information
traceplot(m1)
###################################################
#3 Run the model using ULAM but specify 6 chains
###################################################
### Run the Model using the function "ulam", but add more chains
set.seed(96760) #Used to ensure the output is replicated
m1<- ulam(
alist(
math12 ~ dnorm( mu, sigma) ,
mu<- a + b*(smath8),
a ~ dnorm(50, 5),
b ~ dnorm(0, 5),
sigma ~ dlnorm(2, .7)
), data=dl, chains=6, core=6)
#Check the run of the model
show(m1)
#Check the results in tabular form, as well as some key diagnostics
precis(m1)
#Run further diagnostics for the algorithm. See "Statistical Rethinking" book...
#...particularly Chapter 9, for more information
traceplot(m1)
trankplot(m1)
### Estimate variacs and covariances from posterior
round(vcov(m1), 3)
###################################################
#4 .	After inspecting the poste marginal posterior distribution in tabular form,
#plot the observed results, the average slope,
#the 95% credibility interval around the mean, and the 95% credibility
#interval of math12 scores.
###################################################
# Extract samples from the posterior distribution using "rethinking" package...
#...function "extract.samples"
set.seed(835217)
post<-extract.samples(m1)
# Create a sequence of values for the predictor
smath8.seq<-seq(from=-2, to=3, by=.1 )
# The following creates a function to calculate the expected values of dependent
#based on the indipdendent and the liner function parameters from the posterior
mu.link<-function(smath8) post$a + post$b*smath8
#This now calculates the value of "mu" for each value of the sequence smath8
mu<-sapply(smath8.seq, mu.link)
#now I can calculate the mean of these values
mu.mean<-apply(mu, 2, mean)
#and now I can calculate the CIs considering p=.95
mu.ci<-apply(mu,2,PI, prob=0.95)
set.seed(835217)
premath12<-function(smath8) dnorm((post$a + post$b*smath8) , post$sigma)
predict.math12<-sapply(smath8.seq, premath12)
math12.ci<-apply(predict.math12,2,PI, prob=0.95)
View(math12.ci)
View(math12.ci)
View(predict.math12)
View(predict.math12)
View(premath12)
premath12<-function(smath8) rnorm(3000, (post$a + post$b*smath8) , post$sigma)
#calculate predicted math12 scores based on the function above and the sequence of math8 scores
predict.math12<-sapply(smath8.seq, premath12)
#calculate the 95% CI of these predicted scores
math12.ci<-apply(predict.math12,2,PI, prob=0.95)
View(predict.math12)
premath12<-function(smath8) dnorm((post$a + post$b*smath8) , post$sigma)
#calculate predicted math12 scores based on the function above and the sequence of math8 scores
predict.math12<-sapply(smath8.seq, premath12)
math12.ci<-apply(predict.math12,2,PI, prob=0.95)
sim<-sim(m1, data=list(smath8=smath8.seq))
