require(mvtnorm) # see http://dirk.eddelbuettel.com/blog/2012/08/05/ on its importance
require(QRM) # for fit.mst(), fit.GPD()
First, we download the data.
file <- "DAX.RAW.txt"
if(!file.exists(file))
system(paste0("wget http://www.ma.hw.ac.uk/~mcneil/ftp/", file)) # download the data (on Unix; this may or may not work for you; if it doesn't, just download the data manually)
db <- read.table(file, header=TRUE, row.names="Positions", # read the *d*ata *b*ase
encoding="UTF-8") # for 'didactical' reasons
str(db) # check the *str*ucture of 'db', what is it... ahh, a data frame!
## 'data.frame': 6147 obs. of 23 variables:
## $ ALLIANZ.HLDG. : num 156 156 161 162 164 ...
## $ COMMERZBANK : num 147 147 149 152 152 ...
## $ DRESDNER.BANK : num 18.4 18.4 18.8 18.9 18.9 ...
## $ BMW : num 104 109 110 111 109 ...
## $ SCHERING : num 36.9 37.4 37.8 37.9 37.4 ...
## $ BASF : num 15 15.4 15.6 15.8 15.8 ...
## $ BAYER : num 12.2 11.9 12.1 12.7 12.7 ...
## $ BAYERISCHE.VBK.: num 23.5 22.9 23.4 23.7 23.9 ...
## $ BAYER.HYPBK. : num 23.4 23.2 23.3 23.5 23.4 ...
## $ DEGUSSA : num 203 207 208 210 214 ...
## $ DEUTSCHE.BANK : num 22.3 22.5 22.9 23 23.3 ...
## $ CONTINENTAL : num 8.54 8.83 8.78 8.83 8.73 8.82 8.74 8.73 8.74 8.74 ...
## $ VOLKSWAGEN : num 134 140 145 144 140 ...
## $ DAIMLER.BENZ : num 17 17.6 17.8 17.8 17.7 ...
## $ HOECHST : num 13.8 13.8 14.2 14.3 14.2 ...
## $ SIEMENS : num 20.8 21.1 21.3 21.4 21.5 ...
## $ KARSTADT : num 360 360 362 369 368 ...
## $ LINDE : num 136 137 140 142 144 ...
## $ THYSSEN : num 67.5 68.4 67.5 71.6 71.2 ...
## $ MANNESMANN : num 85 86.5 87.8 88.7 88.6 ...
## $ MAN : num 118 119 125 125 127 ...
## $ RWE : num 11.7 11.9 12 11.9 12 ...
## $ INDEX : num 536 545 552 556 557 ...
str(rn <- rownames(db)) # row names contain dates
## chr [1:6147] "01/01/1973" "01/02/1973" "01/03/1973" ...
date <- as.Date(rn, format="%m/%d/%Y") # convert the dates to *proper* date objects
str(date)
## Date[1:6147], format: "1973-01-01" "1973-01-02" "1973-01-03" "1973-01-04" ...
rownames(db) <- date # use the newly formatted dates as row names
Pick out the sub-database of stock prices we work with here.
start.date <- "1985-01-02"
end.date <- "1994-12-30"
S <- db[start.date <= date & date <= end.date, c("BMW", "SIEMENS")]
str(S)
## 'data.frame': 2608 obs. of 2 variables:
## $ BMW : num 273 272 269 273 272 ...
## $ SIEMENS: num 45.9 45.7 45.9 46.1 46.6 ...
head(S) # show the beginning
## BMW SIEMENS
## 1985-01-02 273.37 45.90
## 1985-01-03 271.91 45.66
## 1985-01-04 269.36 45.85
## 1985-01-07 272.64 46.14
## 1985-01-08 271.91 46.57
## 1985-01-09 280.28 47.14
S[1:6,] # the same
## BMW SIEMENS
## 1985-01-02 273.37 45.90
## 1985-01-03 271.91 45.66
## 1985-01-04 269.36 45.85
## 1985-01-07 272.64 46.14
## 1985-01-08 271.91 46.57
## 1985-01-09 280.28 47.14
tail(S) # show the end
## BMW SIEMENS
## 1994-12-23 768.5 64.53
## 1994-12-26 768.5 64.53
## 1994-12-27 773.0 65.20
## 1994-12-28 771.5 65.03
## 1994-12-29 760.0 64.35
## 1994-12-30 770.0 64.90
Use a basic plot of each time series to check if anything is ‘suspicious’.
rns <- rownames(S) # row names of S (character)
## BMW
plot(as.Date(rns), S[,"BMW"], type="l",
main="BMW stock data", # title
xlab=expression(Date~italic(t)), # x-axis label; or simply use "Date t"
ylab=expression(Stock~price~italic(S[t]))) # y-axis label
## Siemens
plot(as.Date(rns), S[,"SIEMENS"], type="l",
main="Siemens stock data", # title
xlab=expression(Date~italic(t)), # x-axis label
ylab=expression(Stock~price~italic(S[t]))) # y-axis label
Next, compute the risk-factor changes and plot them (here: against each other).
ran <- range(X <- apply(log(S), 2, diff)) # risk-factor changes and range
plot(X, xlim=ran, ylim=ran, main="Risk-factor changes", cex=0.2)
##' @title Compute the Loss Operator
##' @param x matrix of risk-factor changes
##' @param w. weights w. = lambda_j * S_{t,j}
##' @return losses
##' @author Marius Hofert
loss_operator <- function(x, w.)
-rowSums(expm1(x) * matrix(w., nrow=nrow(x), ncol=length(w.), byrow=TRUE))
##' @title Non-parametric VaR estimator
##' @param L losses
##' @param alpha confidence level
##' @return Non-parametric estimate of VaR at level alpha
##' @author Marius Hofert
VaR_hat <- function(L, alpha) quantile(L, probs=alpha, names=FALSE)
##' @title Non-parametric ES estimator
##' @param L losses
##' @param alpha confidence level
##' @return Non-parametric estimate of ES at level alpha
##' @author Marius Hofert
ES_hat <- function(L, alpha) mean(L[L > VaR_hat(L, alpha=alpha)])
##' @title Estimate VaR and ES
##' @param S stock data, an (n, d)-matrix
##' @param lambda number of shares of each stock
##' @param alpha confidence level for VaR and ES
##' @param method a character string specifying the estimator
##' @param ... additional arguments passed to the various methods
##' @return a list containing the estimated risk measures VaR and ES, and
##' possibly other results (depending on the estimator)
##' @author Marius Hofert
risk_measure <- function(S, lambda, alpha,
method = c("var.cov", "historical", "MC.N", "MC.t", "GPD"),
...)
{
## Input checks and conversions
if(!is.matrix(S)) S <- rbind(S, deparse.level=0L) # to guarantee that ncol() works...
stopifnot(0 < alpha, alpha < 1, # check whether alpha is in (0,1)
length(lambda) == ncol(S), lambda > 0) # check length and sign of lambda
method <- match.arg(method) # match correct method if not fully provided
## Ingredients required for *all* methods
X <- apply(log(S), 2, diff) # compute risk-factor changes
if(!length(X)) stop("'S' should have more than just one line") # check
S. <- as.numeric(tail(S, n=1)) # pick out last available stock prices ("today")
w. <- lambda * S. # weights w.
## Method switch (now consider the various methods)
switch(method,
"var.cov" = { # variance-covariance method
## Estimate a multivariate normal distribution
mu.hat <- colMeans(X) # estimate the mean vector mu
Sigma.hat <- var(X) # estimate the covariance matrix Sigma
L.delta.mean <- -sum(w. * mu.hat) # mean of the approx. normal df of L^{\Delta}
L.delta.sd <- sqrt(t(w.) %*% Sigma.hat %*% w.) # standard deviation of the approx. normal df of L^{\Delta}
## Compute VaR and ES and return
qa <- qnorm(alpha)
list(VaR = L.delta.mean + L.delta.sd * qa,
ES = L.delta.mean + L.delta.sd * dnorm(qa) / (1-alpha))
## => We could just return a bivariate vector here, but
## for other methods, we might want to return additional
## auxiliary results, and we should *always* return similar
## objects (here: lists)
},
"historical" = { # historical simulation method
## Using empirically estimated risk measures
L <- loss_operator(X, w.=w.) # compute historical losses
## Compute VaR and ES and return
list(VaR = VaR_hat(L, alpha),
ES = ES_hat(L, alpha))
},
"MC.N" = { # Monte Carlo based on a fitted multivariate normal
stopifnot(hasArg(N)) # check if the number 'N' of MC replications has been provided (via '...')
N <- list(...)$N # pick out N from '...'
mu.hat <- colMeans(X) # estimate the mean vector mu
Sigma.hat <- var(X) # estimate the covariance matrix Sigma
X. <- rmvnorm(N, mean=mu.hat, sigma=Sigma.hat) # simulate risk-factor changes
L <- loss_operator(X., w.) # compute corresponding (simulated) losses
## Compute VaR and ES and return
list(VaR = VaR_hat(L, alpha), # empirically estimate VaR
ES = ES_hat(L, alpha), # empirically estimate ES
## Additional quantities returned here
mu = mu.hat, # fitted mean vector
Sigma = Sigma.hat) # fitted covariance matrix
},
"MC.t" = { # Monte Carlo based on a fitted multivariate t
stopifnot(hasArg(N)) # check if the number 'N' of MC replications has been provided (via '...')
N <- list(...)$N # pick out N from '...'
fit <- fit.mst(X, method = "BFGS") # fit a multivariate t distribution
X. <- rmvt(N, sigma=as.matrix(fit$Sigma), df=fit$df, delta=fit$mu) # simulate risk-factor changes
L <- loss_operator(X., w.) # compute corresponding (simulated) losses
## Compute VaR and ES and return
list(VaR = VaR_hat(L, alpha), # empirically estimate VaR
ES = ES_hat(L, alpha), # empirically estimate ES
## Additional quantities returned here
mu = fit$mu, # fitted location vector
sigma = fit$Sigma, # fitted dispersion matrix
Sigma = fit$covariance, # fitted covariance matrix
df = fit$df) # fitted degrees of freedom
},
"GPD" = { # simulate losses from a fitted Generalized Pareto distribution (GPD); this is underlying the Peaks-over-threshold method
stopifnot(hasArg(q)) # check if the quantile-threshold 'q' has been provided
L. <- loss_operator(X, w.=w.) # historical losses
u <- quantile(L., probs=list(...)$q, names=FALSE) # determine the threshold as the q-quantile of the historical losses
fit <- fit.GPD(L., threshold=u) # fit a GPD to the excesses
xi <- fit$par.ests[["xi"]] # fitted xi
beta <- fit$par.ests[["beta"]] # fitted beta
## warning("verify (e.g., by a mean-excess plot) that the fitted GPD model is adequate!")
if(xi <= 0) stop("Risk measures only implemented for xi > 0.")
## Now compute semi-parametric VaR and ES estimates
## G_{xi,beta}(x) = 1-(1+xi*x/beta)^{-1/xi} if xi != 0
L.. <- L.[L. > u] - u # excesses over u
Fbu <- length(L..) / length(L.) # N_u/n
VaR <- u + (beta/xi)*(((1-alpha)/Fbu)^(-xi)-1) # see McNeil, Frey, Embrechts (2005, p. 283; 2015, p. 155)
ES <- (VaR + beta-xi*u) / (1-xi)
if(xi >= 1) ES <- Inf # adjust to be Inf if xi >= 1 (i.e., ES < 0); see Coles (2001, p. 79)
## Return
list(VaR = VaR, # parametrically estimate VaR
ES = ES, # parametrically estimate ES
## Additional quantities returned here
xi = xi, # fitted xi
beta = beta, # fitted beta
converged = fit$converged, # did the fitting algorithm converge?
u = u, # threshold
excess = L..) # excesses over u
},
stop("Wrong 'method'"))
}
Specify a setup.
lambda <- c(1, 10) # (example) number of shares of the two stocks
alpha <- 0.99 # confidence levels for computing the risk measures
N <- 1e4 # Monte Carlo sample size
Estimate VaR and ES with the various methods.
set.seed(271) # set a seed so that all simulation results are reproducible; see ?set.seed
var.cov <- risk_measure(S, lambda=lambda, alpha=alpha, method="var.cov")
historical <- risk_measure(S, lambda=lambda, alpha=alpha, method="historical")
MC.N <- risk_measure(S, lambda=lambda, alpha=alpha, method="MC.N", N=N)
GPD <- risk_measure(S, lambda=lambda, alpha=alpha, method="GPD", N=N, q=0.9)
MC.t <- risk_measure(S, lambda=lambda, alpha=alpha, method="MC.t", N=N)
## Pick out VaR and ES for all methods
(rm <- rbind("Var.-cov." = unlist(var.cov),
"MC (normal)" = unlist(MC.N[c("VaR", "ES")]),
"Hist. sim." = unlist(historical),
"GPD" = unlist(GPD [c("VaR", "ES")]),
"MC (t)" = unlist(MC.t[c("VaR", "ES")])))
## VaR ES
## Var.-cov. 46.73221 53.59655
## MC (normal) 47.29791 53.66101
## Hist. sim. 54.41306 80.31111
## GPD 55.99662 82.49985
## MC (t) 56.56600 87.05234
Transform the excesses with the fitted GPD distribution function . The resulting sample should be close to one of a standard uniform distribution.
excess <- GPD$excess
xi.hat <- GPD$xi
beta.hat <- GPD$beta
z <- pGPD(excess, xi=xi.hat, beta=beta.hat) # should be U[0,1]
plot(z, ylab="Fitted GPD applied to the excesses") # looks fine
We can also consider a (more sophisticated) Q-Q plot for this task.
excess. <- sort(excess) # sorted data
qF <- function(p) qGPD(p, xi=xi.hat, beta=beta.hat)
qF. <- qF(ppoints(length(excess.))) # theoretical quantiles
plot(qF., excess., xlab="Theoretical quantiles",
ylab="Sample quantiles", main=paste0("Q-Q plot for the fitted GPD(",
round(xi.hat, 2),", ",round(beta.hat, 2),") distribution"))
qqline(y=excess., distribution=qF)
Compute historical losses (for creating a histogram); see risk.measure()
.
S. <- as.numeric(tail(S, n=1)) # pick out last available stock prices ("today")
w. <- lambda * S. # weights w.
L <- loss_operator(X, w.=w.) # historical losses
summary(L) # get important statistics about the losses
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -123.1000 -11.0000 0.0000 -0.5615 9.5360 174.5000
Plot a histogram of the losses including the VaR and ES estimates.
doPDF <- !dev.interactive(orNone=TRUE)
if(doPDF) pdf(file=(file <- "02_Risk_measure_estimators.pdf"), width=8, height=6)
hist(L, breaks="Scott", probability=TRUE, xlim=c(0, max(L, rm)),
main=substitute(bold("Histogram of losses"~italic(L)~"from"~sd~"to"~ed~
"with"~VaR[a]<=ES[a]),
list(a=alpha, sd=start.date, ed=end.date)),
xlab=expression("Losses"~italic(L)~"> 0"), col="gray90") # histogram
box() # box around histogram
col <- c("black", "royalblue3", "darkgreen", "red", "darkorange2")
for(i in seq_len(nrow(rm))) abline(v=rm[i,], col=col[i]) # colored vertical lines indicating VaR and ES
legend("topright", bty="n", inset=0.04, lty=1, col=col, legend=rownames(rm),
title=as.expression(substitute(VaR[a]~"and"~ES[a], list(a=alpha)))) # legend
if(doPDF) dev.off()
We can observe three things:
The variance-covariance method and the Monte Carlo method based on a fitted multivariate normal distribution lead to similar results (they both assume multivariate normal distributed risk-factor changes).
The historical simulation method, however, implies that the loss distribution is more heavy-tailed. This is captured quite well by the risk measure estimates based on the Monte Carlo method for multivariate t distributed risk-factor changes (\(\nu\approx 3.02\)); more adequate here.
The GPD-based EVT approach leads results comparable to the Monte Carlo method based on a fitted \(t\) distribution and the historical simulation method.