require(mvtnorm) # see http://dirk.eddelbuettel.com/blog/2012/08/05/ on its importance
require(QRM) # for fit.mst(), fit.GPD()

1 Working with the (BMW, Siemens) data

First, we download the data.

file <- "DAX.RAW.txt"
if(!file.exists(file))
    system(paste0("wget http://www.ma.hw.ac.uk/~mcneil/ftp/", file)) # download the data (on Unix; this may or may not work for you; if it doesn't, just download the data manually)
db <- read.table(file, header=TRUE, row.names="Positions", # read the *d*ata *b*ase
                 encoding="UTF-8") # for 'didactical' reasons
str(db) # check the *str*ucture of 'db', what is it... ahh, a data frame!
## 'data.frame':    6147 obs. of  23 variables:
##  $ ALLIANZ.HLDG.  : num  156 156 161 162 164 ...
##  $ COMMERZBANK    : num  147 147 149 152 152 ...
##  $ DRESDNER.BANK  : num  18.4 18.4 18.8 18.9 18.9 ...
##  $ BMW            : num  104 109 110 111 109 ...
##  $ SCHERING       : num  36.9 37.4 37.8 37.9 37.4 ...
##  $ BASF           : num  15 15.4 15.6 15.8 15.8 ...
##  $ BAYER          : num  12.2 11.9 12.1 12.7 12.7 ...
##  $ BAYERISCHE.VBK.: num  23.5 22.9 23.4 23.7 23.9 ...
##  $ BAYER.HYPBK.   : num  23.4 23.2 23.3 23.5 23.4 ...
##  $ DEGUSSA        : num  203 207 208 210 214 ...
##  $ DEUTSCHE.BANK  : num  22.3 22.5 22.9 23 23.3 ...
##  $ CONTINENTAL    : num  8.54 8.83 8.78 8.83 8.73 8.82 8.74 8.73 8.74 8.74 ...
##  $ VOLKSWAGEN     : num  134 140 145 144 140 ...
##  $ DAIMLER.BENZ   : num  17 17.6 17.8 17.8 17.7 ...
##  $ HOECHST        : num  13.8 13.8 14.2 14.3 14.2 ...
##  $ SIEMENS        : num  20.8 21.1 21.3 21.4 21.5 ...
##  $ KARSTADT       : num  360 360 362 369 368 ...
##  $ LINDE          : num  136 137 140 142 144 ...
##  $ THYSSEN        : num  67.5 68.4 67.5 71.6 71.2 ...
##  $ MANNESMANN     : num  85 86.5 87.8 88.7 88.6 ...
##  $ MAN            : num  118 119 125 125 127 ...
##  $ RWE            : num  11.7 11.9 12 11.9 12 ...
##  $ INDEX          : num  536 545 552 556 557 ...
str(rn <- rownames(db)) # row names contain dates
##  chr [1:6147] "01/01/1973" "01/02/1973" "01/03/1973" ...
date <- as.Date(rn, format="%m/%d/%Y") # convert the dates to *proper* date objects
str(date)
##  Date[1:6147], format: "1973-01-01" "1973-01-02" "1973-01-03" "1973-01-04" ...
rownames(db) <- date # use the newly formatted dates as row names

Pick out the sub-database of stock prices we work with here.

start.date <- "1985-01-02"
end.date   <- "1994-12-30"
S <- db[start.date <= date & date <= end.date, c("BMW", "SIEMENS")]
str(S)
## 'data.frame':    2608 obs. of  2 variables:
##  $ BMW    : num  273 272 269 273 272 ...
##  $ SIEMENS: num  45.9 45.7 45.9 46.1 46.6 ...
head(S) # show the beginning
##               BMW SIEMENS
## 1985-01-02 273.37   45.90
## 1985-01-03 271.91   45.66
## 1985-01-04 269.36   45.85
## 1985-01-07 272.64   46.14
## 1985-01-08 271.91   46.57
## 1985-01-09 280.28   47.14
S[1:6,] # the same
##               BMW SIEMENS
## 1985-01-02 273.37   45.90
## 1985-01-03 271.91   45.66
## 1985-01-04 269.36   45.85
## 1985-01-07 272.64   46.14
## 1985-01-08 271.91   46.57
## 1985-01-09 280.28   47.14
tail(S) # show the end
##              BMW SIEMENS
## 1994-12-23 768.5   64.53
## 1994-12-26 768.5   64.53
## 1994-12-27 773.0   65.20
## 1994-12-28 771.5   65.03
## 1994-12-29 760.0   64.35
## 1994-12-30 770.0   64.90

Use a basic plot of each time series to check if anything is ‘suspicious’.

rns <- rownames(S) # row names of S (character)
## BMW
plot(as.Date(rns), S[,"BMW"], type="l",
     main="BMW stock data", # title
     xlab=expression(Date~italic(t)), # x-axis label; or simply use "Date t"
     ylab=expression(Stock~price~italic(S[t]))) # y-axis label

## Siemens
plot(as.Date(rns), S[,"SIEMENS"], type="l",
     main="Siemens stock data", # title
     xlab=expression(Date~italic(t)), # x-axis label
     ylab=expression(Stock~price~italic(S[t]))) # y-axis label

Next, compute the risk-factor changes and plot them (here: against each other).

ran <- range(X <- apply(log(S), 2, diff)) # risk-factor changes and range
plot(X, xlim=ran, ylim=ran, main="Risk-factor changes", cex=0.2)

2 Implement (and document) some auxiliary functions

##' @title Compute the Loss Operator
##' @param x matrix of risk-factor changes
##' @param w. weights w. = lambda_j * S_{t,j}
##' @return losses
##' @author Marius Hofert
loss_operator <- function(x, w.)
    -rowSums(expm1(x) * matrix(w., nrow=nrow(x), ncol=length(w.), byrow=TRUE))
##' @title Non-parametric VaR estimator
##' @param L losses
##' @param alpha confidence level
##' @return Non-parametric estimate of VaR at level alpha
##' @author Marius Hofert
VaR_hat <- function(L, alpha) quantile(L, probs=alpha, names=FALSE)
##' @title Non-parametric ES estimator
##' @param L losses
##' @param alpha confidence level
##' @return Non-parametric estimate of ES at level alpha
##' @author Marius Hofert
ES_hat  <- function(L, alpha) mean(L[L > VaR_hat(L, alpha=alpha)])
##' @title Estimate VaR and ES
##' @param S stock data, an (n, d)-matrix
##' @param lambda number of shares of each stock
##' @param alpha confidence level for VaR and ES
##' @param method a character string specifying the estimator
##' @param ... additional arguments passed to the various methods
##' @return a list containing the estimated risk measures VaR and ES, and
##'         possibly other results (depending on the estimator)
##' @author Marius Hofert
risk_measure <- function(S, lambda, alpha,
                         method = c("var.cov", "historical", "MC.N", "MC.t", "GPD"),
                         ...)
{
    ## Input checks and conversions
    if(!is.matrix(S)) S <- rbind(S, deparse.level=0L) # to guarantee that ncol() works...
    stopifnot(0 < alpha, alpha < 1, # check whether alpha is in (0,1)
              length(lambda) == ncol(S), lambda > 0) # check length and sign of lambda
    method <- match.arg(method) # match correct method if not fully provided

    ## Ingredients required for *all* methods
    X <- apply(log(S), 2, diff) # compute risk-factor changes
    if(!length(X)) stop("'S' should have more than just one line") # check
    S. <- as.numeric(tail(S, n=1)) # pick out last available stock prices ("today")
    w. <- lambda * S. # weights w.

    ## Method switch (now consider the various methods)
    switch(method,
           "var.cov" = { # variance-covariance method
               ## Estimate a multivariate normal distribution
               mu.hat <- colMeans(X) # estimate the mean vector mu
               Sigma.hat  <- var(X) # estimate the covariance matrix Sigma
               L.delta.mean <- -sum(w. * mu.hat) # mean of the approx. normal df of L^{\Delta}
               L.delta.sd <- sqrt(t(w.) %*% Sigma.hat %*% w.) # standard deviation of the approx. normal df of L^{\Delta}
               ## Compute VaR and ES and return
               qa <- qnorm(alpha)
               list(VaR = L.delta.mean + L.delta.sd * qa,
                    ES  = L.delta.mean + L.delta.sd * dnorm(qa) / (1-alpha))
               ## => We could just return a bivariate vector here, but
               ##    for other methods, we might want to return additional
               ##    auxiliary results, and we should *always* return similar
               ##    objects (here: lists)
           },
           "historical" = { # historical simulation method
               ## Using empirically estimated risk measures
               L <- loss_operator(X, w.=w.) # compute historical losses
               ## Compute VaR and ES and return
               list(VaR = VaR_hat(L, alpha),
                    ES =   ES_hat(L, alpha))
           },
           "MC.N" = { # Monte Carlo based on a fitted multivariate normal
               stopifnot(hasArg(N)) # check if the number 'N' of MC replications has been provided (via '...')
               N <- list(...)$N # pick out N from '...'
               mu.hat <- colMeans(X) # estimate the mean vector mu
               Sigma.hat  <- var(X) # estimate the covariance matrix Sigma
               X. <- rmvnorm(N, mean=mu.hat, sigma=Sigma.hat) # simulate risk-factor changes
               L <- loss_operator(X., w.) # compute corresponding (simulated) losses
               ## Compute VaR and ES and return
               list(VaR = VaR_hat(L, alpha), # empirically estimate VaR
                    ES  =  ES_hat(L, alpha), # empirically estimate ES
                    ## Additional quantities returned here
                    mu    = mu.hat, # fitted mean vector
                    Sigma = Sigma.hat) # fitted covariance matrix
           },
           "MC.t" = { # Monte Carlo based on a fitted multivariate t
               stopifnot(hasArg(N)) # check if the number 'N' of MC replications has been provided (via '...')
               N <- list(...)$N # pick out N from '...'
               fit <- fit.mst(X, method = "BFGS") # fit a multivariate t distribution
               X. <- rmvt(N, sigma=as.matrix(fit$Sigma), df=fit$df, delta=fit$mu) # simulate risk-factor changes
               L <- loss_operator(X., w.) # compute corresponding (simulated) losses
               ## Compute VaR and ES and return
               list(VaR = VaR_hat(L, alpha), # empirically estimate VaR
                    ES =   ES_hat(L, alpha), # empirically estimate ES
                    ## Additional quantities returned here
                    mu    = fit$mu, # fitted location vector
                    sigma = fit$Sigma, # fitted dispersion matrix
                    Sigma = fit$covariance, # fitted covariance matrix
                    df    = fit$df) # fitted degrees of freedom
           },
           "GPD" = { # simulate losses from a fitted Generalized Pareto distribution (GPD); this is underlying the Peaks-over-threshold method
               stopifnot(hasArg(q)) # check if the quantile-threshold 'q' has been provided
               L. <- loss_operator(X, w.=w.) # historical losses
               u <- quantile(L., probs=list(...)$q, names=FALSE) # determine the threshold as the q-quantile of the historical losses
               fit <- fit.GPD(L., threshold=u) # fit a GPD to the excesses
               xi <- fit$par.ests[["xi"]] # fitted xi
               beta <- fit$par.ests[["beta"]] # fitted beta
               ## warning("verify (e.g., by a mean-excess plot) that the fitted GPD model is adequate!")
               if(xi <= 0) stop("Risk measures only implemented for xi > 0.")
               ## Now compute semi-parametric VaR and ES estimates
               ## G_{xi,beta}(x) = 1-(1+xi*x/beta)^{-1/xi} if xi != 0
               L.. <- L.[L. > u] - u # excesses over u
               Fbu <- length(L..) / length(L.) # N_u/n
               VaR <- u + (beta/xi)*(((1-alpha)/Fbu)^(-xi)-1) # see McNeil, Frey, Embrechts (2005, p. 283; 2015, p. 155)
               ES <- (VaR + beta-xi*u) / (1-xi)
               if(xi >= 1) ES <- Inf # adjust to be Inf if xi >= 1 (i.e., ES < 0); see Coles (2001, p. 79)
               ## Return
               list(VaR = VaR, # parametrically estimate VaR
                    ES  = ES, # parametrically estimate ES
                    ## Additional quantities returned here
                    xi     = xi, # fitted xi
                    beta   = beta, # fitted beta
                    converged = fit$converged, # did the fitting algorithm converge?
                    u      = u, # threshold
                    excess = L..) # excesses over u
           },
           stop("Wrong 'method'"))
}

3 Compute VaR and ES for the different methods

Specify a setup.

lambda <- c(1, 10) # (example) number of shares of the two stocks
alpha <- 0.99 # confidence levels for computing the risk measures
N <- 1e4 # Monte Carlo sample size

Estimate VaR and ES with the various methods.

set.seed(271) # set a seed so that all simulation results are reproducible; see ?set.seed
var.cov    <- risk_measure(S, lambda=lambda, alpha=alpha, method="var.cov")
historical <- risk_measure(S, lambda=lambda, alpha=alpha, method="historical")
MC.N       <- risk_measure(S, lambda=lambda, alpha=alpha, method="MC.N", N=N)
GPD        <- risk_measure(S, lambda=lambda, alpha=alpha, method="GPD", N=N, q=0.9)
MC.t       <- risk_measure(S, lambda=lambda, alpha=alpha, method="MC.t", N=N)

## Pick out VaR and ES for all methods
(rm <- rbind("Var.-cov."   = unlist(var.cov),
             "MC (normal)" = unlist(MC.N[c("VaR", "ES")]),
             "Hist. sim."  = unlist(historical),
             "GPD"         = unlist(GPD [c("VaR", "ES")]),
             "MC (t)"      = unlist(MC.t[c("VaR", "ES")])))
##                  VaR       ES
## Var.-cov.   46.73221 53.59655
## MC (normal) 47.29791 53.66101
## Hist. sim.  54.41306 80.31111
## GPD         55.99662 82.49985
## MC (t)      56.56600 87.05234

Graphical goodness-of-fit check for the GPD

Transform the excesses with the fitted GPD distribution function . The resulting sample should be close to one of a standard uniform distribution.

excess <- GPD$excess
xi.hat <- GPD$xi
beta.hat <- GPD$beta
z <- pGPD(excess, xi=xi.hat, beta=beta.hat) # should be U[0,1]
plot(z, ylab="Fitted GPD applied to the excesses") # looks fine

We can also consider a (more sophisticated) Q-Q plot for this task.

excess. <- sort(excess) # sorted data
qF <- function(p) qGPD(p, xi=xi.hat, beta=beta.hat)
qF. <- qF(ppoints(length(excess.))) # theoretical quantiles
plot(qF., excess., xlab="Theoretical quantiles",
     ylab="Sample quantiles", main=paste0("Q-Q plot for the fitted GPD(",
     round(xi.hat, 2),", ",round(beta.hat, 2),") distribution"))
qqline(y=excess., distribution=qF)

4 Graphical analysis

Compute historical losses (for creating a histogram); see risk.measure().

S. <- as.numeric(tail(S, n=1)) # pick out last available stock prices ("today")
w. <- lambda * S. # weights w.
L <- loss_operator(X, w.=w.) # historical losses
summary(L) # get important statistics about the losses
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -123.1000  -11.0000    0.0000   -0.5615    9.5360  174.5000

Plot a histogram of the losses including the VaR and ES estimates.

doPDF <- !dev.interactive(orNone=TRUE)
if(doPDF) pdf(file=(file <- "02_Risk_measure_estimators.pdf"), width=8, height=6)
hist(L, breaks="Scott", probability=TRUE, xlim=c(0, max(L, rm)),
     main=substitute(bold("Histogram of losses"~italic(L)~"from"~sd~"to"~ed~
                          "with"~VaR[a]<=ES[a]),
                     list(a=alpha, sd=start.date, ed=end.date)),
     xlab=expression("Losses"~italic(L)~"> 0"), col="gray90") # histogram
box() # box around histogram
col <- c("black", "royalblue3", "darkgreen", "red", "darkorange2")
for(i in seq_len(nrow(rm))) abline(v=rm[i,], col=col[i]) # colored vertical lines indicating VaR and ES
legend("topright", bty="n", inset=0.04, lty=1, col=col, legend=rownames(rm),
       title=as.expression(substitute(VaR[a]~"and"~ES[a], list(a=alpha)))) # legend
if(doPDF) dev.off()

We can observe three things:

  1. The variance-covariance method and the Monte Carlo method based on a fitted multivariate normal distribution lead to similar results (they both assume multivariate normal distributed risk-factor changes).

  2. The historical simulation method, however, implies that the loss distribution is more heavy-tailed. This is captured quite well by the risk measure estimates based on the Monte Carlo method for multivariate t distributed risk-factor changes (\(\nu\approx 3.02\)); more adequate here.

  3. The GPD-based EVT approach leads results comparable to the Monte Carlo method based on a fitted \(t\) distribution and the historical simulation method.