ggpmisc 0.2.12
Density filtering

Pedro J. Aphalo

2016-10-21

Introduction

This vignette demonstrates the performance of the default arguments with artificial data sets of different sizes and drawn from different theoretical distributions. It used initially for testing but it also shows what to expect under different situations.

Preliminaries

library(ggpmisc)
library(ggplot2)
library(tibble)

We define functions to simplify the generation of random data sets.

make_data_tbl <- function(nrow = 100, rfun = rnorm, ...) {
  if (nrow %% 2) {
    nrow <- nrow + 1
  }
  
  set.seed(1001)
  
  tibble::tibble(
    x = rfun(nrow, ...),
    y = rfun(nrow, ...),
    group = rep(c("A", "B"), c(nrow / 2, nrow / 2))
  )
}

Tests with different data sets

Number of observations

By default the fraction of observations kept is 1/10.

ggplot(data = make_data_tbl(6), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red")

ggplot(data = make_data_tbl(6), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 1/2)

ggplot(data = make_data_tbl(20), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red")

ggplot(data = make_data_tbl(100), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red")

ggplot(data = make_data_tbl(500), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red")

ggplot(data = make_data_tbl(2000), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red")

ggplot(data = make_data_tbl(2000), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 0.01)

ggplot(data = make_data_tbl(2000), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", 
                     keep.sparse = FALSE)

ggplot(data = make_data_tbl(2000), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", 
                     keep.sparse = FALSE)+
  stat_dens2d_filter(color = "blue")

ggplot(data = make_data_tbl(2000), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", 
                     keep.fraction = 0.01,
                     keep.sparse = FALSE)

ggplot(data = make_data_tbl(10000), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red")
ggplot(data = make_data_tbl(10000), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 0.01)

Random draws from different theoretical distributions

ggplot(data = make_data_tbl(1000, rfun = runif), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 0.1)

ggplot(data = make_data_tbl(1000, rfun = rgamma, shape = 2), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 0.1)

ggplot(data = make_data_tbl(1000, rfun = rgamma, shape = 6), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 0.1)

ggplot(data = make_data_tbl(1000, rfun = rbeta, shape1 = 3, shape2 = 12), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 0.1)

Transformed scales

ggplot(data = make_data_tbl(1000, rfun = rbeta, shape1 = 3, shape2 = 12), aes(x, y)) +
  geom_point() +
  stat_dens2d_filter(color = "red", keep.fraction = 0.1) +
  scale_y_log10()