?? kde.r
字號:
###############################################################################
# Multivariate kernel density estimators
###############################################################################
###############################################################################
# Generate grid over a set of points
#
# Parameters
# x - data points
# H - bandwidth matrix
# tol - tolerance = extra coverage exceeding the range of x
# gridsize - number of points for each direction
#
# Returns
# gridx - list of intervals, one for each co-ord direction so that
# gridx[[1]] x gridx[[2]] x ... x gridx[[d]] is the grid
# stepsize - vector of step sizes
###############################################################################
make.grid.ks <- function(x, H, tol, gridsize, xmin, xmax, gridtype)
{
d <- ncol(x)
tol.H <- tol * diag(H)
if (missing(xmin))
xmin <- apply(x, 2, min) - tol.H
if (missing(xmax))
xmax <- apply(x, 2, max) + tol.H
stepsize <- rep(0, d)
gridx <- numeric(0)
if (length(gridsize)==1)
gridsize <- rep(gridsize, d)
if (missing(gridtype))
gridtype <- rep("linear", d)
gridtype.vec<- rep("", d)
for (i in 1:d)
{
gridtype1 <- tolower(substr(gridtype[i],1,1))
if (gridtype1=="l")
{
gridx <- c(gridx, list(seq(xmin[i], xmax[i], length=gridsize[i])))
stepsize[i] <- abs(gridx[[i]][1] - gridx[[i]][2])
gridtype.vec[i] <- "linear"
}
else if (gridtype1=="s")
{
gridx.temp <- seq(sign(xmin[i])*sqrt(abs(xmin[i])), sign(xmax[i])*sqrt(abs(xmax[i])), length=gridsize[i])
gridx <- c(gridx, list(sign(gridx.temp) * gridx.temp^2))
stepsize[i] <- NA
gridtype.vec[i] <- "sqrt"
}
}
gridx <- c(gridx, list(stepsize = stepsize, gridtype=gridtype.vec))
return(gridx)
}
###############################################################################
# Generate kernel (rectangular) support at data point
#
# Parameters
# x - data points
# H - bandwidth matrix
# tol - tolerance = extra coverage exceeding the range of x
#
# Returns
# list of min and max points of support (here we parameterise rectangles
# by their min = lower left co-ord and max = upper right coord)
###############################################################################
make.supp <- function(x, H, tol)
{
n <- nrow(x)
d <- ncol(x)
tol.H <- tol * diag(H)
xmin <- matrix(0, nr=n, nc=d)
xmax <- matrix(0, nr=n, nc=d)
for (i in 1:n)
{
xmin[i,] <- x[i,] - tol.H
xmax[i,] <- x[i,] + tol.H
}
return(list(xmin = xmin, xmax = xmax))
}
###############################################################################
# Find the grid points contained in kernel support rectangles
#
# Parameters
# gridx - grid (list of subdivided intervals)
# rectx - rectangles (list of min and max points)
#
# Returns
# list of min and max points of the grid for each rectangle
###############################################################################
find.gridpts <- function(gridx, suppx)
{
xmax <- suppx$xmax
xmin <- suppx$xmin
d <- ncol(xmax)
n <- nrow(xmax)
gridpts.min <- matrix(0, nc=d, nr=n)
gridpts.max <- gridpts.min
for (i in 1:n)
for (j in 1:d)
{
# find index of last element of gridx smaller than min support
tsum <- sum(xmin[i,j] >= gridx[[j]])
if (tsum==0)
gridpts.min[i,j] <- 1
else
gridpts.min[i,j] <- tsum
# find index of first element gridx greater than max support
gridpts.max[i,j] <- sum(xmax[i,j] >= gridx[[j]])
}
return(list(xmin=gridpts.min, xmax=gridpts.max))
}
###############################################################################
# Multivariate kernel density estimate using normal kernels
#
# Parameters
# x - points
# H - bandwidth matrix
# gridsize - number of interval points in grid
# supp - effective support of kernel
# eval.points - compute density estimate at these points (if missing
# and dim(x) = 2, 3 compute density estimate over grid)
# eval.levels - compute 3-D in 2-D slices taken at these level curves
#
# Returns
# list with first d components with the points that the density
# estimate is evaluated at, and values of the density estimate
###############################################################################
kde <- function(x, H, h, gridsize, gridtype, xmin, xmax, supp=3.7, eval.points, binned=FALSE, bgridsize, positive=FALSE, adj.positive)
{
if (is.vector(x))
{
if (missing(H)) d <- 1
else
{
if (is.vector(H)) d <- 1
else {x <- matrix(x, nrow=1); d <- ncol(x)}
}
}
else d <- ncol(x)
## compute binned estimator
if (binned)
{
if (!missing(eval.points))
stop("Both binned=TRUE and eval.points are non-empty.")
if (missing(bgridsize)) bgridsize <- default.gridsize(d)
if (positive & d==1)
{
y <- log(x)
fhat <- kde.binned(x=y, H=H, h=h, bgridsize=bgridsize, xmin=xmin, xmax=xmax)
fhat$estimate <- fhat$estimate/exp(fhat$eval.points)
fhat$eval.points <- exp(fhat$eval.points)
fhat$x <- x
}
else
fhat <- kde.binned(x=x, H=H, h=h, bgridsize=bgridsize, xmin=xmin, xmax=xmax)
}
else
{
## compute exact (non-binned) estimator
if (missing(gridsize)) gridsize <- default.gridsize(d)
## 1-dimensional
if (d==1)
{
if (!missing(H) & !missing(h))
stop("Both H and h are both specified.")
if (missing(h))
h <- sqrt(H)
if (missing(eval.points))
fhat <- kde.grid.1d(x=x, h=h, gridsize=gridsize, supp=supp, positive=positive, xmin=xmin, xmax=xmax, adj.positive=adj.positive, gridtype=gridtype)
else
fhat <- kde.points.1d(x=x, h=h, eval.points=eval.points, positive=positive, adj.positive=adj.positive)
}
## multi-dimensional
else
{
if (is.data.frame(x)) x <- as.matrix(x)
if (missing(eval.points))
{
if (d==2)
fhat <- kde.grid.2d(x=x, H=H, gridsize=gridsize, supp=supp, xmin=xmin, xmax=xmax, gridtype=gridtype)
else if (d == 3)
fhat <- kde.grid.3d(x=x, H=H, gridsize=gridsize, supp=supp, xmin=xmin, xmax=xmax, gridtype=gridtype)
else
stop("Need to specify eval.points for more than 3 dimensions")
}
else
fhat <- kde.points(x, H, eval.points)
}
}
fhat$binned <- binned
##fhat$gridtype <- gridtype
## add variable names
if (d==1)
{
x.names <- deparse(substitute(x))
}
else
{
x.names <- colnames(x)
if (is.null(x.names))
{
x.names <- strsplit(deparse(substitute(x)), "\\[")[[1]][1]
x.names <- paste(x.names, "[,", 1:d,"]",sep="")
}
}
fhat$names <- x.names
class(fhat) <- "kde"
return(fhat)
}
###############################################################################
### Multivariate binned kernel density estimate using normal kernels
###############################################################################
kde.binned <- function(x, H, h, bgridsize, xmin, xmax, bin.par)
{
## linear binning
if (missing(bin.par))
{
if (is.vector(x)) d <- 1
else d <- ncol(x)
if (d==1)
if (missing(H)) H <- as.matrix(h^2)
else {h <- sqrt(H); H <- as.matrix(H)}
if (!is.diagonal(H) & d > 1)
stop("Binned estimation defined for diagonal H only")
if (missing(bgridsize)) bgridsize <- default.gridsize(d)
bin.par <- binning(x=x, H=H, h, bgridsize, xmin, xmax, supp=3.7)
}
else
{
if (!is.list(bin.par$eval.points)) { d <- 1; bgridsize <- length(bin.par$eval.points)}
else { d <- length(bin.par$eval.points); bgridsize <- sapply(bin.par$eval.points, length)}
if (d==1)
if (missing(H)) H <- as.matrix(h^2)
else {h <- sqrt(H); H <- as.matrix(H)}
}
if (d==1)
range.x <- list(range(bin.par$eval.points))
else
range.x <- lapply(bin.par$eval.points, range)
fhat.grid <- drvkde(x=bin.par$counts, drv=rep(0,d), bandwidth=sqrt(diag(H)), binned=TRUE, range.x=range.x, se=FALSE, gridsize=bgridsize)
eval.points <- fhat.grid$x.grid
fhat.grid <- fhat.grid$est
fhat.grid[fhat.grid<0] <- 0
if (missing(x)) x <- NULL
if (d==1)
fhat <- list(x=x, eval.points=unlist(eval.points), estimate=fhat.grid, H=h^2, h=h)
else
fhat <- list(x=x, eval.points=eval.points, estimate=fhat.grid, H=H)
return(fhat)
}
#############################################################################
## Univariate kernel density estimate on a grid
#############################################################################
kde.grid.1d <- function(x, h, gridsize, supp=3.7, positive=FALSE, adj.positive, xmin, xmax, gridtype)
{
if (missing(xmin)) xmin <- min(x) - h*supp
if (missing(xmax)) xmax <- max(x) + h*supp
if (missing(gridtype)) gridtype <- "linear"
if (positive)
{
if (missing(adj.positive)) adj.positive <- abs(min(x))
y <- log(x + adj.positive) ## transform positive data x to real line
gridx <- seq(max(0, xmin), xmax, length=gridsize)
gridy <- log(gridx + adj.positive)
gridtype.vec <- "linear"
}
else
{
y <- x
gridtype1 <- tolower(substr(gridtype,1,1))
if (gridtype1=="l")
{
gridy <- seq(xmin, xmax, length=gridsize)
gridtype.vec <- "linear"
}
else if (gridtype1=="s")
{
gridy.temp <- seq(sign(xmin)*sqrt(abs(xmin)), sign(xmax)*sqrt(abs(xmax)), length=gridsize)
gridy <- sign(gridy.temp) * gridy.temp^2
gridtype.vec <- "sqrt"
}
}
n <- length(y)
est <- dnorm.mixt(x=gridy, mus=y, sigmas=rep(h, n), props=rep(1,n)/n)
fhat <- list(x=y, eval.points=gridy, estimate=est, h=h, H=h^2, gridtype=gridtype.vec)
if (positive)
{
## compute transformation KDE
fhat$estimate <- fhat$estimate / exp(gridy)
fhat$x <- x
fhat$eval.points <- gridx # exp(gridy) - adj.positive
}
class(fhat) <- "kde"
return(fhat)
}
###############################################################################
# Bivariate kernel density estimate using normal kernels, evaluated over grid
#
# Parameters
# x - data points
# H - bandwidth matrix
# gridsize - number of interval points in grid
# supp - effective support of kernel
#
# Returns
# list with fields
# x - data points
# eval.points - points that KDE is evaluated at
# estimate - KDE evaluated at eval.points
# H - bandwidth matrix
###############################################################################
kde.grid.2d <- function(x, H, gridsize, supp, gridx=NULL, grid.pts=NULL, xmin, xmax, gridtype)
{
# initialise grid
n <- nrow(x)
if (is.null(gridx))
gridx <- make.grid.ks(x, matrix.sqrt(H), tol=supp, gridsize=gridsize, xmin=xmin, xmax=xmax, gridtype=gridtype)
suppx <- make.supp(x, matrix.sqrt(H), tol=supp)
if (is.null(grid.pts))
grid.pts <- find.gridpts(gridx, suppx)
fhat.grid <- matrix(0, nrow=length(gridx[[1]]), ncol=length(gridx[[2]]))
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -