?? selector.r
字號:
## nstage - number of plug-in stages (1 or 2)## pilot - "amse" - different AMSE pilot## - "samse" - SAMSE pilot## pre - "scale" - pre-scaled data## - "sphere"- pre-sphered data #### Returns## matrix of psi functionals############################################################################psimat.2d <- function(x.star, nstage=1, pilot="samse", binned, bin.par){ if (nstage==1) psi.fun <- psifun1.2d(x.star, pilot=pilot, binned=binned, bin.par=bin.par) else if (nstage==2) psi.fun <- psifun2.2d(x.star, pilot=pilot, binned=binned, bin.par=bin.par) psi40 <- psi.fun[1] psi31 <- psi.fun[2] psi22 <- psi.fun[3] psi13 <- psi.fun[4] psi04 <- psi.fun[5] coeff <- c(1, 2, 1, 2, 4, 2, 1, 2, 1) psi.fun <- c(psi40, psi31, psi22, psi31, psi22, psi13, psi22, psi13, psi04) return(matrix(coeff * psi.fun, nc=3, nr=3))}psimat.nd <- function(x.star, d, nstage=1, pilot="samse", binned, bin.par){ if (nstage==1) psi.fun <- psifun1.nd(x.star, d=d, pilot=pilot, binned=binned, bin.par=bin.par) else if (nstage==2) psi.fun <- psifun2.nd(x.star, d=d, pilot=pilot, binned=binned, bin.par=bin.par) coeff <- Psi4.list(d)$coeff return(matrix(coeff * psi.fun, nc=d*(d+1)/2, nr=d*(d+1)/2))}### unconstrained pilot selectorspsimat.unconstr.nd <- function(x, nstage=1, Sd4, Sd6){ if (nstage==1) psi.fun <- psifun1.unconstr.nd(x=x, Sd4=Sd4, Sd6=Sd6) else if (nstage==2) psi.fun <- psifun2.unconstr.nd(x=x, Sd4=Sd4, Sd6=Sd6) return(invvec(psi.fun))} ############################################################################## Plug-in bandwidth selectors############################################################################# ############################################################################## Computes plug-in full bandwidth matrix - 2 to 6 dim#### Parameters## x - data points## Hstart - initial value for minimisation## nstage - number of plug-in stages (1 or 2)## pilot - "amse" - different AMSE pilot## - "samse" - SAMSE pilot## - "unconstr" - unconstrained pilot## pre - "scale" - pre-scaled data## - "sphere"- pre-sphered data #### Returns## Plug-in full bandwidth matrix###############################################################################hpi <- function(x, nstage=2, binned=TRUE, bgridsize){ ## 1-d selector is taken from KernSmooth's dpik if (missing(bgridsize)) bgridsize <- 401 return(dpik(x=x, level=nstage, gridsize=bgridsize))}Hpi <- function(x, nstage=2, pilot="samse", pre="sphere", Hstart, binned=FALSE, bgridsize, amise=FALSE){ n <- nrow(x) d <- ncol(x) RK <- (4*pi)^(-d/2) if(!is.matrix(x)) x <- as.matrix(x) if (substr(pre,1,2)=="sc") { x.star <- pre.scale(x) S12 <- diag(sqrt(diag(var(x)))) Sinv12 <- chol2inv(chol(S12)) } else if (substr(pre,1,2)=="sp") { x.star <- pre.sphere(x) S12 <- matrix.sqrt(var(x)) Sinv12 <- chol2inv(chol(S12)) } if (substr(pilot,1,1)=="a") pilot <- "amse" else if (substr(pilot,1,1)=="s") pilot <- "samse" else if (substr(pilot,1,1)=="u") pilot <- "unconstr" if (pilot=="amse" & d>2) stop("SAMSE pilot selectors are better for higher dimensions") if (pilot=="unconstr" & d>=6) stop("Uconstrained pilots not implemented yet for 6-dim data") if (missing(bgridsize) & binned) bgridsize <- default.gridsize(d) if (d > 4) binned <- FALSE if (binned) { ## linear binning H.max <- (((d+8)^((d+6)/2)*pi^(d/2)*RK)/(16*(d+2)*n*gamma(d/2+4)))^(2/(d+4))* var(x.star) ##bin.par <- binning(x=x.star, bgridsize=bgridsize, H=sqrt(diag(H.max))) ## for large samples, take subset for pilot estimation nsub <- min(n, 1e4) x.star.sub <- x.star[1:nsub,] bin.par.sub <- binning(x=x.star.sub, bgridsize=bgridsize, H=sqrt(diag(H.max))) } else x.star.sub <- x.star ## psi4.mat is on pre-transformed data scale if (pilot!="unconstr") { if (d==2) psi4.mat <- psimat.2d(x.star.sub, nstage=nstage, pilot=pilot, binned=binned, bin.par=bin.par.sub) else psi4.mat <- psimat.nd(x.star.sub, d=d, nstage=nstage, pilot=pilot, binned=binned, bin.par=bin.par.sub) } else { ### symmetriser matrices for unconstrained pilot selectors ##Sd2 <- Sdr(d=d, r=2) Sd4 <- Sdr(d=d, r=4) Sd6 <- Sdr(d=d, r=6) psi4.mat <- psimat.unconstr.nd(x=x, nstage=nstage, Sd4=Sd4, Sd6=Sd6) } if (pilot=="unconstr") { ## use normal reference bandwidth as initial condition if (missing(Hstart)) Hstart <- (4/(n*(d + 2)))^(2/(d + 4)) * var(x) Hstart <- matrix.sqrt(Hstart) ## PI is estimate of AMISE pi.temp.unconstr <- function(vechH) { H <- invvech(vechH) %*% invvech(vechH) pi.temp <- 1/(det(H)^(1/2)*n)*RK + 1/4* t(vec(H)) %*% psi4.mat %*% vec(H) return(drop(pi.temp)) } ## psi4.mat always a zero eigen-value since it has repeated rows result <- optim(vech(Hstart), pi.temp.unconstr, method="BFGS") H <- invvech(result$par) %*% invvech(result$par) } else if (pilot!="unconstr") { ## use normal reference bandwidth as initial condition if (missing(Hstart)) Hstart <- (4/(n*(d + 2)))^(2/(d + 4)) * var(x.star) else Hstart <- Sinv12 %*% Hstart %*% Sinv12 Hstart <- matrix.sqrt(Hstart) ## PI is estimate of AMISE pi.temp <- function(vechH) { H <- invvech(vechH) %*% invvech(vechH) pi.temp <- 1/(det(H)^(1/2)*n)*RK + 1/4* t(vech(H)) %*% psi4.mat %*% vech(H) return(drop(pi.temp)) } ## check that Psi_4 is positive definite if (prod(eigen(psi4.mat)$val > 0) == 1) { result <- optim(vech(Hstart), pi.temp, method="BFGS") H <- invvech(result$par) %*% invvech(result$par) } else { cat("Psi matrix not positive definite\n") H <- matrix(NA, nc=d, nr=d) } ## back-transform H <- S12 %*% H %*% S12 } if (!amise) return(H) else return(list(H = H, PI=result$value))} ################################################################################ Computes plug-in diagonal bandwidth matrix for 2 to 6-dim## Parameters# x - data points# nstage - number of plug-in stages (1 or 2)# pre - "scale" - pre-scaled data# - "sphere"- pre-sphered data ## Returns# Plug-in diagonal bandwidth matrix###############################################################################Hpi.diag <- function(x, nstage=2, pilot="amse", pre="scale", Hstart, binned=FALSE, bgridsize){ if(!is.matrix(x)) x <- as.matrix(x) if (substr(pre,1,2)=="sc") x.star <- pre.scale(x) else if (substr(pre,1,2)=="sp") x.star <- pre.sphere(x) if (substr(pre,1,2)=="sp") stop("Using pre-sphering won't give diagonal bandwidth matrix\n") if (substr(pilot,1,1)=="a") pilot <- "amse" else if (substr(pilot,1,1)=="s") pilot <- "samse" n <- nrow(x) d <- ncol(x) RK <- (4*pi)^(-d/2) s1 <- sd(x[,1]) s2 <- sd(x[,2]) if (substr(pre,1,2)=="sc") S12 <- diag(sqrt(diag(var(x)))) else if (substr(pre,1,2)=="sp") S12 <- matrix.sqrt(var(x)) Sinv12 <- chol2inv(chol(S12)) if (missing(bgridsize) & binned) bgridsize <- default.gridsize(d) if (d > 4) binned <- FALSE if (binned) { H.max <- (((d+8)^((d+6)/2)*pi^(d/2)*RK)/(16*(d+2)*n*gamma(d/2+4)))^(2/(d+4))* var(x.star) ##bin.par <- binning(x.star, bgridsize, sqrt(diag(H.max))) ## for large samples, take subset for pilot estimation nsub <- min(n, 1e4) x.star.sub <- x.star[sample(1:n, size=nsub),] bin.par.sub <- binning(x=x.star.sub, bgridsize=bgridsize, H=sqrt(diag(H.max))) } else x.star.sub <- x.star if (d==2) { if (nstage == 1) psi.fun <- psifun1.2d(x.star.sub, pilot=pilot, binned=binned, bin.par=bin.par.sub) else if (nstage == 2) psi.fun <- psifun2.2d(x.star.sub, pilot=pilot, binned=binned, bin.par=bin.par.sub) psi40 <- psi.fun[1] psi22 <- psi.fun[3] psi04 <- psi.fun[5] ## diagonal bandwidth matrix for 2-dim has exact formula h1 <- (psi04^(3/4)*RK/(psi40^(3/4)*(sqrt(psi40 * psi04)+psi22)*n))^(1/6) h2 <- (psi40/psi04)^(1/4) * h1 return(diag(c(s1^2*h1^2, s2^2*h2^2))) } else { if (pilot=="amse") stop("SAMSE pilot selectors are better for higher dimensions") ## use normal reference bandwidth as initial condition if (missing(Hstart)) Hstart <- (4/(n*(d + 2)))^(2/(d + 4)) * var(x.star) else Hstart <- Sinv12 %*% Hstart %*% Sinv12 Hstart <- matrix.sqrt(Hstart) psi4.mat <- psimat.nd(x.star.sub, d=d, nstage=nstage, pilot=pilot, binned=binned, bin.par=bin.par.sub) ## PI is estimate of AMISE pi.temp <- function(diagH) { H <- diag(diagH) %*% diag(diagH) pi.temp <- 1/(det(H)^(1/2)*n)*RK + 1/4* t(vech(H)) %*% psi4.mat %*% vech(H) return(drop(pi.temp)) } result <- optim(diag(Hstart), pi.temp, method="BFGS") H <- diag(result$par) %*% diag(result$par) ## back-transform if (pre=="scale") S12 <- diag(sqrt(diag(var(x)))) else if (pre=="sphere") S12 <- matrix.sqrt(var(x)) H <- S12 %*% H %*% S12 return(H) }}################################################################################ Cross-validation bandwidth selectors############################################################################################################################################################### Computes the least squares cross validation LSCV function for 2 to 6 dim# # Parameters# x - data values# H - bandwidth matrix## Returns# LSCV(H)###############################################################################lscv.1d.binned <- function(xbin.par, h){ n <- sum(xbin.par$counts) lscv1 <- n^2*bkfe(x=xbin.par$counts, range.x=xbin.par$range.x[[1]], drv=0, bandwidth=sqrt(2)*h, binned=TRUE) lscv2 <- n^2*(bkfe(x=xbin.par$counts, range.x=xbin.par$range.x[[1]], drv=0, bandwidth=h, binned=TRUE) - dnorm(0, mean=0, sd=h)/n) lscv <- lscv1/n^2 - 2/(n*(n-1))*lscv2 return(lscv)}lscv.mat <- function(x, H, binned=FALSE, bin.par){ n <- nrow(x) ##d <- ncol(x) lscv1 <- dmvnorm.sum(x, 2*H, inc=1, binned=binned, bin.par=bin.par) lscv2 <- dmvnorm.sum(x, H, inc=0, binned=binned, bin.par=bin.par) return(lscv1/n^2 - 2/(n*(n-1))*lscv2) } ################################################################################ Finds the bandwidth matrix that minimises LSCV for 2 to 6 dim# # Parameters# x - data values# Hstart - initial bandwidth matrix## Returns# H_LSCV###############################################################################hlscv <- function(x, binned=TRUE, bgridsize){ if (any(duplicated(x))) stop("Data contain duplicated values: LSCV is not well-behaved in this case") n <- length(x) d <- 1 hnorm <- sqrt((4/(n*(d + 2)))^(2/(d + 4)) * var(x)) if (missing(bgridsize)) bgridsize <- 401 xbin.par <- dfltCounts.ks(x, gridsize=bgridsize) lscv.1d.temp <- function(h) { return(lscv.1d.binned(x=xbin.par, h=h)) } opt <- optimise(f=lscv.1d.temp, interval=c(0.2*hnorm, 5*hnorm, tol=.Machine$double.eps))$minimum return(opt) } Hlscv <- function(x, Hstart){ if (any(duplicated(x))) stop("Data contain duplicated values: LSCV is not well-behaved in this case") n <- nrow(x) d <- ncol(x) ##RK <- (4*pi)^(-d/2) ## use normal reference selector as initial condn if (missing(Hstart)) Hstart <- matrix.sqrt((4/ (n*(d + 2)))^(2/(d + 4)) * var(x)) lscv.mat.temp <- function(vechH) { ## ensures that H is positive definite H <- invvech(vechH) %*% invvech(vechH) return(lscv.mat(x=x, H=H, binned=FALSE)) } result <- optim(vech(Hstart), lscv.mat.temp, method="Nelder-Mead") #control=list(abstol=n^(-10*d))) return(invvech(result$par) %*% invvech(result$par))}################################################################################ Finds the diagonal bandwidth matrix that minimises LSCV for 2 to 6 dim# # Parameters# x - data values# Hstart - initial bandwidth matrix## Returns# H_LSCV,diag###############################################################################Hlscv.diag <- function(x, Hstart, binned=FALSE, bgridsize){ n <- nrow(x) d <- ncol(x) RK <- (4*pi)^(-d/2) if (missing(Hstart)) Hstart <- matrix.sqrt((4/ (n*(d + 2)))^(2/(d + 4)) * var(x)) if (missing(bgridsize) & binned) bgridsize <- default.gridsize(d) if (d > 4) binned <- FALSE if (binned) { H.max <- (((d+8)^((d+6)/2)*pi^(d/2)*RK)/(16*(d+2)*n*gamma(d/2+4)))^(2/(d+4))* var(x) ## linear binning bin.par <- binning(x=x, bgridsize=bgridsize, H=sqrt(diag(H.max))) } lscv.mat.temp <- function(diagH) { H <- diag(diagH^2) return(lscv.mat(x=x, H=H, binned=binned, bin.par=bin.par)) } result <- optim(diag(Hstart), lscv.mat.temp, method="Nelder-Mead") return(diag(result$par^2))}################################################################################ Computes the biased cross validation BCV function for 2-dim# # Parameters# x - data values# H1, H2 - bandwidth matrices## Returns# BCV(H)###############################################################################bcv.mat <- function(x, H1, H2){ n <- nrow(x) d <- 2 psi40 <- dmvnorm.deriv.2d.sum(x, Sigma=H2, r=c(4,0), inc=0) psi31 <- dmvnorm.deriv.2d.sum(x, Sigma=H2, r=c(3,1), inc=0) psi22 <- dmvnorm.deriv.2d.sum(x, Sigma=H2, r=c(2,2), inc=0) psi13 <- dmvnorm.deriv.2d.sum(x, Sigma=H2, r=c(1,3), inc=0) psi04 <- dmvnorm.deriv.2d.sum(x, Sigma=H2, r=c(0,4), inc=0) coeff <- c(1, 2, 1, 2, 4, 2, 1, 2, 1) psi.fun <- c(psi40, psi31, psi22, psi31, psi22, psi13, psi22, psi13,psi04)/(n*(n-1)) psi4.mat <- matrix(coeff * psi.fun, nc=3, nr=3) RK <- (4*pi)^(-d/2) bcv <- drop(n^(-1)*det(H1)^(-1/2)*RK + 1/4*t(vech(H1)) %*% psi4.mat %*% vech(H1)) return(list(bcv=bcv, psimat=psi4.mat))}################################################################################ Find the bandwidth matrix that minimises the BCV for 2-dim# # Parameters# x - data values
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -