?? kda.r
字號:
############################################################################### Kernel discriminant analysis############################################################################################################################################################### Find bandwidths for each class in training set, for 2- to 6-dim ## Parameters# x - data values# group - group variable# bw - type of bandwidth selector# nstage, pilot, pre - parameters for plugin bandwidths# diag - FALSE - use full b/w matrices# - TRUE - use diag b/w matrices## Returns# Matrix of bandwidths for each group in training set###############################################################################hkda <- function(x, x.group, bw="plugin", nstage=2, binned=TRUE, bgridsize){ grlab <- sort(unique(x.group)) m <- length(grlab) bw <- substr(tolower(bw),1,1) hs <- numeric(0) if (missing(bgridsize)) bgridsize <- 401 for (i in 1:m) { y <- x[x.group==grlab[i]] if (bw=="p") h <- hpi(y, nstage=nstage, binned=TRUE, bgridsize=bgridsize) hs <- c(hs, h) } return(hs)} Hkda <- function(x, x.group, Hstart, bw="plugin", nstage=2, pilot="samse", pre="sphere", binned=FALSE, bgridsize){ d <- ncol(x) grlab <- sort(unique(x.group)) m <- length(grlab) bw <- substr(tolower(bw),1,1) Hs <- numeric(0) if (missing(bgridsize) & binned) bgridsize <- default.gridsize(d) for (i in 1:m) { y <- x[x.group==grlab[i],] if (!missing(Hstart)) { Hstarty <- Hstart[((i-1)*d+1) : (i*d),] if (bw=="l") H <- Hlscv(y, Hstart=Hstarty) else if (bw=="s") H <- Hscv(y, pre=pre, Hstart=Hstarty, binned=binned, bgridsize=bgridsize) else if (bw=="p") H <- Hpi(y, nstage=nstage, pilot=pilot, pre=pre, Hstart=Hstarty, binned=binned, bgridsize=bgridsize) } else { if (bw=="l") H <- Hlscv(y) else if (bw=="s") H <- Hscv(y, pre=pre, binned=binned, bgridsize=bgridsize) else if (bw=="p") H <- Hpi(y, nstage=nstage, pilot=pilot, pre=pre, binned=binned, bgridsize=bgridsize) } Hs <- rbind(Hs, H) } return(Hs) }Hkda.diag <- function(x, x.group, bw="plugin", nstage=2, pilot="samse", pre="sphere", binned=FALSE, bgridsize){ d <- ncol(x) grlab <- sort(unique(x.group)) m <- length(grlab) bw <- substr(tolower(bw),1,1) Hs <- numeric(0) if (missing(bgridsize) & binned) bgridsize <- default.gridsize(d) for (i in 1:m) { y <- x[x.group==grlab[i],] if (bw=="l") H <- Hlscv.diag(y, binned=binned, bgridsize=bgridsize) else if (bw=="p") H <- Hpi.diag(y, nstage=nstage, pilot=pilot, pre=pre, binned=binned, bgridsize=bgridsize) else if (bw=="s") H <- Hscv.diag(y, pre=pre, binned=binned, bgridsize=bgridsize) Hs <- rbind(Hs, H) } return(Hs) }################################################################################ Classify data set according to discriminant analysis based on training data# for 1- to 6-dim## Parameter# x - training data# x.group - group variable for x# y - data values to be classified# Hs - bandwidth matrices# prior.prob - prior probabilities## Returns# Group classification of data set y###############################################################################kda <- function(x, x.group, Hs, hs, y, prior.prob=NULL){ if (is.vector(x)) { disc.gr <- kda.1d(x=x, x.group=x.group, hs=hs, y=y, prior.prob=prior.prob) } else { if (is.data.frame(x)) x <- as.matrix(x) if (is.data.frame(y)) y <- as.matrix(y) gr <- sort(unique(x.group)) ##d <- ncol(x) ## if prior.prob is NULL then use sample proportions if (is.null(prior.prob)) { prior.prob <- rep(0, length(gr)) for (j in 1:length(gr)) prior.prob[j] <- length(which(x.group==gr[j])) prior.prob <- prior.prob/nrow(x) } if (!(identical(all.equal(sum(prior.prob), 1), TRUE))) stop("Sum of prior weights not equal to 1") ## Compute KDE and weighted KDE m <- length(gr) fhat <- kda.kde(x, x.group, Hs=Hs, eval.points=y) fhat.wt <- matrix(0, ncol=m, nrow=nrow(y)) for (j in 1:m) fhat.wt[,j] <- fhat$est[[j]]* prior.prob[j] ## Assign y according largest weighted density value disc.gr.temp <- apply(fhat.wt, 1, which.max) disc.gr <- gr for (j in 1:m) { ind <- which(disc.gr.temp==j) disc.gr[ind] <- gr[j] } } return(disc.gr) }kda.1d <- function(x, x.group, hs, y, prior.prob=NULL){ gr <- sort(unique(x.group)) # if prior.prob is NULL then use sample proportions if (is.null(prior.prob)) { prior.prob <- rep(0, length(gr)) for (j in 1:length(gr)) prior.prob[j] <- length(which(x.group==gr[j])) prior.prob <- prior.prob/length(x) } if (!(identical(all.equal(sum(prior.prob), 1), TRUE))) stop("Sum of prior weights not equal to 1") ## Compute KDE and weighted KDE m <- length(gr) fhat <- kda.kde(x, x.group, hs=hs, eval.points=y) fhat.wt <- matrix(0, ncol=m, nrow=length(y)) for (j in 1:m) fhat.wt[,j] <- fhat$estimate[[j]]* prior.prob[j] ## Assign y according largest weighted density value disc.gr.temp <- apply(fhat.wt, 1, which.max) disc.gr <- gr for (j in 1:m) { ind <- which(disc.gr.temp==j) disc.gr[ind] <- gr[j] } return(disc.gr) }################################################################################ Compares true group classification with an estimated one## Parameters# group - true group variable# est.group - estimated group variable## Returns# List with components# comp - cross-classification table of groupings - true groups are the rows,# estimated groups are the columns# error - total mis-classification rate###############################################################################compare <- function(x.group, est.group, by.group=FALSE){ if (length(x.group)!=length(est.group)) stop("Group label vectors not the same length") grlab <- sort(unique(x.group)) m <- length(grlab) comp <- matrix(0, nr=m, nc=m) for (i in 1:m) for (j in 1:m) comp[i,j] <- sum((x.group==grlab[i]) & (est.group==grlab[j])) if (by.group) { er <- vector() for (i in 1:m) er[i] <- 1-comp[i,i]/rowSums(comp)[i] er <- matrix(er, nc=1) er <- rbind(er, 1 - sum(diag(comp))/sum(comp)) rownames(er) <- c(as.character(paste(grlab, "(true)")), "Total") colnames(er) <- "error" } else er <- 1 - sum(diag(comp))/sum(comp) comp <- cbind(comp, rowSums(comp)) comp <- rbind(comp, colSums(comp)) colnames(comp) <- c(as.character(paste(grlab, "(est.)")), "Total") rownames(comp) <- c(as.character(paste(grlab, "(true)")), "Total") return(list(cross=comp, error=er)) }################################################################################ Computes cross-validated misclassification rates (for use when test data is# not independent of training data) for KDA## Parameters# x - training data# x.group - group variable for x# y - data values to be classified# Hs - bandwidth matrices# prior.prob - prior probabilities## Returns# List with components# comp - cross-classification table of groupings - true groups are the rows,# estimated groups are the columns# error - total mis-classification rate###############################################################################compare.kda.cv <- function(x, x.group, bw="plugin", prior.prob=NULL, Hstart, by.group=FALSE, trace=FALSE, binned=FALSE, bgridsize, recompute=FALSE, ...){ ## 1-d if (is.vector(x)) { n <- length(x) h <- hkda(x, x.group, bw=bw, binned=binned, bgridsize=bgridsize, ...) gr <- sort(unique(x.group)) kda.cv.gr <- x.group for (i in 1:n) { h.mod <- h ## find group that x[i] belongs to ind <- which(x.group[i]==gr) indx <- x.group==gr[ind] indx[i] <- FALSE if (substr(bw,1,1)=="p") h.temp <- hpi(x[indx], binned=binned, bgridsize=bgridsize, ...) h.mod[ind] <- h.temp ## recompute KDA estimate of groups with x[i] excluded if (trace) cat(paste("Processing data item:", i, "\n")) kda.cv.gr[i] <- kda(x[-i], x.group[-i], hs=h.mod, y=x, prior.prob=prior.prob)[i] } return(compare(x.group, kda.cv.gr, by.group=by.group)) } ## multi-dimensional n <- nrow(x) d <- ncol(x) if (!missing(Hstart)) H <- Hkda(x, x.group, bw=bw, Hstart=Hstart, binned=binned, bgridsize=bgridsize, ...) else H <- Hkda(x, x.group, bw=bw, binned=binned, bgridsize=bgridsize, ...) ### classify data x using KDA rules based on x itself ##kda.group <- kda(x, x.group, Hs=H, y=x, prior.prob=prior.prob) ##comp <- compare(x.group, kda.group) gr <- sort(unique(x.group)) kda.cv.gr <- x.group for (i in 1:n) { H.mod <- H ### find group that x[i] belongs to ind <- which(x.group[i]==gr) indx <- x.group==gr[ind] indx[i] <- FALSE if (recompute) { ## compute b/w matrix for that group with x[i] excluded if (!missing(Hstart)) { Hstart.temp <- Hstart[((ind-1)*d+1):(ind*d),] if (substr(bw,1,1)=="p") H.temp <- Hpi(x[indx,], Hstart=Hstart.temp, binned=binned, bgridsize=bgridsize,...) else if (substr(bw,1,1)=="s") H.temp <- Hscv(x[indx,], Hstart=Hstart.temp, binned=binned, bgridsize=bgridsize,...) else if (substr(bw,1,1)=="l") H.temp <- Hlscv(x[indx,], Hstart=Hstart.temp, ...) } else { if (substr(bw,1,1)=="p") H.temp <- Hpi(x[indx,], binned=binned, bgridsize=bgridsize, ...) else if (substr(bw,1,1)=="s") H.temp <- Hscv(x[indx,], binned=binned, bgridsize=bgridsize, ...) else if (substr(bw,1,1)=="l") H.temp <- Hlscv(x[indx,], ...) } H.mod[((ind-1)*d+1):(ind*d),] <- H.temp } ## recompute KDA estimate of groups with x[i] excluded if (trace) cat(paste("Processing data item:", i, "\n")) kda.cv.gr[i] <- kda(x[-i,], x.group[-i], Hs=H.mod, y=x, prior.prob=prior.prob)[i] } return(compare(x.group, kda.cv.gr, by.group=by.group)) }################################################################################## Same as compare.kda.cv except uses diagonal b/w matrices###############################################################################compare.kda.diag.cv <- function(x, x.group, bw="plugin", prior.prob=NULL, by.group=FALSE, trace=FALSE, binned=FALSE, bgridsize, recompute=FALSE, ...){ n <- nrow(x) d <- ncol(x) H <- Hkda.diag(x, x.group, bw=bw, binned=binned, bgridsize=bgridsize, ...) ##kda.group <- kda(x, x.group, Hs=H, y=x, prior.prob=prior.prob) ##comp <- compare(x.group, kda.group) gr <- sort(unique(x.group)) kda.cv.gr <- x.group for (i in 1:n) { H.mod <- H if (recompute) { ind <- which(x.group[i]==gr) indx <- x.group==gr[ind] indx[i] <- FALSE if (substr(bw,1,1)=="p") H.temp <- Hpi.diag(x[indx,], binned=binned, bgridsize=bgridsize, ...) else if (substr(bw,1,1)=="l") H.temp <- Hlscv.diag(x[indx,], binned=binned, bgridsize=bgridsize, ...) H.mod[((ind-1)*d+1):(ind*d),] <- H.temp } if (trace) cat(paste("Processing data item:", i, "\n")) kda.cv.gr[i] <- kda(x[-i,], x.group[-i], Hs=H.mod, y=x, prior.prob=prior.prob)[i] } return(compare(x.group, kda.cv.gr, by.group=by.group)) }################################################################################ KDEs of individual densities for KDA - 1- to 3-dim## Parameters# x - data values# group - group variable# Hs - bandwidth matrices## Returns# List with components (class dade)# x - list of data values# eval.points - evaluation points of dnesity estimate# estimate - list of density estimate# H - list of bandwidth matrices##############################################################################kda.kde <- function(x, x.group, Hs, hs, prior.prob=NULL, gridsize, xmin, xmax, supp=3.7, eval.points=NULL, binned=FALSE, bgridsize){ if (is.vector(x)) { if (missing(gridsize)) gridsize <- 101 if (missing(bgridsize)) bgridsize <- 401 fhat.list <- kda.kde.1d(x=x, x.group=x.group, hs=hs, prior.prob=prior.prob, gridsize=gridsize, eval.points=eval.points, supp=supp, binned=binned, bgridsize=bgridsize, xmin=xmin, xmax=xmax) } else { if (is.data.frame(x)) x <- as.matrix(x) grlab <- sort(unique(x.group)) m <- length(grlab) d <- ncol(x) ## find largest bandwidth matrix to initialise grid detH <- vector() for (j in 1:m) detH[j] <- det(Hs[((j-1)*d+1) : (j*d),]) Hmax.ind <- which.max(detH) Hmax <- Hs[((Hmax.ind-1)*d+1) : (Hmax.ind*d),]
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -