Commit 00a1a1806c0d050f5ef4f8e12a9367c85be9ce9f - r-cran-gbm

+8

-6

DESCRIPTION less more

0	0	Package: gbm
1		Version: 2.1.5
	1	Version: 2.1.8
2	2	Title: Generalized Boosted Regression Models
3	3	Authors@R: c(
4	4	person("Brandon", "Greenwell",

17	17	comment = "https://github.com/gbm-developers")
18	18	)
19	19	Depends: R (>= 2.9.0)
20		Imports: gridExtra, lattice, parallel, survival
21		Suggests: knitr, pdp, RUnit, splines, viridis
	20	Imports: lattice, parallel, survival
	21	Suggests: covr, gridExtra, knitr, pdp, RUnit, splines, tinytest, vip,
	22	viridis
22	23	Description: An implementation of extensions to Freund and Schapire's AdaBoost
23	24	algorithm and Friedman's gradient boosting machine. Includes regression
24	25	methods for least squares, absolute loss, t-distribution loss, quantile

28	29	License: GPL (>= 2) \| file LICENSE
29	30	URL: https://github.com/gbm-developers/gbm
30	31	BugReports: https://github.com/gbm-developers/gbm/issues
31		RoxygenNote: 6.1.1
	32	Encoding: UTF-8
	33	RoxygenNote: 7.1.1
32	34	VignetteBuilder: knitr
33	35	NeedsCompilation: yes
34		Packaged: 2019-01-14 14:21:52 UTC; bgreenwell
	36	Packaged: 2020-07-13 15:15:55 UTC; b780620
35	37	Author: Brandon Greenwell [aut, cre] (<https://orcid.org/0000-0002-8120-0084>),
36	38	Bradley Boehmke [aut] (<https://orcid.org/0000-0002-3611-8516>),
37	39	Jay Cunningham [aut],
38	40	GBM Developers [aut] (https://github.com/gbm-developers)
39	41	Maintainer: Brandon Greenwell <greenwell.brandon@gmail.com>
40	42	Repository: CRAN
41		Date/Publication: 2019-01-14 15:00:03 UTC
	43	Date/Publication: 2020-07-15 10:00:02 UTC

+40

-39

MD5 less more

0	0	108bdba2eb6f2ba6ce890f47224ef68f *CHANGES
1		894d28d233ef8843240f7fca545caea0 *DESCRIPTION
	1	47bb9e8da14440b3130f6b729e6278c5 *DESCRIPTION
2	2	67f2f9cc8297be2f12dfe86e05277383 *LICENSE
3		00dda5f78be66b96a668b74b523fcac1 *NAMESPACE
4		f49617fcc735cf616817886d616d9ee2 *NEWS.md
	3	f9637d4fba61a768283f21aa530054eb *NAMESPACE
	4	c4289aae976b259fc5cf46a2363ca209 *NEWS.md
5	5	061c315ef880f845918ff59cce721239 *R/basehaz.gbm.R
6		aef3622e1f5a19f9c74616130321851f *R/calibrate.plot.R
7		af7dcaeddbc7e6eb31b66290a98c0a1c *R/gbm-internals.R
	6	080340cbe29d967fa80c3ff318d7f732 *R/calibrate.plot.R
	7	50333a6845d49192b2558459455c0a91 *R/gbm-internals.R
8	8	2f21a77c0c4d5274533173b223f7f05e *R/gbm-package.R
9		6c851a0da731e8611f499738d7ebc3b7 *R/gbm.R
10		bd784b825af03b7576017cdd45c696fa *R/gbm.fit.R
11		2f6a79af8a23dd4be5283881a82e5f5c *R/gbm.more.R
12		cdcc395f477e8a83fde52d313d5d9760 *R/gbm.object.R
	9	933caf661e494cd607202ca9d4f0fc62 *R/gbm.R
	10	f7b746b290dee8f64a9e421ac8a85411 *R/gbm.fit.R
	11	5619be8fc7266260bae6e1eb84f9e880 *R/gbm.more.R
	12	357e0e50665db6624c0f990ef77d66df *R/gbm.object.R
13	13	f2d808e5f68996a79a14d575ae20ab16 *R/gbm.perf.R
14		f17f3d39a4d6820e78130748ce8032ff *R/gbmCrossVal.R
	14	747960c3f50db7d0494155ba204eada8 *R/gbmCrossVal.R
15	15	40231a31962f0df1ab182edcffe51b9f *R/interact.gbm.R
16		fc877c59338b8343545050803c29ec95 *R/ir.measures.R
17		1e1e9648a40d27a07c63e9c4103ba4d0 *R/plot.gbm.R
18		e8f2da715b200da15e92a70e483207ce *R/predict.gbm.R
	16	16a66cc76e829638ce932ac651c61f47 *R/ir.measures.R
	17	c54fa92e38020e1e6957a85dd2e28a87 *R/plot.gbm.R
	18	74a70185c1d776610d70f8061166d5e4 *R/predict.gbm.R
19	19	48438bd417c4a7b3c0495c901c5d5060 *R/pretty.gbm.tree.R
20	20	b068e5396186cc21060477aac914abe7 *R/print.gbm.R
21	21	af4fd23ba860c912a1a237fb3b5631d1 *R/reconstructGBMdata.R
22	22	7d953fa9013fdb90ae01e67e336b2747 *R/relative.influence.R
23		81f913b053b7d402f4a808aeb3670e2f *R/shrink.gbm.R
24		d001fbd3c7de86463f4d0f1dff63a70b *R/shrink.gbm.pred.R
25		21f1a9fdd69be98ad81bbca7e18ec8a7 *R/test.gbm.R
26		3fc23fb8a1c816ac430c4e836a08078a *R/utils.R
	23	a8915f1fc2969662927c0116bcea5615 *R/test.gbm.R
	24	bcfa03f177d8a10d2c15b56e5e135d75 *R/utils.R
27	25	08ab323918a24917e4d4638ca01c841a *R/zzz.R
28		55ae3c9b2954cd0ac1c317b5698d77c3 *README.md
29		4dc9151409b8112474ac3f1da044f7f7 *build/vignette.rds
	26	9147b939a47477bba1ab8f4c29723119 *README.md
	27	b97c7d2a583bca53b75caccf35aef9ee *build/vignette.rds
30	28	4e38ebb4d3578e523b7d94fc9ece3d65 *demo/00Index
31	29	e3bd8606063f15ded6ab3261c13d22af *demo/OOB-reps.R
32	30	354344b4f6e8a232508ef872ced5efa3 *demo/bernoulli.R

37	35	dbff7ebcc6a18e27c1b423fd5db70ae3 *demo/printExamples.R
38	36	79316127956b8f5291f5021f1e7c89ef *demo/robustReg.R
39	37	c044e4fcd21ef75478830ede774cfba7 *inst/doc/gbm.Rnw
40		ecaf68f8e96581dbbd9735927f42c462 *inst/doc/gbm.pdf
41		e89d6b6a7a2f19974d5c7916c9e2ae66 *man/basehaz.gbm.Rd
42		c606780ccf3028850a848dfc2b3f4739 *man/calibrate.plot.Rd
	38	86204b5cb06b4c7eaa81d41e678a6f33 *inst/doc/gbm.pdf
	39	a3f52d3361a07f9e5ac4d95e100aa078 *inst/tinytest/test_bernoulli.R
	40	255756dccfea79e8d573fa1737a71efd *inst/tinytest/test_coxph.R
	41	a3d1dbaf5c3d9c03cbd3e5f2596a3e0c *inst/tinytest/test_least_squares.R
	42	f1a525fce37f03be6fffafd05e9e7bb0 *inst/tinytest/test_relative_influence.R
	43	7e335ac1ef073e3523977ae22a16bfaf *man/basehaz.gbm.Rd
	44	75d55867308e00f5c9d3670fb953cd1f *man/calibrate.plot.Rd
43	45	bf74b54c920807d509d5ff19e45e95d4 *man/gbm-internals.Rd
44	46	5f96c05f991a485fbfe7a23b87b3d649 *man/gbm-package.Rd
45		15763b8625b44991118470ad6057b6da *man/gbm.Rd
46		94befbc345d33d0ed250a227a1268603 *man/gbm.fit.Rd
47		a65152118be58b4d8bf48ad8c93614c7 *man/gbm.more.Rd
48		728fa0d75f96519d0156aa2891362b9b *man/gbm.object.Rd
49		d007fd2b010c4b6ccbd4c0ec2aba9ea0 *man/gbm.perf.Rd
50		c43f6a77ca7bec407e85b642d6dfa2be *man/gbm.roc.area.Rd
51		2cd76f2ffbdc511bb0ac0a9dc1fb393b *man/gbmCrossVal.Rd
52		7d42ecd6cfbbb3e83f94685f0ef7add4 *man/grid.arrange.Rd
	47	3fb9a60ad8fdca4442e1c121eeb5dac6 *man/gbm.Rd
	48	7263dde81fbe1de556a37c6f718f22e1 *man/gbm.fit.Rd
	49	2464c138f182c855dff1e70916b28bd6 *man/gbm.more.Rd
	50	c6601474ddfc8d105c4c36cbc7b34bc6 *man/gbm.object.Rd
	51	95459ba59c2a81325a8ad3be30610f3d *man/gbm.perf.Rd
	52	16483371ce60d2c3f8246d20c78c9501 *man/gbm.roc.area.Rd
	53	58476ac9a079ffd11abe6f39d3c11ee0 *man/gbmCrossVal.Rd
53	54	c1789d7d5b7fc9be7665be55c1893d35 *man/interact.gbm.Rd
54		0a3f9f38c375609ef6380dceb1d4128c *man/plot.gbm.Rd
55		2a0d1ae9483de0ffb214d25623821f68 *man/predict.gbm.Rd
	55	bf3388a00c4616fba0ab363c6c5006c7 *man/plot.gbm.Rd
	56	a9429d902938aa6f7d8925e3761cbbff *man/predict.gbm.Rd
56	57	e368dcac4b75c8273529151e0087c5d4 *man/pretty.gbm.tree.Rd
57	58	21c028bad14805f40e0a7a0dc7e49e64 *man/print.gbm.Rd
58		f9563a4ec1265edfec56ecbdb8148e38 *man/quantile.rug.Rd
	59	55e165eb74fc82bbd2eca636e77d6618 *man/quantile.rug.Rd
59	60	27aa52e20ea8281697e8357a36d58b85 *man/reconstructGBMdata.Rd
60		f17f451739be17e89ec1b227b6602c86 *man/relative.influence.Rd
61		6f99e3dde82cbc922d9f1fc7f22bdcd9 *man/shrink.gbm.Rd
62		d75c1d9e1ff0c6a83bb37df2591ae4d9 *man/shrink.gbm.pred.Rd
63		dd2dfa92c91ff3ae020d9dbdd23657fb *man/summary.gbm.Rd
	61	ee3f3b8ed722adf5cd10da2975233987 *man/relative.influence.Rd
	62	82ce31d4fc3645cb3c2fbc2e164b03d1 *man/summary.gbm.Rd
64	63	8201654f42537ca205d0d5b138848df8 *man/test.gbm.Rd
65	64	0d32ce72a7b02fc57d602c60b9ba8305 *src/adaboost.cpp
66	65	2f5d22dc3043e69628763cbe303e6b5f *src/adaboost.h

76	75	91d88e455827695f63bf23df5dfb3108 *src/distribution.h
77	76	6d2bd44a11975c8f023640eb7a9036c3 *src/gaussian.cpp
78	77	6c2bf2616a3b4491aaaf501346246d3f *src/gaussian.h
79		889bfcdd44dc35824be51ba8ae2bd517 *src/gbm-init.c
	78	27ffff3bcc49d50e130083ef8f2081e5 *src/gbm-init.c
80	79	1d8d4e59887769602b1d3c8dc3d5f94f *src/gbm.cpp
81	80	0f49e8549558916322ec80e29b591a73 *src/gbm.h
82	81	c0c572eb464dae70700ffe8fdc3f6b9f *src/gbm_engine.cpp
83	82	b3f1f49fa614ac6cfd52b28191bfdb70 *src/gbm_engine.h
84		1d924856d046e942a312d373cfce230f *src/gbmentry.cpp
	83	b9df12bad88932789013d80b43f34524 *src/gbmentry.cpp
85	84	1fba83f37e9f092d8b005e0c8f32a97b *src/huberized.cpp
86	85	141e5b762944c14a0b6294e15046296f *src/huberized.h
87	86	10dcf061e2807ca52f811ec6650f33ad *src/laplace.cpp

115	114	9ab15eb81fc9a18ee7d14a76f7aefd2a *src/tdist.h
116	115	276e36bf158250eb458a1cdabcf975b5 *src/tree.cpp
117	116	6b2f1cd60e5d67638e110e1ac9552b27 *src/tree.h
	117	7caa6e3190a366a3696a52fa982a3030 *tests/tinytest.R
	118	f721a9c169d3849b725768ed011621a3 *vignettes/gbm-concordance.tex
118	119	c044e4fcd21ef75478830ede774cfba7 *vignettes/gbm.Rnw
119	120	b5633beb372053eac8730e76d8999ce9 *vignettes/gbm.bib
120	121	7ba661d197d25537a69fc34d737b4d29 *vignettes/oobperf2.pdf

+0

-4

NAMESPACE less more

25	25	export(getCVgroup)
26	26	export(getStratify)
27	27	export(getVarNames)
28		export(grid.arrange)
29	28	export(guessDist)
30	29	export(interact.gbm)
31	30	export(ir.measure.auc)

42	41	export(reconstructGBMdata)
43	42	export(relative.influence)
44	43	export(show.gbm)
45		export(shrink.gbm)
46		export(shrink.gbm.pred)
47	44	export(summary.gbm)
48	45	export(test.gbm)
49	46	export(test.relative.influence)

61	58	importFrom(graphics,rug)
62	59	importFrom(graphics,segments)
63	60	importFrom(graphics,title)
64		importFrom(gridExtra,grid.arrange)
65	61	importFrom(stats,approx)
66	62	importFrom(stats,binomial)
67	63	importFrom(stats,delete.response)

+35

-1

NEWS.md less more

	0	# gbm 2.1.8
	1
	2	* Removed experimental functions `shrink.gbm()` and `shrink.gbm.pred()`; the latter seemed broken anyway. Happy to accept a PR if anyone wants to fix them.
	3
	4
	5	# gbm 2.1.7
	6
	7	* Fix `Non-file package-anchored link(s) in documentation...` warning.
	8
	9
	10	# gbm 2.1.6
	11
	12	* Corrected the number of arguments for `gbm_shrink_gradient()` in `gbm-init.c` [(#50)](https://github.com/gbm-developers/gbm/issues/50). (Thanks to CRAN for highlighting the issue.)
	13
	14	* Removed unnecessary dependency on [gridExtra](https://cran.r-project.org/package=gridExtra).
	15
	16	* Switched to using `lapply()` instead of `parallel::parLapply()` whenever `n.cores = 1`.
	17
	18	* Calling `gbm()` with `distribution = "bernoulli"` will now throw an error whenever the response is non-numeric (e.g., 0/1 factors will throw an error instead of possibly crashing the session.) [(#6)](https://github.com/gbm-developers/gbm/issues/6). (Thanks to @mzoll.)
	19
	20	* Calling `gbm()` with `distribution = "multinomial"` now comes with a warning message; multinomial support has always been problematic and since this package is only being maintained for backwards compatibility, it likely will not be fixed unless someone makes a PR.
	21
	22	* Switched from [RUnit](https://cran.r-project.org/package=RUnit) to [tinytest](https://cran.r-project.org/package=tinytest) framework. The `test.gbm()`, `test.relative.influence()`, and `validate.gbm()` functions will remain for backwards compatability. This is just the start, as more tests will be added in the future [(#51)](https://github.com/gbm-developers/gbm/issues/51).
	23
	24
	25	#### Bug fixes
	26
	27	* Fixed a long standing bug that could occur when using k-fold cross-validation with a response that's been transformed in the model formula [(#30)](https://github.com/gbm-developers/gbm/issues/30).
	28
	29	* Fixed a but that would crash the session when giving "bad" input for `n.trees` in the call to `predict.gbm()` [(#45)](https://github.com/gbm-developers/gbm/issues/45). (Thanks to @ngreifer.)
	30
	31	* Fixed a bug where calling `predict()` could throw an error in some cases when `n.trees` was not specified.
	32
	33
0	34	# gbm 2.1.5
1	35
2	36	* Fixed bug that occurred whenever `distribution` was a list (e.g., "pairwise" regression) [(#27)](https://github.com/gbm-developers/gbm/issues/27).

21	55
22	56	* Fixed bug with axis labels in the `plot()` method for `"gbm"` objects [(#17)](https://github.com/gbm-developers/gbm/issues/17).
23	57
24		* The `plot()` method for `"gbm"` objects is now more consistent and always returns a `"trellis"` object [(#19)](https://github.com/gbm-developers/gbm/issues/19). Consequently, setting graphical parameters via `par` will no longer have an effect on the output from `plot.gbm`.
	58	* The `plot()` method for `"gbm"` objects is now more consistent and always returns a `"trellis"` object [(#19)](https://github.com/gbm-developers/gbm/issues/19). Consequently, setting graphical parameters via `par` will no longer have an effect on the output from `plot.gbm()`.
25	59
26	60	* The `plot()` method for `"gbm"` objects gained five new arguments: `level.plot`, `contour`, `number`, `overlap`, and `col.regions`; see `?plot.gbm` for details.
27	61

+3

-3

R/calibrate.plot.R less more

12	12	#'
13	13	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}.
14	14	#'
15		#' @seealso \code{\link[graphics]{plot}}, \code{\link[stats]{quantile}},
	15	#' @seealso \code{\link[graphics:plot.default]{plot}}, \code{\link[stats]{quantile}},
16	16	#' \code{\link[base]{jitter}}, \code{\link[graphics]{rug}}.
17	17	#'
18	18	#' @keywords aplot

51	51	#' All others default to squared error assuming \code{gaussian}.
52	52	#'
53	53	#' @param replace Determines whether this plot will replace or overlay the
54		#' current plot. \code{replace=FALSE} is useful for comparing the calibration
	54	#' current plot. \code{replace=FALSE} is useful for comparing the calibration
55	55	#' of several methods.
56	56	#'
57	57	#' @param line.par Graphics parameters for the line.

75	75	#' calibration curve.
76	76	#'
77	77	#' @param ... Additional optional arguments to be passed onto
78		#' \code{\link[graphics]{plot}}
	78	#' \code{\link[graphics:plot.default]{plot}}
79	79	#'
80	80	#' @return No return values.
81	81	#'

+2

-2

R/gbm-internals.R less more

144	144
145	145	#' @rdname gbm-internals
146	146	#' @export
147		gbmCluster <- function(n){
	147	gbmCluster <- function(n) {
148	148	# If number of cores (n) not given, try to work it out from the number
149	149	# that appear to be available and the number of CV folds.
150		if (is.null(n)){
	150	if (is.null(n)) {
151	151	n <- parallel::detectCores()
152	152	}
153	153	parallel::makeCluster(n)

+18

-13

R/gbm.R less more

235	235	#' Y <- X1 ^ 1.5 + 2 * (X2 ^ 0.5) + mu
236	236	#' sigma <- sqrt(var(Y) / SNR)
237	237	#' Y <- Y + rnorm(N, 0, sigma)
238		#' X1[sample(1:N, size = 500)] <- NA # introduce some missing values
239		#' X4[sample(1:N, size = 300)] <- NA # introduce some missing values
	238	#' X1[sample(1:N,size=500)] <- NA # introduce some missing values
	239	#' X4[sample(1:N,size=300)] <- NA # introduce some missing values
240	240	#' data <- data.frame(Y, X1, X2, X3, X4, X5, X6)
241	241	#'
242	242	#' # Fit a GBM

290	290	#' print(sum((data2$Y - Yhat)^2))
291	291	#'
292	292	#' # Construct univariate partial dependence plots
293		#' p1 <- plot(gbm1, i.var = 1, n.trees = best.iter)
294		#' p2 <- plot(gbm1, i.var = 2, n.trees = best.iter)
295		#' p3 <- plot(gbm1, i.var = "X3", n.trees = best.iter) # can use index or name
296		#' grid.arrange(p1, p2, p3, ncol = 3)
	293	#' plot(gbm1, i.var = 1, n.trees = best.iter)
	294	#' plot(gbm1, i.var = 2, n.trees = best.iter)
	295	#' plot(gbm1, i.var = "X3", n.trees = best.iter) # can use index or name
297	296	#'
298	297	#' # Construct bivariate partial dependence plots
299	298	#' plot(gbm1, i.var = 1:2, n.trees = best.iter)

338	337	Terms <- attr(mf, "terms")
339	338	w <- model.weights(mf)
340	339	offset <- model.offset(mf)
	340	y <- model.response(mf) # extract response values
341	341
342	342	# Determine and check response distribution
343	343	if (missing(distribution)) {
344		y <- data[, all.vars(formula)[1L], drop = TRUE]
	344	# y <- data[, all.vars(formula)[1L], drop = TRUE]
345	345	distribution <- guessDist(y)
346	346	}
347	347	if (is.character(distribution)) {
348	348	distribution <- list(name = distribution)
349	349	}
350
351	350	if (!is.element(distribution$name, getAvailableDistributions())) {
352	351	stop("Distribution ", distribution$name, " is not supported.")
353	352	}
354
355		# Extract and check response values
356		y <- model.response(mf)
	353	if (distribution$name == "multinomial") {
	354	warning("Setting `distribution = \"multinomial\"` is ill-advised as it is ",
	355	"currently broken. It exists only for backwards compatibility. ",
	356	"Use at your own risk.", call. = FALSE)
	357	}
357	358
358	359	# Construct data frame of predictor values
359	360	var.names <- attributes(Terms)$term.labels

430	431	Misc <- group
431	432
432	433	}
433
	434
434	435	# Set up for k-fold cross-validation
435	436	cv.error <- NULL
	437	# FIXME: Is there a better way to handle this?
	438	if (cv.folds == 1) {
	439	cv.folds <- 0 # o/w, an uninformative error is thrown
	440	}
436	441	if(cv.folds > 1) {
437	442	cv.results <- gbmCrossVal(cv.folds = cv.folds, nTrain = nTrain,
438	443	n.cores = n.cores,

467	472	gbm.obj$cv.folds <- cv.folds
468	473	gbm.obj$call <- mcall
469	474	gbm.obj$m <- m
470		if (cv.folds > 0) {
	475	if (cv.folds > 1) { # FIXME: Was previously `cv.folds > 0`?
471	476	gbm.obj$cv.fitted <- p
472	477	}
473	478	if (distribution$name == "pairwise") {

+3

-1

R/gbm.fit.R less more

313	313	if(!is.element(distribution$name, supported.distributions)) {
314	314	stop("Distribution ",distribution$name," is not supported")
315	315	}
316		if((distribution$name == "bernoulli") && !all(is.element(y,0:1))) {
	316	if((distribution$name == "bernoulli") && !all(is.element(y, 0:1)) &&
	317	!is.numeric(y)) {
	318	# NOTE: Including `!is.numeric(y)` will catch cases where y is a 0/1 factor
317	319	stop("Bernoulli requires the response to be in {0,1}")
318	320	if (is.factor(y)) {
319	321	y <- as.integer(y) - 1

+6

-6

R/gbm.more.R less more

105	105	#' print(sum((data2$Y - Yhat)^2))
106	106	#'
107	107	#' # Construct univariate partial dependence plots
108		#' p1 <- plot(gbm1, i.var = 1, n.trees = best.iter)
109		#' p2 <- plot(gbm1, i.var = 2, n.trees = best.iter)
110		#' p3 <- plot(gbm1, i.var = "X3", n.trees = best.iter) # can use index or name
111		#' grid.arrange(p1, p2, p3, ncol = 3)
	108	#' plot(gbm1, i.var = 1, n.trees = best.iter)
	109	#' plot(gbm1, i.var = 2, n.trees = best.iter)
	110	#' plot(gbm1, i.var = "X3", n.trees = best.iter) # can use index or name
112	111	#'
113	112	#' # Construct bivariate partial dependence plots
114	113	#' plot(gbm1, i.var = 1:2, n.trees = best.iter)

208	207	}
209	208
210	209	# construct group index
211		group <- factor(do.call(paste, c(data[,distribution.group, drop=FALSE], sep=":")))
	210	group <- factor(do.call(paste, c(data[, distribution.group, drop = FALSE], sep = ":")))
212	211
213	212	# Check that weights are constant across groups
214	213	if ((!missing(weights)) && (!is.null(weights)))

218	217
219	218	if (any(w.min != w.max))
220	219	{
221		stop("For distribution 'pairwise', all instances for the same group must have the same weight")
	220	stop("For distribution 'pairwise', all instances for the same ",
	221	"group must have the same weight")
222	222	}
223	223
224	224	# Normalize across groups

+18

-13

R/gbm.object.R less more

1	1	#'
2	2	#' These are objects representing fitted \code{gbm}s.
3	3	#'
4		#' @return \item{initF}{the "intercept" term, the initial predicted value to
5		#' which trees make adjustments} \item{fit}{a vector containing the fitted
	4	#' @return \item{initF}{The "intercept" term, the initial predicted value to
	5	#' which trees make adjustments.} \item{fit}{A vector containing the fitted
6	6	#' values on the scale of regression function (e.g. log-odds scale for
7		#' bernoulli, log scale for poisson)} \item{train.error}{a vector of length
	7	#' bernoulli, log scale for poisson).} \item{train.error}{A vector of length
8	8	#' equal to the number of fitted trees containing the value of the loss
9		#' function for each boosting iteration evaluated on the training data}
10		#' \item{valid.error}{a vector of length equal to the number of fitted trees
	9	#' function for each boosting iteration evaluated on the training data.}
	10	#' \item{valid.error}{A vector of length equal to the number of fitted trees
11	11	#' containing the value of the loss function for each boosting iteration
12		#' evaluated on the validation data} \item{cv.error}{if \code{cv.folds}<2 this
13		#' component is NULL. Otherwise, this component is a vector of length equal to
	12	#' evaluated on the validation data.} \item{cv.error}{If \code{cv.folds} < 2 this
	13	#' component is \code{NULL}. Otherwise, this component is a vector of length equal to
14	14	#' the number of fitted trees containing a cross-validated estimate of the loss
15		#' function for each boosting iteration} \item{oobag.improve}{a vector of
	15	#' function for each boosting iteration.} \item{oobag.improve}{A vector of
16	16	#' length equal to the number of fitted trees containing an out-of-bag estimate
17	17	#' of the marginal reduction in the expected value of the loss function. The
18	18	#' out-of-bag estimate uses only the training data and is useful for estimating
19		#' the optimal number of boosting iterations. See \code{\link{gbm.perf}}}
20		#' \item{trees}{a list containing the tree structures. The components are best
21		#' viewed using \code{\link{pretty.gbm.tree}}} \item{c.splits}{a list of all
	19	#' the optimal number of boosting iterations. See \code{\link{gbm.perf}}.}
	20	#' \item{trees}{A list containing the tree structures. The components are best
	21	#' viewed using \code{\link{pretty.gbm.tree}}.} \item{c.splits}{A list of all
22	22	#' the categorical splits in the collection of trees. If the \code{trees[[i]]}
23	23	#' component of a \code{gbm} object describes a categorical split then the
24	24	#' splitting value will refer to a component of \code{c.splits}. That component
25	25	#' of \code{c.splits} will be a vector of length equal to the number of levels
26	26	#' in the categorical split variable. -1 indicates left, +1 indicates right,
27		#' and 0 indicates that the level was not present in the training data}
	27	#' and 0 indicates that the level was not present in the training data.}
28	28	#' \item{cv.fitted}{If cross-validation was performed, the cross-validation
29	29	#' predicted values on the scale of the linear predictor. That is, the fitted
30		#' values from the ith CV-fold, for the model having been trained on the data
	30	#' values from the i-th CV-fold, for the model having been trained on the data
31	31	#' in all other folds.}
	32	#'
32	33	#' @section Structure: The following components must be included in a
33	34	#' legitimate \code{gbm} object.
	35	#'
34	36	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	37	#'
35	38	#' @seealso \code{\link{gbm}}
	39	#'
36	40	#' @keywords methods
	41	#'
37	42	#' @name gbm.object
38	43	NULL

+34

-30

R/gbmCrossVal.R less more

86	86	#' @rdname gbmCrossVal
87	87	#' @export
88	88	gbmCrossValErr <- function(cv.models, cv.folds, cv.group, nTrain, n.trees) {
89		in.group <- tabulate(cv.group, nbins=cv.folds)
	89	in.group <- tabulate(cv.group, nbins = cv.folds)
90	90	cv.error <- vapply(1:cv.folds,
91	91	function(index) {
92	92	model <- cv.models[[index]]

131	131	model <- cv.models[[ind]]
132	132
133	133	# The %in% here is to handle coxph
134		my.data <- data[flag, !(data.names %in% model$response.name)]
	134	# my.data <- data[flag, !(data.names %in% model$response.name)]
	135	my.data <- data[flag, model$var.names]
135	136	predictions <- predict(model, newdata = my.data, n.trees = best.iter.cv) # FIXME
136	137	predictions <- matrix(predictions, ncol = num.cols)
137	138	res[flag, ] <- predictions
138
139	139	}
140	140
141	141	# Handle multinomial case

160	160	shrinkage, bag.fraction, var.names,
161	161	response.name, group) {
162	162
163		# Set up cluster and add finalizer
164		cluster <- gbmCluster(n.cores)
165		on.exit(parallel::stopCluster(cluster))
166
167	163	# Set random seeds
168	164	seeds <- as.integer(runif(cv.folds, -(2^31 - 1), 2^31))
169
	165
170	166	# Perform cross-validation model builds
171		parallel::parLapply(cl = cluster, X = 1:cv.folds, fun = gbmDoFold, i.train, x,
172		y, offset, distribution, w, var.monotone, n.trees,
173		interaction.depth, n.minobsinnode, shrinkage,
174		bag.fraction, cv.group, var.names, response.name, group,
175		seeds)
176
	167	if (!is.null(n.cores) && n.cores == 1) {
	168	lapply(1:cv.folds, FUN = gbmDoFold, i.train, x,
	169	y, offset, distribution, w, var.monotone, n.trees,
	170	interaction.depth, n.minobsinnode, shrinkage,
	171	bag.fraction, cv.group, var.names, response.name, group,
	172	seeds)
	173	} else {
	174	# Set up cluster and add finalizer
	175	cluster <- gbmCluster(n.cores)
	176	on.exit(parallel::stopCluster(cluster))
	177	parallel::parLapply(cl = cluster, X = 1:cv.folds, fun = gbmDoFold, i.train, x,
	178	y, offset, distribution, w, var.monotone, n.trees,
	179	interaction.depth, n.minobsinnode, shrinkage,
	180	bag.fraction, cv.group, var.names, response.name, group,
	181	seeds)
	182	}
	183
177	184	}
178	185
179	186

184	191	bag.fraction, cv.group, var.names, response.name, group,
185	192	s) {
186	193
187		# Do specified cross-validation fold - a self-contained function for passing
	194	# Do specified cross-validation fold---a self-contained function for passing
188	195	# to individual cores.
189	196
190	197	# Load required packages for core
191		library(gbm, quietly=TRUE)
	198	library(gbm, quietly = TRUE)
192	199
193	200	# Print CV information
194	201	cat("CV:", X, "\n")

196	203	# Setup
197	204	set.seed(s[[X]])
198	205	i <- order(cv.group == X)
199		x <- x[i.train,,drop=TRUE][i,,drop=FALSE]
	206	x <- x[i.train, , drop = TRUE][i, , drop = FALSE]
200	207	y <- y[i.train][i]
201	208	offset <- offset[i.train][i]
202	209	nTrain <- length(which(cv.group != X))
203	210	group <- group[i.train][i]
204	211
205		# Fit a GBM
206		res <- gbm.fit(x = x, y = y, offset = offset, distribution = distribution,
207		w = w, var.monotone = var.monotone, n.trees = n.trees,
208		interaction.depth = interaction.depth,
209		n.minobsinnode = n.minobsinnode,
210		shrinkage = shrinkage, bag.fraction = bag.fraction,
211		nTrain = nTrain, keep.data = FALSE, verbose = FALSE,
212		response.name = response.name, group = group)
213
214		# Return the result
215		res
216
217		}
	212	# Return a fitted GBM
	213	gbm.fit(x = x, y = y, offset = offset, distribution = distribution,
	214	w = w, var.monotone = var.monotone, n.trees = n.trees,
	215	interaction.depth = interaction.depth,
	216	n.minobsinnode = n.minobsinnode,
	217	shrinkage = shrinkage, bag.fraction = bag.fraction,
	218	nTrain = nTrain, keep.data = FALSE, verbose = FALSE,
	219	response.name = response.name, group = group)
	220
	221	}

+2

-11

R/ir.measures.R less more

19	19	#' @param pred Predicted value.
20	20	#' @param metric What type of performance measure to compute.
21	21	#' @param y,y.f,f,w,group,max.rank Used internally.
22		#' @param x ?.
	22	#' @param x Numeric vector.
23	23	#' @return The requested performance measure.
24	24	#'
25	25	#' @details

46	46	#' @references C. Burges (2010). "From RankNet to LambdaRank to LambdaMART: An
47	47	#' Overview", Microsoft Research Technical Report MSR-TR-2010-82.
48	48	#' @keywords models
49		#'
50		#' @examples
51		#'
52		#' ##---- Should be DIRECTLY executable !! ----
53		#' ##-- ==> Define data, use random,
54		#' ##-- or do help(data=index) for the standard data sets.
55
56
57		# Area under ROC curve = ratio of correctly ranking pairs
58		#' @rdname gbm.roc.area
	49	#'
59	50	#' @export
60	51	gbm.roc.area <- function(obs, pred) {
61	52	n1 <- sum(obs)

+8

-8

R/plot.gbm.R less more

8	8	#' the weighted tree traversal method described in Friedman (2001) to do the
9	9	#' integration. Based on the variable types included in the projection,
10	10	#' \code{plot.gbm} selects an appropriate display choosing amongst line plots,
11		#' contour plots, and \code{\link[lattice]{lattice}} plots. If the default
12		#' graphics are not sufficient the user may set \code{return.grid=TRUE}, store
	11	#' contour plots, and \code{\link[lattice:Lattice]{lattice}} plots. If the default
	12	#' graphics are not sufficient the user may set \code{return.grid = TRUE}, store
13	13	#' the result of the function, and develop another graphic display more
14	14	#' appropriate to the particular example.
15	15	#'

18	18	#'
19	19	#' @param i.var Vector of indices or the names of the variables to plot. If
20	20	#' using indices, the variables are indexed in the same order that they appear
21		#' in the initial \code{gbm} formula. If \code{length(i.var)} is between 1 and
	21	#' in the initial \code{gbm} formula. If \code{length(i.var)} is between 1 and
22	22	#' 3 then \code{plot.gbm} produces the plots. Otherwise, \code{plot.gbm}
23	23	#' returns only the grid of evaluation points and their average predictions
24	24	#'

44	44	#' level plot. Only used when \code{level.plot = TRUE}. Default is \code{FALSE}.
45	45	#'
46	46	#' @param number Integer specifying the number of conditional intervals to use
47		#' for the continuous panel variables. See \code{\link[graphics]{co.intervals}}
48		#' and \code{\link[lattice]{equal.count}} for further details.
	47	#' for the continuous panel variables. See \code{\link[graphics:coplot]{co.intervals}}
	48	#' and \code{\link[lattice:shingles]{equal.count}} for further details.
49	49	#'
50	50	#' @param overlap The fraction of overlap of the conditioning variables. See
51		#' \code{\link[graphics]{co.intervals}} and \code{\link[lattice]{equal.count}}
	51	#' \code{\link[graphics:coplot]{co.intervals}} and \code{\link[lattice:shingles]{equal.count}}
52	52	#' for further details.
53	53	#'
54	54	#' @param col.regions Color vector to be used if \code{level.plot} is
55	55	#' \code{TRUE}. Defaults to the wonderful Matplotlib 'viridis' color map
56		#' provided by the \code{viridis} package. See \code{\link[viridis]{viridis}}
	56	#' provided by the \code{viridis} package. See \code{\link[viridis:reexports]{viridis}}
57	57	#' for details.
58	58	#'
59	59	#' @param ... Additional optional arguments to be passed onto
60		#' \code{\link[graphics]{plot}}.
	60	#' \code{\link[graphics:plot.default]{plot}}.
61	61	#'
62	62	#' @return If \code{return.grid = TRUE}, a grid of evaluation points and their
63	63	#' average predictions. Otherwise, a plot is returned.

+58

-78

R/predict.gbm.R less more

47	47	#'
48	48	#' @export predict.gbm
49	49	#' @export
50		predict.gbm <- function(object,newdata,n.trees,
51		type="link",
52		single.tree = FALSE,
53		...)
54		{
	50	predict.gbm <- function(object, newdata, n.trees, type = "link",
	51	single.tree = FALSE, ...) {
55	52	if ( missing( newdata ) ){
56	53	newdata <- reconstructGBMdata(object)
57	54	}
58		if ( missing(n.trees) ) {
59		if ( object$train.fraction < 1 ){
60		n.trees <- gbm.perf( object, method="test", plot.it = FALSE )
	55	if ( missing(n.trees) \|\| length(n.trees) < 1 ) {
	56	if ( object$train.fraction < 1 ) {
	57	n.trees <- gbm.perf( object, method = "test", plot.it = FALSE )
	58	} else if (!is.null(object$cv.error)) {
	59	n.trees <- gbm.perf( object, method = "cv", plot.it = FALSE )
	60	} else {
	61	n.trees <- length( object$train.error )
61	62	}
62		else if (!is.null(object$cv.error)){
63		n.trees <- gbm.perf( object, method="cv", plot.it = FALSE )
64		}
65		else{ best <- length( object$train.error ) }
66		cat( paste( "Using", n.trees, "trees...\n" ) )
	63	message( paste( "Using", n.trees, "trees...\n" ) )
67	64	}
68
69		if(!is.element(type, c("link","response" )))
70		{
71		stop("type must be either 'link' or 'response'")
	65
	66	if (!is.element(type, c("link", "response"))) {
	67	stop("type must be either 'link' or 'response'")
72	68	}
73		if(!is.null(object$Terms))
74		{
75		x <- model.frame(terms(reformulate(object$var.names)),
76		newdata,
77		na.action=na.pass)
	69	if (!is.null(object$Terms)) {
	70	x <- model.frame(terms(reformulate(object$var.names)), newdata,
	71	na.action = na.pass)
	72	} else {
	73	x <- newdata
78	74	}
79		else
80		{
81		x <- newdata
82		}
83
	75
84	76	cRows <- nrow(x)
85	77	cCols <- ncol(x)
86
87		for(i in 1:cCols)
88		{
89		if(is.factor(x[,i]))
90		{
91		if (length(levels(x[,i])) > length(object$var.levels[[i]])) {
92		new.compare <- levels(x[,i])[1:length(object$var.levels[[i]])]
93		} else {
94		new.compare <- levels(x[,i])
95		}
96		if (!identical(object$var.levels[[i]], new.compare)) {
97		x[,i] <- factor(x[,i], union(object$var.levels[[i]], levels(x[,i])))
98		}
99		x[,i] <- as.numeric(factor(x[,i], levels = object$var.levels[[i]]))-1
	78
	79	for(i in 1:cCols) {
	80	if(is.factor(x[,i])) {
	81	if (length(levels(x[,i])) > length(object$var.levels[[i]])) {
	82	new.compare <- levels(x[,i])[1:length(object$var.levels[[i]])]
	83	} else {
	84	new.compare <- levels(x[,i])
	85	}
	86	if (!identical(object$var.levels[[i]], new.compare)) {
	87	x[,i] <- factor(x[,i], union(object$var.levels[[i]], levels(x[,i])))
	88	}
	89	x[,i] <- as.numeric(factor(x[,i], levels = object$var.levels[[i]]))-1
100	90	}
101	91	}
102
	92
103	93	x <- as.vector(unlist(x, use.names=FALSE))
104		if(missing(n.trees) \|\| any(n.trees > object$n.trees))
105		{
106		n.trees[n.trees>object$n.trees] <- object$n.trees
107		warning("Number of trees not specified or exceeded number fit so far. Using ",paste(n.trees,collapse=" "),".")
	94	if (missing(n.trees) \|\| any(n.trees > object$n.trees)) {
	95	n.trees[n.trees>object$n.trees] <- object$n.trees
	96	warning("Number of trees not specified or exceeded number fit so far. ",
	97	"Using ", paste(n.trees, collapse = " "), ".")
108	98	}
109	99	i.ntree.order <- order(n.trees)
110
	100
111	101	# Next if block for compatibility with objects created with version 1.6.
112	102	if (is.null(object$num.classes)){
113		object$num.classes <- 1
	103	object$num.classes <- 1
114	104	}
115
	105
116	106	predF <- .Call("gbm_pred",
117	107	X=as.double(x),
118	108	cRows=as.integer(cRows),

125	115	var.type=as.integer(object$var.type),
126	116	single.tree = as.integer(single.tree),
127	117	PACKAGE = "gbm")
128
129		if((length(n.trees) > 1) \|\| (object$num.classes > 1))
130		{
131		if(object$distribution$name=="multinomial")
132		{
	118
	119	if ((length(n.trees) > 1) \|\| (object$num.classes > 1)) {
	120	if (object$distribution$name=="multinomial") {
133	121	predF <- array(predF, dim=c(cRows,object$num.classes,length(n.trees)))
134	122	dimnames(predF) <- list(NULL, object$classes, n.trees)
135	123	predF[,,i.ntree.order] <- predF
136		} else
137		{
	124	} else {
138	125	predF <- matrix(predF, ncol=length(n.trees), byrow=FALSE)
139	126	colnames(predF) <- n.trees
140	127	predF[,i.ntree.order] <- predF
141	128	}
142	129	}
143
144		if(type=="response")
145		{
146		if(is.element(object$distribution$name, c("bernoulli", "pairwise")))
147		{
148		predF <- 1/(1+exp(-predF))
149		} else
150		if(object$distribution$name=="poisson")
151		{
	130
	131	if (type=="response") {
	132	if (is.element(object$distribution$name, c("bernoulli", "pairwise"))) {
	133	predF <- 1 / (1 + exp(-predF))
	134	} else if (object$distribution$name =="poisson") {
152	135	predF <- exp(predF)
153		}
154		else if (object$distribution$name == "adaboost"){
	136	} else if (object$distribution$name == "adaboost"){
155	137	predF <- 1 / (1 + exp(-2*predF))
156	138	}
157		if(object$distribution$name=="multinomial")
158		{
	139	if (object$distribution$name == "multinomial") {
159	140	pexp <- exp(predF)
160	141	psum <- apply(pexp, c(1, 3), function(x) { x / sum(x) })
161	142	# Transpose each 2d array
162	143	predF <- aperm(psum, c(2, 1, 3))
163	144	}
164
165		if((length(n.trees)==1) && (object$distribution$name!="multinomial"))
166		{
167		predF <- as.vector(predF)
	145
	146	if ((length(n.trees) == 1) &&
	147	(object$distribution$name != "multinomial")) {
	148	predF <- as.vector(predF)
168	149	}
169	150	}
170
171		if(!is.null(attr(object$Terms,"offset")))
172		{
173		warning("predict.gbm does not add the offset to the predicted values.")
	151
	152	if(!is.null(attr(object$Terms,"offset"))) {
	153	warning("predict.gbm does not add the offset to the predicted values.")
174	154	}
175
	155
176	156	return(predF)
177	157	}

+0

-85

~~R/shrink.gbm.R~~ less more

0		# evaluates the objective function and gradient with respect to beta
1		# beta = log(lambda/(1-lambda))
2
3		#' L1 shrinkage of the predictor variables in a GBM
4		#'
5		#' Performs recursive shrinkage in each of the trees in a GBM fit using
6		#' different shrinkage parameters for each variable.
7		#'
8		#' This function is currently experimental. Used in conjunction with a gradient
9		#' ascent search for inclusion of variables.
10		#'
11		#' @param object A \code{\link{gbm.object}}.
12		#'
13		#' @param n.trees Integer specifying the number of trees to use.
14		#'
15		#' @param lambda Vector of length equal to the number of variables containing
16		#' the shrinkage parameter for each variable.
17		#'
18		#' @param \dots Additional optional arguments. (Currently ignored.)
19		#'
20		#' @return \item{predF}{Predicted values from the shrunken tree}
21		#' \item{objective}{The value of the loss function associated with the
22		#' predicted values} \item{gradient}{A vector with length equal to the number
23		#' of variables containing the derivative of the objective function with
24		#' respect to beta, the logit transform of the shrinkage parameter for each
25		#' variable}
26		#'
27		#' @note Warning: This function is experimental.
28		#'
29		#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
30		#'
31		#' @seealso \code{\link{shrink.gbm.pred}}, \code{\link{gbm}}
32		#'
33		#' @references Hastie, T. J., and Pregibon, D.
34		#' \url{https://web.stanford.edu/~hastie/Papers/shrink_tree.pdf}. AT&T Bell
35		#' Laboratories Technical Report (March 1990).
36		#'
37		#' @keywords methods
38		#'
39		#' @export
40		shrink.gbm <- function(object,n.trees,
41		lambda=rep(10,length(object$var.names)),
42		...)
43		{
44		if(length(lambda) != length(object$var.names))
45		{
46		stop("lambda must have the same length as the number of variables in the gbm object.")
47		}
48
49		if(is.null(object$data))
50		{
51		stop("shrink.gbm requires keep.data=TRUE when gbm model is fit.")
52		}
53
54		y <- object$data$y
55		x <- object$data$x
56
57		cCols <- length(object$var.names)
58		cRows <- length(x)/cCols
59
60
61		if(missing(n.trees) \|\| (n.trees > object$n.trees))
62		{
63		n.trees <- object$n.trees
64		warning("n.trees not specified or some values exceeded number fit so far. Using ",n.trees,".")
65		}
66
67		result <- .Call("gbm_shrink_gradient",
68		y=as.double(y),
69		X=as.double(x),
70		cRows=as.integer(cRows),
71		cCols=as.integer(cCols),
72		n.trees=as.integer(n.trees),
73		initF=object$initF,
74		trees=object$trees,
75		c.split=object$c.split,
76		var.type=as.integer(object$var.type),
77		depth=as.integer(object$interaction.depth),
78		lambda=as.double(lambda),
79		PACKAGE = "gbm")
80
81		names(result) <- c("predF","objective","gradient")
82
83		return(result)
84		}

+0

-80

~~R/shrink.gbm.pred.R~~ less more

0		#' Predictions from a shrunked GBM
1		#'
2		#' Makes predictions from a shrunken GBM model.
3		#'
4		#' @param object a \code{\link{gbm.object}}
5		#' @param newdata dataset for predictions
6		#' @param n.trees the number of trees to use
7		#' @param lambda a vector with length equal to the number of variables
8		#' containing the shrinkage parameter for each variable
9		#' @param \dots other parameters (ignored)
10		#' @return A vector with length equal to the number of observations in newdata
11		#' containing the predictions
12		#' @section Warning: This function is experimental
13		#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
14		#' @seealso \code{\link{shrink.gbm}}, \code{\link{gbm}}
15		#' @keywords methods
16		#' @export
17		shrink.gbm.pred <- function(object,newdata,n.trees,
18		lambda=rep(1,length(object$var.names)),
19		...)
20		{
21		if(length(lambda) != length(object$var.names))
22		{
23		stop("lambda must have the same length as the number of variables in the gbm object.")
24		}
25
26		if(!is.null(object$Terms))
27		{
28		x <- model.frame(delete.response(object$Terms),
29		newdata,
30		na.action=na.pass)
31		}
32		else
33		{
34		x <- newdata
35		}
36
37		cRows <- nrow(x)
38		cCols <- ncol(x)
39
40		for(i in 1:cCols)
41		{
42		if(is.factor(x[,i]))
43		{
44		j <- match(levels(x[,i]), object$var.levels[[i]])
45		if(any(is.na(j)))
46		{
47		stop(paste("New levels for variable ",
48		object$var.names[i],": ",
49		levels(x[,i])[is.na(j)],sep=""))
50		}
51		x[,i] <- as.numeric(x[,i])-1
52		}
53		}
54
55		x <- as.vector(unlist(x))
56		if(missing(n.trees) \|\| any(n.trees > object$n.trees))
57		{
58		n.trees <- n.trees[n.trees<=object$n.trees]
59		if(length(n.trees)==0) n.trees <- object$n.trees
60		warning("n.trees not specified or some values exceeded number fit so far. Using ",n.trees,".")
61		}
62		# sort n.trees so that predictions are easier to generate and store
63		n.trees <- sort(n.trees)
64
65		predF <- .Call("gbm_shrink_pred",
66		X=as.double(x),
67		cRows=as.integer(cRows),
68		cCols=as.integer(cCols),
69		n.trees=as.integer(n.trees),
70		initF=object$initF,
71		trees=object$trees,
72		c.split=object$c.split,
73		var.type=as.integer(object$var.type),
74		depth=as.integer(object$interaction.depth),
75		lambda=as.double(lambda),
76		PACKAGE = "gbm")
77
78		return(predF)
79		}

+3

-3

R/test.gbm.R less more

28	28
29	29	############################################################################
30	30	## test Gaussian distribution gbm model
31		set.seed(1)
	31	set.seed(123)
32	32
33	33	cat("Running least squares regression example.\n")
34	34

73	73	# Get best model
74	74	best.iter <- gbm.perf(gbm1,method="cv", plot.it=FALSE) # returns cv estimate of best number of trees
75	75
76		set.seed(2)
	76	set.seed(223)
77	77	# make some new data
78	78	N <- 1000
79	79	X1 <- runif(N)

104	104
105	105	cat("Running cox proportional hazards regression example.\n")
106	106	# create some data
107		set.seed(1)
	107	set.seed(2)
108	108	N <- 3000
109	109	X1 <- runif(N)
110	110	X2 <- runif(N)

+0

-13

R/utils.R less more

0		#' Arrange multiple grobs on a page
1		#'
2		#' See \code{\link[gridExtra]{grid.arrange}} for more details.
3		#'
4		#' @name grid.arrange
5		#' @rdname grid.arrange
6		#' @keywords internal
7		#' @export
8		#' @importFrom gridExtra grid.arrange
9		#' @usage grid.arrange(..., newpage = TRUE)
10		NULL
11
12
13	0	#' @keywords internal
14	1	getAvailableDistributions <- function() {
15	2	c("adaboost", "bernoulli", "coxph", "gaussian", "huberized", "laplace",

+29

-20

README.md less more

3	3	[![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/gbm)](https://cran.r-project.org/package=gbm)
4	4	[![Build
5	5	Status](https://travis-ci.org/gbm-developers/gbm.svg?branch=master)](https://travis-ci.org/gbm-developers/gbm)
	6	[![Codecov test
	7	coverage](https://codecov.io/gh/gbm-developers/gbm/branch/master/graph/badge.svg)](https://codecov.io/gh/gbm-developers/gbm?branch=master)
6	8	[![Downloads](http://cranlogs.r-pkg.org/badges/gbm)](http://cranlogs.r-pkg.org/badges/gbm)
7	9	[![Total
8	10	Downloads](http://cranlogs.r-pkg.org/badges/grand-total/gbm)](http://cranlogs.r-pkg.org/badges/grand-total/gbm)

10	12	Overview
11	13	--------
12	14
13		The gbm package (which stands for generalized boosted
14		models) implements extensions to Freund and Schapire’s AdaBoost
15		algorithm and [Friedman’s gradient boosting
16		machine](http://projecteuclid.org/euclid.aos/1013203451). It includes
17		regression methods for least squares, absolute loss, t-distribution
18		loss, quantile regression, logistic, multinomial logistic, Poisson, Cox
19		proportional hazards partial likelihood, AdaBoost exponential loss,
20		Huberized hinge loss, and Learning to Rank measures (i.e.,
	15	The [gbm](https://cran.r-project.org/package=gbm) package, which stands
	16	for generalized boosted models, provides extensions to
	17	Freund and Schapire’s AdaBoost algorithm and [Friedman’s gradient
	18	boosting machine](http://projecteuclid.org/euclid.aos/1013203451). It
	19	includes regression methods for least squares, absolute loss,
	20	t-distribution loss, quantile regression, logistic, multinomial
	21	logistic, Poisson, Cox proportional hazards partial likelihood, AdaBoost
	22	exponential loss, Huberized hinge loss, and Learning to Rank measures
	23	(i.e.,
21	24	[LambdaMart](https://www.microsoft.com/en-us/research/publication/from-ranknet-to-lambdarank-to-lambdamart-an-overview/)).
22	25
23	26	Installation

27	30	# The easiest way to get gbm is to it install from CRAN:
28	31	install.packages("gbm")
29	32
30		# Or the the development version from GitHub:
31		# install.packages("devtools")
32		devtools::install_github("gbm-developers/gbm")
	33	# Alternatively, you can install the development version from GitHub:
	34	if (!requireNamespace("remotes")) {
	35	install.packages("remotes")
	36	}
	37	remotes::install_github("gbm-developers/gbm")
33	38	```
34	39
35	40	Lifecycle

37	42
38	43	[![lifecycle](https://img.shields.io/badge/lifecycle-retired-orange.svg)](https://www.tidyverse.org/lifecycle/#retired)
39	44
40		The gbm package is retired and no longer under active development. We
41		will only make the necessary changes to ensure that gbm remain on CRAN.
42		For the most part, no new features will be added, and only the most
43		critical of bugs will be fixed.
	45	The [gbm](https://cran.r-project.org/package=gbm) package is retired and
	46	no longer under active development. We will only make the necessary
	47	changes to ensure that [gbm](https://cran.r-project.org/package=gbm)
	48	remains on CRAN. For the most part, no new features will be added, and
	49	only the most critical of bugs will be fixed.
44	50
45		This is a maintained version of `gbm` back compatible to CRAN versions
46		of `gbm` 2.1.x. It exists mainly for the purpose of reproducible
47		research and data analyses performed with the 2.1.x versions of `gbm`.
48		For newer development, and a more consistent API, try out the
49		[gbm3](https://github.com/gbm-developers/gbm3) package!
	51	This is a maintained version of
	52	[gbm](https://cran.r-project.org/package=gbm) back compatible to CRAN
	53	versions of [gbm](https://cran.r-project.org/package=gbm) 2.1.x. It
	54	exists mainly for the purpose of reproducible research and data analyses
	55	performed with the 2.1.x versions of
	56	[gbm](https://cran.r-project.org/package=gbm). ~~For newer development,
	57	and a more consistent API, try out the
	58	[gbm3](https://github.com/gbm-developers/gbm3) package!~~

build/vignette.rds less more

Binary diff not shown

inst/doc/gbm.pdf less more

Binary diff not shown

+56

-0

inst/tinytest/test_bernoulli.R less more

	0	# FOr reproducibility
	1	set.seed(1)
	2
	3	# Create some data
	4	N <- 1000
	5	X1 <- runif(N)
	6	X2 <- runif(N)
	7	X3 <- factor(sample(letters[1:4], N, replace = T))
	8	mu <- c(-1, 0, 1, 2)[as.numeric(X3)]
	9	p <- 1 / (1 + exp(-(sin(3 * X1) - 4 * X2 + mu)))
	10	Y <- rbinom(N, 1, p)
	11	w <- rexp(N)
	12	w <- N * w / sum(w) # random weights if you want to experiment with them
	13	data <- data.frame(Y = Y, X1 = X1, X2 = X2, X3 = X3)
	14
	15	# Fit initial model
	16	gbm1 <- gbm(
	17	Y ~ X1 + X2 + X3, # formula
	18	data = data, # dataset
	19	weights = w,
	20	var.monotone = c(0, 0, 0), # -1: monotone decrease, +1: monotone increase, 0: no monotone restrictions
	21	distribution = "bernoulli",
	22	n.trees = 3000, # number of trees
	23	shrinkage = 0.001, # shrinkage or learning rate, 0.001 to 0.1 usually work
	24	interaction.depth = 3, # 1: additive model, 2: two-way interactions, etc
	25	bag.fraction = 0.5, # subsampling fraction, 0.5 is probably best
	26	train.fraction = 0.5, # fraction of data for training, first train.fraction*N used for training
	27	cv.folds = 5, # do 5-fold cross-validation
	28	n.cores = 1,
	29	n.minobsinnode = 10 # minimum total weight needed in each node
	30	)
	31
	32	# Extract optimal number of trees based on test set performance
	33	best.iter.test <- gbm.perf(gbm1, method = "test", plot.it = FALSE) # returns test set estimate of best number of trees
	34	best.iter <- best.iter.test
	35
	36	# Make some new data
	37	set.seed(2)
	38	N <- 1000
	39	X1 <- runif(N)
	40	X2 <- runif(N)
	41	X3 <- factor(sample(letters[1:4], N, replace = T))
	42	mu <- c(-1, 0, 1, 2)[as.numeric(X3)]
	43	p <- 1 / (1 + exp(-(sin(3 * X1) - 4 * X2 + mu)))
	44	Y <- rbinom(N, 1, p)
	45	data2 <- data.frame(Y = Y, X1 = X1, X2 = X2, X3 = X3)
	46
	47	# Predict on the new data using "best" number of trees
	48	# f.predict will be on the canonical scale (logit,log,etc.)
	49	f.1.predict <- predict(gbm1, data2, n.trees = best.iter.test)
	50
	51	# Compute quantity prior to transformation
	52	f.new = sin(3 * X1) - 4 * X2 + mu
	53
	54	# Base the validation tests on observed discrepancies
	55	expect_true(sd(f.new - f.1.predict) < 1.0)

+81

-0

inst/tinytest/test_coxph.R less more

	0	# Load required packages
	1	library(survival)
	2
	3	# Create some data
	4	set.seed(2)
	5	N <- 3000
	6	X1 <- runif(N)
	7	X2 <- runif(N)
	8	X3 <- factor(sample(letters[1:4], N, replace = T))
	9	mu <- c(-1, 0, 1, 2)[as.numeric(X3)]
	10	f <- 0.5 * sin(3 * X1 + 5 * X2 ^ 2 + mu / 10)
	11	tt.surv <- rexp(N, exp(f))
	12	tt.cens <- rexp(N, 0.5)
	13	delta <- as.numeric(tt.surv <= tt.cens)
	14	tt <- apply(cbind(tt.surv, tt.cens), 1, min)
	15
	16	# Throw in some missing values
	17	X1[sample(1:N, size = 100)] <- NA
	18	X3[sample(1:N, size = 300)] <- NA
	19
	20	# Random weights if you want to experiment with them
	21	w <- rep(1, N)
	22
	23	data <- data.frame(
	24	tt = tt,
	25	delta = delta,
	26	X1 = X1,
	27	X2 = X2,
	28	X3 = X3
	29	)
	30
	31	# fit initial model
	32	gbm1 <- gbm(
	33	Surv(tt, delta) ~ X1 + X2 + X3, # formula
	34	data = data, # dataset
	35	weights = w,
	36	var.monotone = c(0, 0, 0), # -1: monotone decrease, +1: monotone increase, 0: no monotone restrictions
	37	distribution = "coxph",
	38	n.trees = 3000, # number of trees
	39	shrinkage = 0.001, # shrinkage or learning rate, 0.001 to 0.1 usually work
	40	interaction.depth = 3, # 1: additive model, 2: two-way interactions, etc
	41	bag.fraction = 0.5, # subsampling fraction, 0.5 is probably best
	42	train.fraction = 0.5, # fraction of data for training, first train.fraction*N used for training
	43	cv.folds = 5, # do 5-fold cross-validation
	44	n.cores = 1,
	45	n.minobsinnode = 10, # minimum total weight needed in each node
	46	keep.data = TRUE
	47	)
	48
	49	# Extract optimal number of trees based on test set performance
	50	best.iter <- gbm.perf(gbm1, method = "test", plot.it = FALSE) # returns test set estimate of best number of trees
	51
	52	# Make some new data
	53	set.seed(2)
	54	N <- 1000
	55	X1 <- runif(N)
	56	X2 <- runif(N)
	57	X3 <- factor(sample(letters[1:4], N, replace = T))
	58	mu <- c(-1, 0, 1, 2)[as.numeric(X3)]
	59
	60	f <- 0.5 * sin(3 * X1 + 5 * X2 ^ 2 + mu / 10) # -0.5 <= f <= 0.5 via sin fn.
	61	tt.surv <- rexp(N, exp(f))
	62	tt.cens <- rexp(N, 0.5)
	63
	64	data2 <- data.frame(
	65	tt = apply(cbind(tt.surv, tt.cens), 1, min),
	66	delta = as.numeric(tt.surv <= tt.cens),
	67	f = f,
	68	X1 = X1,
	69	X2 = X2,
	70	X3 = X3
	71	)
	72
	73	# predict on the new data using "best" number of trees
	74	# f.predict will be on the canonical scale (logit,log,etc.)
	75	f.predict <- predict(gbm1, newdata = data2, n.trees = best.iter)
	76
	77	#plot(data2$f,f.predict)
	78	# Use observed sd
	79	expect_true(sd(data2$f - f.predict) < 0.4,
	80	info = "CoxPH: checking if squared error within tolerance.")

+71

-0

inst/tinytest/test_least_squares.R less more

	0	# For reproducibility
	1	set.seed(848)
	2
	3	# Create some data
	4	N <- 1000
	5	X1 <- runif(N)
	6	X2 <- 2 * runif(N)
	7	X3 <- factor(sample(letters[1:4], N, replace = TRUE))
	8	X4 <- ordered(sample(letters[1:6], N, replace = TRUE))
	9	X5 <- factor(sample(letters[1:3], N, replace = TRUE))
	10	X6 <- 3 * runif(N)
	11	mu <- c(-1, 0, 1, 2)[as.numeric(X3)]
	12	SNR <- 10 # signal-to-noise ratio
	13	Y <- X1 ** 1.5 + 2 * (X2 ** 0.5) + mu
	14	sigma <- sqrt(var(Y) / SNR)
	15	Y <- Y + rnorm(N, mean = 0, sd = sigma)
	16	# Create a bunch of missing values
	17	X1[sample(1:N, size = 100)] <- NA
	18	X3[sample(1:N, size = 300)] <- NA
	19	w <- rep(1, N)
	20	data <- data.frame(Y = Y, X1 = X1, X2 = X2, X3 = X3, X4 = X4, X5 = X5, X6 = X6)
	21
	22	# fit initial model
	23	gbm1 <- gbm(
	24	Y ~ X1 + X2 + X3 + X4 + X5 + X6, # formula
	25	data = data, # dataset
	26	var.monotone = c(0,0,0,0,0,0), # -1: monotone decrease, +1: monotone increase, 0: no monotone restrictions
	27	distribution = "gaussian", # bernoulli, adaboost, gaussian, poisson, coxph, or
	28	# list(name = "quantile", alpha = 0.05) for quantile regression
	29	n.trees = 2000, # number of trees
	30	shrinkage = 0.005, # shrinkage or learning rate, 0.001 to 0.1 usually work
	31	interaction.depth = 3, # 1: additive model, 2: two-way interactions, etc
	32	bag.fraction = 0.5, # subsampling fraction, 0.5 is probably best
	33	train.fraction = 1, # fraction of data for training, first train.fraction*N used for training
	34	n.minobsinnode = 10, # minimum number of obs needed in each node
	35	keep.data = TRUE,
	36	cv.folds = 10, # do 10-fold cross-validation
	37	n.cores = 1,
	38	)
	39
	40	# Get best model
	41	best.iter <- gbm.perf(gbm1, method = "cv", plot.it = FALSE) # returns cv estimate of best number of trees
	42
	43	# For reproducibility
	44	set.seed(223)
	45
	46	# Make some new data
	47	N <- 1000
	48	X1 <- runif(N)
	49	X2 <- 2 * runif(N)
	50	X3 <- factor(sample(letters[1:4], N, replace = TRUE))
	51	X4 <- ordered(sample(letters[1:6], N, replace = TRUE))
	52	X5 <- factor(sample(letters[1:3], N, replace = TRUE))
	53	X6 <- 3 * runif(N)
	54	mu <- c(-1, 0, 1, 2)[as.numeric(X3)]
	55
	56	# Actual underlying signal
	57	Y <- X1 ** 1.5 + 2 * (X2 ** 0.5) + mu
	58
	59	# Want to see how close predictions are to the underlying signal; noise would just interfere with this
	60	# Y <- Y + rnorm(N,0,sigma)
	61	data2 <- data.frame(Y = Y, X1 = X1, X2 = X2, X3 = X3, X4 = X4, X5 = X5, X6 = X6)
	62
	63	# predict on the new data using "best" number of trees
	64	f.predict <- predict(gbm1, data2, best.iter) # f.predict will be on the canonical scale (logit,log,etc.)
	65
	66	# Base the validation tests on observed discrepancies
	67	expect_true(abs(mean(data2$Y - f.predict)) < 0.01,
	68	info = "LS: checking if Gaussian absolute error within tolerance.")
	69	expect_true(sd(data2$Y - f.predict) < sigma,
	70	info = "LS: checking if Gaussian squared error within tolerance.")

+25

-0

inst/tinytest/test_relative_influence.R less more

	0	# Test that relative.influence really does pick out the true predictors
	1	set.seed(1234)
	2	X1 <- matrix(nrow = 1000, ncol = 50)
	3	X1 <- apply(X1, 2, function(x) rnorm(1000)) # random noise
	4	X2 <- matrix(nrow = 1000, ncol = 5)
	5	X2 <- apply(X2, 2, function(x) c(rnorm(500), rnorm(500, 3))) # real predictors
	6	cls <- rep(c(0, 1), ea = 500) # Class
	7	X <- data.frame(cbind(X1, X2, cls))
	8	mod <- gbm(
	9	cls ~ .,
	10	data = X,
	11	n.trees = 1000,
	12	cv.folds = 5,
	13	n.cores = 1,
	14	shrinkage = .01,
	15	interaction.depth = 2
	16	)
	17	ri <- rev(sort(relative.influence(mod)))
	18	wh <- names(ri)[1:5]
	19	res <- sum(wh %in% paste("V", 51:55, sep = ""))
	20	expect_identical(
	21	current = res,
	22	target = 5L,
	23	info = "Checking if relative influence identifies true predictors."
	24	)

+1

-2

man/basehaz.gbm.Rd less more

3	3	\alias{basehaz.gbm}
4	4	\title{Baseline hazard function}
5	5	\usage{
6		basehaz.gbm(t, delta, f.x, t.eval = NULL, smooth = FALSE,
7		cumulative = TRUE)
	6	basehaz.gbm(t, delta, f.x, t.eval = NULL, smooth = FALSE, cumulative = TRUE)
8	7	}
9	8	\arguments{
10	9	\item{t}{The survival times.}

+19

-7

man/calibrate.plot.Rd less more

3	3	\alias{calibrate.plot}
4	4	\title{Calibration plot}
5	5	\usage{
6		calibrate.plot(y, p, distribution = "bernoulli", replace = TRUE,
7		line.par = list(col = "black"), shade.col = "lightyellow",
8		shade.density = NULL, rug.par = list(side = 1),
9		xlab = "Predicted value", ylab = "Observed average", xlim = NULL,
10		ylim = NULL, knots = NULL, df = 6, ...)
	6	calibrate.plot(
	7	y,
	8	p,
	9	distribution = "bernoulli",
	10	replace = TRUE,
	11	line.par = list(col = "black"),
	12	shade.col = "lightyellow",
	13	shade.density = NULL,
	14	rug.par = list(side = 1),
	15	xlab = "Predicted value",
	16	ylab = "Observed average",
	17	xlim = NULL,
	18	ylim = NULL,
	19	knots = NULL,
	20	df = 6,
	21	...
	22	)
11	23	}
12	24	\arguments{
13	25	\item{y}{The outcome 0-1 variable.}

19	31	All others default to squared error assuming \code{gaussian}.}
20	32
21	33	\item{replace}{Determines whether this plot will replace or overlay the
22		current plot. \code{replace=FALSE} is useful for comparing the calibration
	34	current plot. \code{replace=FALSE} is useful for comparing the calibration
23	35	of several methods.}
24	36
25	37	\item{line.par}{Graphics parameters for the line.}

43	55	calibration curve.}
44	56
45	57	\item{...}{Additional optional arguments to be passed onto
46		\code{\link[graphics]{plot}}}
	58	\code{\link[graphics:plot.default]{plot}}}
47	59	}
48	60	\value{
49	61	No return values.

+23

-12

man/gbm.Rd less more

3	3	\alias{gbm}
4	4	\title{Generalized Boosted Regression Modeling (GBM)}
5	5	\usage{
6		gbm(formula = formula(data), distribution = "bernoulli",
7		data = list(), weights, var.monotone = NULL, n.trees = 100,
8		interaction.depth = 1, n.minobsinnode = 10, shrinkage = 0.1,
9		bag.fraction = 0.5, train.fraction = 1, cv.folds = 0,
10		keep.data = TRUE, verbose = FALSE, class.stratify.cv = NULL,
11		n.cores = NULL)
	6	gbm(
	7	formula = formula(data),
	8	distribution = "bernoulli",
	9	data = list(),
	10	weights,
	11	var.monotone = NULL,
	12	n.trees = 100,
	13	interaction.depth = 1,
	14	n.minobsinnode = 10,
	15	shrinkage = 0.1,
	16	bag.fraction = 0.5,
	17	train.fraction = 1,
	18	cv.folds = 0,
	19	keep.data = TRUE,
	20	verbose = FALSE,
	21	class.stratify.cv = NULL,
	22	n.cores = NULL
	23	)
12	24	}
13	25	\arguments{
14	26	\item{formula}{A symbolic description of the model to be fit. The formula

208	220	Y <- X1 ^ 1.5 + 2 * (X2 ^ 0.5) + mu
209	221	sigma <- sqrt(var(Y) / SNR)
210	222	Y <- Y + rnorm(N, 0, sigma)
211		X1[sample(1:N, size = 500)] <- NA # introduce some missing values
212		X4[sample(1:N, size = 300)] <- NA # introduce some missing values
	223	X1[sample(1:N,size=500)] <- NA # introduce some missing values
	224	X4[sample(1:N,size=300)] <- NA # introduce some missing values
213	225	data <- data.frame(Y, X1, X2, X3, X4, X5, X6)
214	226
215	227	# Fit a GBM

263	275	print(sum((data2$Y - Yhat)^2))
264	276
265	277	# Construct univariate partial dependence plots
266		p1 <- plot(gbm1, i.var = 1, n.trees = best.iter)
267		p2 <- plot(gbm1, i.var = 2, n.trees = best.iter)
268		p3 <- plot(gbm1, i.var = "X3", n.trees = best.iter) # can use index or name
269		grid.arrange(p1, p2, p3, ncol = 3)
	278	plot(gbm1, i.var = 1, n.trees = best.iter)
	279	plot(gbm1, i.var = 2, n.trees = best.iter)
	280	plot(gbm1, i.var = "X3", n.trees = best.iter) # can use index or name
270	281
271	282	# Construct bivariate partial dependence plots
272	283	plot(gbm1, i.var = 1:2, n.trees = best.iter)

+21

-6

man/gbm.fit.Rd less more

3	3	\alias{gbm.fit}
4	4	\title{Generalized Boosted Regression Modeling (GBM)}
5	5	\usage{
6		gbm.fit(x, y, offset = NULL, misc = NULL, distribution = "bernoulli",
7		w = NULL, var.monotone = NULL, n.trees = 100,
8		interaction.depth = 1, n.minobsinnode = 10, shrinkage = 0.001,
9		bag.fraction = 0.5, nTrain = NULL, train.fraction = NULL,
10		keep.data = TRUE, verbose = TRUE, var.names = NULL,
11		response.name = "y", group = NULL)
	6	gbm.fit(
	7	x,
	8	y,
	9	offset = NULL,
	10	misc = NULL,
	11	distribution = "bernoulli",
	12	w = NULL,
	13	var.monotone = NULL,
	14	n.trees = 100,
	15	interaction.depth = 1,
	16	n.minobsinnode = 10,
	17	shrinkage = 0.001,
	18	bag.fraction = 0.5,
	19	nTrain = NULL,
	20	train.fraction = NULL,
	21	keep.data = TRUE,
	22	verbose = TRUE,
	23	var.names = NULL,
	24	response.name = "y",
	25	group = NULL
	26	)
12	27	}
13	28	\arguments{
14	29	\item{x}{A data frame or matrix containing the predictor variables. The

+11

-6

man/gbm.more.Rd less more

3	3	\alias{gbm.more}
4	4	\title{Generalized Boosted Regression Modeling (GBM)}
5	5	\usage{
6		gbm.more(object, n.new.trees = 100, data = NULL, weights = NULL,
7		offset = NULL, verbose = NULL)
	6	gbm.more(
	7	object,
	8	n.new.trees = 100,
	9	data = NULL,
	10	weights = NULL,
	11	offset = NULL,
	12	verbose = NULL
	13	)
8	14	}
9	15	\arguments{
10	16	\item{object}{A \code{\link{gbm.object}} object created from an initial call

113	119	print(sum((data2$Y - Yhat)^2))
114	120
115	121	# Construct univariate partial dependence plots
116		p1 <- plot(gbm1, i.var = 1, n.trees = best.iter)
117		p2 <- plot(gbm1, i.var = 2, n.trees = best.iter)
118		p3 <- plot(gbm1, i.var = "X3", n.trees = best.iter) # can use index or name
119		grid.arrange(p1, p2, p3, ncol = 3)
	122	plot(gbm1, i.var = 1, n.trees = best.iter)
	123	plot(gbm1, i.var = 2, n.trees = best.iter)
	124	plot(gbm1, i.var = "X3", n.trees = best.iter) # can use index or name
120	125
121	126	# Construct bivariate partial dependence plots
122	127	plot(gbm1, i.var = 1:2, n.trees = best.iter)

+13

-13

man/gbm.object.Rd less more

3	3	\alias{gbm.object}
4	4	\title{Generalized Boosted Regression Model Object}
5	5	\value{
6		\item{initF}{the "intercept" term, the initial predicted value to
7		which trees make adjustments} \item{fit}{a vector containing the fitted
	6	\item{initF}{The "intercept" term, the initial predicted value to
	7	which trees make adjustments.} \item{fit}{A vector containing the fitted
8	8	values on the scale of regression function (e.g. log-odds scale for
9		bernoulli, log scale for poisson)} \item{train.error}{a vector of length
	9	bernoulli, log scale for poisson).} \item{train.error}{A vector of length
10	10	equal to the number of fitted trees containing the value of the loss
11		function for each boosting iteration evaluated on the training data}
12		\item{valid.error}{a vector of length equal to the number of fitted trees
	11	function for each boosting iteration evaluated on the training data.}
	12	\item{valid.error}{A vector of length equal to the number of fitted trees
13	13	containing the value of the loss function for each boosting iteration
14		evaluated on the validation data} \item{cv.error}{if \code{cv.folds}<2 this
15		component is NULL. Otherwise, this component is a vector of length equal to
	14	evaluated on the validation data.} \item{cv.error}{If \code{cv.folds} < 2 this
	15	component is \code{NULL}. Otherwise, this component is a vector of length equal to
16	16	the number of fitted trees containing a cross-validated estimate of the loss
17		function for each boosting iteration} \item{oobag.improve}{a vector of
	17	function for each boosting iteration.} \item{oobag.improve}{A vector of
18	18	length equal to the number of fitted trees containing an out-of-bag estimate
19	19	of the marginal reduction in the expected value of the loss function. The
20	20	out-of-bag estimate uses only the training data and is useful for estimating
21		the optimal number of boosting iterations. See \code{\link{gbm.perf}}}
22		\item{trees}{a list containing the tree structures. The components are best
23		viewed using \code{\link{pretty.gbm.tree}}} \item{c.splits}{a list of all
	21	the optimal number of boosting iterations. See \code{\link{gbm.perf}}.}
	22	\item{trees}{A list containing the tree structures. The components are best
	23	viewed using \code{\link{pretty.gbm.tree}}.} \item{c.splits}{A list of all
24	24	the categorical splits in the collection of trees. If the \code{trees[[i]]}
25	25	component of a \code{gbm} object describes a categorical split then the
26	26	splitting value will refer to a component of \code{c.splits}. That component
27	27	of \code{c.splits} will be a vector of length equal to the number of levels
28	28	in the categorical split variable. -1 indicates left, +1 indicates right,
29		and 0 indicates that the level was not present in the training data}
	29	and 0 indicates that the level was not present in the training data.}
30	30	\item{cv.fitted}{If cross-validation was performed, the cross-validation
31	31	predicted values on the scale of the linear predictor. That is, the fitted
32		values from the ith CV-fold, for the model having been trained on the data
	32	values from the i-th CV-fold, for the model having been trained on the data
33	33	in all other folds.}
34	34	}
35	35	\description{

+1

-2

man/gbm.perf.Rd less more

3	3	\alias{gbm.perf}
4	4	\title{GBM performance}
5	5	\usage{
6		gbm.perf(object, plot.it = TRUE, oobag.curve = FALSE, overlay = TRUE,
7		method)
	6	gbm.perf(object, plot.it = TRUE, oobag.curve = FALSE, overlay = TRUE, method)
8	7	}
9	8	\arguments{
10	9	\item{object}{A \code{\link{gbm.object}} created from an initial call to

+1

-7

man/gbm.roc.area.Rd less more

31	31
32	32	\item{pred}{Predicted value.}
33	33
34		\item{x}{?.}
	34	\item{x}{Numeric vector.}
35	35
36	36	\item{y, y.f, f, w, group, max.rank}{Used internally.}
37	37

59	59	\code{gbm.conc} is more general as it allows non-binary targets, but is
60	60	significantly slower.
61	61	}
62		\examples{
63
64		##---- Should be DIRECTLY executable !! ----
65		##-- ==> Define data, use random,
66		##-- or do help(data=index) for the standard data sets.
67		}
68	62	\references{
69	63	C. Burges (2010). "From RankNet to LambdaRank to LambdaMART: An
70	64	Overview", Microsoft Research Technical Report MSR-TR-2010-82.

+70

-11

man/gbmCrossVal.Rd less more

7	7	\alias{gbmCrossValPredictions}
8	8	\title{Cross-validate a gbm}
9	9	\usage{
10		gbmCrossVal(cv.folds, nTrain, n.cores, class.stratify.cv, data, x, y,
11		offset, distribution, w, var.monotone, n.trees, interaction.depth,
12		n.minobsinnode, shrinkage, bag.fraction, var.names, response.name, group)
	10	gbmCrossVal(
	11	cv.folds,
	12	nTrain,
	13	n.cores,
	14	class.stratify.cv,
	15	data,
	16	x,
	17	y,
	18	offset,
	19	distribution,
	20	w,
	21	var.monotone,
	22	n.trees,
	23	interaction.depth,
	24	n.minobsinnode,
	25	shrinkage,
	26	bag.fraction,
	27	var.names,
	28	response.name,
	29	group
	30	)
13	31
14	32	gbmCrossValErr(cv.models, cv.folds, cv.group, nTrain, n.trees)
15	33
16		gbmCrossValPredictions(cv.models, cv.folds, cv.group, best.iter.cv,
17		distribution, data, y)
	34	gbmCrossValPredictions(
	35	cv.models,
	36	cv.folds,
	37	cv.group,
	38	best.iter.cv,
	39	distribution,
	40	data,
	41	y
	42	)
18	43
19		gbmCrossValModelBuild(cv.folds, cv.group, n.cores, i.train, x, y, offset,
20		distribution, w, var.monotone, n.trees, interaction.depth,
21		n.minobsinnode, shrinkage, bag.fraction, var.names, response.name, group)
	44	gbmCrossValModelBuild(
	45	cv.folds,
	46	cv.group,
	47	n.cores,
	48	i.train,
	49	x,
	50	y,
	51	offset,
	52	distribution,
	53	w,
	54	var.monotone,
	55	n.trees,
	56	interaction.depth,
	57	n.minobsinnode,
	58	shrinkage,
	59	bag.fraction,
	60	var.names,
	61	response.name,
	62	group
	63	)
22	64
23		gbmDoFold(X, i.train, x, y, offset, distribution, w, var.monotone, n.trees,
24		interaction.depth, n.minobsinnode, shrinkage, bag.fraction, cv.group,
25		var.names, response.name, group, s)
	65	gbmDoFold(
	66	X,
	67	i.train,
	68	x,
	69	y,
	70	offset,
	71	distribution,
	72	w,
	73	var.monotone,
	74	n.trees,
	75	interaction.depth,
	76	n.minobsinnode,
	77	shrinkage,
	78	bag.fraction,
	79	cv.group,
	80	var.names,
	81	response.name,
	82	group,
	83	s
	84	)
26	85	}
27	86	\arguments{
28	87	\item{cv.folds}{The number of cross-validation folds.}

+0

-12

~~man/grid.arrange.Rd~~ less more

0		% Generated by roxygen2: do not edit by hand
1		% Please edit documentation in R/utils.R
2		\name{grid.arrange}
3		\alias{grid.arrange}
4		\title{Arrange multiple grobs on a page}
5		\usage{
6		grid.arrange(..., newpage = TRUE)
7		}
8		\description{
9		See \code{\link[gridExtra]{grid.arrange}} for more details.
10		}
11		\keyword{internal}

+22

-12

man/plot.gbm.Rd less more

3	3	\alias{plot.gbm}
4	4	\title{Marginal plots of fitted gbm objects}
5	5	\usage{
6		\method{plot}{gbm}(x, i.var = 1, n.trees = x$n.trees,
7		continuous.resolution = 100, return.grid = FALSE, type = c("link",
8		"response"), level.plot = TRUE, contour = FALSE, number = 4,
9		overlap = 0.1, col.regions = viridis::viridis, ...)
	6	\method{plot}{gbm}(
	7	x,
	8	i.var = 1,
	9	n.trees = x$n.trees,
	10	continuous.resolution = 100,
	11	return.grid = FALSE,
	12	type = c("link", "response"),
	13	level.plot = TRUE,
	14	contour = FALSE,
	15	number = 4,
	16	overlap = 0.1,
	17	col.regions = viridis::viridis,
	18	...
	19	)
10	20	}
11	21	\arguments{
12	22	\item{x}{A \code{\link{gbm.object}} that was fit using a call to

14	24
15	25	\item{i.var}{Vector of indices or the names of the variables to plot. If
16	26	using indices, the variables are indexed in the same order that they appear
17		in the initial \code{gbm} formula. If \code{length(i.var)} is between 1 and
	27	in the initial \code{gbm} formula. If \code{length(i.var)} is between 1 and
18	28	3 then \code{plot.gbm} produces the plots. Otherwise, \code{plot.gbm}
19	29	returns only the grid of evaluation points and their average predictions}
20	30

40	50	level plot. Only used when \code{level.plot = TRUE}. Default is \code{FALSE}.}
41	51
42	52	\item{number}{Integer specifying the number of conditional intervals to use
43		for the continuous panel variables. See \code{\link[graphics]{co.intervals}}
44		and \code{\link[lattice]{equal.count}} for further details.}
	53	for the continuous panel variables. See \code{\link[graphics:coplot]{co.intervals}}
	54	and \code{\link[lattice:shingles]{equal.count}} for further details.}
45	55
46	56	\item{overlap}{The fraction of overlap of the conditioning variables. See
47		\code{\link[graphics]{co.intervals}} and \code{\link[lattice]{equal.count}}
	57	\code{\link[graphics:coplot]{co.intervals}} and \code{\link[lattice:shingles]{equal.count}}
48	58	for further details.}
49	59
50	60	\item{col.regions}{Color vector to be used if \code{level.plot} is
51	61	\code{TRUE}. Defaults to the wonderful Matplotlib 'viridis' color map
52		provided by the \code{viridis} package. See \code{\link[viridis]{viridis}}
	62	provided by the \code{viridis} package. See \code{\link[viridis:reexports]{viridis}}
53	63	for details.}
54	64
55	65	\item{...}{Additional optional arguments to be passed onto
56		\code{\link[graphics]{plot}}.}
	66	\code{\link[graphics:plot.default]{plot}}.}
57	67	}
58	68	\value{
59	69	If \code{return.grid = TRUE}, a grid of evaluation points and their

70	80	the weighted tree traversal method described in Friedman (2001) to do the
71	81	integration. Based on the variable types included in the projection,
72	82	\code{plot.gbm} selects an appropriate display choosing amongst line plots,
73		contour plots, and \code{\link[lattice]{lattice}} plots. If the default
74		graphics are not sufficient the user may set \code{return.grid=TRUE}, store
	83	contour plots, and \code{\link[lattice:Lattice]{lattice}} plots. If the default
	84	graphics are not sufficient the user may set \code{return.grid = TRUE}, store
75	85	the result of the function, and develop another graphic display more
76	86	appropriate to the particular example.
77	87	}

+1

-2

man/predict.gbm.Rd less more

3	3	\alias{predict.gbm}
4	4	\title{Predict method for GBM Model Fits}
5	5	\usage{
6		\method{predict}{gbm}(object, newdata, n.trees, type = "link",
7		single.tree = FALSE, ...)
	6	\method{predict}{gbm}(object, newdata, n.trees, type = "link", single.tree = FALSE, ...)
8	7	}
9	8	\arguments{
10	9	\item{object}{Object of class inheriting from (\code{\link{gbm.object}})}

+1

-1

man/quantile.rug.Rd less more

26	26	quantile.rug(x)
27	27	}
28	28	\seealso{
29		\code{\link[graphics]{plot}}, \code{\link[stats]{quantile}},
	29	\code{\link[graphics:plot.default]{plot}}, \code{\link[stats]{quantile}},
30	30	\code{\link[base]{jitter}}, \code{\link[graphics]{rug}}.
31	31	}
32	32	\author{

+1

-2

man/relative.influence.Rd less more

9	9
10	10	permutation.test.gbm(object, n.trees)
11	11
12		gbm.loss(y, f, w, offset, dist, baseline, group = NULL,
13		max.rank = NULL)
	12	gbm.loss(y, f, w, offset, dist, baseline, group = NULL, max.rank = NULL)
14	13	}
15	14	\arguments{
16	15	\item{object}{a \code{gbm} object created from an initial call to

+0

-50

~~man/shrink.gbm.Rd~~ less more

0		% Generated by roxygen2: do not edit by hand
1		% Please edit documentation in R/shrink.gbm.R
2		\name{shrink.gbm}
3		\alias{shrink.gbm}
4		\title{L1 shrinkage of the predictor variables in a GBM}
5		\usage{
6		shrink.gbm(object, n.trees, lambda = rep(10, length(object$var.names)),
7		...)
8		}
9		\arguments{
10		\item{object}{A \code{\link{gbm.object}}.}
11
12		\item{n.trees}{Integer specifying the number of trees to use.}
13
14		\item{lambda}{Vector of length equal to the number of variables containing
15		the shrinkage parameter for each variable.}
16
17		\item{\dots}{Additional optional arguments. (Currently ignored.)}
18		}
19		\value{
20		\item{predF}{Predicted values from the shrunken tree}
21		\item{objective}{The value of the loss function associated with the
22		predicted values} \item{gradient}{A vector with length equal to the number
23		of variables containing the derivative of the objective function with
24		respect to beta, the logit transform of the shrinkage parameter for each
25		variable}
26		}
27		\description{
28		Performs recursive shrinkage in each of the trees in a GBM fit using
29		different shrinkage parameters for each variable.
30		}
31		\details{
32		This function is currently experimental. Used in conjunction with a gradient
33		ascent search for inclusion of variables.
34		}
35		\note{
36		Warning: This function is experimental.
37		}
38		\references{
39		Hastie, T. J., and Pregibon, D.
40		\url{https://web.stanford.edu/~hastie/Papers/shrink_tree.pdf}. AT&T Bell
41		Laboratories Technical Report (March 1990).
42		}
43		\seealso{
44		\code{\link{shrink.gbm.pred}}, \code{\link{gbm}}
45		}
46		\author{
47		Greg Ridgeway \email{gregridgeway@gmail.com}
48		}
49		\keyword{methods}

+0

-39

~~man/shrink.gbm.pred.Rd~~ less more

0		% Generated by roxygen2: do not edit by hand
1		% Please edit documentation in R/shrink.gbm.pred.R
2		\name{shrink.gbm.pred}
3		\alias{shrink.gbm.pred}
4		\title{Predictions from a shrunked GBM}
5		\usage{
6		shrink.gbm.pred(object, newdata, n.trees, lambda = rep(1,
7		length(object$var.names)), ...)
8		}
9		\arguments{
10		\item{object}{a \code{\link{gbm.object}}}
11
12		\item{newdata}{dataset for predictions}
13
14		\item{n.trees}{the number of trees to use}
15
16		\item{lambda}{a vector with length equal to the number of variables
17		containing the shrinkage parameter for each variable}
18
19		\item{\dots}{other parameters (ignored)}
20		}
21		\value{
22		A vector with length equal to the number of observations in newdata
23		containing the predictions
24		}
25		\description{
26		Makes predictions from a shrunken GBM model.
27		}
28		\section{Warning}{
29		This function is experimental
30		}
31
32		\seealso{
33		\code{\link{shrink.gbm}}, \code{\link{gbm}}
34		}
35		\author{
36		Greg Ridgeway \email{gregridgeway@gmail.com}
37		}
38		\keyword{methods}

+10

-3

man/summary.gbm.Rd less more

3	3	\alias{summary.gbm}
4	4	\title{Summary of a gbm object}
5	5	\usage{
6		\method{summary}{gbm}(object, cBars = length(object$var.names),
7		n.trees = object$n.trees, plotit = TRUE, order = TRUE,
8		method = relative.influence, normalize = TRUE, ...)
	6	\method{summary}{gbm}(
	7	object,
	8	cBars = length(object$var.names),
	9	n.trees = object$n.trees,
	10	plotit = TRUE,
	11	order = TRUE,
	12	method = relative.influence,
	13	normalize = TRUE,
	14	...
	15	)
9	16	}
10	17	\arguments{
11	18	\item{object}{a \code{gbm} object created from an initial call to

+1

-4

src/gbm-init.c less more

10	10	extern SEXP gbm_fit(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
11	11	extern SEXP gbm_plot(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
12	12	extern SEXP gbm_pred(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
13		extern SEXP gbm_shrink_gradient(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
14		extern SEXP gbm_shrink_pred(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
15	13
16	14	static const R_CallMethodDef CallEntries[] = {
17	15	{"gbm_fit", (DL_FUNC) &gbm_fit, 22},
18	16	{"gbm_plot", (DL_FUNC) &gbm_plot, 10},
19	17	{"gbm_pred", (DL_FUNC) &gbm_pred, 10},
20		{"gbm_shrink_gradient", (DL_FUNC) &gbm_shrink_gradient, 11},
21		{"gbm_shrink_pred", (DL_FUNC) &gbm_shrink_pred, 10},
22	18	{NULL, NULL, 0}
23	19	};
24	20

27	23	R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
28	24	R_useDynamicSymbols(dll, FALSE);
29	25	}
	26

+0

-544

src/gbmentry.cpp less more

647	647	goto Cleanup;
648	648	} // gbm_plot
649	649
650		SEXP gbm_shrink_pred
651		(
652		SEXP radX,
653		SEXP rcRows,
654		SEXP rcCols,
655		SEXP rcNumClasses,
656		SEXP racTrees,
657		SEXP rdInitF,
658		SEXP rTrees,
659		SEXP rCSplits,
660		SEXP raiVarType,
661		SEXP rcInteractionDepth,
662		SEXP radLambda
663		)
664		{
665		unsigned long hr = 0;
666		int iTree = 0;
667		int iPredictionIter = 0;
668		int iObs = 0;
669		int iClass = 0;
670		int i = 0;
671		int cRows = INTEGER(rcRows)[0];
672		int cNumClasses = INTEGER(rcNumClasses)[0];
673		double *adLambda = REAL(radLambda);
674		double dLambda = 0.0;
675		double dPred = 0.0;
676
677		SEXP rThisTree = NULL;
678		int *aiSplitVar = NULL;
679		double *adSplitCode = NULL;
680		int *aiLeftNode = NULL;
681		int *aiRightNode = NULL;
682		int *aiMissingNode = NULL;
683		double *adNodeW = NULL;
684		int iCurrentNode = 0;
685		double dX = 0.0;
686		int iCatSplitIndicator = 0;
687
688		SEXP rResult = NULL;
689		SEXP radPredF = NULL;
690
691		// The predictions
692		double *adPredF = NULL;
693		// The shrunken predictions
694		double *adNodePred = NULL;
695		int *aiNodeStack = NULL;
696		unsigned long cNodeStack = 0;
697		int cMaxNodes = 1+3*(INTEGER(rcInteractionDepth)[0]);
698
699		adPredF = new double[cRows * cNumClasses];
700		if(adPredF == NULL)
701		{
702		hr = GBM_OUTOFMEMORY;
703		goto Error;
704		}
705		for(iObs=0; iObs<cRows*cNumClasses; iObs++)
706		{
707		adPredF[iObs] = REAL(rdInitF)[0];
708		}
709
710		adNodePred = new double[cMaxNodes];
711		if(adNodePred == NULL)
712		{
713		hr = GBM_OUTOFMEMORY;
714		goto Error;
715		}
716		aiNodeStack = new int[cMaxNodes];
717		if(aiNodeStack == NULL)
718		{
719		hr = GBM_OUTOFMEMORY;
720		goto Error;
721		}
722
723		// allocate the predictions to return
724		PROTECT(rResult = allocVector(VECSXP, length(racTrees)));
725		if(rResult == NULL)
726		{
727		hr = GBM_OUTOFMEMORY;
728		goto Error;
729		}
730
731		iPredictionIter = 0;
732		iTree = 0;
733		while(iPredictionIter < length(racTrees))
734		{
735		while(iTree < INTEGER(racTrees)[iPredictionIter] * cNumClasses)
736		{
737		for (iClass = 0; iClass < cNumClasses; iClass++)
738		{
739		rThisTree = VECTOR_ELT(rTrees,iTree);
740		aiSplitVar = INTEGER(VECTOR_ELT(rThisTree,0));
741		adSplitCode = REAL (VECTOR_ELT(rThisTree,1));
742		aiLeftNode = INTEGER(VECTOR_ELT(rThisTree,2));
743		aiRightNode = INTEGER(VECTOR_ELT(rThisTree,3));
744		aiMissingNode = INTEGER(VECTOR_ELT(rThisTree,4));
745		adNodeW = REAL (VECTOR_ELT(rThisTree,6));
746
747		// shrink the tree's predictions
748		aiNodeStack[0] = 0;
749		cNodeStack = 1;
750		for(i=0; i<cMaxNodes; i++)
751		{
752		adNodePred[i] = R_NaN;
753		}
754		while(cNodeStack>0)
755		{
756		i = aiNodeStack[cNodeStack-1];
757		if(aiSplitVar[i]==-1)
758		{
759		adNodePred[i] = adSplitCode[i];
760		cNodeStack--;
761		}
762		else if(ISNA(adNodePred[aiLeftNode[i]]))
763		{
764		aiNodeStack[cNodeStack] = aiLeftNode[i];
765		cNodeStack++;
766		aiNodeStack[cNodeStack] = aiRightNode[i];
767		cNodeStack++;
768
769		// check whether missing node is the same as parent node
770		// occurs when X_i has no missing values
771		if(adNodeW[i] != adNodeW[aiMissingNode[i]])
772		{
773		aiNodeStack[cNodeStack] = aiMissingNode[i];
774		cNodeStack++;
775		}
776		else
777		{
778		adNodePred[aiMissingNode[i]] = 0.0;
779		}
780		}
781		else
782		{
783		// compute the parent node's prediction
784		adNodePred[i] =
785		(adNodeW[aiLeftNode[i]]*adNodePred[aiLeftNode[i]] +
786		adNodeW[aiRightNode[i]]*adNodePred[aiRightNode[i]]+
787		adNodeW[aiMissingNode[i]]*adNodePred[aiMissingNode[i]])/
788		adNodeW[i];
789		cNodeStack--;
790		}
791		}
792
793		// predict for the observations
794		for(iObs=0; iObs<cRows; iObs++)
795		{
796		iCurrentNode = 0;
797		dPred = 0.0;
798		dLambda = 1.0;
799
800		while(aiSplitVar[iCurrentNode] != -1)
801		{
802		dPred += dLambda*
803		(1-adLambda[aiSplitVar[iCurrentNode]])*
804		adNodePred[iCurrentNode];
805		dLambda *= adLambda[aiSplitVar[iCurrentNode]];
806
807		dX = REAL(radX)[aiSplitVar[iCurrentNode]*cRows + iObs];
808		// missing?
809		if(ISNA(dX))
810		{
811		iCurrentNode = aiMissingNode[iCurrentNode];
812		}
813		// continuous?
814		else if(INTEGER(raiVarType)[aiSplitVar[iCurrentNode]] == 0)
815		{
816		if(dX < adSplitCode[iCurrentNode])
817		{
818		iCurrentNode = aiLeftNode[iCurrentNode];
819		}
820		else
821		{
822		iCurrentNode = aiRightNode[iCurrentNode];
823		}
824		}
825		else // categorical
826		{
827		iCatSplitIndicator = INTEGER(
828		VECTOR_ELT(rCSplits,
829		(int)adSplitCode[iCurrentNode]))[(int)dX];
830		if(iCatSplitIndicator==-1)
831		{
832		iCurrentNode = aiLeftNode[iCurrentNode];
833		}
834		else if(iCatSplitIndicator==1)
835		{
836		iCurrentNode = aiRightNode[iCurrentNode];
837		}
838		else // categorical level not present in training
839		{
840		iCurrentNode = aiMissingNode[iCurrentNode];
841		}
842		}
843		}
844		dPred += dLambda*adNodePred[iCurrentNode];
845
846		// add the shrunken prediction
847		adPredF[iObs + iClass * cRows] += dPred; // add the prediction
848		} // iObs
849		iTree++;
850		} // iClass
851		} // iTree
852
853		PROTECT(radPredF = allocVector(REALSXP, cRows));
854		if(radPredF == NULL)
855		{
856		hr = GBM_OUTOFMEMORY;
857		goto Error;
858		}
859		for(iObs=0; iObs<cRows*cNumClasses; iObs++)
860		{
861		REAL(radPredF)[iObs] = adPredF[iObs];
862		}
863		SET_VECTOR_ELT(rResult,iPredictionIter,radPredF);
864		UNPROTECT(1); // radPredF
865
866		iPredictionIter++;
867		}
868
869		Cleanup:
870		if(adPredF!=NULL)
871		{
872		delete [] adPredF;
873		adPredF = NULL;
874		}
875		if(adNodePred!=NULL)
876		{
877		delete [] adNodePred;
878		adNodePred = NULL;
879		}
880		if(aiNodeStack!=NULL)
881		{
882		delete [] aiNodeStack;
883		aiNodeStack = NULL;
884		}
885
886		UNPROTECT(1); // rResult
887		return rResult;
888		Error:
889		goto Cleanup;
890		}
891
892		SEXP gbm_shrink_gradient
893		(
894		SEXP radY,
895		SEXP radX,
896		SEXP rcRows,
897		SEXP rcCols,
898		SEXP rcNumClasses,
899		SEXP rcTrees,
900		SEXP rdInitF,
901		SEXP rTrees,
902		SEXP rCSplits,
903		SEXP raiVarType,
904		SEXP rcInteractionDepth,
905		SEXP radLambda
906		)
907		{
908		unsigned long hr = 0;
909		int iTree = 0;
910
911		int iObs = 0;
912		int iLambda = 0;
913		int iNode = 0;
914		int iClass = 0;
915		int cRows = INTEGER(rcRows)[0];
916		int cNumClasses = INTEGER(rcNumClasses)[0];
917		double *adY = REAL(radY);
918		double *adLambda = REAL(radLambda);
919		double dLambdaProduct = 0.0;
920		double dPred = 0.0;
921		double dNewPredTerm = 0.0;
922		double dDJDf = 0.0;
923
924		// NB for K-Class
925		double *adProb = NULL;
926		double dDenom = 0.0;
927
928		SEXP rThisTree = NULL;
929		int *aiSplitVar = NULL;
930		double *adSplitCode = NULL;
931		int *aiLeftNode = NULL;
932		int *aiRightNode = NULL;
933		int *aiMissingNode = NULL;
934		double *adNodeW = NULL;
935		int iCurrentNode = 0;
936		double dX = 0.0;
937		int iCatSplitIndicator = 0;
938
939		SEXP rResult = NULL;
940		SEXP radPredF = NULL;
941		SEXP rdObjective = NULL;
942		SEXP radGradient = NULL;
943
944		// The node predictions
945		double *adNodePred = NULL;
946		// tracks which variables are in the prediction path
947		int *aiInPath = NULL;
948		int cInPath = 0;
949		double *adDfDLambda = NULL;
950
951		adDfDLambda = new double[length(radLambda)];
952		if(adDfDLambda == NULL)
953		{
954		hr = GBM_OUTOFMEMORY;
955		goto Error;
956		}
957		aiInPath = new int[INTEGER(rcInteractionDepth)[0]+1];
958		if(aiInPath == NULL)
959		{
960		hr = GBM_OUTOFMEMORY;
961		goto Error;
962		}
963		// allocate the predictions to return
964		PROTECT(rResult = allocVector(VECSXP, 3));
965		if(rResult == NULL)
966		{
967		hr = GBM_OUTOFMEMORY;
968		goto Error;
969		}
970		// allocate predictions
971		PROTECT(radPredF = allocVector(REALSXP, cRows * cNumClasses));
972		if(radPredF == NULL)
973		{
974		hr = GBM_OUTOFMEMORY;
975		goto Error;
976		}
977		SET_VECTOR_ELT(rResult,0,radPredF);
978		UNPROTECT(1); // radPredF
979		//allocate objective function
980		PROTECT(rdObjective = allocVector(REALSXP, 1));
981		if(rdObjective == NULL)
982		{
983		hr = GBM_OUTOFMEMORY;
984		goto Error;
985		}
986		SET_VECTOR_ELT(rResult,1,rdObjective);
987		UNPROTECT(1); // rdObjective
988		//allocate objective function
989		PROTECT(radGradient = allocVector(REALSXP, length(radLambda)));
990		if(radGradient == NULL)
991		{
992		hr = GBM_OUTOFMEMORY;
993		goto Error;
994		}
995		SET_VECTOR_ELT(rResult,2,radGradient);
996		UNPROTECT(1); // radGradient
997
998		// Allocate K-Class array
999		if (cNumClasses > 1)
1000		{
1001		adProb = new double[cNumClasses];
1002		}
1003
1004		// initialize the predicted values
1005		for(iObs=0; iObs<cRows*cNumClasses; iObs++)
1006		{
1007		REAL(radPredF)[iObs] = REAL(rdInitF)[0];
1008		}
1009		// initialize the gradient
1010		for(iLambda=0; iLambda<length(radGradient); iLambda++)
1011		{
1012		REAL(radGradient)[iLambda] = 0.0;
1013		}
1014		REAL(rdObjective)[0] = 0.0;
1015
1016		// predict for the observations
1017		// first loop has to be over observations in order to compute the gradient
1018		for(iObs=0; iObs<cRows; iObs++)
1019		{
1020		for(iLambda=0; iLambda<length(radGradient); iLambda++)
1021		{
1022		adDfDLambda[iLambda] = 0.0;
1023		}
1024
1025		for(iTree=0; iTree<INTEGER(rcTrees)[0]; iTree++)
1026		{
1027		for (iClass = 0; iClass < cNumClasses; iClass++)
1028		{
1029
1030		rThisTree = VECTOR_ELT(rTrees,iClass + iTree * cNumClasses);
1031		aiSplitVar = INTEGER(VECTOR_ELT(rThisTree,0));
1032		adSplitCode = REAL (VECTOR_ELT(rThisTree,1));
1033		aiLeftNode = INTEGER(VECTOR_ELT(rThisTree,2));
1034		aiRightNode = INTEGER(VECTOR_ELT(rThisTree,3));
1035		aiMissingNode = INTEGER(VECTOR_ELT(rThisTree,4));
1036		adNodeW = REAL (VECTOR_ELT(rThisTree,6));
1037		adNodePred = REAL (VECTOR_ELT(rThisTree,7));
1038
1039		iCurrentNode = 0;
1040		dPred = 0.0;
1041		dLambdaProduct = 1.0;
1042
1043		// reset for the new tree
1044		cInPath = 0;
1045
1046		while(aiSplitVar[iCurrentNode] != -1)
1047		{
1048		dNewPredTerm = dLambdaProduct*
1049		(1-adLambda[aiSplitVar[iCurrentNode]])*
1050		adNodePred[iCurrentNode];
1051
1052		// update prediction
1053		dPred += dNewPredTerm;
1054
1055		// update gradient
1056		if(adLambda[aiSplitVar[iCurrentNode]]!=1.0)
1057		{
1058		adDfDLambda[aiSplitVar[iCurrentNode]] -=
1059		dNewPredTerm/(1.0-adLambda[aiSplitVar[iCurrentNode]]);
1060		}
1061		for(iNode=0; iNode<cInPath; iNode++)
1062		{
1063		if(adLambda[aiInPath[iNode]]!=0.0)
1064		{
1065		adDfDLambda[aiInPath[iNode]] +=
1066		dNewPredTerm/adLambda[aiInPath[iNode]];
1067		}
1068		}
1069		aiInPath[cInPath] = aiSplitVar[iCurrentNode];
1070		cInPath++;
1071
1072		dLambdaProduct *= adLambda[aiSplitVar[iCurrentNode]];
1073
1074		dX = REAL(radX)[aiSplitVar[iCurrentNode]*cRows + iObs];
1075		// missing?
1076		if(ISNA(dX))
1077		{
1078		iCurrentNode = aiMissingNode[iCurrentNode];
1079		}
1080		// continuous?
1081		else if(INTEGER(raiVarType)[aiSplitVar[iCurrentNode]] == 0)
1082		{
1083		if(dX < adSplitCode[iCurrentNode])
1084		{
1085		iCurrentNode = aiLeftNode[iCurrentNode];
1086		}
1087		else
1088		{
1089		iCurrentNode = aiRightNode[iCurrentNode];
1090		}
1091		}
1092		else // categorical
1093		{
1094		iCatSplitIndicator = INTEGER(
1095		VECTOR_ELT(rCSplits,
1096		(int)adSplitCode[iCurrentNode]))[(int)dX];
1097		if(iCatSplitIndicator==-1)
1098		{
1099		iCurrentNode = aiLeftNode[iCurrentNode];
1100		}
1101		else if(iCatSplitIndicator==1)
1102		{
1103		iCurrentNode = aiRightNode[iCurrentNode];
1104		}
1105		else // categorical level not present in training
1106		{
1107		iCurrentNode = aiMissingNode[iCurrentNode];
1108		}
1109		}
1110		} // aiSplitVar[iCurrentNode] != -1
1111
1112		// incorporate the terminal node
1113		dNewPredTerm = dLambdaProduct*adNodePred[iCurrentNode];
1114		dPred += dNewPredTerm;
1115		// update gradient
1116		for(iNode=0; iNode<cInPath; iNode++)
1117		{
1118		if(adLambda[aiInPath[iNode]] != 0.0)
1119		{
1120		adDfDLambda[aiInPath[iNode]] +=
1121		dNewPredTerm/adLambda[aiInPath[iNode]];
1122		}
1123		}
1124
1125		// add the prediction from tree iTree to prediction iObs
1126		REAL(radPredF)[iObs + iClass * cRows] += dPred;
1127		} // iClass
1128		} // iTree
1129
1130		// If multinomial was used (i.e. numClasses > 1) then calculate the probabilities
1131		if (cNumClasses > 1)
1132		{
1133		dDenom = 0.0;
1134		for (iClass = 0; iClass < cNumClasses; iClass++)
1135		{
1136		adProb[iClass] = exp(REAL(radPredF)[iObs + iClass * cRows]);
1137		dDenom += adProb[iClass];
1138		}
1139
1140		dDJDf = 0.0;
1141		for (iClass = 0; iClass < cNumClasses; iClass++)
1142		{
1143		adProb[iClass] /= dDenom;
1144
1145		REAL(rdObjective)[0] += (adY[iObs + iClass * cRows] - adProb[iClass]) *
1146		(adY[iObs + iClass * cRows] - adProb[iClass]);
1147		dDJDf += -2(adY[iObs + iClass cRows] - adProb[iClass]);
1148		}
1149
1150		REAL(rdObjective)[0] /= double(cNumClasses);
1151		dDJDf /= double(cNumClasses);
1152		}
1153		else
1154		{
1155		// DEBUG: need to make more general for other loss functions!
1156		REAL(rdObjective)[0] += (adY[iObs]-REAL(radPredF)[iObs])*
1157		(adY[iObs]-REAL(radPredF)[iObs]);
1158		dDJDf = -2*(adY[iObs]-REAL(radPredF)[iObs]);
1159		}
1160
1161		for(iLambda=0; iLambda<length(radLambda); iLambda++)
1162		{
1163		if(adDfDLambda[iLambda] != 0.0)
1164		{
1165		REAL(radGradient)[iLambda] +=
1166		dDJDf * adDfDLambda[iLambda]; // * adLambda[iLambda]*(1.0-adLambda[iLambda]);
1167		}
1168		}
1169		} // iObs
1170
1171		Cleanup:
1172		if(adDfDLambda!=NULL)
1173		{
1174		delete [] adDfDLambda;
1175		adDfDLambda = NULL;
1176		}
1177		if(aiInPath!=NULL)
1178		{
1179		delete [] aiInPath;
1180		aiInPath = NULL;
1181		}
1182		if (adProb != NULL)
1183		{
1184		delete [] adProb;
1185		adProb = NULL;
1186		}
1187
1188		UNPROTECT(1); // rResult
1189		return rResult;
1190		Error:
1191		goto Cleanup;
1192		}
1193
1194	650	} // end extern "C"
1195	651

+6

-0

tests/tinytest.R less more

	0
	1	if ( requireNamespace("tinytest", quietly = TRUE) ){
	2	home <- length(unclass(packageVersion("gbm"))[[1L]]) == 4
	3	tinytest::test_package("gbm", at_home = home)
	4	}
	5

+2

-0

vignettes/gbm-concordance.tex less more

	0	\Sconcordance{concordance:gbm.tex:gbm.Rnw:%
	1	1 373 1 50 0}