Commit 74b421ad6cf4289995f86ed61895186788056d98 - r-cran-gbm

+37

-22

DESCRIPTION less more

0	0	Package: gbm
1		Version: 2.1.3
2		Date: 2017-03-21
	1	Version: 2.1.4
3	2	Title: Generalized Boosted Regression Models
4		Author: Greg Ridgeway <gregridgeway@gmail.com> with contributions from
5		others
6		Maintainer: ORPHANED
7		Depends: R (>= 2.9.0), survival, lattice, splines, parallel
8		Suggests: RUnit
9		Description: An implementation of extensions to Freund and
10		Schapire's AdaBoost algorithm and Friedman's gradient boosting
11		machine. Includes regression methods for least squares,
12		absolute loss, t-distribution loss, quantile regression,
13		logistic, multinomial logistic, Poisson, Cox proportional
14		hazards partial likelihood, AdaBoost exponential loss,
15		Huberized hinge loss, and Learning to Rank measures
16		(LambdaMart).
	3	Authors@R: c(
	4	person("Brandon", "Greenwell",
	5	email = "greenwell.brandon@gmail.com",
	6	role = c("aut", "cre"),
	7	comment = c(ORCID = "0000-0002-8120-0084")),
	8	person("Bradley", "Boehmke",
	9	email = "bradleyboehmke@gmail.com",
	10	role = "aut",
	11	comment = c(ORCID = "0000-0002-3611-8516")),
	12	person("Jay", "Cunningham",
	13	email = "james@notbadafterall.com",
	14	role = "aut"),
	15	person("GBM", "Developers",
	16	role = "aut",
	17	comment = "https://github.com/gbm-developers")
	18	)
	19	Depends: R (>= 2.9.0)
	20	Imports: gridExtra, lattice, parallel, survival
	21	Suggests: knitr, pdp, RUnit, splines, viridis
	22	Description: An implementation of extensions to Freund and Schapire's AdaBoost
	23	algorithm and Friedman's gradient boosting machine. Includes regression
	24	methods for least squares, absolute loss, t-distribution loss, quantile
	25	regression, logistic, multinomial logistic, Poisson, Cox proportional hazards
	26	partial likelihood, AdaBoost exponential loss, Huberized hinge loss, and
	27	Learning to Rank measures (LambdaMart). Originally developed by Greg Ridgeway.
17	28	License: GPL (>= 2) \| file LICENSE
18		URL: http://code.google.com/p/gradientboostedmodels/
19		Packaged: 2017-03-21 06:44:01 UTC; ripley
	29	URL: https://github.com/gbm-developers/gbm
	30	BugReports: https://github.com/gbm-developers/gbm/issues
	31	RoxygenNote: 6.1.0
	32	VignetteBuilder: knitr
20	33	NeedsCompilation: yes
	34	Packaged: 2018-09-16 06:19:54 UTC; ripley
	35	Author: Brandon Greenwell [aut, cre] (<https://orcid.org/0000-0002-8120-0084>),
	36	Bradley Boehmke [aut] (<https://orcid.org/0000-0002-3611-8516>),
	37	Jay Cunningham [aut],
	38	GBM Developers [aut] (https://github.com/gbm-developers)
	39	Maintainer: Brandon Greenwell <greenwell.brandon@gmail.com>
21	40	Repository: CRAN
22		Date/Publication: 2017-03-21 06:48:03 UTC
23		X-CRAN-Original-Maintainer: Harry Southworth
24		<harry.southworth@gmail.com>
25		X-CRAN-Comment: Orphaned on 2017-03-21 as long-standing errors were not
26		corrected. NMU by CRAN team.
	41	Date/Publication: 2018-09-16 08:20:11

+1

-1

LICENSE less more

12	12	General Public License for more details.
13	13
14	14	Copies of the relevant licenses can be found at:
15		http://www.r-project.org/Licenses/
	15	https://www.r-project.org/Licenses/

+63

-62

MD5 less more

0	0	108bdba2eb6f2ba6ce890f47224ef68f *CHANGES
1		cf8eedb04e0b7de4ba83cabfe278a328 *DESCRIPTION
2		c2cae135a9c0d4ae15e14e89166ba841 *LICENSE
3		dcb19d54815086722ad172c662cb7d03 *NAMESPACE
4		6a1293bc6f87d439da835b1b837f9c52 *R/basehaz.gbm.R
5		cc5e4cd5f5d5e23382bae904e9ada152 *R/calibrate.plot.R
6		5615ac799ce14603a692a2c29be9648f *R/checks.R
7		cf5a5bce0653ae59317ddac8bfe8d389 *R/gbm.R
8		428c0d3515d5fcbbdd992e10f5d22793 *R/gbm.fit.R
9		1de9823ae906c064f61a39bd1e0241d3 *R/gbm.loss.R
10		ab8e510ccde4446a7c93ff384ba3217c *R/gbm.more.R
11		5a79d41470d1f8ae3b8c278bc5e12389 *R/gbm.perf.R
12		0fdb6a883897939376827795e4ee5230 *R/gbmCluster.R
13		f4651f14ae6acdfa96319bb257f9d0e1 *R/gbmCrossVal.R
14		7201fac67c6152443cf2a2c3b5989116 *R/gbmDoFold.R
15		f5cc3af1a8eb7ddbf962038e88d27953 *R/getCVgroup.R
16		efd18f017f7a73397141bf4239c922ce *R/getStratify.R
17		696197960954e0845b8998911987cab2 *R/guessDist.R
18		be47e36ef092244404831df5227f6d65 *R/interact.gbm.R
19		f8c4c5e164b772b3bfc152b8e5659e2e *R/ir.measures.R
20		bbfe015167ca3c75ecd155f6b090f661 *R/permutation.test.gbm.R
21		51c2749906af39dc17eb1af54b4d861d *R/plot.gbm.R
22		b9c2bb5000212628b390b81dfdd895c0 *R/predict.gbm.R
23		7e3daea77a7b6ffa18e9f81cf0e0b152 *R/pretty.gbm.tree.R
24		13ac361d8e3f54893f7de0b66351eee4 *R/print.gbm.R
25		36d2345c029a4e8384703c92d46f9b2e *R/reconstructGBMdata.R
26		792e2a5c7cdfeeee3e29c4e418f8af35 *R/relative.influence.R
27		e8cf40a7c7efcd820e908a43252cfc2b *R/shrink.gbm.R
28		eefc2a06d746e77ac2ba101d240640b8 *R/shrink.gbm.pred.R
29		5b47e86c97e9b464bd64e7ea647c65ae *R/test.gbm.R
	1	82faeac45c35b19ca76f8278d98e1d20 *DESCRIPTION
	2	67f2f9cc8297be2f12dfe86e05277383 *LICENSE
	3	00dda5f78be66b96a668b74b523fcac1 *NAMESPACE
	4	ab6e6d294c6c724e76c5f069c1694fd2 *NEWS.md
	5	061c315ef880f845918ff59cce721239 *R/basehaz.gbm.R
	6	aef3622e1f5a19f9c74616130321851f *R/calibrate.plot.R
	7	af7dcaeddbc7e6eb31b66290a98c0a1c *R/gbm-internals.R
	8	2f21a77c0c4d5274533173b223f7f05e *R/gbm-package.R
	9	cc641d322c124bfab6d7e2351cf4e6d2 *R/gbm.R
	10	1a60700a939bb694799c92073d13b3a5 *R/gbm.fit.R
	11	2f6a79af8a23dd4be5283881a82e5f5c *R/gbm.more.R
	12	cdcc395f477e8a83fde52d313d5d9760 *R/gbm.object.R
	13	b999e62a4727556bb73d893db39e9a83 *R/gbm.perf.R
	14	f17f3d39a4d6820e78130748ce8032ff *R/gbmCrossVal.R
	15	40231a31962f0df1ab182edcffe51b9f *R/interact.gbm.R
	16	fc877c59338b8343545050803c29ec95 *R/ir.measures.R
	17	1e1e9648a40d27a07c63e9c4103ba4d0 *R/plot.gbm.R
	18	23d6e774a0facb281c6f179703b9533d *R/predict.gbm.R
	19	48438bd417c4a7b3c0495c901c5d5060 *R/pretty.gbm.tree.R
	20	b068e5396186cc21060477aac914abe7 *R/print.gbm.R
	21	af4fd23ba860c912a1a237fb3b5631d1 *R/reconstructGBMdata.R
	22	1a8dd026617a7bdc35d9e7ed8232c399 *R/relative.influence.R
	23	81f913b053b7d402f4a808aeb3670e2f *R/shrink.gbm.R
	24	d001fbd3c7de86463f4d0f1dff63a70b *R/shrink.gbm.pred.R
	25	21f1a9fdd69be98ad81bbca7e18ec8a7 *R/test.gbm.R
	26	3fc23fb8a1c816ac430c4e836a08078a *R/utils.R
	27	08ab323918a24917e4d4638ca01c841a *R/zzz.R
	28	55ae3c9b2954cd0ac1c317b5698d77c3 *README.md
	29	4dc9151409b8112474ac3f1da044f7f7 *build/vignette.rds
30	30	4e38ebb4d3578e523b7d94fc9ece3d65 *demo/00Index
31	31	e3bd8606063f15ded6ab3261c13d22af *demo/OOB-reps.R
32	32	354344b4f6e8a232508ef872ced5efa3 *demo/bernoulli.R

36	36	af763746809ed98e48e065f77942cb05 *demo/pairwise.R
37	37	dbff7ebcc6a18e27c1b423fd5db70ae3 *demo/printExamples.R
38	38	79316127956b8f5291f5021f1e7c89ef *demo/robustReg.R
39		5e674508b7fde23e247a6e1a6c6b6ec6 *inst/doc/gbm.Sweave
40		e73636a53327b5e049e5764b0620d03e *inst/doc/gbm.pdf
41		b63bc1c2450ad4bca8db60e03b932a53 *inst/doc/gbm.tex
42		64dbd4ec219c6e855b87bc4ddeba111e *inst/doc/index.html
43		dc706f07b81a76bf9aab2edf4641e86f *inst/doc/oobperf2.eps
44		7ba661d197d25537a69fc34d737b4d29 *inst/doc/oobperf2.pdf
45		9d73da9632fed38c327ffbd1b072347b *inst/doc/shrinkage-v-iterations.eps
46		3fda19791155842b0e48565781441aa2 *inst/doc/shrinkage-v-iterations.pdf
	39	c044e4fcd21ef75478830ede774cfba7 *inst/doc/gbm.Rnw
	40	d9afae55c8fff7ec22fc678aa3224efb *inst/doc/gbm.pdf
47	41	4d55dd49b93485a78ecb50caafd19b4e *inst/doc/shrinkageplot.R
48		90fd593dd07098b5600fb650e86733ff *inst/doc/srcltx.sty
49		ce7a173a73fb952a1bf40cb65e3b99f2 *man/basehaz.gbm.Rd
50		7fca3316fe15ef40546c3db911d67086 *man/calibrate.plot.Rd
51		99fab30dc167a5c90a1d1424e71a25f4 *man/gbm-internal.Rd
52		dbbaa87e0b50024671667d8d38008e64 *man/gbm-package.Rd
53		eac981fe86aac2cf2b76f2bcee97289f *man/gbm.Rd
54		089cf48c905c3429ed63f69a0cd982b5 *man/gbm.object.Rd
55		3ed5b048c81d016868ca2799e4504419 *man/gbm.perf.Rd
56		7359f0a3b1f2d27cf29e497745c6ba59 *man/gbm.roc.area.Rd
57		9e8eb660baefa82bc008cbf7e12babf8 *man/gbmCrossVal.Rd
58		8fca4f44be806cb17eb37affe8334618 *man/interact.gbm.Rd
59		a8728abc1dc77b599c2aa7d1df6f982e *man/plot.gbm.Rd
60		5896d84873dd1ed5d22005b5b37b17b6 *man/predict.gbm.Rd
61		1656ffd7646d41236545e0399a70afdd *man/pretty.gbm.tree.Rd
62		894215a9e1e715f39a6cb79a6fe81baf *man/print.gbm.Rd
63		0da8961be170c9a72df248d6f0fe985e *man/quantile.rug.Rd
64		9fbb2bddffae7a639d4f702817eeecb3 *man/reconstructGBMdata.Rd
65		e1dea92edf78383b17139d45c318294c *man/relative.influence.Rd
66		b58470798d31cfceceeec40252ce833f *man/shrink.gbm.Rd
67		ef52c476e46b4c64eee269064ea58b64 *man/shrink.gbm.pred.Rd
68		b73e9273873b1178e9a116187105c022 *man/summary.gbm.Rd
69		3e0b677bccf30388ec0fc96f77f5fb62 *man/validate.Rd
	42	e89d6b6a7a2f19974d5c7916c9e2ae66 *man/basehaz.gbm.Rd
	43	c606780ccf3028850a848dfc2b3f4739 *man/calibrate.plot.Rd
	44	bf74b54c920807d509d5ff19e45e95d4 *man/gbm-internals.Rd
	45	5f96c05f991a485fbfe7a23b87b3d649 *man/gbm-package.Rd
	46	db08fe6fff6da69ebfbaad46ff0d902f *man/gbm.Rd
	47	94befbc345d33d0ed250a227a1268603 *man/gbm.fit.Rd
	48	a65152118be58b4d8bf48ad8c93614c7 *man/gbm.more.Rd
	49	728fa0d75f96519d0156aa2891362b9b *man/gbm.object.Rd
	50	d007fd2b010c4b6ccbd4c0ec2aba9ea0 *man/gbm.perf.Rd
	51	c43f6a77ca7bec407e85b642d6dfa2be *man/gbm.roc.area.Rd
	52	2cd76f2ffbdc511bb0ac0a9dc1fb393b *man/gbmCrossVal.Rd
	53	7d42ecd6cfbbb3e83f94685f0ef7add4 *man/grid.arrange.Rd
	54	c1789d7d5b7fc9be7665be55c1893d35 *man/interact.gbm.Rd
	55	0a3f9f38c375609ef6380dceb1d4128c *man/plot.gbm.Rd
	56	2a0d1ae9483de0ffb214d25623821f68 *man/predict.gbm.Rd
	57	e368dcac4b75c8273529151e0087c5d4 *man/pretty.gbm.tree.Rd
	58	21c028bad14805f40e0a7a0dc7e49e64 *man/print.gbm.Rd
	59	f9563a4ec1265edfec56ecbdb8148e38 *man/quantile.rug.Rd
	60	27aa52e20ea8281697e8357a36d58b85 *man/reconstructGBMdata.Rd
	61	f17f451739be17e89ec1b227b6602c86 *man/relative.influence.Rd
	62	6f99e3dde82cbc922d9f1fc7f22bdcd9 *man/shrink.gbm.Rd
	63	d75c1d9e1ff0c6a83bb37df2591ae4d9 *man/shrink.gbm.pred.Rd
	64	dd2dfa92c91ff3ae020d9dbdd23657fb *man/summary.gbm.Rd
	65	8201654f42537ca205d0d5b138848df8 *man/test.gbm.Rd
70	66	0d32ce72a7b02fc57d602c60b9ba8305 *src/adaboost.cpp
71	67	2f5d22dc3043e69628763cbe303e6b5f *src/adaboost.h
72	68	6d2bd44a11975c8f023640eb7a9036c3 *src/bac/gaussian.cpp

81	77	91d88e455827695f63bf23df5dfb3108 *src/distribution.h
82	78	6d2bd44a11975c8f023640eb7a9036c3 *src/gaussian.cpp
83	79	6c2bf2616a3b4491aaaf501346246d3f *src/gaussian.h
	80	889bfcdd44dc35824be51ba8ae2bd517 *src/gbm-init.c
84	81	1d8d4e59887769602b1d3c8dc3d5f94f *src/gbm.cpp
85	82	0f49e8549558916322ec80e29b591a73 *src/gbm.h
86	83	c0c572eb464dae70700ffe8fdc3f6b9f *src/gbm_engine.cpp
87	84	b3f1f49fa614ac6cfd52b28191bfdb70 *src/gbm_engine.h
88		f1da15864dab021cdac1617ffba4ff0f *src/gbmentry.cpp
	85	1d924856d046e942a312d373cfce230f *src/gbmentry.cpp
89	86	1fba83f37e9f092d8b005e0c8f32a97b *src/huberized.cpp
90	87	141e5b762944c14a0b6294e15046296f *src/huberized.h
91		cd2cedbf213ddbc773ea20fe354a93ae *src/init.c
92	88	10dcf061e2807ca52f811ec6650f33ad *src/laplace.cpp
93	89	53b4d97c482517fbbc97162da1adf891 *src/laplace.h
94		e7958b4630de29d3848d057d2aebc6e2 *src/locationm.cpp
	90	d25bcfb8da3565604f902270b25eb470 *src/locationm.cpp
95	91	932f3d98f158ebf6ae11ed47e873a7f3 *src/locationm.h
96	92	39094967ceaabf7c744bc93d0b86d22f *src/matrix.h
97	93	7242e54abea29c46990c4aabba7a65b6 *src/multinomial.cpp

120	116	9ab15eb81fc9a18ee7d14a76f7aefd2a *src/tdist.h
121	117	276e36bf158250eb458a1cdabcf975b5 *src/tree.cpp
122	118	6b2f1cd60e5d67638e110e1ac9552b27 *src/tree.h
	119	c044e4fcd21ef75478830ede774cfba7 *vignettes/gbm.Rnw
	120	b5633beb372053eac8730e76d8999ce9 *vignettes/gbm.bib
	121	7ba661d197d25537a69fc34d737b4d29 *vignettes/oobperf2.pdf
	122	3fda19791155842b0e48565781441aa2 *vignettes/shrinkage-v-iterations.pdf
	123	90fd593dd07098b5600fb650e86733ff *vignettes/srcltx.sty

+91

-28

NAMESPACE less more

0		# Export all names that don't start with "."
1		exportPattern("^[^\\.]")
	0	# Generated by roxygen2: do not edit by hand
2	1
3		useDynLib(gbm)
4
5		importFrom(survival, Surv)
6
7		# ns from splines is used in one of the examples
8		importFrom(splines, ns, splineDesign)
9
10		# xyplot is used, which means several functions internal
11		# to lattice will also be used. Import the lot.
	2	S3method(plot,gbm)
	3	S3method(predict,gbm)
	4	S3method(print,gbm)
	5	S3method(summary,gbm)
	6	export(basehaz.gbm)
	7	export(calibrate.plot)
	8	export(checkID)
	9	export(checkMissing)
	10	export(checkOffset)
	11	export(checkWeights)
	12	export(gbm)
	13	export(gbm.conc)
	14	export(gbm.fit)
	15	export(gbm.loss)
	16	export(gbm.more)
	17	export(gbm.perf)
	18	export(gbm.roc.area)
	19	export(gbmCluster)
	20	export(gbmCrossVal)
	21	export(gbmCrossValErr)
	22	export(gbmCrossValModelBuild)
	23	export(gbmCrossValPredictions)
	24	export(gbmDoFold)
	25	export(getCVgroup)
	26	export(getStratify)
	27	export(getVarNames)
	28	export(grid.arrange)
	29	export(guessDist)
	30	export(interact.gbm)
	31	export(ir.measure.auc)
	32	export(ir.measure.conc)
	33	export(ir.measure.map)
	34	export(ir.measure.mrr)
	35	export(ir.measure.ndcg)
	36	export(perf.pairwise)
	37	export(permutation.test.gbm)
	38	export(plot.gbm)
	39	export(predict.gbm)
	40	export(pretty.gbm.tree)
	41	export(quantile.rug)
	42	export(reconstructGBMdata)
	43	export(relative.influence)
	44	export(show.gbm)
	45	export(shrink.gbm)
	46	export(shrink.gbm.pred)
	47	export(summary.gbm)
	48	export(test.gbm)
	49	export(test.relative.influence)
	50	export(validate.gbm)
12	51	import(lattice)
13
14		import(parallel)
15
16		importFrom("grDevices", "rainbow")
17		importFrom("graphics", "abline", "axis", "barplot", "lines", "mtext",
18		"par", "plot", "polygon", "rug", "segments", "title")
19		importFrom("stats", "approx", "binomial", "delete.response",
20		"gaussian", "glm", "loess", "model.extract", "model.frame",
21		"model.offset", "model.response", "model.weights",
22		"na.pass", "poisson", "predict", "quantile", "rbinom",
23		"reformulate", "rexp", "rnorm", "runif", "sd", "supsmu",
24		"terms", "var", "weighted.mean")
25
26		S3method(plot, gbm)
27		S3method(predict, gbm)
28		S3method(print, gbm)
29		S3method(summary, gbm)
	52	importFrom(grDevices,rainbow)
	53	importFrom(graphics,abline)
	54	importFrom(graphics,axis)
	55	importFrom(graphics,barplot)
	56	importFrom(graphics,lines)
	57	importFrom(graphics,mtext)
	58	importFrom(graphics,par)
	59	importFrom(graphics,plot)
	60	importFrom(graphics,polygon)
	61	importFrom(graphics,rug)
	62	importFrom(graphics,segments)
	63	importFrom(graphics,title)
	64	importFrom(gridExtra,grid.arrange)
	65	importFrom(stats,approx)
	66	importFrom(stats,binomial)
	67	importFrom(stats,delete.response)
	68	importFrom(stats,gaussian)
	69	importFrom(stats,glm)
	70	importFrom(stats,loess)
	71	importFrom(stats,model.extract)
	72	importFrom(stats,model.frame)
	73	importFrom(stats,model.offset)
	74	importFrom(stats,model.response)
	75	importFrom(stats,model.weights)
	76	importFrom(stats,na.pass)
	77	importFrom(stats,poisson)
	78	importFrom(stats,predict)
	79	importFrom(stats,quantile)
	80	importFrom(stats,rbinom)
	81	importFrom(stats,reformulate)
	82	importFrom(stats,reorder)
	83	importFrom(stats,rexp)
	84	importFrom(stats,rnorm)
	85	importFrom(stats,runif)
	86	importFrom(stats,sd)
	87	importFrom(stats,supsmu)
	88	importFrom(stats,terms)
	89	importFrom(stats,var)
	90	importFrom(stats,weighted.mean)
	91	importFrom(survival,Surv)
	92	useDynLib(gbm, .registration = TRUE)

+11

-0

NEWS.md less more

	0	# NEWS for gbm package
	1
	2	### Changes for version 2.1.4
	3	* Switched from `CHANGES` to `NEWS` file.
	4	* Updated links and maintainer field in `DESCRIPTION` file.
	5	* Fixed bug caused by factors with unused levels [(#5)](https://github.com/gbm-developers/gbm/issues/5).
	6	* Fixed bug with axis labels in the `plot` method for `"gbm"` objects. [(#17)](https://github.com/gbm-developers/gbm/issues/17).
	7	* The `plot` method for `"gbm"` objects is now more consistent and always returns a `"trellis"` object [(#19)](https://github.com/gbm-developers/gbm/issues/19). Consequently, setting graphical parameters via `par` will no longer have an effect on the output from `plot.gbm`.
	8	* The `plot` method for `"gbm"` objects gained five new arguments: `level.plot`, `contour`, `number`, `overlap`, and `col.regions`; see `?plot.gbm` for details.
	9	* The default color palette for false color level plots in `plot.gbm` has changed to the Matplotlib 'viridis' color map.
	10	* Fixed a number of references and URLs.

+73

-38

R/basehaz.gbm.R less more

0		# compute Breslow estimator of the baseline hazard function
1		basehaz.gbm <- function(t,delta,f.x,
2		t.eval=NULL,
3		smooth=FALSE,
4		cumulative=TRUE)
5		{
6		t.unique <- sort(unique(t[delta==1]))
7		alpha <- length(t.unique)
8		for(i in 1:length(t.unique))
9		{
10		alpha[i] <- sum(t[delta==1]==t.unique[i])/
11		sum(exp(f.x[t>=t.unique[i]]))
12		}
	0	# rd2rox <- function(path = file.choose()) {
	1	# info <- Rd2roxygen::parse_file(path)
	2	# cat(Rd2roxygen::create_roxygen(info), sep = "\n")
	3	# }
13	4
14		if(!smooth && !cumulative)
15		{
16		if(!is.null(t.eval))
	5
	6	#' Baseline hazard function
	7	#'
	8	#' Computes the Breslow estimator of the baseline hazard function for a
	9	#' proportional hazard regression model.
	10	#'
	11	#' The proportional hazard model assumes h(t\|x)=lambda(t)*exp(f(x)).
	12	#' \code{\link{gbm}} can estimate the f(x) component via partial likelihood.
	13	#' After estimating f(x), \code{basehaz.gbm} can compute the a nonparametric
	14	#' estimate of lambda(t).
	15	#'
	16	#' @param t The survival times.
	17	#' @param delta The censoring indicator.
	18	#' @param f.x The predicted values of the regression model on the log hazard
	19	#' scale.
	20	#' @param t.eval Values at which the baseline hazard will be evaluated.
	21	#' @param smooth If \code{TRUE} \code{basehaz.gbm} will smooth the estimated
	22	#' baseline hazard using Friedman's super smoother \code{\link{supsmu}}.
	23	#' @param cumulative If \code{TRUE} the cumulative survival function will be
	24	#' computed.
	25	#' @return A vector of length equal to the length of t (or of length
	26	#' \code{t.eval} if \code{t.eval} is not \code{NULL}) containing the baseline
	27	#' hazard evaluated at t (or at \code{t.eval} if \code{t.eval} is not
	28	#' \code{NULL}). If \code{cumulative} is set to \code{TRUE} then the returned
	29	#' vector evaluates the cumulative hazard function at those values.
	30	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	31	#' @seealso \code{\link[survival]{survfit}}, \code{\link{gbm}}
	32	#' @references
	33	#' N. Breslow (1972). "Discussion of `Regression Models and
	34	#' Life-Tables' by D.R. Cox," Journal of the Royal Statistical Society, Series
	35	#' B, 34(2):216-217.
	36	#'
	37	#' N. Breslow (1974). "Covariance analysis of censored survival data,"
	38	#' Biometrics 30:89-99.
	39	#' @keywords methods survival
	40	#' @export
	41	basehaz.gbm <- function(t,delta, f.x, t.eval = NULL, smooth = FALSE,
	42	cumulative = TRUE) {
	43
	44	t.unique <- sort(unique(t[delta==1]))
	45	alpha <- length(t.unique)
	46	for(i in 1:length(t.unique)) {
	47	alpha[i] <- sum(t[delta==1]==t.unique[i])/
	48	sum(exp(f.x[t>=t.unique[i]]))
	49	}
	50
	51	if(!smooth && !cumulative) {
	52	if(!is.null(t.eval)) {
	53	stop("Cannot evaluate unsmoothed baseline hazard at t.eval.")
	54	}
	55	} else {
	56	if(smooth && !cumulative) {
	57	lambda.smooth <- supsmu(t.unique,alpha)
	58	} else {
	59	if(smooth && cumulative)
17	60	{
18		stop("Cannot evaluate unsmoothed baseline hazard at t.eval.")
	61	lambda.smooth <- supsmu(t.unique, cumsum(alpha))
	62	} else { # (!smooth && cumulative) - THE DEFAULT
	63	lambda.smooth <- list(x = t.unique, y = cumsum(alpha))
19	64	}
20		} else
21		if(smooth && !cumulative)
22		{
23		lambda.smooth <- supsmu(t.unique,alpha)
24		} else
25		if(smooth && cumulative)
26		{
27		lambda.smooth <- supsmu(t.unique,cumsum(alpha))
28		} else # (!smooth && cumulative) - THE DEFAULT
29		{
30		lambda.smooth <- list(x=t.unique,y=cumsum(alpha))
31		}
	65	}
	66	}
32	67
33		if(!is.null(t.eval))
34		{
35		obj <- approx(lambda.smooth$x,lambda.smooth$y,xout=t.eval)$y
36		} else
37		{
38		obj <- approx(lambda.smooth$x,lambda.smooth$y,xout=t)$y
39		}
40
41		return(obj)
	68
	69	obj <- if(!is.null(t.eval)) {
	70	approx(lambda.smooth$x, lambda.smooth$y, xout = t.eval)$y
	71	} else {
	72	approx(lambda.smooth$x, lambda.smooth$y, xout = t)$y
	73	}
	74
	75	return(obj)
	76
42	77	}

+187

-86

R/calibrate.plot.R less more

0		quantile.rug <- function(x,prob=(0:10)/10,...)
1		{
2		quants <- quantile(x[!is.na(x)],prob=prob)
3		if(length(unique(quants)) < length(prob))
4		{
5		quants <- jitter(quants)
6		}
7		rug(quants,...)
	0	#' Quantile rug plot
	1	#'
	2	#' Marks the quantiles on the axes of the current plot.
	3	#'
	4	#' @param x A numeric vector.
	5	#'
	6	#' @param prob The quantiles of x to mark on the x-axis.
	7	#'
	8	#' @param ... Additional optional arguments to be passed onto
	9	#' \code{\link[graphics]{rug}}
	10	#'
	11	#' @return No return values.
	12	#'
	13	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}.
	14	#'
	15	#' @seealso \code{\link[graphics]{plot}}, \code{\link[stats]{quantile}},
	16	#' \code{\link[base]{jitter}}, \code{\link[graphics]{rug}}.
	17	#'
	18	#' @keywords aplot
	19	#'
	20	#' @export quantile.rug
	21	#'
	22	#' @examples
	23	#' x <- rnorm(100)
	24	#' y <- rnorm(100)
	25	#' plot(x, y)
	26	#' quantile.rug(x)
	27	quantile.rug <- function(x, prob = 0:10/10, ...) {
	28	quants <- quantile(x[!is.na(x)], prob = prob)
	29	if(length(unique(quants)) < length(prob)) {
	30	quants <- jitter(quants)
	31	}
	32	rug(quants, ...)
8	33	}
9	34
10		calibrate.plot <- function(y,p,
11		distribution="bernoulli",
12		replace=TRUE,
13		line.par=list(col="black"),
14		shade.col="lightyellow",
15		shade.density=NULL,
16		rug.par=list(side=1),
17		xlab="Predicted value",
18		ylab="Observed average",
19		xlim=NULL,ylim=NULL,
20		knots=NULL,df=6,
21		...)
22		{
23		data <- data.frame(y=y,p=p)
24	35
25		if(is.null(knots) && is.null(df))
26		stop("Either knots or df must be specified")
27		if((df != round(df)) \|\| (df<1))
28		stop("df must be a positive integer")
29
30		if(distribution=="bernoulli")
31		{
32		family1 = binomial
33		} else if(distribution=="poisson")
34		{
35		family1 = poisson
36		} else
37		{
38		family1 = gaussian
39		}
40		gam1 <- glm(y~ns(p,df=df,knots=knots),data=data,family=family1)
41
42		x <- seq(min(p),max(p),length=200)
43		yy <- predict(gam1,newdata=data.frame(p=x),se.fit=TRUE,type="response")
44
45		x <- x[!is.na(yy$fit)]
46		yy$se.fit <- yy$se.fit[!is.na(yy$fit)]
47		yy$fit <- yy$fit[!is.na(yy$fit)]
48
49		if(!is.na(shade.col))
50		{
51		se.lower <- yy$fit-2*yy$se.fit
52		se.upper <- yy$fit+2*yy$se.fit
53		if(distribution=="bernoulli")
54		{
55		se.lower[se.lower < 0] <- 0
56		se.upper[se.upper > 1] <- 1
57		}
58		if(distribution=="poisson")
59		{
60		se.lower[se.lower < 0] <- 0
61		}
62		if(is.null(xlim)) xlim <- range(se.lower,se.upper,x)
63		if(is.null(ylim)) ylim <- range(se.lower,se.upper,x)
64		}
65		else
66		{
67		if(is.null(xlim)) xlim <- range(yy$fit,x)
68		if(is.null(ylim)) ylim <- range(yy$fit,x)
69		}
70		if(replace)
71		{
72		plot(0,0,
73		type="n",
74		xlab=xlab,ylab=ylab,
75		xlim=xlim,ylim=ylim,
76		...)
77		}
78		if(!is.na(shade.col))
79		{
80		polygon(c(x,rev(x),x[1]),
81		c(se.lower,rev(se.upper),se.lower[1]),
82		col=shade.col,
83		border=NA,
84		density=shade.density)
85		}
86		lines(x,yy$fit,col=line.par$col)
87		quantile.rug(p,side=rug.par$side)
88		abline(0,1,col="red")
	36	#' Calibration plot
	37	#'
	38	#' An experimental diagnostic tool that plots the fitted values versus the
	39	#' actual average values. Currently only available when
	40	#' \code{distribution = "bernoulli"}.
	41	#'
	42	#' Uses natural splines to estimate E(y\|p). Well-calibrated predictions imply
	43	#' that E(y\|p) = p. The plot also includes a pointwise 95% confidence band.
	44	#'
	45	#' @param y The outcome 0-1 variable.
	46	#'
	47	#' @param p The predictions estimating E(y\|x).
	48	#'
	49	#' @param distribution The loss function used in creating \code{p}.
	50	#' \code{bernoulli} and \code{poisson} are currently the only special options.
	51	#' All others default to squared error assuming \code{gaussian}.
	52	#'
	53	#' @param replace Determines whether this plot will replace or overlay the
	54	#' current plot. \code{replace=FALSE} is useful for comparing the calibration
	55	#' of several methods.
	56	#'
	57	#' @param line.par Graphics parameters for the line.
	58	#'
	59	#' @param shade.col Color for shading the 2 SE region. \code{shade.col=NA}
	60	#' implies no 2 SE region.
	61	#'
	62	#' @param shade.density The \code{density} parameter for \code{\link{polygon}}.
	63	#'
	64	#' @param rug.par Graphics parameters passed to \code{\link{rug}}.
	65	#'
	66	#' @param xlab x-axis label corresponding to the predicted values.
	67	#'
	68	#' @param ylab y-axis label corresponding to the observed average.
	69	#'
	70	#' @param xlim,ylim x- and y-axis limits. If not specified te function will
	71	#' select limits.
	72	#'
	73	#' @param knots,df These parameters are passed directly to
	74	#' \code{\link[splines]{ns}} for constructing a natural spline smoother for the
	75	#' calibration curve.
	76	#'
	77	#' @param ... Additional optional arguments to be passed onto
	78	#' \code{\link[graphics]{plot}}
	79	#'
	80	#' @return No return values.
	81	#'
	82	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	83	#'
	84	#' @references
	85	#' J.F. Yates (1982). "External correspondence: decomposition of
	86	#' the mean probability score," Organisational Behaviour and Human Performance
	87	#' 30:132-156.
	88	#'
	89	#' D.J. Spiegelhalter (1986). "Probabilistic Prediction in Patient Management
	90	#' and Clinical Trials," Statistics in Medicine 5:421-433.
	91	#' @keywords hplot
	92	#'
	93	#' @export
	94	#'
	95	#' @examples
	96	#' # Don't want R CMD check to think there is a dependency on rpart
	97	#' # so comment out the example
	98	#' #library(rpart)
	99	#' #data(kyphosis)
	100	#' #y <- as.numeric(kyphosis$Kyphosis)-1
	101	#' #x <- kyphosis$Age
	102	#' #glm1 <- glm(y~poly(x,2),family=binomial)
	103	#' #p <- predict(glm1,type="response")
	104	#' #calibrate.plot(y, p, xlim=c(0,0.6), ylim=c(0,0.6))
	105	calibrate.plot <- function(y, p, distribution = "bernoulli", replace = TRUE,
	106	line.par = list(col = "black"),
	107	shade.col = "lightyellow",
	108	shade.density = NULL, rug.par = list(side = 1),
	109	xlab = "Predicted value", ylab = "Observed average",
	110	xlim = NULL, ylim = NULL, knots = NULL, df = 6, ...)
	111	{
	112
	113	# Sanity check
	114	if (!requireNamespace("splines", quietly = TRUE)) {
	115	stop("The splines package is needed for this function to work. Please ",
	116	"install it.", call. = FALSE)
	117	}
	118
	119	data <- data.frame(y = y, p = p)
	120
	121	# Check spline parameters
	122	if(is.null(knots) && is.null(df)) {
	123	stop("Either knots or df must be specified")
	124	}
	125	if((df != round(df)) \|\| (df < 1)) {
	126	stop("df must be a positive integer")
	127	}
	128
	129	# Check distribution
	130	if(distribution == "bernoulli") {
	131	family1 <- binomial
	132	} else if(distribution == "poisson") {
	133	family1 <- poisson
	134	} else {
	135	family1 <- gaussian
	136	}
	137
	138	# Fit a GLM using natural cubic splines
	139	gam1 <- glm(y ~ splines::ns(p, df = df, knots = knots), data = data,
	140	family = family1)
	141
	142	# Plotting data
	143	x <- seq(min(p), max(p), length = 200)
	144	yy <- predict(gam1, newdata = data.frame(p = x), se.fit = TRUE,
	145	type = "response")
	146	x <- x[!is.na(yy$fit)]
	147	yy$se.fit <- yy$se.fit[!is.na(yy$fit)]
	148	yy$fit <- yy$fit[!is.na(yy$fit)]
	149
	150	# Plotting parameters
	151	if(!is.na(shade.col)) {
	152	se.lower <- yy$fit - 2 * yy$se.fit
	153	se.upper <- yy$fit + 2 * yy$se.fit
	154	if(distribution == "bernoulli") {
	155	se.lower[se.lower < 0] <- 0
	156	se.upper[se.upper > 1] <- 1
	157	}
	158	if(distribution == "poisson") {
	159	se.lower[se.lower < 0] <- 0
	160	}
	161	if(is.null(xlim)) {
	162	xlim <- range(se.lower, se.upper, x)
	163	}
	164	if(is.null(ylim)) {
	165	ylim <- range(se.lower, se.upper, x)
	166	}
	167	}
	168	else {
	169	if(is.null(xlim)) {
	170	xlim <- range(yy$fit,x)
	171	}
	172	if(is.null(ylim)) {
	173	ylim <- range(yy$fit,x)
	174	}
	175	}
	176
	177	# Construct plot
	178	if(replace) {
	179	plot(0, 0, type = "n", xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim,
	180	...)
	181	}
	182	if(!is.na(shade.col)) {
	183	polygon(c(x, rev(x), x[1L]), c(se.lower, rev(se.upper), se.lower[1L]),
	184	col = shade.col, border = NA, density = shade.density)
	185	}
	186	lines(x, yy$fit, col = line.par$col)
	187	quantile.rug(p, side = rug.par$side)
	188	abline(0, 1, col = "red")
	189
89	190	}

+0

-45

~~R/checks.R~~ less more

0		checkMissing <- function(x, y){
1		nms <- getVarNames(x)
2		#### Check for NaNs in x and NAs in response
3		j <- apply(x, 2, function(z) any(is.nan(z)))
4		if(any(j)) {
5		stop("Use NA for missing values. NaN found in predictor variables:",
6		paste(nms[j],collapse=","))
7		}
8		if(any(is.na(y))) stop("Missing values are not allowed in the response")
9		invisible(NULL)
10		}
11
12		checkID <- function(id){
13		# Check for disallowed interaction.depth
14		if(id < 1) {
15		stop("interaction.depth must be at least 1.")
16		}
17		else if(id > 49) {
18		stop("interaction.depth must be less than 50. You should also ask yourself why you want such large interaction terms. A value between 1 and 5 should be sufficient for most applications.")
19		}
20		invisible(id)
21		}
22
23		checkWeights <- function(w, n){
24		# Logical checks on weights
25		if(length(w)==0) { w <- rep(1, n) }
26		else if(any(w < 0)) stop("negative weights not allowed")
27		w
28		}
29
30		checkOffset <- function(o, y){
31		# Check offset
32		if(is.null(o) \| all(o==0)) { o <- NA }
33		else if(length(o) != length(y)) {
34		stop("The length of offset does not equal the length of y.")
35		}
36		o
37		}
38
39		getVarNames <- function(x){
40		if(is.matrix(x)) { var.names <- colnames(x) }
41		else if(is.data.frame(x)) { var.names <- names(x) }
42		else { var.names <- paste("X",1:ncol(x),sep="") }
43		var.names
44		}

+155

-0

R/gbm-internals.R less more

	0	#' gbm internal functions
	1	#'
	2	#' Helper functions for preprocessing data prior to building a \code{"gbm"}
	3	#' object.
	4	#'
	5	#' @param y The response variable.
	6	#' @param d,distribution The distribution, either specified by the user or
	7	#' implied.
	8	#' @param class.stratify.cv Whether or not to stratify, if provided by the user.
	9	#' @param i.train Computed internally by \code{gbm}.
	10	#' @param group The group, if using \code{distibution = "pairwise"}.
	11	#' @param strat Whether or not to stratify.
	12	#' @param cv.folds The number of cross-validation folds.
	13	#' @param x The design matrix.
	14	#' @param id The interaction depth.
	15	#' @param w The weights.
	16	#' @param n The number of cores to use in the cluster.
	17	#' @param o The offset.
	18	#'
	19	#' @details
	20	#' These are functions used internally by \code{gbm} and not intended for direct
	21	#' use by the user.
	22	#'
	23	#' @aliases guessDist getStratify getCVgroup checkMissing checkID checkWeights
	24	#' checkOffset getVarNames gbmCluster
	25	#'
	26	#' @rdname gbm-internals
	27	#' @export
	28	guessDist <- function(y){
	29	# If distribution is not given, try to guess it
	30	if (length(unique(y)) == 2){ d <- "bernoulli" }
	31	else if (class(y) == "Surv" ){ d <- "coxph" }
	32	else if (is.factor(y)){ d <- "multinomial" }
	33	else{ d <- "gaussian" }
	34	cat(paste("Distribution not specified, assuming", d, "...\n"))
	35	list(name=d)
	36	}
	37
	38
	39	#' @rdname gbm-internals
	40	#' @export
	41	getCVgroup <- function(distribution, class.stratify.cv, y, i.train, cv.folds,
	42	group) {
	43	# Construct cross-validation groups depending on the type of model to be fit
	44	if (distribution$name %in% c( "bernoulli", "multinomial" ) & class.stratify.cv ){
	45	nc <- table(y[i.train]) # Number in each class
	46	uc <- names(nc)
	47	if (min(nc) < cv.folds){
	48	stop( paste("The smallest class has only", min(nc), "objects in the training set. Can't do", cv.folds, "fold cross-validation."))
	49	}
	50	cv.group <- vector(length = length(i.train))
	51	for (i in 1:length(uc)){
	52	cv.group[y[i.train] == uc[i]] <- sample(rep(1:cv.folds , length = nc[i]))
	53	}
	54	} # Close if
	55	else if (distribution$name == "pairwise") {
	56	# Split into CV folds at group boundaries
	57	s <- sample(rep(1:cv.folds, length=nlevels(group)))
	58	cv.group <- s[as.integer(group[i.train])]
	59	}
	60	else {
	61	cv.group <- sample(rep(1:cv.folds, length=length(i.train)))
	62	}
	63	cv.group
	64	}
	65
	66
	67	#' @rdname gbm-internals
	68	#' @export
	69	getStratify <- function(strat, d){
	70	if (is.null(strat)){
	71	if (d$name == "multinomial" ){ strat <- TRUE }
	72	else { strat <- FALSE }
	73	}
	74	else {
	75	if (!is.element(d$name, c( "bernoulli", "multinomial"))){
	76	warning("You can only use class.stratify.cv when distribution is bernoulli or multinomial. Ignored.")
	77	strat <- FALSE
	78	}
	79	} # Close else
	80	strat
	81	}
	82
	83
	84	#' @rdname gbm-internals
	85	#' @export
	86	checkMissing <- function(x, y){
	87	nms <- getVarNames(x)
	88	#### Check for NaNs in x and NAs in response
	89	j <- apply(x, 2, function(z) any(is.nan(z)))
	90	if(any(j)) {
	91	stop("Use NA for missing values. NaN found in predictor variables:",
	92	paste(nms[j],collapse=","))
	93	}
	94	if(any(is.na(y))) stop("Missing values are not allowed in the response")
	95	invisible(NULL)
	96	}
	97
	98
	99	#' @rdname gbm-internals
	100	#' @export
	101	checkWeights <- function(w, n){
	102	# Logical checks on weights
	103	if(length(w)==0) { w <- rep(1, n) }
	104	else if(any(w < 0)) stop("negative weights not allowed")
	105	w
	106	}
	107
	108
	109	#' @rdname gbm-internals
	110	#' @export
	111	checkID <- function(id){
	112	# Check for disallowed interaction.depth
	113	if(id < 1) {
	114	stop("interaction.depth must be at least 1.")
	115	}
	116	else if(id > 49) {
	117	stop("interaction.depth must be less than 50. You should also ask yourself why you want such large interaction terms. A value between 1 and 5 should be sufficient for most applications.")
	118	}
	119	invisible(id)
	120	}
	121
	122
	123	#' @rdname gbm-internals
	124	#' @export
	125	checkOffset <- function(o, y){
	126	# Check offset
	127	if(is.null(o) \| all(o==0)) { o <- NA }
	128	else if(length(o) != length(y)) {
	129	stop("The length of offset does not equal the length of y.")
	130	}
	131	o
	132	}
	133
	134
	135	#' @rdname gbm-internals
	136	#' @export
	137	getVarNames <- function(x){
	138	if(is.matrix(x)) { var.names <- colnames(x) }
	139	else if(is.data.frame(x)) { var.names <- names(x) }
	140	else { var.names <- paste("X",1:ncol(x),sep="") }
	141	var.names
	142	}
	143
	144
	145	#' @rdname gbm-internals
	146	#' @export
	147	gbmCluster <- function(n){
	148	# If number of cores (n) not given, try to work it out from the number
	149	# that appear to be available and the number of CV folds.
	150	if (is.null(n)){
	151	n <- parallel::detectCores()
	152	}
	153	parallel::makeCluster(n)
	154	}

+54

-0

R/gbm-package.R less more

	0	#' Generalized Boosted Regression Models (GBMs)
	1	#'
	2	#' This package implements extensions to Freund and Schapire's AdaBoost
	3	#' algorithm and J. Friedman's gradient boosting machine. Includes regression
	4	#' methods for least squares, absolute loss, logistic, Poisson, Cox
	5	#' proportional hazards partial likelihood, multinomial, t-distribution,
	6	#' AdaBoost exponential loss, Learning to Rank, and Huberized hinge loss.
	7	#'
	8	#' Further information is available in vignette:
	9	#' \code{browseVignettes(package = "gbm")}
	10	#'
	11	#' @import lattice
	12	#'
	13	#' @importFrom grDevices rainbow
	14	#' @importFrom graphics abline axis barplot lines mtext par plot polygon rug
	15	#' @importFrom graphics segments title
	16	#' @importFrom stats approx binomial delete.response gaussian glm loess
	17	#' @importFrom stats model.extract model.frame model.offset model.response
	18	#' @importFrom stats model.weights na.pass poisson predict quantile rbinom
	19	#' @importFrom stats reformulate reorder rexp rnorm runif sd supsmu terms var
	20	#' @importFrom stats weighted.mean
	21	#' @importFrom survival Surv
	22	#'
	23	#' @useDynLib gbm, .registration = TRUE
	24	#'
	25	#' @name gbm-package
	26	#'
	27	#' @docType package
	28	#'
	29	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com} with contributions by
	30	#' Daniel Edwards, Brian Kriegler, Stefan Schroedl and Harry Southworth.
	31	#'
	32	#' @references
	33	#' Y. Freund and R.E. Schapire (1997) \dQuote{A decision-theoretic
	34	#' generalization of on-line learning and an application to boosting,}
	35	#' \emph{Journal of Computer and System Sciences,} 55(1):119-139.
	36	#'
	37	#' G. Ridgeway (1999). \dQuote{The state of boosting,} \emph{Computing Science
	38	#' and Statistics} 31:172-181.
	39	#'
	40	#' J.H. Friedman, T. Hastie, R. Tibshirani (2000). \dQuote{Additive Logistic
	41	#' Regression: a Statistical View of Boosting,} \emph{Annals of Statistics}
	42	#' 28(2):337-374.
	43	#'
	44	#' J.H. Friedman (2001). \dQuote{Greedy Function Approximation: A Gradient
	45	#' Boosting Machine,} \emph{Annals of Statistics} 29(5):1189-1232.
	46	#'
	47	#' J.H. Friedman (2002). \dQuote{Stochastic Gradient Boosting,}
	48	#' \emph{Computational Statistics and Data Analysis} 38(4):367-378.
	49	#'
	50	#' The \url{http://statweb.stanford.edu/~jhf/R-MART} website.
	51	#'
	52	#' @keywords package
	53	NULL⏎

+481

-169

R/gbm.R less more

0		.onAttach <- function(lib, pkg)
1		{
2		vers <- library(help=gbm)$info[[1]]
3		vers <- vers[grep("Version:",vers)]
4		vers <- rev(strsplit(vers," ")[[1]])[1]
5		packageStartupMessage(paste("Loaded gbm",vers))
	0	#' Generalized Boosted Regression Modeling (GBM)
	1	#'
	2	#' Fits generalized boosted regression models. For technical details, see the
	3	#' vignette: \code{utils::browseVignettes("gbm")}.
	4	#'
	5	#' \code{gbm.fit} provides the link between R and the C++ gbm engine.
	6	#' \code{gbm} is a front-end to \code{gbm.fit} that uses the familiar R
	7	#' modeling formulas. However, \code{\link[stats]{model.frame}} is very slow if
	8	#' there are many predictor variables. For power-users with many variables use
	9	#' \code{gbm.fit}. For general practice \code{gbm} is preferable.
	10	#'
	11	#' @param formula A symbolic description of the model to be fit. The formula
	12	#' may include an offset term (e.g. y~offset(n)+x). If
	13	#' \code{keep.data = FALSE} in the initial call to \code{gbm} then it is the
	14	#' user's responsibility to resupply the offset to \code{\link{gbm.more}}.
	15	#'
	16	#' @param distribution Either a character string specifying the name of the
	17	#' distribution to use or a list with a component \code{name} specifying the
	18	#' distribution and any additional parameters needed. If not specified,
	19	#' \code{gbm} will try to guess: if the response has only 2 unique values,
	20	#' bernoulli is assumed; otherwise, if the response is a factor, multinomial is
	21	#' assumed; otherwise, if the response has class \code{"Surv"}, coxph is
	22	#' assumed; otherwise, gaussian is assumed.
	23	#'
	24	#' Currently available options are \code{"gaussian"} (squared error),
	25	#' \code{"laplace"} (absolute loss), \code{"tdist"} (t-distribution loss),
	26	#' \code{"bernoulli"} (logistic regression for 0-1 outcomes),
	27	#' \code{"huberized"} (huberized hinge loss for 0-1 outcomes), classes),
	28	#' \code{"adaboost"} (the AdaBoost exponential loss for 0-1 outcomes),
	29	#' \code{"poisson"} (count outcomes), \code{"coxph"} (right censored
	30	#' observations), \code{"quantile"}, or \code{"pairwise"} (ranking measure
	31	#' using the LambdaMart algorithm).
	32	#'
	33	#' If quantile regression is specified, \code{distribution} must be a list of
	34	#' the form \code{list(name = "quantile", alpha = 0.25)} where \code{alpha} is
	35	#' the quantile to estimate. The current version's quantile regression method
	36	#' does not handle non-constant weights and will stop.
	37	#'
	38	#' If \code{"tdist"} is specified, the default degrees of freedom is 4 and
	39	#' this can be controlled by specifying
	40	#' \code{distribution = list(name = "tdist", df = DF)} where \code{DF} is your
	41	#' chosen degrees of freedom.
	42	#'
	43	#' If "pairwise" regression is specified, \code{distribution} must be a list of
	44	#' the form \code{list(name="pairwise",group=...,metric=...,max.rank=...)}
	45	#' (\code{metric} and \code{max.rank} are optional, see below). \code{group} is
	46	#' a character vector with the column names of \code{data} that jointly
	47	#' indicate the group an instance belongs to (typically a query in Information
	48	#' Retrieval applications). For training, only pairs of instances from the same
	49	#' group and with different target labels can be considered. \code{metric} is
	50	#' the IR measure to use, one of
	51	#' \describe{
	52	#' \item{list("conc")}{Fraction of concordant pairs; for binary labels, this
	53	#' is equivalent to the Area under the ROC Curve}
	54	#' \item{:}{Fraction of concordant pairs; for binary labels, this is
	55	#' equivalent to the Area under the ROC Curve}
	56	#' \item{list("mrr")}{Mean reciprocal rank of the highest-ranked positive
	57	#' instance}
	58	#' \item{:}{Mean reciprocal rank of the highest-ranked positive instance}
	59	#' \item{list("map")}{Mean average precision, a generalization of \code{mrr}
	60	#' to multiple positive instances}\item{:}{Mean average precision, a
	61	#' generalization of \code{mrr} to multiple positive instances}
	62	#' \item{list("ndcg:")}{Normalized discounted cumulative gain. The score is
	63	#' the weighted sum (DCG) of the user-supplied target values, weighted
	64	#' by log(rank+1), and normalized to the maximum achievable value. This
	65	#' is the default if the user did not specify a metric.}
	66	#' }
	67	#'
	68	#' \code{ndcg} and \code{conc} allow arbitrary target values, while binary
	69	#' targets {0,1} are expected for \code{map} and \code{mrr}. For \code{ndcg}
	70	#' and \code{mrr}, a cut-off can be chosen using a positive integer parameter
	71	#' \code{max.rank}. If left unspecified, all ranks are taken into account.
	72	#'
	73	#' Note that splitting of instances into training and validation sets follows
	74	#' group boundaries and therefore only approximates the specified
	75	#' \code{train.fraction} ratio (the same applies to cross-validation folds).
	76	#' Internally, queries are randomly shuffled before training, to avoid bias.
	77	#'
	78	#' Weights can be used in conjunction with pairwise metrics, however it is
	79	#' assumed that they are constant for instances from the same group.
	80	#'
	81	#' For details and background on the algorithm, see e.g. Burges (2010).
	82	#'
	83	#' @param data an optional data frame containing the variables in the model. By
	84	#' default the variables are taken from \code{environment(formula)}, typically
	85	#' the environment from which \code{gbm} is called. If \code{keep.data=TRUE} in
	86	#' the initial call to \code{gbm} then \code{gbm} stores a copy with the
	87	#' object. If \code{keep.data=FALSE} then subsequent calls to
	88	#' \code{\link{gbm.more}} must resupply the same dataset. It becomes the user's
	89	#' responsibility to resupply the same data at this point.
	90	#'
	91	#' @param weights an optional vector of weights to be used in the fitting
	92	#' process. Must be positive but do not need to be normalized. If
	93	#' \code{keep.data=FALSE} in the initial call to \code{gbm} then it is the
	94	#' user's responsibility to resupply the weights to \code{\link{gbm.more}}.
	95	#'
	96	#' @param var.monotone an optional vector, the same length as the number of
	97	#' predictors, indicating which variables have a monotone increasing (+1),
	98	#' decreasing (-1), or arbitrary (0) relationship with the outcome.
	99	#'
	100	#' @param n.trees Integer specifying the total number of trees to fit. This is
	101	#' equivalent to the number of iterations and the number of basis functions in
	102	#' the additive expansion. Default is 100.
	103	#'
	104	#' @param interaction.depth Integer specifying the maximum depth of each tree
	105	#' (i.e., the highest level of variable interactions allowed). A value of 1
	106	#' implies an additive model, a value of 2 implies a model with up to 2-way
	107	#' interactions, etc. Default is 1.
	108	#'
	109	#' @param n.minobsinnode Integer specifying the minimum number of observations
	110	#' in the terminal nodes of the trees. Note that this is the actual number of
	111	#' observations, not the total weight.
	112	#'
	113	#' @param shrinkage a shrinkage parameter applied to each tree in the
	114	#' expansion. Also known as the learning rate or step-size reduction; 0.001 to
	115	#' 0.1 usually work, but a smaller learning rate typically requires more trees.
	116	#' Default is 0.1.
	117	#'
	118	#' @param bag.fraction the fraction of the training set observations randomly
	119	#' selected to propose the next tree in the expansion. This introduces
	120	#' randomnesses into the model fit. If \code{bag.fraction} < 1 then running the
	121	#' same model twice will result in similar but different fits. \code{gbm} uses
	122	#' the R random number generator so \code{set.seed} can ensure that the model
	123	#' can be reconstructed. Preferably, the user can save the returned
	124	#' \code{\link{gbm.object}} using \code{\link{save}}. Default is 0.5.
	125	#'
	126	#' @param train.fraction The first \code{train.fraction * nrows(data)}
	127	#' observations are used to fit the \code{gbm} and the remainder are used for
	128	#' computing out-of-sample estimates of the loss function.
	129	#'
	130	#' @param cv.folds Number of cross-validation folds to perform. If
	131	#' \code{cv.folds}>1 then \code{gbm}, in addition to the usual fit, will
	132	#' perform a cross-validation, calculate an estimate of generalization error
	133	#' returned in \code{cv.error}.
	134	#'
	135	#' @param keep.data a logical variable indicating whether to keep the data and
	136	#' an index of the data stored with the object. Keeping the data and index
	137	#' makes subsequent calls to \code{\link{gbm.more}} faster at the cost of
	138	#' storing an extra copy of the dataset.
	139	#'
	140	#' @param verbose Logical indicating whether or not to print out progress and
	141	#' performance indicators (\code{TRUE}). If this option is left unspecified for
	142	#' \code{gbm.more}, then it uses \code{verbose} from \code{object}. Default is
	143	#' \code{FALSE}.
	144	#'
	145	#' @param class.stratify.cv Logical indicating whether or not the
	146	#' cross-validation should be stratified by class. Defaults to \code{TRUE} for
	147	#' \code{distribution = "multinomial"} and is only implemented for
	148	#' \code{"multinomial"} and \code{"bernoulli"}. The purpose of stratifying the
	149	#' cross-validation is to help avoiding situations in which training sets do
	150	#' not contain all classes.
	151	#'
	152	#' @param n.cores The number of CPU cores to use. The cross-validation loop
	153	#' will attempt to send different CV folds off to different cores. If
	154	#' \code{n.cores} is not specified by the user, it is guessed using the
	155	#' \code{detectCores} function in the \code{parallel} package. Note that the
	156	#' documentation for \code{detectCores} makes clear that it is not failsafe and
	157	#' could return a spurious number of available cores.
	158	#'
	159	#' @return A \code{\link{gbm.object}} object.
	160	#'
	161	#' @details
	162	#' This package implements the generalized boosted modeling framework. Boosting
	163	#' is the process of iteratively adding basis functions in a greedy fashion so
	164	#' that each additional basis function further reduces the selected loss
	165	#' function. This implementation closely follows Friedman's Gradient Boosting
	166	#' Machine (Friedman, 2001).
	167	#'
	168	#' In addition to many of the features documented in the Gradient Boosting
	169	#' Machine, \code{gbm} offers additional features including the out-of-bag
	170	#' estimator for the optimal number of iterations, the ability to store and
	171	#' manipulate the resulting \code{gbm} object, and a variety of other loss
	172	#' functions that had not previously had associated boosting algorithms,
	173	#' including the Cox partial likelihood for censored data, the poisson
	174	#' likelihood for count outcomes, and a gradient boosting implementation to
	175	#' minimize the AdaBoost exponential loss function.
	176	#'
	177	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	178	#'
	179	#' Quantile regression code developed by Brian Kriegler
	180	#' \email{bk@@stat.ucla.edu}
	181	#'
	182	#' t-distribution, and multinomial code developed by Harry Southworth and
	183	#' Daniel Edwards
	184	#'
	185	#' Pairwise code developed by Stefan Schroedl \email{schroedl@@a9.com}
	186	#'
	187	#' @seealso \code{\link{gbm.object}}, \code{\link{gbm.perf}},
	188	#' \code{\link{plot.gbm}}, \code{\link{predict.gbm}}, \code{\link{summary.gbm}},
	189	#' and \code{\link{pretty.gbm.tree}}.
	190	#'
	191	#' @references
	192	#' Y. Freund and R.E. Schapire (1997) \dQuote{A decision-theoretic
	193	#' generalization of on-line learning and an application to boosting,}
	194	#' \emph{Journal of Computer and System Sciences,} 55(1):119-139.
	195	#'
	196	#' G. Ridgeway (1999). \dQuote{The state of boosting,} \emph{Computing Science
	197	#' and Statistics} 31:172-181.
	198	#'
	199	#' J.H. Friedman, T. Hastie, R. Tibshirani (2000). \dQuote{Additive Logistic
	200	#' Regression: a Statistical View of Boosting,} \emph{Annals of Statistics}
	201	#' 28(2):337-374.
	202	#'
	203	#' J.H. Friedman (2001). \dQuote{Greedy Function Approximation: A Gradient
	204	#' Boosting Machine,} \emph{Annals of Statistics} 29(5):1189-1232.
	205	#'
	206	#' J.H. Friedman (2002). \dQuote{Stochastic Gradient Boosting,}
	207	#' \emph{Computational Statistics and Data Analysis} 38(4):367-378.
	208	#'
	209	#' B. Kriegler (2007). Cost-Sensitive Stochastic Gradient Boosting Within a
	210	#' Quantitative Regression Framework. Ph.D. Dissertation. University of
	211	#' California at Los Angeles, Los Angeles, CA, USA. Advisor(s) Richard A. Berk.
	212	#' url{https://dl.acm.org/citation.cfm?id=1354603}.
	213	#'
	214	#' C. Burges (2010). \dQuote{From RankNet to LambdaRank to LambdaMART: An
	215	#' Overview,} Microsoft Research Technical Report MSR-TR-2010-82.
	216	#'
	217	#' @export
	218	#'
	219	#' @examples
	220	#' #
	221	#' # A least squares regression example
	222	#' #
	223	#'
	224	#' # Simulate data
	225	#' set.seed(101) # for reproducibility
	226	#' N <- 1000
	227	#' X1 <- runif(N)
	228	#' X2 <- 2 * runif(N)
	229	#' X3 <- ordered(sample(letters[1:4], N, replace = TRUE), levels = letters[4:1])
	230	#' X4 <- factor(sample(letters[1:6], N, replace = TRUE))
	231	#' X5 <- factor(sample(letters[1:3], N, replace = TRUE))
	232	#' X6 <- 3 * runif(N)
	233	#' mu <- c(-1, 0, 1, 2)[as.numeric(X3)]
	234	#' SNR <- 10 # signal-to-noise ratio
	235	#' Y <- X1 ^ 1.5 + 2 * (X2 ^ 0.5) + mu
	236	#' sigma <- sqrt(var(Y) / SNR)
	237	#' Y <- Y + rnorm(N, 0, sigma)
	238	#' X1[sample(1:N,size=500)] <- NA # introduce some missing values
	239	#' X4[sample(1:N,size=300)] <- NA # introduce some missing values
	240	#' data <- data.frame(Y, X1, X2, X3, X4, X5, X6)
	241	#'
	242	#' # Fit a GBM
	243	#' set.seed(102) # for reproducibility
	244	#' gbm1 <- gbm(Y ~ ., data = data, var.monotone = c(0, 0, 0, 0, 0, 0),
	245	#' distribution = "gaussian", n.trees = 100, shrinkage = 0.1,
	246	#' interaction.depth = 3, bag.fraction = 0.5, train.fraction = 0.5,
	247	#' n.minobsinnode = 10, cv.folds = 5, keep.data = TRUE,
	248	#' verbose = FALSE, n.cores = 1)
	249	#'
	250	#' # Check performance using the out-of-bag (OOB) error; the OOB error typically
	251	#' # underestimates the optimal number of iterations
	252	#' best.iter <- gbm.perf(gbm1, method = "OOB")
	253	#' print(best.iter)
	254	#'
	255	#' # Check performance using the 50% heldout test set
	256	#' best.iter <- gbm.perf(gbm1, method = "test")
	257	#' print(best.iter)
	258	#'
	259	#' # Check performance using 5-fold cross-validation
	260	#' best.iter <- gbm.perf(gbm1, method = "cv")
	261	#' print(best.iter)
	262	#'
	263	#' # Plot relative influence of each variable
	264	#' par(mfrow = c(1, 2))
	265	#' summary(gbm1, n.trees = 1) # using first tree
	266	#' summary(gbm1, n.trees = best.iter) # using estimated best number of trees
	267	#'
	268	#' # Compactly print the first and last trees for curiosity
	269	#' print(pretty.gbm.tree(gbm1, i.tree = 1))
	270	#' print(pretty.gbm.tree(gbm1, i.tree = gbm1$n.trees))
	271	#'
	272	#' # Simulate new data
	273	#' set.seed(103) # for reproducibility
	274	#' N <- 1000
	275	#' X1 <- runif(N)
	276	#' X2 <- 2 * runif(N)
	277	#' X3 <- ordered(sample(letters[1:4], N, replace = TRUE))
	278	#' X4 <- factor(sample(letters[1:6], N, replace = TRUE))
	279	#' X5 <- factor(sample(letters[1:3], N, replace = TRUE))
	280	#' X6 <- 3 * runif(N)
	281	#' mu <- c(-1, 0, 1, 2)[as.numeric(X3)]
	282	#' Y <- X1 ^ 1.5 + 2 * (X2 ^ 0.5) + mu + rnorm(N, 0, sigma)
	283	#' data2 <- data.frame(Y, X1, X2, X3, X4, X5, X6)
	284	#'
	285	#' # Predict on the new data using the "best" number of trees; by default,
	286	#' # predictions will be on the link scale
	287	#' Yhat <- predict(gbm1, newdata = data2, n.trees = best.iter, type = "link")
	288	#'
	289	#' # least squares error
	290	#' print(sum((data2$Y - Yhat)^2))
	291	#'
	292	#' # Construct univariate partial dependence plots
	293	#' p1 <- plot(gbm1, i.var = 1, n.trees = best.iter)
	294	#' p2 <- plot(gbm1, i.var = 2, n.trees = best.iter)
	295	#' p3 <- plot(gbm1, i.var = "X3", n.trees = best.iter) # can use index or name
	296	#' grid.arrange(p1, p2, p3, ncol = 3)
	297	#'
	298	#' # Construct bivariate partial dependence plots
	299	#' plot(gbm1, i.var = 1:2, n.trees = best.iter)
	300	#' plot(gbm1, i.var = c("X2", "X3"), n.trees = best.iter)
	301	#' plot(gbm1, i.var = 3:4, n.trees = best.iter)
	302	#'
	303	#' # Construct trivariate partial dependence plots
	304	#' plot(gbm1, i.var = c(1, 2, 6), n.trees = best.iter,
	305	#' continuous.resolution = 20)
	306	#' plot(gbm1, i.var = 1:3, n.trees = best.iter)
	307	#' plot(gbm1, i.var = 2:4, n.trees = best.iter)
	308	#' plot(gbm1, i.var = 3:5, n.trees = best.iter)
	309	#'
	310	#' # Add more (i.e., 100) boosting iterations to the ensemble
	311	#' gbm2 <- gbm.more(gbm1, n.new.trees = 100, verbose = FALSE)
	312	gbm <- function(formula = formula(data), distribution = "bernoulli",
	313	data = list(), weights, var.monotone = NULL, n.trees = 100,
	314	interaction.depth = 1, n.minobsinnode = 10, shrinkage = 0.1,
	315	bag.fraction = 0.5, train.fraction = 1.0, cv.folds = 0,
	316	keep.data = TRUE, verbose = FALSE, class.stratify.cv = NULL,
	317	n.cores = NULL) {
	318
	319	# Match the call to gbm
	320	mcall <- match.call()
	321
	322	# Verbose output?
	323	lVerbose <- if (!is.logical(verbose)) {
	324	FALSE
	325	} else {
	326	verbose
	327	}
	328
	329	# Construct model frame, terms object, weights, and offset
	330	mf <- match.call(expand.dots = FALSE)
	331	m <- match(c("formula", "data", "weights", "offset"), names(mf), 0)
	332	mf <- mf[c(1, m)]
	333	mf$drop.unused.levels <- TRUE
	334	mf$na.action <- na.pass
	335	mf[[1]] <- as.name("model.frame")
	336	m <- mf
	337	mf <- eval(mf, parent.frame())
	338	Terms <- attr(mf, "terms")
	339	w <- model.weights(mf)
	340	offset <- model.offset(mf)
	341
	342	# Determine and check response distribution
	343	distribution <- if (missing(distribution)) {
	344	y <- data[, all.vars(formula)[1L], drop = TRUE]
	345	guessDist(y)
	346	} else if (is.character(distribution)) {
	347	list(name = distribution)
	348	}
	349	if (!is.element(distribution$name, getAvailableDistributions())) {
	350	stop("Distribution ", distribution$name, " is not supported.")
	351	}
	352
	353	# Extract and check response values
	354	y <- model.response(mf)
	355
	356	# Construct data frame of predictor values
	357	var.names <- attributes(Terms)$term.labels
	358	x <- model.frame(terms(reformulate(var.names)), data = data,
	359	na.action = na.pass)
	360
	361	# Extract response name as a character string
	362	response.name <- as.character(formula[[2L]])
	363
	364	# Stratify cross-validation by class (only for bernoulli and multinomial)
	365	class.stratify.cv <- getStratify(class.stratify.cv, d = distribution)
	366
	367	# Groups (for pairwise distribution only)
	368	group <- NULL
	369	num.groups <- 0
	370
	371	# Determine number of training instances
	372	if (distribution$name != "pairwise"){
	373
	374	# Number of training instances
	375	nTrain <- floor(train.fraction * nrow(x))
	376
	377	} else {
	378
	379	# Sampling is by group, so we need to calculate them here
	380	distribution.group <- distribution[["group"]]
	381	if (is.null(distribution.group)) {
	382	stop(paste("For pairwise regression, `distribution` must be a list of",
	383	"the form `list(name = \"pairwise\", group = c(\"date\",",
	384	"\"session\", \"category\", \"keywords\"))`."))
	385	}
	386
	387	# Check if group names are valid
	388	i <- match(distribution.group, colnames(data))
	389	if (any(is.na(i))) {
	390	stop("Group column does not occur in data: ",
	391	distribution.group[is.na(i)], ".")
	392	}
	393
	394	# Construct group index
	395	group <- factor(
	396	do.call(paste, c(data[, distribution.group, drop = FALSE], sep = ":"))
	397	)
	398
	399	# Check that weights are constant across groups
	400	if ((!missing(weights)) && (!is.null(weights))) {
	401	w.min <- tapply(w, INDEX = group, FUN = min)
	402	w.max <- tapply(w, INDEX = group, FUN = max)
	403	if (any(w.min != w.max)) {
	404	stop("For `distribution = \"pairwise\"`, all instances for the same ",
	405	"group must have the same weight.")
	406	}
	407	w <- w * length(w.min) / sum(w.min) # normalize across groups
	408	}
	409
	410	# Shuffle groups to remove bias when split into train/test sets and/or CV
	411	# folds
	412	perm.levels <- levels(group)[sample(1:nlevels(group))]
	413	group <- factor(group, levels = perm.levels)
	414
	415	# The C function expects instances to be sorted by group and descending by
	416	# target
	417	ord.group <- order(group, -y)
	418	group <- group[ord.group]
	419	y <- y[ord.group]
	420	x <- x[ord.group, , drop = FALSE]
	421	w <- w[ord.group]
	422
	423	# Split into train and validation sets at group boundary
	424	num.groups.train <- max(1, round(train.fraction * nlevels(group)))
	425
	426	# Include all groups up to the num.groups.train
	427	nTrain <- max(which(group==levels(group)[num.groups.train]))
	428	Misc <- group
	429
	430	}
	431
	432	# Set up for k-fold cross-validation
	433	cv.error <- NULL
	434	if(cv.folds > 1) {
	435	cv.results <- gbmCrossVal(cv.folds = cv.folds, nTrain = nTrain,
	436	n.cores = n.cores,
	437	class.stratify.cv = class.stratify.cv,
	438	data = data, x = x, y = y, offset = offset,
	439	distribution = distribution, w = w,
	440	var.monotone = var.monotone, n.trees = n.trees,
	441	interaction.depth = interaction.depth,
	442	n.minobsinnode = n.minobsinnode,
	443	shrinkage = shrinkage,
	444	bag.fraction = bag.fraction,
	445	var.names = var.names,
	446	response.name = response.name, group = group)
	447	cv.error <- cv.results$error
	448	p <- cv.results$predictions
	449	}
	450
	451	# Fit a GBM
	452	gbm.obj <- gbm.fit(x = x, y = y, offset = offset, distribution = distribution,
	453	w = w, var.monotone = var.monotone, n.trees = n.trees,
	454	interaction.depth = interaction.depth,
	455	n.minobsinnode = n.minobsinnode, shrinkage = shrinkage,
	456	bag.fraction = bag.fraction, nTrain = nTrain,
	457	keep.data = keep.data, verbose = lVerbose,
	458	var.names = var.names, response.name = response.name,
	459	group = group)
	460
	461	# Attach further components
	462	gbm.obj$train.fraction <- train.fraction
	463	gbm.obj$Terms <- Terms
	464	gbm.obj$cv.error <- cv.error
	465	gbm.obj$cv.folds <- cv.folds
	466	gbm.obj$call <- mcall
	467	gbm.obj$m <- m
	468	if (cv.folds > 0) {
	469	gbm.obj$cv.fitted <- p
	470	}
	471	if (distribution$name == "pairwise") {
	472	# Data has been reordered according to queries. We need to permute the
	473	# fitted values so that they correspond to the original order.
	474	gbm.obj$ord.group <- ord.group
	475	gbm.obj$fit <- gbm.obj$fit[order(ord.group)]
	476	}
	477
	478	# Return "gbm" object
	479	gbm.obj
	480
6	481	}
7
8		gbm <- function(formula = formula(data),
9		distribution = "bernoulli",
10		data = list(),
11		weights,
12		var.monotone = NULL,
13		n.trees = 100,
14		interaction.depth = 1,
15		n.minobsinnode = 10,
16		shrinkage = 0.001,
17		bag.fraction = 0.5,
18		train.fraction = 1.0,
19		cv.folds=0,
20		keep.data = TRUE,
21		verbose = 'CV',
22		class.stratify.cv=NULL,
23		n.cores=NULL){
24		theCall <- match.call()
25
26
27		lVerbose <- if (!is.logical(verbose)) { FALSE }
28		else { verbose }
29
30		mf <- match.call(expand.dots = FALSE)
31		m <- match(c("formula", "data", "weights", "offset"), names(mf), 0)
32		mf <- mf[c(1, m)]
33		mf$drop.unused.levels <- TRUE
34		mf$na.action <- na.pass
35		mf[[1]] <- as.name("model.frame")
36		m <- mf
37		mf <- eval(mf, parent.frame())
38		Terms <- attr(mf, "terms")
39
40		y <- model.response(mf)
41
42		if (missing(distribution)){ distribution <- guessDist(y) }
43		else if (is.character(distribution)){ distribution <- list(name=distribution) }
44
45		w <- model.weights(mf)
46		offset <- model.offset(mf)
47
48		var.names <- attributes(Terms)$term.labels
49		x <- model.frame(terms(reformulate(var.names)),
50		data,
51		na.action=na.pass)
52
53		# get the character name of the response variable
54		response.name <- as.character(formula[[2]])
55
56		lVerbose <- if (!is.logical(verbose)) { FALSE }
57		else { verbose }
58
59		class.stratify.cv <- getStratify(class.stratify.cv, distribution)
60
61		# groups (for pairwise distribution only)
62		group <- NULL
63		num.groups <- 0
64
65		# determine number of training instances
66		if (distribution$name != "pairwise"){
67		nTrain <- floor(train.fraction * nrow(x))
68		}
69		else {
70		# distribution$name == "pairwise":
71		# Sampling is by group, so we need to calculate them here
72		distribution.group <- distribution[["group"]]
73		if (is.null(distribution.group))
74		{
75		stop("For pairwise regression, the distribution parameter must be a list with a parameter 'group' for the a list of the column names indicating groups, for example list(name=\"pairwise\",group=c(\"date\",\"session\",\"category\",\"keywords\")).")
76		}
77
78		# Check if group names are valid
79		i <- match(distribution.group, colnames(data))
80		if (any(is.na(i)))
81		{
82		stop("Group column does not occur in data: ", distribution.group[is.na(i)])
83		}
84
85		# Construct group index
86		group <- factor(do.call(paste, c(data[,distribution.group, drop=FALSE], sep=":")))
87
88		# Check that weights are constant across groups
89		if ((!missing(weights)) && (!is.null(weights)))
90		{
91		w.min <- tapply(w, INDEX=group, FUN=min)
92		w.max <- tapply(w, INDEX=group, FUN=max)
93
94		if (any(w.min != w.max))
95		{
96		stop("For distribution 'pairwise', all instances for the same group must have the same weight")
97		}
98
99		# Normalize across groups
100		w <- w * length(w.min) / sum(w.min)
101		}
102
103		# Shuffle groups, to remove bias when splitting into train/test set and/or CV folds
104		perm.levels <- levels(group)[sample(1:nlevels(group))]
105		group <- factor(group, levels=perm.levels)
106
107		# The C function expects instances to be sorted by group and descending by target
108		ord.group <- order(group, -y)
109		group <- group[ord.group]
110		y <- y[ord.group]
111		x <- x[ord.group,,drop=FALSE]
112		w <- w[ord.group]
113
114		# Split into train and validation set, at group boundary
115		num.groups.train <- max(1, round(train.fraction * nlevels(group)))
116
117		# include all groups up to the num.groups.train
118		nTrain <- max(which(group==levels(group)[num.groups.train]))
119		Misc <- group
120		} # close if(distribution$name=="coxph") ...
121
122		cv.error <- NULL
123		if(cv.folds>1) {
124		cv.results <- gbmCrossVal(cv.folds, nTrain, n.cores,
125		class.stratify.cv, data,
126		x, y, offset, distribution, w, var.monotone,
127		n.trees, interaction.depth, n.minobsinnode,
128		shrinkage, bag.fraction,
129		var.names, response.name, group)
130		cv.error <- cv.results$error
131		p <- cv.results$predictions
132		} # Close if(cv.folds > 1
133
134		gbm.obj <- gbm.fit(x,y,
135		offset = offset,
136		distribution = distribution,
137		w = w,
138		var.monotone = var.monotone,
139		n.trees = n.trees,
140		interaction.depth = interaction.depth,
141		n.minobsinnode = n.minobsinnode,
142		shrinkage = shrinkage,
143		bag.fraction = bag.fraction,
144		nTrain = nTrain,
145		keep.data = keep.data,
146		verbose = lVerbose,
147		var.names = var.names,
148		response.name = response.name,
149		group = group)
150
151		gbm.obj$train.fraction <- train.fraction
152		gbm.obj$Terms <- Terms
153		gbm.obj$cv.error <- cv.error
154		gbm.obj$cv.folds <- cv.folds
155		gbm.obj$call <- theCall
156		gbm.obj$m <- m
157		if (cv.folds > 0){ gbm.obj$cv.fitted <- p }
158
159		if (distribution$name == "pairwise")
160		{
161		# Data has been reordered according to queries.
162		# We need to permute the fitted values to correspond
163		# to the original order.
164		gbm.obj$ord.group <- ord.group
165		gbm.obj$fit <- gbm.obj$fit[order(ord.group)]
166		}
167
168		return(gbm.obj)
169		}

+579

-398

R/gbm.fit.R less more

0		gbm.fit <- function(x,y,
1		offset = NULL,
2		misc = NULL,
3		distribution = "bernoulli",
4		w = NULL,
5		var.monotone = NULL,
6		n.trees = 100,
7		interaction.depth = 1,
8		n.minobsinnode = 10,
9		shrinkage = 0.001,
10		bag.fraction = 0.5,
11		nTrain = NULL,
12		train.fraction = NULL,
13		keep.data = TRUE,
14		verbose = TRUE,
15		var.names = NULL,
16		response.name = "y",
17		group = NULL)
18		{
19
20		if(is.character(distribution)) { distribution <- list(name=distribution) }
21
22		cRows <- nrow(x)
23		cCols <- ncol(x)
24
25		if(nrow(x) != ifelse(class(y)=="Surv", nrow(y), length(y))) {
26		stop("The number of rows in x does not equal the length of y.")
27		}
28
29		# the preferred way to specify the number of training instances is via parameter 'nTrain'.
30		# parameter 'train.fraction' is only maintained for backward compatibility.
31
32		if(!is.null(nTrain) && !is.null(train.fraction)) {
33		stop("Parameters 'nTrain' and 'train.fraction' cannot both be specified")
34		}
35		else if(!is.null(train.fraction)) {
36		warning("Parameter 'train.fraction' of gbm.fit is deprecated, please specify 'nTrain' instead")
37		nTrain <- floor(train.fraction*cRows)
38		}
39		else if(is.null(nTrain)) {
40		# both undefined, use all training data
41		nTrain <- cRows
42		}
43
44		if (is.null(train.fraction)){
45		train.fraction <- nTrain / cRows
46		}
47
48		if(is.null(var.names)) {
49		var.names <- getVarNames(x)
50		}
51
52		# if(is.null(response.name)) { response.name <- "y" }
53
54		# check dataset size
55		if(nTrain * bag.fraction <= 2*n.minobsinnode+1) {
56		stop("The dataset size is too small or subsampling rate is too large: nTrain*bag.fraction <= n.minobsinnode")
57		}
58
59		if (distribution$name != "pairwise") {
60		w <- w*length(w)/sum(w) # normalize to N
61		}
62
63		# Do sanity checks
64		ch <- checkMissing(x, y)
65		interaction.depth <- checkID(interaction.depth)
66		w <- checkWeights(w, length(y))
67		offset <- checkOffset(offset, y)
68
69		Misc <- NA
70
71		# setup variable types
72		var.type <- rep(0,cCols)
73		var.levels <- vector("list",cCols)
74		for(i in 1:length(var.type))
75		{
76		if(all(is.na(x[,i])))
77		{
78		stop("variable ",i,": ",var.names[i]," has only missing values.")
	0	#' Generalized Boosted Regression Modeling (GBM)
	1	#'
	2	#' Workhorse function providing the link between R and the C++ gbm engine.
	3	#' \code{gbm} is a front-end to \code{gbm.fit} that uses the familiar R
	4	#' modeling formulas. However, \code{\link[stats]{model.frame}} is very slow if
	5	#' there are many predictor variables. For power-users with many variables use
	6	#' \code{gbm.fit}. For general practice \code{gbm} is preferable.
	7	#'
	8	#' @param x A data frame or matrix containing the predictor variables. The
	9	#' number of rows in \code{x} must be the same as the length of \code{y}.
	10	#'
	11	#' @param y A vector of outcomes. The number of rows in \code{x} must be the
	12	#' same as the length of \code{y}.
	13	#'
	14	#' @param offset A vector of offset values.
	15	#'
	16	#' @param misc An R object that is simply passed on to the gbm engine. It can be
	17	#' used for additional data for the specific distribution. Currently it is only
	18	#' used for passing the censoring indicator for the Cox proportional hazards
	19	#' model.
	20	#'
	21	#' @param distribution Either a character string specifying the name of the
	22	#' distribution to use or a list with a component \code{name} specifying the
	23	#' distribution and any additional parameters needed. If not specified,
	24	#' \code{gbm} will try to guess: if the response has only 2 unique values,
	25	#' bernoulli is assumed; otherwise, if the response is a factor, multinomial is
	26	#' assumed; otherwise, if the response has class \code{"Surv"}, coxph is
	27	#' assumed; otherwise, gaussian is assumed.
	28	#'
	29	#' Currently available options are \code{"gaussian"} (squared error),
	30	#' \code{"laplace"} (absolute loss), \code{"tdist"} (t-distribution loss),
	31	#' \code{"bernoulli"} (logistic regression for 0-1 outcomes),
	32	#' \code{"huberized"} (huberized hinge loss for 0-1 outcomes), classes),
	33	#' \code{"adaboost"} (the AdaBoost exponential loss for 0-1 outcomes),
	34	#' \code{"poisson"} (count outcomes), \code{"coxph"} (right censored
	35	#' observations), \code{"quantile"}, or \code{"pairwise"} (ranking measure
	36	#' using the LambdaMart algorithm).
	37	#'
	38	#' If quantile regression is specified, \code{distribution} must be a list of
	39	#' the form \code{list(name = "quantile", alpha = 0.25)} where \code{alpha} is
	40	#' the quantile to estimate. The current version's quantile regression method
	41	#' does not handle non-constant weights and will stop.
	42	#'
	43	#' If \code{"tdist"} is specified, the default degrees of freedom is 4 and
	44	#' this can be controlled by specifying
	45	#' \code{distribution = list(name = "tdist", df = DF)} where \code{DF} is your
	46	#' chosen degrees of freedom.
	47	#'
	48	#' If "pairwise" regression is specified, \code{distribution} must be a list of
	49	#' the form \code{list(name="pairwise",group=...,metric=...,max.rank=...)}
	50	#' (\code{metric} and \code{max.rank} are optional, see below). \code{group} is
	51	#' a character vector with the column names of \code{data} that jointly
	52	#' indicate the group an instance belongs to (typically a query in Information
	53	#' Retrieval applications). For training, only pairs of instances from the same
	54	#' group and with different target labels can be considered. \code{metric} is
	55	#' the IR measure to use, one of
	56	#' \describe{
	57	#' \item{list("conc")}{Fraction of concordant pairs; for binary labels, this
	58	#' is equivalent to the Area under the ROC Curve}
	59	#' \item{:}{Fraction of concordant pairs; for binary labels, this is
	60	#' equivalent to the Area under the ROC Curve}
	61	#' \item{list("mrr")}{Mean reciprocal rank of the highest-ranked positive
	62	#' instance}
	63	#' \item{:}{Mean reciprocal rank of the highest-ranked positive instance}
	64	#' \item{list("map")}{Mean average precision, a generalization of \code{mrr}
	65	#' to multiple positive instances}\item{:}{Mean average precision, a
	66	#' generalization of \code{mrr} to multiple positive instances}
	67	#' \item{list("ndcg:")}{Normalized discounted cumulative gain. The score is
	68	#' the weighted sum (DCG) of the user-supplied target values, weighted
	69	#' by log(rank+1), and normalized to the maximum achievable value. This
	70	#' is the default if the user did not specify a metric.}
	71	#' }
	72	#'
	73	#' \code{ndcg} and \code{conc} allow arbitrary target values, while binary
	74	#' targets {0,1} are expected for \code{map} and \code{mrr}. For \code{ndcg}
	75	#' and \code{mrr}, a cut-off can be chosen using a positive integer parameter
	76	#' \code{max.rank}. If left unspecified, all ranks are taken into account.
	77	#'
	78	#' Note that splitting of instances into training and validation sets follows
	79	#' group boundaries and therefore only approximates the specified
	80	#' \code{train.fraction} ratio (the same applies to cross-validation folds).
	81	#' Internally, queries are randomly shuffled before training, to avoid bias.
	82	#'
	83	#' Weights can be used in conjunction with pairwise metrics, however it is
	84	#' assumed that they are constant for instances from the same group.
	85	#'
	86	#' For details and background on the algorithm, see e.g. Burges (2010).
	87	#'
	88	#' @param w A vector of weights of the same length as the \code{y}.
	89	#'
	90	#' @param var.monotone an optional vector, the same length as the number of
	91	#' predictors, indicating which variables have a monotone increasing (+1),
	92	#' decreasing (-1), or arbitrary (0) relationship with the outcome.
	93	#'
	94	#' @param n.trees the total number of trees to fit. This is equivalent to the
	95	#' number of iterations and the number of basis functions in the additive
	96	#' expansion.
	97	#'
	98	#' @param interaction.depth The maximum depth of variable interactions. A value
	99	#' of 1 implies an additive model, a value of 2 implies a model with up to 2-way
	100	#' interactions, etc. Default is \code{1}.
	101	#'
	102	#' @param n.minobsinnode Integer specifying the minimum number of observations
	103	#' in the trees terminal nodes. Note that this is the actual number of
	104	#' observations not the total weight.
	105	#'
	106	#' @param shrinkage The shrinkage parameter applied to each tree in the
	107	#' expansion. Also known as the learning rate or step-size reduction; 0.001 to
	108	#' 0.1 usually work, but a smaller learning rate typically requires more trees.
	109	#' Default is \code{0.1}.
	110	#'
	111	#' @param bag.fraction The fraction of the training set observations randomly
	112	#' selected to propose the next tree in the expansion. This introduces
	113	#' randomnesses into the model fit. If \code{bag.fraction} < 1 then running the
	114	#' same model twice will result in similar but different fits. \code{gbm} uses
	115	#' the R random number generator so \code{set.seed} can ensure that the model
	116	#' can be reconstructed. Preferably, the user can save the returned
	117	#' \code{\link{gbm.object}} using \code{\link{save}}. Default is \code{0.5}.
	118	#'
	119	#' @param nTrain An integer representing the number of cases on which to train.
	120	#' This is the preferred way of specification for \code{gbm.fit}; The option
	121	#' \code{train.fraction} in \code{gbm.fit} is deprecated and only maintained
	122	#' for backward compatibility. These two parameters are mutually exclusive. If
	123	#' both are unspecified, all data is used for training.
	124	#'
	125	#' @param train.fraction The first \code{train.fraction * nrows(data)}
	126	#' observations are used to fit the \code{gbm} and the remainder are used for
	127	#' computing out-of-sample estimates of the loss function.
	128	#'
	129	#' @param keep.data Logical indicating whether or not to keep the data and an
	130	#' index of the data stored with the object. Keeping the data and index makes
	131	#' subsequent calls to \code{\link{gbm.more}} faster at the cost of storing an
	132	#' extra copy of the dataset.
	133	#'
	134	#' @param verbose Logical indicating whether or not to print out progress and
	135	#' performance indicators (\code{TRUE}). If this option is left unspecified for
	136	#' \code{gbm.more}, then it uses \code{verbose} from \code{object}. Default is
	137	#' \code{FALSE}.
	138	#'
	139	#' @param var.names Vector of strings of length equal to the number of columns
	140	#' of \code{x} containing the names of the predictor variables.
	141	#'
	142	#' @param response.name Character string label for the response variable.
	143	#'
	144	#' @param group The \code{group} to use when \code{distribution = "pairwise"}.
	145	#'
	146	#' @return A \code{\link{gbm.object}} object.
	147	#'
	148	#' @details
	149	#' This package implements the generalized boosted modeling framework. Boosting
	150	#' is the process of iteratively adding basis functions in a greedy fashion so
	151	#' that each additional basis function further reduces the selected loss
	152	#' function. This implementation closely follows Friedman's Gradient Boosting
	153	#' Machine (Friedman, 2001).
	154	#'
	155	#' In addition to many of the features documented in the Gradient Boosting
	156	#' Machine, \code{gbm} offers additional features including the out-of-bag
	157	#' estimator for the optimal number of iterations, the ability to store and
	158	#' manipulate the resulting \code{gbm} object, and a variety of other loss
	159	#' functions that had not previously had associated boosting algorithms,
	160	#' including the Cox partial likelihood for censored data, the poisson
	161	#' likelihood for count outcomes, and a gradient boosting implementation to
	162	#' minimize the AdaBoost exponential loss function.
	163	#'
	164	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	165	#'
	166	#' Quantile regression code developed by Brian Kriegler
	167	#' \email{bk@@stat.ucla.edu}
	168	#'
	169	#' t-distribution, and multinomial code developed by Harry Southworth and
	170	#' Daniel Edwards
	171	#'
	172	#' Pairwise code developed by Stefan Schroedl \email{schroedl@@a9.com}
	173	#'
	174	#' @seealso \code{\link{gbm.object}}, \code{\link{gbm.perf}},
	175	#' \code{\link{plot.gbm}}, \code{\link{predict.gbm}}, \code{\link{summary.gbm}},
	176	#' and \code{\link{pretty.gbm.tree}}.
	177	#'
	178	#' @references
	179	#' Y. Freund and R.E. Schapire (1997) \dQuote{A decision-theoretic
	180	#' generalization of on-line learning and an application to boosting,}
	181	#' \emph{Journal of Computer and System Sciences,} 55(1):119-139.
	182	#'
	183	#' G. Ridgeway (1999). \dQuote{The state of boosting,} \emph{Computing Science
	184	#' and Statistics} 31:172-181.
	185	#'
	186	#' J.H. Friedman, T. Hastie, R. Tibshirani (2000). \dQuote{Additive Logistic
	187	#' Regression: a Statistical View of Boosting,} \emph{Annals of Statistics}
	188	#' 28(2):337-374.
	189	#'
	190	#' J.H. Friedman (2001). \dQuote{Greedy Function Approximation: A Gradient
	191	#' Boosting Machine,} \emph{Annals of Statistics} 29(5):1189-1232.
	192	#'
	193	#' J.H. Friedman (2002). \dQuote{Stochastic Gradient Boosting,}
	194	#' \emph{Computational Statistics and Data Analysis} 38(4):367-378.
	195	#'
	196	#' B. Kriegler (2007). Cost-Sensitive Stochastic Gradient Boosting Within a
	197	#' Quantitative Regression Framework. Ph.D. Dissertation. University of
	198	#' California at Los Angeles, Los Angeles, CA, USA. Advisor(s) Richard A. Berk.
	199	#' url{https://dl.acm.org/citation.cfm?id=1354603}.
	200	#'
	201	#' C. Burges (2010). \dQuote{From RankNet to LambdaRank to LambdaMART: An
	202	#' Overview,} Microsoft Research Technical Report MSR-TR-2010-82.
	203	#'
	204	#' @export
	205	gbm.fit <- function(x, y, offset = NULL, misc = NULL, distribution = "bernoulli",
	206	w = NULL, var.monotone = NULL, n.trees = 100,
	207	interaction.depth = 1, n.minobsinnode = 10,
	208	shrinkage = 0.001, bag.fraction = 0.5, nTrain = NULL,
	209	train.fraction = NULL, keep.data = TRUE, verbose = TRUE,
	210	var.names = NULL, response.name = "y", group = NULL) {
	211
	212	# Reformat distribution into a named list
	213	if(is.character(distribution)) {
	214	distribution <- list(name = distribution)
	215	}
	216
	217	# Dimensions of predictor data
	218	cRows <- nrow(x)
	219	cCols <- ncol(x)
	220
	221	if(nrow(x) != ifelse(class(y) == "Surv", nrow(y), length(y))) {
	222	stop("The number of rows in x does not equal the length of y.")
	223	}
	224
	225	# The preferred way to specify the number of training instances is via the
	226	# parameter `nTrain`. The parameter `train.fraction` is only maintained for
	227	# back compatibility.
	228	if(!is.null(nTrain) && !is.null(train.fraction)) {
	229	stop("Parameters `nTrain` and `train.fraction` cannot both be specified.")
	230	} else if(!is.null(train.fraction)) {
	231	warning("Parameter `train.fraction` is deprecated, please specify ",
	232	"`nTrain` instead.")
	233	nTrain <- floor(train.fraction*cRows)
	234	} else if(is.null(nTrain)) {
	235	nTrain <- cRows # both undefined, use all training data
	236	}
	237	if (is.null(train.fraction)){
	238	train.fraction <- nTrain / cRows
	239	}
	240
	241	# Extract var.names if NULL
	242	if(is.null(var.names)) {
	243	var.names <- getVarNames(x)
	244	}
	245
	246	# Check size of data
	247	if(nTrain * bag.fraction <= 2 * n.minobsinnode + 1) {
	248	stop("The data set is too small or the subsampling rate is too large: ",
	249	"`nTrain * bag.fraction <= n.minobsinnode`")
	250	}
	251
	252	if (distribution$name != "pairwise") {
	253	w <- w * length(w) / sum(w) # normalize to N
	254	}
	255
	256	# Sanity checks
	257	ch <- checkMissing(x, y)
	258	interaction.depth <- checkID(interaction.depth)
	259	w <- checkWeights(w, length(y))
	260	offset <- checkOffset(offset, y)
	261
	262	Misc <- NA
	263
	264	# setup variable types
	265	var.type <- rep(0,cCols)
	266	var.levels <- vector("list",cCols)
	267	for(i in 1:length(var.type))
	268	{
	269	if(all(is.na(x[,i])))
	270	{
	271	stop("variable ",i,": ",var.names[i]," has only missing values.")
	272	}
	273	if(is.ordered(x[,i]))
	274	{
	275	var.levels[[i]] <- levels(factor(x[,i]))
	276	x[,i] <- as.numeric(factor(x[,i]))-1
	277	var.type[i] <- 0
	278	}
	279	else if(is.factor(x[,i]))
	280	{
	281	if(length(levels(x[,i]))>1024)
	282	stop("gbm does not currently handle categorical variables with more than 1024 levels. Variable ",i,": ",var.names[i]," has ",length(levels(x[,i]))," levels.")
	283	var.levels[[i]] <- levels(factor(x[,i]))
	284	x[,i] <- as.numeric(factor(x[,i]))-1
	285	var.type[i] <- max(x[,i],na.rm=TRUE)+1
	286	}
	287	else if(is.numeric(x[,i]))
	288	{
	289	var.levels[[i]] <- quantile(x[,i],prob=(0:10)/10,na.rm=TRUE)
	290	}
	291	else
	292	{
	293	stop("variable ",i,": ",var.names[i]," is not of type numeric, ordered, or factor.")
	294	}
	295
	296	# check for some variation in each variable
	297	if(length(unique(var.levels[[i]])) == 1)
	298	{
	299	warning("variable ",i,": ",var.names[i]," has no variation.")
	300	}
	301	}
	302
	303	nClass <- 1
	304
	305	if(!("name" %in% names(distribution))) {
	306	stop("The distribution is missing a `name` component; for example, ",
	307	"distribution = list(name = \"gaussian\").")
	308	}
	309	supported.distributions <- getAvailableDistributions()
	310	distribution.call.name <- distribution$name
	311
	312	# Check for potential problems with the distribution
	313	if(!is.element(distribution$name,supported.distributions)) {
	314	stop("Distribution ",distribution$name," is not supported")
	315	}
	316	if((distribution$name == "bernoulli") && !all(is.element(y,0:1))) {
	317	stop("Bernoulli requires the response to be in {0,1}")
	318	if (is.factor(y)) {
	319	y <- as.integer(y) - 1
	320	}
	321	}
	322	if((distribution$name == "huberized") && !all(is.element(y,0:1))) {
	323	stop("Huberized square hinged loss requires the response to be in {0,1}")
	324	if (is.factor(y)) {
	325	y <- as.integer(y) - 1
	326	}
	327	}
	328	if((distribution$name == "poisson") && any(y<0)) {
	329	stop("Poisson requires the response to be positive")
	330	}
	331	if((distribution$name == "poisson") && any(y != trunc(y))) {
	332	stop("Poisson requires the response to be a positive integer")
	333	}
	334	if((distribution$name == "adaboost") && !all(is.element(y,0:1))) {
	335	stop("This version of AdaBoost requires the response to be in {0,1}")
	336	if (is.factor(y)) {
	337	y <- as.integer(y) - 1
	338	}
	339	}
	340	if(distribution$name == "quantile") {
	341	if(length(unique(w)) > 1) {
	342	stop("This version of gbm for the quantile regression lacks a weighted quantile. For now the weights must be constant.")
	343	}
	344	if(is.null(distribution$alpha)) {
	345	stop("For quantile regression, the distribution parameter must be a list with a parameter 'alpha' indicating the quantile, for example list(name=\"quantile\",alpha=0.95).")
	346	} else {
	347	if((distribution$alpha < 0) \|\| (distribution$alpha > 1)) {
	348	stop("alpha must be between 0 and 1.")
79	349	}
80		if(is.ordered(x[,i]))
81		{
82		var.levels[[i]] <- levels(x[,i])
83		x[,i] <- as.numeric(x[,i])-1
84		var.type[i] <- 0
	350	}
	351	Misc <- c(alpha=distribution$alpha)
	352	}
	353	if(distribution$name == "coxph") {
	354	if(class(y)!="Surv") {
	355	stop("Outcome must be a survival object Surv(time,failure)")
	356	}
	357	if(attr(y,"type")!="right") {
	358	stop("gbm() currently only handles right censored observations")
	359	}
	360	Misc <- y[,2]
	361	y <- y[,1]
	362
	363	# reverse sort the failure times to compute risk sets on the fly
	364	i.train <- order(-y[1:nTrain])
	365	n.test <- cRows - nTrain
	366	if(n.test > 0) {
	367	i.test <- order(-y[(nTrain+1):cRows]) + nTrain
	368	}
	369	else {
	370	i.test <- NULL
	371	}
	372	i.timeorder <- c(i.train,i.test)
	373
	374	y <- y[i.timeorder]
	375	Misc <- Misc[i.timeorder]
	376	x <- x[i.timeorder,,drop=FALSE]
	377	w <- w[i.timeorder]
	378	if(!is.na(offset)) offset <- offset[i.timeorder]
	379	}
	380	if(distribution$name == "tdist") {
	381	if (is.null(distribution$df) \|\| !is.numeric(distribution$df)){
	382	Misc <- 4
	383	}
	384	else {
	385	Misc <- distribution$df[1]
	386	}
	387	}
	388	if (distribution$name == "multinomial") {
	389	## Ensure that the training set contains all classes
	390	classes <- attr(factor(y), "levels")
	391	nClass <- length(classes)
	392
	393	if (nClass > nTrain) {
	394	stop(paste("Number of classes (", nClass, ") must be less than the",
	395	" size of the training set (", nTrain, ").", sep = ""))
	396	}
	397
	398	new.idx <- as.vector(sapply(classes, function(a,x){ min((1:length(x))[x==a]) }, y))
	399
	400	all.idx <- 1:length(y)
	401	new.idx <- c(new.idx, all.idx[!(all.idx %in% new.idx)])
	402
	403	y <- y[new.idx]
	404	x <- x[new.idx, ]
	405	w <- w[new.idx]
	406	if (!is.null(offset)) {
	407	offset <- offset[new.idx]
	408	}
	409
	410	## Get the factors
	411	y <- as.numeric(as.vector(outer(y, classes, "==")))
	412
	413	## Fill out the weight and offset
	414	w <- rep(w, nClass)
	415	if (!is.null(offset)) {
	416	offset <- rep(offset, nClass)
	417	}
	418	} # close if (dist... == "multinomial"
	419
	420	if(distribution$name == "pairwise") {
	421	distribution.metric <- distribution[["metric"]]
	422	if (!is.null(distribution.metric)) {
	423	distribution.metric <- tolower(distribution.metric)
	424	supported.metrics <- c("conc", "ndcg", "map", "mrr")
	425	if (!is.element(distribution.metric, supported.metrics)) {
	426	stop("Metric '", distribution.metric, "' is not supported, use either 'conc', 'ndcg', 'map', or 'mrr'")
85	427	}
86		else if(is.factor(x[,i]))
87		{
88		if(length(levels(x[,i]))>1024)
89		stop("gbm does not currently handle categorical variables with more than 1024 levels. Variable ",i,": ",var.names[i]," has ",length(levels(x[,i]))," levels.")
90		var.levels[[i]] <- levels(x[,i])
91		x[,i] <- as.numeric(x[,i])-1
92		var.type[i] <- max(x[,i],na.rm=TRUE)+1
93		}
94		else if(is.numeric(x[,i]))
95		{
96		var.levels[[i]] <- quantile(x[,i],prob=(0:10)/10,na.rm=TRUE)
97		}
98		else
99		{
100		stop("variable ",i,": ",var.names[i]," is not of type numeric, ordered, or factor.")
101		}
102
103		# check for some variation in each variable
104		if(length(unique(var.levels[[i]])) == 1)
105		{
106		warning("variable ",i,": ",var.names[i]," has no variation.")
107		}
108		}
109
110		nClass <- 1
111
112		if(!("name" %in% names(distribution))) {
113		stop("The distribution is missing a 'name' component, for example list(name=\"gaussian\")")
114		}
115		supported.distributions <-
116		c("bernoulli","gaussian","poisson","adaboost","laplace","coxph","quantile",
117		"tdist", "multinomial", "huberized", "pairwise")
118
119		distribution.call.name <- distribution$name
120
121		# check potential problems with the distributions
122		if(!is.element(distribution$name,supported.distributions))
123		{
124		stop("Distribution ",distribution$name," is not supported")
125		}
126		if((distribution$name == "bernoulli") && !all(is.element(y,0:1)))
127		{
128		stop("Bernoulli requires the response to be in {0,1}")
129		}
130		if((distribution$name == "huberized") && !all(is.element(y,0:1)))
131		{
132		stop("Huberized square hinged loss requires the response to be in {0,1}")
133		}
134		if((distribution$name == "poisson") && any(y<0))
135		{
136		stop("Poisson requires the response to be positive")
137		}
138		if((distribution$name == "poisson") && any(y != trunc(y)))
139		{
140		stop("Poisson requires the response to be a positive integer")
141		}
142		if((distribution$name == "adaboost") && !all(is.element(y,0:1)))
143		{
144		stop("This version of AdaBoost requires the response to be in {0,1}")
145		}
146		if(distribution$name == "quantile")
147		{
148		if(length(unique(w)) > 1)
149		{
150		stop("This version of gbm for the quantile regression lacks a weighted quantile. For now the weights must be constant.")
151		}
152		if(is.null(distribution$alpha))
153		{
154		stop("For quantile regression, the distribution parameter must be a list with a parameter 'alpha' indicating the quantile, for example list(name=\"quantile\",alpha=0.95).")
155		} else
156		if((distribution$alpha<0) \|\| (distribution$alpha>1))
157		{
158		stop("alpha must be between 0 and 1.")
159		}
160		Misc <- c(alpha=distribution$alpha)
161		}
162		if(distribution$name == "coxph")
163		{
164		if(class(y)!="Surv")
165		{
166		stop("Outcome must be a survival object Surv(time,failure)")
167		}
168		if(attr(y,"type")!="right")
169		{
170		stop("gbm() currently only handles right censored observations")
171		}
172		Misc <- y[,2]
173		y <- y[,1]
174
175		# reverse sort the failure times to compute risk sets on the fly
176		i.train <- order(-y[1:nTrain])
177		n.test <- cRows - nTrain
178		if(n.test > 0)
179		{
180		i.test <- order(-y[(nTrain+1):cRows]) + nTrain
181		}
182		else
183		{
184		i.test <- NULL
185		}
186		i.timeorder <- c(i.train,i.test)
187
188		y <- y[i.timeorder]
189		Misc <- Misc[i.timeorder]
190		x <- x[i.timeorder,,drop=FALSE]
191		w <- w[i.timeorder]
192		if(!is.na(offset)) offset <- offset[i.timeorder]
193		}
194		if(distribution$name == "tdist")
195		{
196		if (is.null(distribution$df) \|\| !is.numeric(distribution$df)){
197		Misc <- 4
	428	metric <- distribution.metric
	429	} else {
	430	warning("No metric specified, using 'ndcg'")
	431	metric <- "ndcg" # default
	432	distribution[["metric"]] <- metric
	433	}
	434
	435	if (any(y<0)) {
	436	stop("targets for 'pairwise' should be non-negative")
	437	}
	438
	439	if (is.element(metric, c("mrr", "map")) && (!all(is.element(y, 0:1)))) {
	440	stop("Metrics 'map' and 'mrr' require the response to be in {0,1}")
	441	}
	442
	443	# Cut-off rank for metrics
	444	# Default of 0 means no cutoff
	445
	446	max.rank <- 0
	447	if (!is.null(distribution[["max.rank"]]) && distribution[["max.rank"]] > 0) {
	448	if (is.element(metric, c("ndcg", "mrr"))) {
	449	max.rank <- distribution[["max.rank"]]
198	450	}
199	451	else {
200		Misc <- distribution$df[1]
	452	stop("Parameter 'max.rank' cannot be specified for metric '", distribution.metric, "', only supported for 'ndcg' and 'mrr'")
201	453	}
202		}
203		if (distribution$name == "multinomial")
204		{
205		## Ensure that the training set contains all classes
206		classes <- attr(factor(y), "levels")
207		nClass <- length(classes)
	454	}
	455
	456	# We pass the cut-off rank to the C function as the last element in the Misc vector
	457	Misc <- c(group, max.rank)
	458
	459	distribution.call.name <- sprintf("pairwise_%s", metric)
	460	} # close if (dist... == "pairwise"
	461
	462	# create index upfront... subtract one for 0 based order
	463	x.order <- apply(x[1:nTrain,,drop=FALSE],2,order,na.last=FALSE)-1
	464
	465	x <- as.vector(data.matrix(x))
	466	predF <- rep(0,length(y))
	467	train.error <- rep(0,n.trees)
	468	valid.error <- rep(0,n.trees)
	469	oobag.improve <- rep(0,n.trees)
	470
	471	if(is.null(var.monotone)) {
	472	var.monotone <- rep(0,cCols)
	473	} else if(length(var.monotone)!=cCols) {
	474	stop("Length of var.monotone != number of predictors")
	475	} else if(!all(is.element(var.monotone,-1:1))) {
	476	stop("var.monotone must be -1, 0, or 1")
	477	}
	478	fError <- FALSE
	479
	480	gbm.obj <- .Call("gbm_fit",
	481	Y=as.double(y),
	482	Offset=as.double(offset),
	483	X=as.double(x),
	484	X.order=as.integer(x.order),
	485	weights=as.double(w),
	486	Misc=as.double(Misc),
	487	cRows=as.integer(cRows),
	488	cCols=as.integer(cCols),
	489	var.type=as.integer(var.type),
	490	var.monotone=as.integer(var.monotone),
	491	distribution=as.character(distribution.call.name),
	492	n.trees=as.integer(n.trees),
	493	interaction.depth=as.integer(interaction.depth),
	494	n.minobsinnode=as.integer(n.minobsinnode),
	495	n.classes = as.integer(nClass),
	496	shrinkage=as.double(shrinkage),
	497	bag.fraction=as.double(bag.fraction),
	498	nTrain=as.integer(nTrain),
	499	fit.old=as.double(NA),
	500	n.cat.splits.old=as.integer(0),
	501	n.trees.old=as.integer(0),
	502	verbose=as.integer(verbose),
	503	PACKAGE = "gbm")
	504
	505	names(gbm.obj) <- c("initF","fit","train.error","valid.error",
	506	"oobag.improve","trees","c.splits")
	507
	508	gbm.obj$bag.fraction <- bag.fraction
	509	gbm.obj$distribution <- distribution
	510	gbm.obj$interaction.depth <- interaction.depth
	511	gbm.obj$n.minobsinnode <- n.minobsinnode
	512	gbm.obj$num.classes <- nClass
	513	gbm.obj$n.trees <- length(gbm.obj$trees) / nClass
	514	gbm.obj$nTrain <- nTrain
	515	gbm.obj$train.fraction <- train.fraction
	516	gbm.obj$response.name <- response.name
	517	gbm.obj$shrinkage <- shrinkage
	518	gbm.obj$var.levels <- var.levels
	519	gbm.obj$var.monotone <- var.monotone
	520	gbm.obj$var.names <- var.names
	521	gbm.obj$var.type <- var.type
	522	gbm.obj$verbose <- verbose
	523	gbm.obj$Terms <- NULL
	524
	525	if(distribution$name == "coxph") {
	526	gbm.obj$fit[i.timeorder] <- gbm.obj$fit
	527	}
	528	## If K-Classification is used then split the fit and tree components
	529	if (distribution$name == "multinomial") {
	530	gbm.obj$fit <- matrix(gbm.obj$fit, ncol = nClass)
	531	dimnames(gbm.obj$fit)[[2]] <- classes
	532	gbm.obj$classes <- classes
	533
	534	## Also get the class estimators
	535	exp.f <- exp(gbm.obj$fit)
	536	denom <- matrix(rep(rowSums(exp.f), nClass), ncol = nClass)
	537	gbm.obj$estimator <- exp.f/denom
	538	}
	539
	540	if(keep.data) {
	541	if(distribution$name == "coxph") {
	542	# Put the observations back in order
	543	gbm.obj$data <- list(
	544	y = y,
	545	x = x,
	546	x.order = x.order,
	547	offset = offset,
	548	Misc = Misc,
	549	w = w,
	550	i.timeorder = i.timeorder
	551	)
	552	}
	553	else if ( distribution$name == "multinomial" ) {
	554	# Restore original order of the data
	555	new.idx <- order(new.idx)
	556	gbm.obj$data <- list(
	557	y = as.vector(matrix(y, ncol = length(classes), byrow = FALSE)[new.idx, ]),
	558	x = as.vector(matrix(x, ncol = length(var.names), byrow = FALSE)[new.idx, ]),
	559	x.order = x.order,
	560	offset = offset[new.idx],
	561	Misc = Misc,
	562	w = w[new.idx]
	563	)
	564	} else {
	565	gbm.obj$data <- list(
	566	y = y,
	567	x = x,
	568	x.order = x.order,
	569	offset = offset,
	570	Misc = Misc,
	571	w = w
	572	)
	573	}
	574	}
	575	else {
	576	gbm.obj$data <- NULL
	577	}
	578
	579	# Reuturn object of class "gbm"
	580	class(gbm.obj) <- "gbm"
	581	gbm.obj
	582
	583	}
208	584
209		if (nClass > nTrain){
210		stop(paste("Number of classes (", nClass,
211		") must be less than the size of the training set (", nTrain, ")",
212		sep = ""))
213		}
214
215		# f <- function(a,x){
216		# min((1:length(x))[x==a])
217		# }
218
219		new.idx <- as.vector(sapply(classes, function(a,x){ min((1:length(x))[x==a]) }, y))
220
221		all.idx <- 1:length(y)
222		new.idx <- c(new.idx, all.idx[!(all.idx %in% new.idx)])
223
224		y <- y[new.idx]
225		x <- x[new.idx, ]
226		w <- w[new.idx]
227		if (!is.null(offset)){
228		offset <- offset[new.idx]
229		}
230
231		## Get the factors
232		y <- as.numeric(as.vector(outer(y, classes, "==")))
233
234		## Fill out the weight and offset
235		w <- rep(w, nClass)
236		if (!is.null(offset)){
237		offset <- rep(offset, nClass)
238		}
239		} # close if (dist... == "multinomial"
240
241		if(distribution$name == "pairwise")
242		{
243		distribution.metric <- distribution[["metric"]]
244		if (!is.null(distribution.metric))
245		{
246		distribution.metric <- tolower(distribution.metric)
247		supported.metrics <- c("conc", "ndcg", "map", "mrr")
248		if (!is.element(distribution.metric, supported.metrics))
249		{
250		stop("Metric '", distribution.metric, "' is not supported, use either 'conc', 'ndcg', 'map', or 'mrr'")
251		}
252		metric <- distribution.metric
253		}
254		else
255		{
256		warning("No metric specified, using 'ndcg'")
257		metric <- "ndcg" # default
258		distribution[["metric"]] <- metric
259		}
260
261		if (any(y<0))
262		{
263		stop("targets for 'pairwise' should be non-negative")
264		}
265
266		if (is.element(metric, c("mrr", "map")) && (!all(is.element(y, 0:1))))
267		{
268		stop("Metrics 'map' and 'mrr' require the response to be in {0,1}")
269		}
270
271		# Cut-off rank for metrics
272		# Default of 0 means no cutoff
273
274		max.rank <- 0
275		if (!is.null(distribution[["max.rank"]]) && distribution[["max.rank"]] > 0)
276		{
277		if (is.element(metric, c("ndcg", "mrr")))
278		{
279		max.rank <- distribution[["max.rank"]]
280		}
281		else
282		{
283		stop("Parameter 'max.rank' cannot be specified for metric '", distribution.metric, "', only supported for 'ndcg' and 'mrr'")
284		}
285		}
286
287		# We pass the cut-off rank to the C function as the last element in the Misc vector
288		Misc <- c(group, max.rank)
289
290		distribution.call.name <- sprintf("pairwise_%s", metric)
291		} # close if (dist... == "pairwise"
292
293		# create index upfront... subtract one for 0 based order
294		x.order <- apply(x[1:nTrain,,drop=FALSE],2,order,na.last=FALSE)-1
295
296		x <- as.vector(data.matrix(x))
297		predF <- rep(0,length(y))
298		train.error <- rep(0,n.trees)
299		valid.error <- rep(0,n.trees)
300		oobag.improve <- rep(0,n.trees)
301
302		if(is.null(var.monotone)) var.monotone <- rep(0,cCols)
303		else if(length(var.monotone)!=cCols)
304		{
305		stop("Length of var.monotone != number of predictors")
306		}
307		else if(!all(is.element(var.monotone,-1:1)))
308		{
309		stop("var.monotone must be -1, 0, or 1")
310		}
311		fError <- FALSE
312
313		gbm.obj <- .Call("gbm",
314		Y=as.double(y),
315		Offset=as.double(offset),
316		X=as.double(x),
317		X.order=as.integer(x.order),
318		weights=as.double(w),
319		Misc=as.double(Misc),
320		cRows=as.integer(cRows),
321		cCols=as.integer(cCols),
322		var.type=as.integer(var.type),
323		var.monotone=as.integer(var.monotone),
324		distribution=as.character(distribution.call.name),
325		n.trees=as.integer(n.trees),
326		interaction.depth=as.integer(interaction.depth),
327		n.minobsinnode=as.integer(n.minobsinnode),
328		n.classes = as.integer(nClass),
329		shrinkage=as.double(shrinkage),
330		bag.fraction=as.double(bag.fraction),
331		nTrain=as.integer(nTrain),
332		fit.old=as.double(NA),
333		n.cat.splits.old=as.integer(0),
334		n.trees.old=as.integer(0),
335		verbose=as.integer(verbose),
336		PACKAGE = "gbm")
337
338		names(gbm.obj) <- c("initF","fit","train.error","valid.error",
339		"oobag.improve","trees","c.splits")
340
341		gbm.obj$bag.fraction <- bag.fraction
342		gbm.obj$distribution <- distribution
343		gbm.obj$interaction.depth <- interaction.depth
344		gbm.obj$n.minobsinnode <- n.minobsinnode
345		gbm.obj$num.classes <- nClass
346		gbm.obj$n.trees <- length(gbm.obj$trees) / nClass
347		gbm.obj$nTrain <- nTrain
348		gbm.obj$train.fraction <- train.fraction
349		gbm.obj$response.name <- response.name
350		gbm.obj$shrinkage <- shrinkage
351		gbm.obj$var.levels <- var.levels
352		gbm.obj$var.monotone <- var.monotone
353		gbm.obj$var.names <- var.names
354		gbm.obj$var.type <- var.type
355		gbm.obj$verbose <- verbose
356		gbm.obj$Terms <- NULL
357
358		if(distribution$name == "coxph")
359		{
360		gbm.obj$fit[i.timeorder] <- gbm.obj$fit
361		}
362		## If K-Classification is used then split the fit and tree components
363		if (distribution$name == "multinomial"){
364		gbm.obj$fit <- matrix(gbm.obj$fit, ncol = nClass)
365		dimnames(gbm.obj$fit)[[2]] <- classes
366		gbm.obj$classes <- classes
367
368		## Also get the class estimators
369		exp.f <- exp(gbm.obj$fit)
370		denom <- matrix(rep(rowSums(exp.f), nClass), ncol = nClass)
371		gbm.obj$estimator <- exp.f/denom
372		}
373
374		if(keep.data)
375		{
376		if(distribution$name == "coxph")
377		{
378		# put the observations back in order
379		gbm.obj$data <- list(y=y,x=x,x.order=x.order,offset=offset,Misc=Misc,w=w,
380		i.timeorder=i.timeorder)
381		}
382		else if ( distribution$name == "multinomial" ){
383		# Restore original order of the data
384		new.idx <- order( new.idx )
385		gbm.obj$data <- list( y=as.vector(matrix(y, ncol=length(classes),byrow=FALSE)[new.idx,]),
386		x=as.vector(matrix(x, ncol=length(var.names), byrow=FALSE)[new.idx,]),
387		x.order=x.order,
388		offset=offset[new.idx],
389		Misc=Misc, w=w[new.idx] )
390		}
391		else
392		{
393		gbm.obj$data <- list(y=y,x=x,x.order=x.order,offset=offset,Misc=Misc,w=w)
394		}
395		}
396		else
397		{
398		gbm.obj$data <- NULL
399		}
400
401		class(gbm.obj) <- "gbm"
402		return(gbm.obj)
403		}

+0

-35

~~R/gbm.loss.R~~ less more

0		gbm.loss <- function(y, f, w, offset, dist, baseline, group=NULL, max.rank=NULL)
1		{
2		if (!is.na(offset))
3		{
4		f <- offset+f
5		}
6
7		if (dist$name != "pairwise")
8		{
9		switch(dist$name,
10		gaussian = weighted.mean((y - f)^2,w) - baseline,
11		bernoulli = -2weighted.mean(yf - log(1+exp(f)),w) - baseline,
12		laplace = weighted.mean(abs(y-f),w) - baseline,
13		adaboost = weighted.mean(exp(-(2y-1)f),w) - baseline,
14		poisson = -2weighted.mean(yf-exp(f),w) - baseline,
15		stop(paste("Distribution",dist$name,"is not yet supported for method=permutation.test.gbm")))
16		}
17		else # dist$name == "pairwise"
18		{
19		if (is.null(dist$metric))
20		{
21		stop("No metric specified for distribution 'pairwise'")
22		}
23		if (!is.element(dist$metric, c("conc", "ndcg", "map", "mrr")))
24		{
25		stop("Invalid metric '", dist$metric, "' specified for distribution 'pairwise'")
26		}
27		if (is.null(group))
28		{
29		stop("For distribution 'pairwise', parameter 'group' has to be supplied")
30		}
31		# Loss = 1 - utility
32		(1 - perf.pairwise(y, f, group, dist$metric, w, max.rank)) - baseline
33		}
34		}

+129

-3

R/gbm.more.R less more

	0	#' Generalized Boosted Regression Modeling (GBM)
	1	#'
	2	#' Adds additional trees to a \code{\link{gbm.object}} object.
	3	#'
	4	#' @param object A \code{\link{gbm.object}} object created from an initial call
	5	#' to \code{\link{gbm}}.
	6	#'
	7	#' @param n.new.trees Integer specifying the number of additional trees to add
	8	#' to \code{object}. Default is 100.
	9	#'
	10	#' @param data An optional data frame containing the variables in the model. By
	11	#' default the variables are taken from \code{environment(formula)}, typically
	12	#' the environment from which \code{gbm} is called. If \code{keep.data=TRUE} in
	13	#' the initial call to \code{gbm} then \code{gbm} stores a copy with the
	14	#' object. If \code{keep.data=FALSE} then subsequent calls to
	15	#' \code{\link{gbm.more}} must resupply the same dataset. It becomes the user's
	16	#' responsibility to resupply the same data at this point.
	17	#'
	18	#' @param weights An optional vector of weights to be used in the fitting
	19	#' process. Must be positive but do not need to be normalized. If
	20	#' \code{keep.data=FALSE} in the initial call to \code{gbm} then it is the
	21	#' user's responsibility to resupply the weights to \code{\link{gbm.more}}.
	22	#'
	23	#' @param offset A vector of offset values.
	24	#'
	25	#' @param verbose Logical indicating whether or not to print out progress and
	26	#' performance indicators (\code{TRUE}). If this option is left unspecified for
	27	#' \code{gbm.more}, then it uses \code{verbose} from \code{object}. Default is
	28	#' \code{FALSE}.
	29	#'
	30	#' @return A \code{\link{gbm.object}} object.
	31	#'
	32	#' @export
	33	#'
	34	#' @examples
	35	#' #
	36	#' # A least squares regression example
	37	#' #
	38	#'
	39	#' # Simulate data
	40	#' set.seed(101) # for reproducibility
	41	#' N <- 1000
	42	#' X1 <- runif(N)
	43	#' X2 <- 2 * runif(N)
	44	#' X3 <- ordered(sample(letters[1:4], N, replace = TRUE), levels = letters[4:1])
	45	#' X4 <- factor(sample(letters[1:6], N, replace = TRUE))
	46	#' X5 <- factor(sample(letters[1:3], N, replace = TRUE))
	47	#' X6 <- 3 * runif(N)
	48	#' mu <- c(-1, 0, 1, 2)[as.numeric(X3)]
	49	#' SNR <- 10 # signal-to-noise ratio
	50	#' Y <- X1 ^ 1.5 + 2 * (X2 ^ 0.5) + mu
	51	#' sigma <- sqrt(var(Y) / SNR)
	52	#' Y <- Y + rnorm(N, 0, sigma)
	53	#' X1[sample(1:N,size=500)] <- NA # introduce some missing values
	54	#' X4[sample(1:N,size=300)] <- NA # introduce some missing values
	55	#' data <- data.frame(Y, X1, X2, X3, X4, X5, X6)
	56	#'
	57	#' # Fit a GBM
	58	#' set.seed(102) # for reproducibility
	59	#' gbm1 <- gbm(Y ~ ., data = data, var.monotone = c(0, 0, 0, 0, 0, 0),
	60	#' distribution = "gaussian", n.trees = 100, shrinkage = 0.1,
	61	#' interaction.depth = 3, bag.fraction = 0.5, train.fraction = 0.5,
	62	#' n.minobsinnode = 10, cv.folds = 5, keep.data = TRUE,
	63	#' verbose = FALSE, n.cores = 1)
	64	#'
	65	#' # Check performance using the out-of-bag (OOB) error; the OOB error typically
	66	#' # underestimates the optimal number of iterations
	67	#' best.iter <- gbm.perf(gbm1, method = "OOB")
	68	#' print(best.iter)
	69	#'
	70	#' # Check performance using the 50% heldout test set
	71	#' best.iter <- gbm.perf(gbm1, method = "test")
	72	#' print(best.iter)
	73	#'
	74	#' # Check performance using 5-fold cross-validation
	75	#' best.iter <- gbm.perf(gbm1, method = "cv")
	76	#' print(best.iter)
	77	#'
	78	#' # Plot relative influence of each variable
	79	#' par(mfrow = c(1, 2))
	80	#' summary(gbm1, n.trees = 1) # using first tree
	81	#' summary(gbm1, n.trees = best.iter) # using estimated best number of trees
	82	#'
	83	#' # Compactly print the first and last trees for curiosity
	84	#' print(pretty.gbm.tree(gbm1, i.tree = 1))
	85	#' print(pretty.gbm.tree(gbm1, i.tree = gbm1$n.trees))
	86	#'
	87	#' # Simulate new data
	88	#' set.seed(103) # for reproducibility
	89	#' N <- 1000
	90	#' X1 <- runif(N)
	91	#' X2 <- 2 * runif(N)
	92	#' X3 <- ordered(sample(letters[1:4], N, replace = TRUE))
	93	#' X4 <- factor(sample(letters[1:6], N, replace = TRUE))
	94	#' X5 <- factor(sample(letters[1:3], N, replace = TRUE))
	95	#' X6 <- 3 * runif(N)
	96	#' mu <- c(-1, 0, 1, 2)[as.numeric(X3)]
	97	#' Y <- X1 ^ 1.5 + 2 * (X2 ^ 0.5) + mu + rnorm(N, 0, sigma)
	98	#' data2 <- data.frame(Y, X1, X2, X3, X4, X5, X6)
	99	#'
	100	#' # Predict on the new data using the "best" number of trees; by default,
	101	#' # predictions will be on the link scale
	102	#' Yhat <- predict(gbm1, newdata = data2, n.trees = best.iter, type = "link")
	103	#'
	104	#' # least squares error
	105	#' print(sum((data2$Y - Yhat)^2))
	106	#'
	107	#' # Construct univariate partial dependence plots
	108	#' p1 <- plot(gbm1, i.var = 1, n.trees = best.iter)
	109	#' p2 <- plot(gbm1, i.var = 2, n.trees = best.iter)
	110	#' p3 <- plot(gbm1, i.var = "X3", n.trees = best.iter) # can use index or name
	111	#' grid.arrange(p1, p2, p3, ncol = 3)
	112	#'
	113	#' # Construct bivariate partial dependence plots
	114	#' plot(gbm1, i.var = 1:2, n.trees = best.iter)
	115	#' plot(gbm1, i.var = c("X2", "X3"), n.trees = best.iter)
	116	#' plot(gbm1, i.var = 3:4, n.trees = best.iter)
	117	#'
	118	#' # Construct trivariate partial dependence plots
	119	#' plot(gbm1, i.var = c(1, 2, 6), n.trees = best.iter,
	120	#' continuous.resolution = 20)
	121	#' plot(gbm1, i.var = 1:3, n.trees = best.iter)
	122	#' plot(gbm1, i.var = 2:4, n.trees = best.iter)
	123	#' plot(gbm1, i.var = 3:5, n.trees = best.iter)
	124	#'
	125	#' # Add more (i.e., 100) boosting iterations to the ensemble
	126	#' gbm2 <- gbm.more(gbm1, n.new.trees = 100, verbose = FALSE)
0	127	gbm.more <- function(object,
1	128	n.new.trees = 100,
2	129	data = NULL,
3	130	weights = NULL,
4	131	offset = NULL,
5		verbose = NULL)
6		{
	132	verbose = NULL) {
7	133	theCall <- match.call()
8	134	nTrain <- object$nTrain
9	135

180	306	}
181	307	x <- as.vector(x)
182	308
183		gbm.obj <- .Call("gbm",
	309	gbm.obj <- .Call("gbm_fit",
184	310	Y = as.double(y),
185	311	Offset = as.double(offset),
186	312	X = as.double(x),

+39

-0

R/gbm.object.R less more

	0	#' Generalized Boosted Regression Model Object
	1	#'
	2	#' These are objects representing fitted \code{gbm}s.
	3	#'
	4	#' @return \item{initF}{the "intercept" term, the initial predicted value to
	5	#' which trees make adjustments} \item{fit}{a vector containing the fitted
	6	#' values on the scale of regression function (e.g. log-odds scale for
	7	#' bernoulli, log scale for poisson)} \item{train.error}{a vector of length
	8	#' equal to the number of fitted trees containing the value of the loss
	9	#' function for each boosting iteration evaluated on the training data}
	10	#' \item{valid.error}{a vector of length equal to the number of fitted trees
	11	#' containing the value of the loss function for each boosting iteration
	12	#' evaluated on the validation data} \item{cv.error}{if \code{cv.folds}<2 this
	13	#' component is NULL. Otherwise, this component is a vector of length equal to
	14	#' the number of fitted trees containing a cross-validated estimate of the loss
	15	#' function for each boosting iteration} \item{oobag.improve}{a vector of
	16	#' length equal to the number of fitted trees containing an out-of-bag estimate
	17	#' of the marginal reduction in the expected value of the loss function. The
	18	#' out-of-bag estimate uses only the training data and is useful for estimating
	19	#' the optimal number of boosting iterations. See \code{\link{gbm.perf}}}
	20	#' \item{trees}{a list containing the tree structures. The components are best
	21	#' viewed using \code{\link{pretty.gbm.tree}}} \item{c.splits}{a list of all
	22	#' the categorical splits in the collection of trees. If the \code{trees[[i]]}
	23	#' component of a \code{gbm} object describes a categorical split then the
	24	#' splitting value will refer to a component of \code{c.splits}. That component
	25	#' of \code{c.splits} will be a vector of length equal to the number of levels
	26	#' in the categorical split variable. -1 indicates left, +1 indicates right,
	27	#' and 0 indicates that the level was not present in the training data}
	28	#' \item{cv.fitted}{If cross-validation was performed, the cross-validation
	29	#' predicted values on the scale of the linear predictor. That is, the fitted
	30	#' values from the ith CV-fold, for the model having been trained on the data
	31	#' in all other folds.}
	32	#' @section Structure: The following components must be included in a
	33	#' legitimate \code{gbm} object.
	34	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	35	#' @seealso \code{\link{gbm}}
	36	#' @keywords methods
	37	#' @name gbm.object
	38	NULL

+93

-185

R/gbm.perf.R less more

0		gbm.perf <- function(object,
1		plot.it=TRUE,
2		oobag.curve=FALSE,
3		overlay=TRUE,
4		method)
5		{
6		smoother <- NULL
7
8		if ( missing( method ) ){
9		if ( object$train.fraction < 1 ){
10		method <- "test"
11		}
12		else if ( !is.null( object$cv.error ) ){
13		method <- "cv"
14		}
15		else { method <- "OOB" }
16		cat( paste( "Using", method, "method...\n" ) )
17		}
18
19		if((method == "OOB") \|\| oobag.curve)
20		{
21		if(object$bag.fraction==1)
22		stop("Cannot compute OOB estimate or the OOB curve when bag.fraction=1")
23		if(all(!is.finite(object$oobag.improve)))
24		stop("Cannot compute OOB estimate or the OOB curve. No finite OOB estimates of improvement")
25		x <- 1:object$n.trees
26		smoother <- loess(object$oobag.improve~x,
27		enp.target=min(max(4,length(x)/10),50))
28		smoother$y <- smoother$fitted
29		smoother$x <- x
30
31		best.iter.oob <- x[which.min(-cumsum(smoother$y))]
32		best.iter <- best.iter.oob
33		}
34
35		if(method == "OOB")
36		{
37		warning("OOB generally underestimates the optimal number of iterations although predictive performance is reasonably competitive. Using cv.folds>0 when calling gbm usually results in improved predictive performance.")
38		}
39
40		if(method == "test")
41		{
42		best.iter.test <- which.min(object$valid.error)
43		best.iter <- best.iter.test
44		}
45
46		if(method == "cv")
47		{
48		if(is.null(object$cv.error))
49		stop("In order to use method=\"cv\" gbm must be called with cv.folds>1.")
50		if(length(object$cv.error) < object$n.trees)
51		warning("cross-validation error is not computed for any additional iterations run using gbm.more().")
52		best.iter.cv <- which.min(object$cv.error)
53		best.iter <- best.iter.cv
54		}
55
56		if(!is.element(method,c("OOB","test","cv")))
57		stop("method must be cv, test, or OOB")
58
59		if(plot.it)
60		{
61		par(mar=c(5,4,4,4)+.1)
62		if (object$distribution$name !="pairwise")
63		{
64		ylab <- switch(substring(object$distribution$name,1,2),
65		ga="Squared error loss",
66		be="Bernoulli deviance",
67		po="Poisson deviance",
68		ad="AdaBoost exponential bound",
69		co="Cox partial deviance",
70		la="Absolute loss",
71		qu="Quantile loss",
72		mu="Multinomial deviance",
73		td="t-distribution deviance"
74		)
75		}
76		else # object$distribution$name =="pairwise"
77		{
78		ylab <- switch(object$distribution$metric,
79		conc ="Fraction of concordant pairs",
80		ndcg="Normalized discounted cumulative gain",
81		map ="Mean average precision",
82		mrr ="Mean reciprocal rank"
83		)
84		}
85
86		if(object$train.fraction==1)
87		{ # HS Next line changed to scale axis to include other error
88		# ylim <- range(object$train.error)
89		if ( method=="cv" ){ ylim <- range(object$train.error, object$cv.error) }
90		else if ( method == "test" ){ ylim <- range( object$train.error, object$valid.error) }
91		else { ylim <- range(object$train.error) }
92		}
93		else
94		{
95		ylim <- range(object$train.error,object$valid.error)
96		}
97
98		plot(object$train.error,
99		ylim=ylim,
	0	#' GBM performance
	1	#'
	2	#' Estimates the optimal number of boosting iterations for a \code{gbm} object
	3	#' and optionally plots various performance measures
	4	#'
	5	#' @param object A \code{\link{gbm.object}} created from an initial call to
	6	#' \code{\link{gbm}}.
	7	#'
	8	#' @param plot.it An indicator of whether or not to plot the performance
	9	#' measures. Setting \code{plot.it = TRUE} creates two plots. The first plot
	10	#' plots \code{object$train.error} (in black) and \code{object$valid.error}
	11	#' (in red) versus the iteration number. The scale of the error measurement,
	12	#' shown on the left vertical axis, depends on the \code{distribution}
	13	#' argument used in the initial call to \code{\link{gbm}}.
	14	#'
	15	#' @param oobag.curve Indicates whether to plot the out-of-bag performance
	16	#' measures in a second plot.
	17	#'
	18	#' @param overlay If TRUE and oobag.curve=TRUE then a right y-axis is added to
	19	#' the training and test error plot and the estimated cumulative improvement
	20	#' in the loss function is plotted versus the iteration number.
	21	#'
	22	#' @param method Indicate the method used to estimate the optimal number of
	23	#' boosting iterations. \code{method = "OOB"} computes the out-of-bag estimate
	24	#' and \code{method = "test"} uses the test (or validation) dataset to compute
	25	#' an out-of-sample estimate. \code{method = "cv"} extracts the optimal number
	26	#' of iterations using cross-validation if \code{gbm} was called with
	27	#' \code{cv.folds} > 1.
	28	#'
	29	#' @return \code{gbm.perf} Returns the estimated optimal number of iterations.
	30	#' The method of computation depends on the \code{method} argument.
	31	#'
	32	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	33	#'
	34	#' @seealso \code{\link{gbm}}, \code{\link{gbm.object}}
	35	#'
	36	#' @keywords nonlinear survival nonparametric tree
	37	#'
	38	#' @export
	39	gbm.perf <- function(object, plot.it = TRUE, oobag.curve = FALSE,
	40	overlay = TRUE, method) {
	41
	42	# Determine method, if missing
	43	if (missing(method)) {
	44	method <- guess_error_method(object)
	45	}
	46
	47	# Determine "optimal" number of iterations
	48	best.iter <- best_iter(object, method = method)
	49
	50	# Determine an appropriate y-axis label
	51	ylab <- get_ylab(object)
	52
	53	# Determine an appropriate range for the y-axis
	54	ylim <- get_ylim(object, method = method)
	55
	56	# Plot results
	57	plot(object$train.error, ylim = ylim, type = "l", xlab = "Iteration",
	58	ylab = ylab)
	59
	60	if(object$train.fraction!=1) {
	61	lines(object$valid.error,col="red")
	62	}
	63	if(method=="cv") {
	64	lines(object$cv.error,col="green")
	65	}
	66	if(!is.na(best.iter)) {
	67	abline(v=best.iter,col="blue",lwd=2,lty=2)
	68	}
	69	if(oobag.curve) {
	70	if(overlay) {
	71	smoother <- attr(best.iter, "smoother")
	72	par(new = TRUE)
	73	plot(smoother$x,
	74	cumsum(smoother$y),
	75	col="blue",
100	76	type="l",
101		xlab="Iteration",ylab=ylab)
102
103		if(object$train.fraction!=1)
104		{
105		lines(object$valid.error,col="red")
106		}
107		if(method=="cv")
108		{
109		lines(object$cv.error,col="green")
110		}
111		if(!is.na(best.iter)) abline(v=best.iter,col="blue",lwd=2,lty=2)
112		if(oobag.curve)
113		{
114		if(overlay)
115		{
116		par(new=TRUE)
117		plot(smoother$x,
118		cumsum(smoother$y),
119		col="blue",
120		type="l",
121		xlab="",ylab="",
122		axes=FALSE)
123		axis(4,srt=0)
124		at <- mean(range(smoother$y))
125		mtext(paste("OOB improvement in",ylab),side=4,srt=270,line=2)
126		abline(h=0,col="blue",lwd=2)
127		}
128
129		plot(object$oobag.improve,type="l",
130		xlab="Iteration",
131		ylab=paste("OOB change in",ylab))
132		lines(smoother,col="red",lwd=2)
133		abline(h=0,col="blue",lwd=1)
134
135		abline(v=best.iter,col="blue",lwd=1)
136		}
137		}
138
139		return(best.iter)
	77	xlab="",ylab="",
	78	axes=FALSE)
	79	axis(4,srt=0)
	80	at <- mean(range(smoother$y))
	81	mtext(paste("OOB improvement in",ylab),side=4,srt=270,line=2)
	82	abline(h=0,col="blue",lwd=2)
	83	}
	84
	85	plot(object$oobag.improve,type="l",
	86	xlab="Iteration",
	87	ylab=paste("OOB change in",ylab))
	88	lines(smoother,col="red",lwd=2)
	89	abline(h=0,col="blue",lwd=1)
	90
	91	abline(v=best.iter,col="blue",lwd=1)
	92	}
	93	return(best.iter)
140	94	}
141
142
143		perf.pairwise <- function(y, f, group, metric="ndcg", w=NULL, max.rank=0)
144		{
145		func.name <- switch(metric,
146		conc = "ir.measure.conc",
147		mrr = "ir.measure.mrr",
148		map = "ir.measure.map",
149		ndcg = "ir.measure.ndcg",
150		stop(paste("Metric",metric,"is not supported"))
151		)
152
153		# Optimization: for binary targets,
154		# AUC is equivalent but faster than CONC
155		if (metric == "conc" && all(is.element(y, 0:1)))
156		{
157		func.name <- "ir.measure.auc"
158		}
159
160		# Max rank = 0 means no cut off
161		if (max.rank <= 0)
162		{
163		max.rank <- length(y)+1
164		}
165
166		# Random tie breaking in case of duplicate scores.
167		# (Without tie breaking, we would overestimate if instances are
168		# sorted descending on target)
169		f <- f + 1E-10 * runif(length(f), min=-0.5, max=0.5)
170
171		measure.by.group <- as.matrix(by(list(y, f), INDICES=group, FUN=get(func.name), max.rank=max.rank))
172
173		# Exclude groups with single result or only negative or positive instances
174		idx <- which((!is.null(measure.by.group)) & measure.by.group >= 0)
175
176		if (is.null(w))
177		{
178		return (mean(measure.by.group[idx]))
179		}
180		else
181		{
182		# Assumption: weights are constant per group
183		w.by.group <- tapply(w, group, mean)
184		return (weighted.mean(measure.by.group[idx], w=w.by.group[idx]))
185		}
186		}

+0

-8

~~R/gbmCluster.R~~ less more

0		gbmCluster <- function(n){
1		# If number of cores (n) not given, try to work it out from the number
2		# that appear to be available and the number of CV folds.
3		if (is.null(n)){
4		n <- detectCores()
5		}
6		makeCluster(n)
7		}

+168

-58

R/gbmCrossVal.R less more

0		##' Perform gbm cross-validation
1		##'
2		##' This function has far too many arguments, but there isn't the
3		##' abstraction in gbm to lose them.
	0	#' Cross-validate a gbm
	1	#'
	2	#' Functions for cross-validating gbm. These functions are used internally and
	3	#' are not intended for end-user direct usage.
	4	#'
	5	#' These functions are not intended for end-user direct usage, but are used
	6	#' internally by \code{gbm}.
	7	#'
	8	#' @aliases gbmCrossVal gbmCrossValModelBuild gbmDoFold gbmCrossValErr
	9	#' gbmCrossValPredictions
	10	#' @param cv.folds The number of cross-validation folds.
	11	#' @param nTrain The number of training samples.
	12	#' @param n.cores The number of cores to use.
	13	#' @param class.stratify.cv Whether or not stratified cross-validation samples
	14	#' are used.
	15	#' @param data The data.
	16	#' @param x The model matrix.
	17	#' @param y The response variable.
	18	#' @param offset The offset.
	19	#' @param distribution The type of loss function. See \code{\link{gbm}}.
	20	#' @param w Observation weights.
	21	#' @param var.monotone See \code{\link{gbm}}.
	22	#' @param n.trees The number of trees to fit.
	23	#' @param interaction.depth The degree of allowed interactions. See
	24	#' \code{\link{gbm}}.
	25	#' @param n.minobsinnode See \code{\link{gbm}}.
	26	#' @param shrinkage See \code{\link{gbm}}.
	27	#' @param bag.fraction See \code{\link{gbm}}.
	28	#' @param var.names See \code{\link{gbm}}.
	29	#' @param response.name See \code{\link{gbm}}.
	30	#' @param group Used when \code{distribution = "pairwise"}. See
	31	#' \code{\link{gbm}}.
	32	#' @param i.train Items in the training set.
	33	#' @param cv.models A list containing the models for each fold.
	34	#' @param cv.group A vector indicating the cross-validation fold for each
	35	#' member of the training set.
	36	#' @param best.iter.cv The iteration with lowest cross-validation error.
	37	#' @param X Index (cross-validation fold) on which to subset.
	38	#' @param s Random seed.
	39	#' @return A list containing the cross-validation error and predictions.
	40	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	41	#' @seealso \code{\link{gbm}}
	42	#' @references J.H. Friedman (2001). "Greedy Function Approximation: A Gradient
	43	#' Boosting Machine," Annals of Statistics 29(5):1189-1232.
	44	#'
	45	#' L. Breiman (2001).
	46	#' \url{https://www.stat.berkeley.edu/users/breiman/randomforest2001.pdf}.
	47	#' @keywords models
	48
	49	# Perform gbm cross-validation
	50	#
	51	# This function has far too many arguments, but there isn't the
	52	# abstraction in gbm to lose them.
	53	#' @rdname gbmCrossVal
	54	#' @export
4	55	gbmCrossVal <- function(cv.folds, nTrain, n.cores,
5	56	class.stratify.cv, data,
6	57	x, y, offset, distribution, w, var.monotone,

18	69	n.minobsinnode, shrinkage,
19	70	bag.fraction, var.names,
20	71	response.name, group)
	72
21	73	## get the errors
22	74	cv.error <- gbmCrossValErr(cv.models, cv.folds, cv.group, nTrain, n.trees)
23	75	best.iter.cv <- which.min(cv.error)
	76
24	77	## get the predictions
25	78	predictions <- gbmCrossValPredictions(cv.models, cv.folds, cv.group,
26	79	best.iter.cv, distribution,
27		data[i.train,], y)
28		list(error=cv.error,
29		predictions=predictions)
30		}
31
32		##' Get the gbm cross-validation error
	80	data[i.train, ], y)
	81	list(error = cv.error, predictions = predictions)
	82	}
	83
	84
	85	# Get the gbm cross-validation error
	86	#' @rdname gbmCrossVal
	87	#' @export
33	88	gbmCrossValErr <- function(cv.models, cv.folds, cv.group, nTrain, n.trees) {
34	89	in.group <- tabulate(cv.group, nbins=cv.folds)
35	90	cv.error <- vapply(1:cv.folds,

38	93	model$valid.error * in.group[[index]]
39	94	}, double(n.trees))
40	95	## this is now a (n.trees, cv.folds) matrix
41
	96
42	97	## and now a n.trees vector
43	98	rowSums(cv.error) / nTrain
44	99	}
45	100
46		##' Get the predictions for GBM cross validation
47		##'
48		##' This function is not as nice as it could be (leakage of y)
	101
	102	#' @rdname gbmCrossVal
	103	#' @export
49	104	gbmCrossValPredictions <- function(cv.models, cv.folds, cv.group,
50	105	best.iter.cv, distribution, data, y) {
51		## test cv.group and data match
	106
	107	# Get the predictions for GBM cross validation. This function is not as nice
	108	# as it could be (i.e., leakage of y)
	109
	110	# Test that cv.group and data match
52	111	if (nrow(data) != length(cv.group)) {
53		stop("mismatch between data and cv.group")
54		}
55		## this is a little complicated due to multinomial distribution
	112	stop("Mismatch between `data` and `cv.group`.")
	113	}
	114
	115	# This is a little complicated due to multinomial distribution
56	116	num.cols <- if (distribution$name == "multinomial") {
57	117	nlevels(factor(y))
58	118	} else {
59	119	1
60	120	}
61		result <- matrix(nrow=nrow(data), ncol=num.cols)
62		## there's no real reason to do this as other than a for loop
63		data.names <- names(data)
	121
	122	# Initialize results matrix
	123	res <- matrix(nrow = nrow(data), ncol = num.cols)
	124
	125	# There's no real reason to do this as other than a for loop
	126	data.names <- names(data) # column names
64	127	for (ind in 1:cv.folds) {
65		## these are the particular elements
	128
	129	# These are the particular elements
66	130	flag <- cv.group == ind
67	131	model <- cv.models[[ind]]
68		## the %in% here is to handle coxph
	132
	133	# The %in% here is to handle coxph
69	134	my.data <- data[flag, !(data.names %in% model$response.name)]
70		predictions <- predict(model, newdata=my.data, n.trees=best.iter.cv)
71		predictions <- matrix(predictions, ncol=num.cols)
72		result[flag,] <- predictions
73		}
74
	135	predictions <- predict(model, newdata = my.data, n.trees = best.iter.cv) # FIXME
	136	predictions <- matrix(predictions, ncol = num.cols)
	137	res[flag, ] <- predictions
	138
	139	}
	140
	141	# Handle multinomial case
75	142	if (distribution$name != "multinomial") {
76		result <- as.numeric(result)
77		}
78
79		result
80		}
81
82
83		##' Perform gbm cross-validation
84		##'
85		##' This function has far too many arguments.
86		gbmCrossValModelBuild <- function(cv.folds, cv.group, n.cores, i.train,
87		x, y, offset, distribution,
88		w, var.monotone, n.trees,
89		interaction.depth, n.minobsinnode,
90		shrinkage, bag.fraction,
91		var.names, response.name,
92		group) {
93		## set up the cluster and add a finalizer
	143	res <- as.numeric(res)
	144	}
	145
	146	# Return the result
	147	res
	148
	149	}
	150
	151
	152	# Perform gbm cross-validation
	153	#
	154	# This function has far too many arguments.
	155	#' @rdname gbmCrossVal
	156	#' @export
	157	gbmCrossValModelBuild <- function(cv.folds, cv.group, n.cores, i.train, x, y,
	158	offset, distribution, w, var.monotone,
	159	n.trees, interaction.depth, n.minobsinnode,
	160	shrinkage, bag.fraction, var.names,
	161	response.name, group) {
	162
	163	# Set up cluster and add finalizer
94	164	cluster <- gbmCluster(n.cores)
95		on.exit(stopCluster(cluster))
96
97		## get ourselves some random seeds
	165	on.exit(parallel::stopCluster(cluster))
	166
	167	# Set random seeds
98	168	seeds <- as.integer(runif(cv.folds, -(2^31 - 1), 2^31))
99
100		## now do the cross-validation model builds
101		parLapply(cl=cluster, X=1:cv.folds,
102		gbmDoFold, i.train, x, y, offset, distribution,
103		w, var.monotone, n.trees,
104		interaction.depth, n.minobsinnode, shrinkage,
105		bag.fraction,
106		cv.group, var.names, response.name, group, seeds)
107		}
	169
	170	# Perform cross-validation model builds
	171	parallel::parLapply(cl = cluster, X = 1:cv.folds, fun = gbmDoFold, i.train, x,
	172	y, offset, distribution, w, var.monotone, n.trees,
	173	interaction.depth, n.minobsinnode, shrinkage,
	174	bag.fraction, cv.group, var.names, response.name, group,
	175	seeds)
	176
	177	}
	178
	179
	180	#' @rdname gbmCrossVal
	181	#' @export
	182	gbmDoFold <- function(X, i.train, x, y, offset, distribution, w, var.monotone,
	183	n.trees, interaction.depth, n.minobsinnode, shrinkage,
	184	bag.fraction, cv.group, var.names, response.name, group,
	185	s) {
	186
	187	# Do specified cross-validation fold - a self-contained function for passing
	188	# to individual cores.
	189
	190	# Load required packages for core
	191	library(gbm, quietly=TRUE)
	192
	193	# Print CV information
	194	cat("CV:", X, "\n")
	195
	196	# Setup
	197	set.seed(s[[X]])
	198	i <- order(cv.group == X)
	199	x <- x[i.train,,drop=TRUE][i,,drop=FALSE]
	200	y <- y[i.train][i]
	201	offset <- offset[i.train][i]
	202	nTrain <- length(which(cv.group != X))
	203	group <- group[i.train][i]
	204
	205	# Fit a GBM
	206	res <- gbm.fit(x = x, y = y, offset = offset, distribution = distribution,
	207	w = w, var.monotone = var.monotone, n.trees = n.trees,
	208	interaction.depth = interaction.depth,
	209	n.minobsinnode = n.minobsinnode,
	210	shrinkage = shrinkage, bag.fraction = bag.fraction,
	211	nTrain = nTrain, keep.data = FALSE, verbose = FALSE,
	212	response.name = response.name, group = group)
	213
	214	# Return the result
	215	res
	216
	217	}

+0

-31

~~R/gbmDoFold.R~~ less more

0		gbmDoFold <-
1		# Do specified cross-validation fold - a self-contained function for
2		# passing to individual cores.
3		function(X,
4		i.train, x, y, offset, distribution, w, var.monotone, n.trees,
5		interaction.depth, n.minobsinnode, shrinkage, bag.fraction,
6		cv.group, var.names, response.name, group, s){
7		library(gbm, quietly=TRUE)
8		cat("CV:", X, "\n")
9
10		set.seed(s[[X]])
11
12		i <- order(cv.group == X)
13		x <- x[i.train,,drop=TRUE][i,,drop=FALSE]
14		y <- y[i.train][i]
15		offset <- offset[i.train][i]
16		nTrain <- length(which(cv.group != X))
17		group <- group[i.train][i]
18
19		res <- gbm.fit(x, y,
20		offset=offset, distribution=distribution,
21		w=w, var.monotone=var.monotone, n.trees=n.trees,
22		interaction.depth=interaction.depth,
23		n.minobsinnode=n.minobsinnode,
24		shrinkage=shrinkage,
25		bag.fraction=bag.fraction,
26		nTrain=nTrain, keep.data=FALSE,
27		verbose=FALSE, response.name=response.name,
28		group=group)
29		res
30		}

+0

-25

~~R/getCVgroup.R~~ less more

0		getCVgroup <-
1		# Construct cross-validation groups depending on the type of model to be fit
2		function(distribution, class.stratify.cv, y, i.train, cv.folds, group){
3
4		if (distribution$name %in% c( "bernoulli", "multinomial" ) & class.stratify.cv ){
5		nc <- table(y[i.train]) # Number in each class
6		uc <- names(nc)
7		if (min(nc) < cv.folds){
8		stop( paste("The smallest class has only", min(nc), "objects in the training set. Can't do", cv.folds, "fold cross-validation."))
9		}
10		cv.group <- vector(length = length(i.train))
11		for (i in 1:length(uc)){
12		cv.group[y[i.train] == uc[i]] <- sample(rep(1:cv.folds , length = nc[i]))
13		}
14		} # Close if
15		else if (distribution$name == "pairwise") {
16		# Split into CV folds at group boundaries
17		s <- sample(rep(1:cv.folds, length=nlevels(group)))
18		cv.group <- s[as.integer(group[i.train])]
19		}
20		else {
21		cv.group <- sample(rep(1:cv.folds, length=length(i.train)))
22		}
23		cv.group
24		}

+0

-13

~~R/getStratify.R~~ less more

0		getStratify <- function(strat, d){
1		if (is.null(strat)){
2		if (d$name == "multinomial" ){ strat <- TRUE }
3		else { strat <- FALSE }
4		}
5		else {
6		if (!is.element(d$name, c( "bernoulli", "multinomial"))){
7		warning("You can only use class.stratify.cv when distribution is bernoulli or multinomial. Ignored.")
8		strat <- FALSE
9		}
10		} # Close else
11		strat
12		}

+0

-9

~~R/guessDist.R~~ less more

0		guessDist <- function(y){
1		# If distribution is not given, try to guess it
2		if (length(unique(y)) == 2){ d <- "bernoulli" }
3		else if (class(y) == "Surv" ){ d <- "coxph" }
4		else if (is.factor(y)){ d <- "multinomial" }
5		else{ d <- "gaussian" }
6		cat(paste("Distribution not specified, assuming", d, "...\n"))
7		list(name=d)
8		}

+41

-1

R/interact.gbm.R less more

0		# Compute Friedman's H statistic for interaction effects
	0	#' Estimate the strength of interaction effects
	1	#'
	2	#' Computes Friedman's H-statistic to assess the strength of variable
	3	#' interactions.
	4	#'
	5	#' @param x A \code{\link{gbm.object}} fitted using a call to \code{\link{gbm}}.
	6	#'
	7	#' @param data The dataset used to construct \code{x}. If the original dataset
	8	#' is large, a random subsample may be used to accelerate the computation in
	9	#' \code{interact.gbm}.
	10	#'
	11	#' @param i.var A vector of indices or the names of the variables for compute
	12	#' the interaction effect. If using indices, the variables are indexed in the
	13	#' same order that they appear in the initial \code{gbm} formula.
	14	#'
	15	#' @param n.trees The number of trees used to generate the plot. Only the first
	16	#' \code{n.trees} trees will be used.
	17	#'
	18	#' @return Returns the value of \eqn{H}.
	19	#'
	20	#' @details
	21	#' \code{interact.gbm} computes Friedman's H-statistic to assess the relative
	22	#' strength of interaction effects in non-linear models. H is on the scale of
	23	#' [0-1] with higher values indicating larger interaction effects. To connect
	24	#' to a more familiar measure, if \eqn{x_1} and \eqn{x_2} are uncorrelated
	25	#' covariates with mean 0 and variance 1 and the model is of the form
	26	#' \deqn{y=\beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3} then
	27	#' \deqn{H=\frac{\beta_3}{\sqrt{\beta_1^2+\beta_2^2+\beta_3^2}}}
	28	#'
	29	#' Note that if the main effects are weak, the estimated H will be unstable.
	30	#' For example, if (in the case of a two-way interaction) neither main effect
	31	#' is in the selected model (relative influence is zero), the result will be
	32	#' 0/0. Also, with weak main effects, rounding errors can result in values of H
	33	#' > 1 which are not possible.
	34	#'
	35	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	36	#' @seealso \code{\link{gbm}}, \code{\link{gbm.object}}
	37	#' @references J.H. Friedman and B.E. Popescu (2005). \dQuote{Predictive
	38	#' Learning via Rule Ensembles.} Section 8.1
	39	#' @keywords methods
	40	#' @export
1	41	interact.gbm <- function(x, data, i.var = 1, n.trees = x$n.trees){
2	42	###############################################################
3	43	# Do sanity checks on the call

+113

-2

R/ir.measures.R less more

9	9	# inaccurate for individual groups, but should have
10	10	# a small effect on the overall measure.
11	11
	12	#' Compute Information Retrieval measures.
	13	#'
	14	#' Functions to compute Information Retrieval measures for pairwise loss for a
	15	#' single group. The function returns the respective metric, or a negative
	16	#' value if it is undefined for the given group.
	17	#'
	18	#' @param obs Observed value.
	19	#' @param pred Predicted value.
	20	#' @param metric What type of performance measure to compute.
	21	#' @param y,y.f,f,w,group,max.rank Used internally.
	22	#' @param x ?.
	23	#' @return The requested performance measure.
	24	#'
	25	#' @details
	26	#' For simplicity, we have no special handling for ties; instead, we break ties
	27	#' randomly. This is slightly inaccurate for individual groups, but should have
	28	#' only a small effect on the overall measure.
	29	#'
	30	#' \code{gbm.conc} computes the concordance index: Fraction of all pairs (i,j)
	31	#' with i<j, x[i] != x[j], such that x[j] < x[i]
	32	#'
	33	#' If \code{obs} is binary, then \code{gbm.roc.area(obs, pred) =
	34	#' gbm.conc(obs[order(-pred)])}.
	35	#'
	36	#' \code{gbm.conc} is more general as it allows non-binary targets, but is
	37	#' significantly slower.
	38	#'
	39	#' @aliases gbm.roc.area gbm.conc ir.measure.conc ir.measure.auc ir.measure.mrr
	40	#' ir.measure.map ir.measure.ndcg perf.pairwise
	41	#'
	42	#' @rdname gbm.roc.area
	43	#'
	44	#' @author Stefan Schroedl
	45	#' @seealso \code{\link{gbm}}
	46	#' @references C. Burges (2010). "From RankNet to LambdaRank to LambdaMART: An
	47	#' Overview", Microsoft Research Technical Report MSR-TR-2010-82.
	48	#' @keywords models
	49	#'
	50	#' @examples
	51	#'
	52	#' ##---- Should be DIRECTLY executable !! ----
	53	#' ##-- ==> Define data, use random,
	54	#' ##-- or do help(data=index) for the standard data sets.
	55
12	56
13	57	# Area under ROC curve = ratio of correctly ranking pairs
14		gbm.roc.area <- function(obs, pred)
15		{
	58	#' @rdname gbm.roc.area
	59	#' @export
	60	gbm.roc.area <- function(obs, pred) {
16	61	n1 <- sum(obs)
17	62	n <- length(obs)
18	63	if (n==n1) { return(1) }

22	67	return ((mean(rank(pred)[obs > 0]) - (n1 + 1)/2)/(n - n1))
23	68	}
24	69
	70
25	71	# Concordance Index:
26	72	# Fraction of all pairs (i,j) with i<j, x[i] != x[j], such that x[j] < x[i]
27	73	# Invariant: if obs is binary, then
28	74	# gbm.roc.area(obs, pred) = gbm.conc(obs[order(-pred)])
29	75	# gbm.conc is more general as it allows non-binary targets,
30	76	# but is significantly slower
	77	#' @rdname gbm.roc.area
	78	#' @export
31	79	gbm.conc <- function(x)
32	80	{
33	81	lx <- length(x)
34	82	return (sum(mapply(function(r) { sum(x[(r+1):lx]<x[r]) }, 1:(lx-1))))
35	83	}
36	84
	85
	86	#' @rdname gbm.roc.area
	87	#' @export
37	88	ir.measure.conc <- function(y.f, max.rank=0)
38	89	{
39	90	# Note: max.rank is meaningless for CONC

55	106	}
56	107	}
57	108
	109
	110	#' @rdname gbm.roc.area
	111	#' @export
58	112	ir.measure.auc <- function(y.f, max.rank=0)
59	113	{
60	114	# Note: max.rank is meaningless for AUC

72	126	}
73	127	}
74	128
	129
	130	#' @rdname gbm.roc.area
	131	#' @export
75	132	ir.measure.mrr <- function(y.f, max.rank)
76	133	{
77	134	y <- y.f[[1]]

96	153	}
97	154	}
98	155
	156
	157	#' @rdname gbm.roc.area
	158	#' @export
99	159	ir.measure.map <- function(y.f, max.rank=0)
100	160	{
101	161	# Note: max.rank is meaningless for MAP

116	176	return (sum((1:length(idx.pos))/idx.pos) / num.pos)
117	177	}
118	178
	179
	180	#' @rdname gbm.roc.area
	181	#' @export
119	182	ir.measure.ndcg <- function(y.f, max.rank)
120	183	{
121	184	y <- y.f[[1]]

138	201	# Normalize
139	202	return (dcg / dcg.max)
140	203	}
	204
	205
	206	#' @rdname gbm.roc.area
	207	#' @export
	208	perf.pairwise <- function(y, f, group, metric="ndcg", w=NULL, max.rank=0)
	209	{
	210	func.name <- switch(metric,
	211	conc = "ir.measure.conc",
	212	mrr = "ir.measure.mrr",
	213	map = "ir.measure.map",
	214	ndcg = "ir.measure.ndcg",
	215	stop(paste("Metric",metric,"is not supported"))
	216	)
	217
	218	# Optimization: for binary targets,
	219	# AUC is equivalent but faster than CONC
	220	if (metric == "conc" && all(is.element(y, 0:1)))
	221	{
	222	func.name <- "ir.measure.auc"
	223	}
	224
	225	# Max rank = 0 means no cut off
	226	if (max.rank <= 0)
	227	{
	228	max.rank <- length(y)+1
	229	}
	230
	231	# Random tie breaking in case of duplicate scores.
	232	# (Without tie breaking, we would overestimate if instances are
	233	# sorted descending on target)
	234	f <- f + 1E-10 * runif(length(f), min=-0.5, max=0.5)
	235
	236	measure.by.group <- as.matrix(by(list(y, f), INDICES=group, FUN=get(func.name), max.rank=max.rank))
	237
	238	# Exclude groups with single result or only negative or positive instances
	239	idx <- which((!is.null(measure.by.group)) & measure.by.group >= 0)
	240
	241	if (is.null(w))
	242	{
	243	return (mean(measure.by.group[idx]))
	244	}
	245	else
	246	{
	247	# Assumption: weights are constant per group
	248	w.by.group <- tapply(w, group, mean)
	249	return (weighted.mean(measure.by.group[idx], w=w.by.group[idx]))
	250	}
	251	}

+0

-50

~~R/permutation.test.gbm.R~~ less more

0		permutation.test.gbm <- function(object,
1		n.trees)
2		{
3		# get variables used in the model
4		i.vars <- sort(unique(unlist(lapply(object$trees[1:n.trees],
5		function(x){unique(x[[1]])}))))
6		i.vars <- i.vars[i.vars!=-1] + 1
7		rel.inf <- rep(0,length(object$var.names))
8
9		if(!is.null(object$data))
10		{
11		y <- object$data$y
12		os <- object$data$offset
13		Misc <- object$data$Misc
14		w <- object$data$w
15		x <- matrix(object$data$x, ncol=length(object$var.names))
16		object$Terms <- NULL # this makes predict.gbm take x as it is
17
18		if (object$distribution$name == "pairwise")
19		{
20		# group and cutoff are only relevant for distribution "pairwise"
21		# in this case, the last element specifies the max rank
22		# max rank = 0 means no cut off
23		group <- Misc[1:length(y)]
24		max.rank <- Misc[length(y)+1]
25		}
26		}
27		else
28		{
29		stop("Model was fit with keep.data=FALSE. permutation.test.gbm has not been implemented for that case.")
30		}
31
32		# the index shuffler
33		j <- sample(1:nrow(x))
34		for(i in 1:length(i.vars))
35		{
36		x[ ,i.vars[i]] <- x[j,i.vars[i]]
37
38		new.pred <- predict.gbm(object,newdata=x,n.trees=n.trees)
39		rel.inf[i.vars[i]] <- gbm.loss(y,new.pred,w,os,
40		object$distribution,
41		object$train.error[n.trees],
42		group,
43		max.rank)
44
45		x[j,i.vars[i]] <- x[ ,i.vars[i]]
46		}
47
48		return(rel.inf=rel.inf)
49		}

+355

-396

R/plot.gbm.R less more

0		plot.gbm <- function(x,
1		i.var=1,
2		n.trees=x$n.trees,
3		continuous.resolution=100,
4		return.grid=FALSE,
5		type="link",
6		...)
7		{
8		if (!is.element(type, c("link", "response"))){
9		stop( "type must be either 'link' or 'response'")
10		}
11
12		if(all(is.character(i.var)))
13		{
14		i <- match(i.var,x$var.names)
15		if(any(is.na(i)))
16		{
17		stop("Plot variables not used in gbm model fit: ",i.var[is.na(i)])
18		} else
19		{
20		i.var <- i
21		}
22		}
23
24		if((min(i.var)<1) \|\| (max(i.var)>length(x$var.names)))
25		{
26		warning("i.var must be between 1 and ",length(x$var.names))
27		}
28		if(n.trees > x$n.trees)
29		{
30		warning(paste("n.trees exceeds the number of trees in the model, ",x$n.trees,
31		". Plotting using ",x$n.trees," trees.",sep=""))
32		n.trees <- x$n.trees
33		}
34
35		if(length(i.var) > 3)
36		{
37		warning("gbm.int.plot creates up to 3-way interaction plots.\nplot.gbm will only return the plotting data structure.")
38		return.grid = TRUE
39		}
40
41		# generate grid to evaluate gbm model
42		grid.levels <- vector("list",length(i.var))
43		for(i in 1:length(i.var))
44		{
45		# continuous
46		if(is.numeric(x$var.levels[[i.var[i]]]))
47		{
48		grid.levels[[i]] <- seq(min(x$var.levels[[i.var[i]]]),
49		max(x$var.levels[[i.var[i]]]),
50		length=continuous.resolution)
51		}
52		# categorical or ordered
53		else
54		{
55		grid.levels[[i]] <- as.numeric(factor(x$var.levels[[i.var[i]]],
56		levels=x$var.levels[[i.var[i]]]))-1
57		}
58		}
59
60		X <- expand.grid(grid.levels)
61		names(X) <- paste("X",1:length(i.var),sep="")
62
63		# Next if block for compatibility with objects created with 1.6
64		if (is.null(x$num.classes)){
65		x$num.classes <- 1
66		}
67
68		# evaluate at each data point
69		y <- .Call("gbm_plot",
70		X = as.double(data.matrix(X)),
71		cRows = as.integer(nrow(X)),
72		cCols = as.integer(ncol(X)),
73		n.class = as.integer(x$num.classes),
74		i.var = as.integer(i.var-1),
75		n.trees = as.integer(n.trees) ,
76		initF = as.double(x$initF),
77		trees = x$trees,
78		c.splits = x$c.splits,
79		var.type = as.integer(x$var.type),
80		PACKAGE = "gbm")
81
82		if (x$distribution$name=="multinomial")
83		{
84		## Put result into matrix form
85		X$y <- matrix(y, ncol = x$num.classes)
86		colnames(X$y) <- x$classes
87
88		## Use class probabilities
89		if (type=="response"){
90		X$y <- exp(X$y)
91		X$y <- X$y / matrix(rowSums(X$y), ncol=ncol(X$y), nrow=nrow(X$y))
92		}
93		}
94		else if(is.element(x$distribution$name, c("bernoulli", "pairwise")) && type=="response") {
95		X$y <- 1/(1+exp(-y))
96		}
97		else if ((x$distribution$name=="poisson") && (type=="response")){
98		X$y <- exp(y)
99		}
100		else if (type=="response"){
101		warning("type 'response' only implemented for 'bernoulli', 'poisson', 'multinomial', and 'pairwise'. Ignoring" )
102		}
103		else { X$y <- y }
104
105		# transform categorical variables back to factors
106		f.factor <- rep(FALSE,length(i.var))
107		for(i in 1:length(i.var))
108		{
109		if(!is.numeric(x$var.levels[[i.var[i]]]))
110		{
111		X[,i] <- factor(x$var.levels[[i.var[i]]][X[,i]+1],
112		levels=x$var.levels[[i.var[i]]])
113		f.factor[i] <- TRUE
114		}
115		}
116
117		if(return.grid)
118		{
119		names(X)[1:length(i.var)] <- x$var.names[i.var]
120		return(X)
121		}
122
123		# create the plots
124		if(length(i.var)==1)
125		{
126		if(!f.factor)
127		{
128		j <- order(X$X1)
129
130		if (x$distribution$name == "multinomial") {
131		if ( type == "response" ){
132		ylabel <- "Predicted class probability"
133		}
134		else { ylabel <- paste("f(",x$var.names[i.var],")",sep="") }
135		plot(range(X$X1), range(X$y), type = "n", xlab = x$var.names[i.var],
136		ylab = ylabel)
137
138		for (ii in 1:x$num.classes){
139		lines(X$X1,X$y[,ii],
140		xlab=x$var.names[i.var],
141		ylab=paste("f(",x$var.names[i.var],")",sep=""),
142		col = ii, ...)
143		}
144		}
145		else if (is.element(x$distribution$name, c("bernoulli", "pairwise"))) {
146		if ( type == "response" ){
147		ylabel <- "Predicted probability"
148		}
149		else {
150		ylabel <- paste("f(",x$var.names[i.var],")",sep="")
151		}
152		plot( X$X1, X$y , type = "l", xlab = x$var.names[i.var], ylab=ylabel )
153		}
154		else if ( x$distribution$name == "poisson" ){
155		if (type == "response" ){
156		ylabel <- "Predicted count"
157		}
158		else{
159		ylabel <- paste("f(",x$var.names[i.var],")",sep="")
160		}
161		plot( X$X1, X$y , type = "l", xlab = x$var.names[i.var], ylab=ylabel )
162		}
163		else {
164		plot(X$X1,X$y,
165		type="l",
166		xlab=x$var.names[i.var],
167		ylab=paste("f(",x$var.names[i.var],")",sep=""),...)
168		}
169		}
170		else
171		{
172		if (x$distribution$name == "multinomial") {
173		nX <- length(X$X1)
174		dim.y <- dim(X$y)
175		if (type == "response" ){
176		ylabel <- "Predicted probability"
177		}
178		else{ ylabel <- paste("f(",x$var.names[i.var],")",sep="") }
179
180		plot(c(0,nX), range(X$y), axes = FALSE, type = "n",
181		xlab = x$var.names[i.var], ylab = ylabel)
182		axis(side = 1, labels = FALSE, at = 0:nX)
183		axis(side = 2)
184
185		mtext(as.character(X$X1), side = 1, at = 1:nX - 0.5)
186
187		segments(x1 = rep(1:nX - 0.75, each = dim.y[2]), y1 = as.vector(t(X$y)),
188		x2 = rep(1:nX - 0.25, each = dim.y[2]), col = 1:dim.y[2])
189		}
190		else if (is.element(x$distribution$name, c("bernoulli", "pairwise")) && type == "response" ){
191		ylabel <- "Predicted probability"
192		plot( X$X1, X$y, type = "l", xlab=x$var.names[i.var], ylab=ylabel )
193		}
194		else if ( x$distribution$name == "poisson" & type == "response" ){
195		ylabel <- "Predicted count"
196		plot( X$X1, X$y, type = "l", xlab=x$var.names[i.var], ylab=ylabel )
197		}
198		else {
199		plot(X$X1,X$y,
200		type="l",
201		xlab=x$var.names[i.var],
202		ylab=paste("f(",x$var.names[i.var],")",sep=""),...)
203		}
204		}
205		}
206		else if(length(i.var)==2)
207		{
208		if(!f.factor[1] && !f.factor[2])
209		{
210		if (x$distribution$name == "multinomial")
211		{
212		for (ii in 1:x$num.classes){
213		X$temp <- X$y[, ii]
214		print(levelplot(temp~X1*X2,data=X,
215		xlab=x$var.names[i.var[1]],
216		ylab=x$var.names[i.var[2]],...))
217		title(paste("Class:", dimnames(X$y)[[2]][ii]))
218		}
219		X$temp <- NULL
220		}
221		else {
222		print(levelplot(y~X1*X2,data=X,
223		xlab=x$var.names[i.var[1]],
224		ylab=x$var.names[i.var[2]],...))
225		}
226		}
227		else if(f.factor[1] && !f.factor[2])
228		{
229		if (x$distribution$name == "multinomial")
230		{
231		for (ii in 1:x$num.classes){
232		X$temp <- X$y[, ii]
233		print( xyplot(temp~X2\|X1,data=X,
234		xlab=x$var.names[i.var[2]],
235		ylab=paste("f(",x$var.names[i.var[1]],",",x$var.names[i.var[2]],")",sep=""),
236		type="l",
237		panel = panel.xyplot,
238		...) )
239		title(paste("Class:", dimnames(X$y)[[2]][ii]))
240		}
241		X$temp <- NULL
242		}
243		else {
244		print(xyplot(y~X2\|X1,data=X,
245		xlab=x$var.names[i.var[2]],
246		ylab=paste("f(",x$var.names[i.var[1]],",",x$var.names[i.var[2]],")",sep=""),
247		type="l",
248		panel = panel.xyplot,
249		...))
250		}
251		}
252		else if(!f.factor[1] && f.factor[2])
253		{
254		if (x$distribution$name == "multinomial")
255		{
256		for (ii in 1:x$num.classes){
257		X$temp <- X$y[, ii]
258		print( xyplot(temp~X1\|X2,data=X,
259		xlab=x$var.names[i.var[1]],
260		ylab=paste("f(",x$var.names[i.var[1]],",",x$var.names[i.var[2]],")",sep=""),
261		type="l",
262		panel = panel.xyplot,
263		...) )
264		title(paste("Class:", dimnames(X$y)[[2]][ii]))
265		}
266		X$temp <- NULL
267		}
268		else {
269		print(xyplot(y~X1\|X2,data=X,
270		xlab=x$var.names[i.var[1]],
271		ylab=paste("f(",x$var.names[i.var[1]],",",x$var.names[i.var[2]],")",sep=""),
272		type="l",
273		panel = panel.xyplot,
274		...))
275		}
276		}
277		else
278		{
279		if (x$distribution$name == "multinomial")
280		{
281		for (ii in 1:x$num.classes){
282		X$temp <- X$y[, ii]
283		print( stripplot(X1~temp\|X2,data=X,
284		xlab=x$var.names[i.var[2]],
285		ylab=paste("f(",x$var.names[i.var[1]],",",x$var.names[i.var[2]],")",sep=""),
286		...) )
287		title(paste("Class:", dimnames(X$y)[[2]][ii]))
288		}
289		X$temp <- NULL
290		}
291		else {
292		print(stripplot(X1~y\|X2,data=X,
293		xlab=x$var.names[i.var[2]],
294		ylab=paste("f(",x$var.names[i.var[1]],",",x$var.names[i.var[2]],")",sep=""),
295		...))
296		}
297		}
298		}
299		else if(length(i.var)==3)
300		{
301		i <- order(f.factor)
302		X.new <- X[,i]
303		X.new$y <- X$y
304		names(X.new) <- names(X)
305
306		# 0 factor, 3 continuous
307		if(sum(f.factor)==0)
308		{
309		X.new$X3 <- equal.count(X.new$X3)
310		if (x$distribution$name == "multinomial")
311		{
312		for (ii in 1:x$num.classes){
313		X.new$temp <- X.new$y[, ii]
314		print( levelplot(temp~X1*X2\|X3,data=X.new,
315		xlab=x$var.names[i.var[i[1]]],
316		ylab=x$var.names[i.var[i[2]]],...) )
317		title(paste("Class:", dimnames(X.new$y)[[2]][ii]))
318		}
319		X.new$temp <- NULL
320		}
321		else {
322		print(levelplot(y~X1*X2\|X3,data=X.new,
323		xlab=x$var.names[i.var[i[1]]],
324		ylab=x$var.names[i.var[i[2]]],...))
325		}
326		}
327		# 1 factor, 2 continuous
328		else if(sum(f.factor)==1)
329		{
330		if (x$distribution$name == "multinomial")
331		{
332		for (ii in 1:x$num.classes){
333		X.new$temp <- X.new$y[, ii]
334		print( levelplot(temp~X1*X2\|X3,data=X.new,
335		xlab=x$var.names[i.var[i[1]]],
336		ylab=x$var.names[i.var[i[2]]],...))
337		title(paste("Class:", dimnames(X.new$y)[[2]][ii]) )
338		}
339		X.new$temp <- NULL
340		}
341		else {
342		print(levelplot(y~X1*X2\|X3,data=X.new,
343		xlab=x$var.names[i.var[i[1]]],
344		ylab=x$var.names[i.var[i[2]]],...))
345		}
346		}
347		# 2 factors, 1 continuous
348		else if(sum(f.factor)==2)
349		{
350		if (x$distribution$name == "multinomial")
351		{
352		for (ii in 1:x$num.classes){
353		X.new$temp <- X.new$y[, ii]
354		print( xyplot(temp~X1\|X2*X3,data=X.new,
355		type="l",
356		xlab=x$var.names[i.var[i[1]]],
357		ylab=paste("f(",paste(x$var.names[i.var[1:3]],collapse=","),")",sep=""),
358		panel = panel.xyplot,
359		...) )
360		title(paste("Class:", dimnames(X.new$y)[[2]][ii]) )
361		}
362		X.new$temp <- NULL
363		}
364		else {
365		print(xyplot(y~X1\|X2*X3,data=X.new,
366		type="l",
367		xlab=x$var.names[i.var[i[1]]],
368		ylab=paste("f(",paste(x$var.names[i.var[1:3]],collapse=","),")",sep=""),
369		panel = panel.xyplot,
370		...))
371		}
372		}
373		# 3 factors, 0 continuous
374		else if(sum(f.factor)==3)
375		{
376		if (x$distribution$name == "multinomial")
377		{
378		for (ii in 1:x$num.classes){
379		X.new$temp <- X.new$y[, ii]
380		print( stripplot(X1~temp\|X2*X3,data=X.new,
381		xlab=x$var.names[i.var[i[1]]],
382		ylab=paste("f(",paste(x$var.names[i.var[1:3]],collapse=","),")",sep=""),
383		...) )
384		title(paste("Class:", dimnames(X.new$y)[[2]][ii]) )
385		}
386		X.new$temp <- NULL
387		}
388		else {
389		print(stripplot(X1~y\|X2*X3,data=X.new,
390		xlab=x$var.names[i.var[i[1]]],
391		ylab=paste("f(",paste(x$var.names[i.var[1:3]],collapse=","),")",sep=""),
392		...))
393		}
394		}
395		}
	0	#' Marginal plots of fitted gbm objects
	1	#'
	2	#' Plots the marginal effect of the selected variables by "integrating" out the
	3	#' other variables.
	4	#'
	5	#' \code{plot.gbm} produces low dimensional projections of the
	6	#' \code{\link{gbm.object}} by integrating out the variables not included in
	7	#' the \code{i.var} argument. The function selects a grid of points and uses
	8	#' the weighted tree traversal method described in Friedman (2001) to do the
	9	#' integration. Based on the variable types included in the projection,
	10	#' \code{plot.gbm} selects an appropriate display choosing amongst line plots,
	11	#' contour plots, and \code{\link[lattice]{lattice}} plots. If the default
	12	#' graphics are not sufficient the user may set \code{return.grid=TRUE}, store
	13	#' the result of the function, and develop another graphic display more
	14	#' appropriate to the particular example.
	15	#'
	16	#' @param x A \code{\link{gbm.object}} that was fit using a call to
	17	#' \code{\link{gbm}}.
	18	#'
	19	#' @param i.var Vector of indices or the names of the variables to plot. If
	20	#' using indices, the variables are indexed in the same order that they appear
	21	#' in the initial \code{gbm} formula. If \code{length(i.var)} is between 1 and
	22	#' 3 then \code{plot.gbm} produces the plots. Otherwise, \code{plot.gbm}
	23	#' returns only the grid of evaluation points and their average predictions
	24	#'
	25	#' @param n.trees Integer specifying the number of trees to use to generate the
	26	#' plot. Default is to use \code{x$n.trees} (i.e., the entire ensemble).
	27	#'
	28	#' @param continuous.resolution Integer specifying the number of equally space
	29	#' points at which to evaluate continuous predictors.
	30	#'
	31	#' @param return.grid Logical indicating whether or not to produce graphics
	32	#' \code{FALSE} or only return the grid of evaluation points and their average
	33	#' predictions \code{TRUE}. This is useful for customizing the graphics for
	34	#' special variable types, or for higher dimensional graphs.
	35	#'
	36	#' @param type Character string specifying the type of prediction to plot on the
	37	#' vertical axis. See \code{\link{predict.gbm}} for details.
	38	#'
	39	#' @param level.plot Logical indicating whether or not to use a false color
	40	#' level plot (\code{TRUE}) or a 3-D surface (\code{FALSE}). Default is
	41	#' \code{TRUE}.
	42	#'
	43	#' @param contour Logical indicating whether or not to add contour lines to the
	44	#' level plot. Only used when \code{level.plot = TRUE}. Default is \code{FALSE}.
	45	#'
	46	#' @param number Integer specifying the number of conditional intervals to use
	47	#' for the continuous panel variables. See \code{\link[graphics]{co.intervals}}
	48	#' and \code{\link[lattice]{equal.count}} for further details.
	49	#'
	50	#' @param overlap The fraction of overlap of the conditioning variables. See
	51	#' \code{\link[graphics]{co.intervals}} and \code{\link[lattice]{equal.count}}
	52	#' for further details.
	53	#'
	54	#' @param col.regions Color vector to be used if \code{level.plot} is
	55	#' \code{TRUE}. Defaults to the wonderful Matplotlib 'viridis' color map
	56	#' provided by the \code{viridis} package. See \code{\link[viridis]{viridis}}
	57	#' for details.
	58	#'
	59	#' @param ... Additional optional arguments to be passed onto
	60	#' \code{\link[graphics]{plot}}.
	61	#'
	62	#' @return If \code{return.grid = TRUE}, a grid of evaluation points and their
	63	#' average predictions. Otherwise, a plot is returned.
	64	#'
	65	#' @note More flexible plotting is available using the
	66	#' \code{\link[pdp]{partial}} and \code{\link[pdp]{plotPartial}} functions.
	67	#'
	68	#' @seealso \code{\link[pdp]{partial}}, \code{\link[pdp]{plotPartial}},
	69	#' \code{\link{gbm}}, and \code{\link{gbm.object}}.
	70	#'
	71	#' @references J. H. Friedman (2001). "Greedy Function Approximation: A Gradient
	72	#' Boosting Machine," Annals of Statistics 29(4).
	73	#'
	74	#' @references B. M. Greenwell (2017). "pdp: An R Package for Constructing
	75	#' Partial Dependence Plots," The R Journal 9(1), 421--436.
	76	#' \url{https://journal.r-project.org/archive/2017/RJ-2017-016/index.html}.
	77	#'
	78	#' @export plot.gbm
	79	#' @export
	80	plot.gbm <- function(x, i.var = 1, n.trees = x$n.trees,
	81	continuous.resolution = 100, return.grid = FALSE,
	82	type = c("link", "response"), level.plot = TRUE,
	83	contour = FALSE, number = 4, overlap = 0.1,
	84	col.regions = viridis::viridis, ...) {
	85
	86	# Match type argument
	87	type <- match.arg(type)
	88
	89	# Sanity checks
	90	if(all(is.character(i.var))) {
	91	i <- match(i.var, x$var.names)
	92	if(any(is.na(i))) {
	93	stop("Requested variables not found in ", deparse(substitute(x)), ": ",
	94	i.var[is.na(i)])
	95	} else {
	96	i.var <- i
	97	}
	98	}
	99	if((min(i.var) < 1) \|\| (max(i.var) > length(x$var.names))) {
	100	warning("i.var must be between 1 and ", length(x$var.names))
	101	}
	102	if(n.trees > x$n.trees) {
	103	warning(paste("n.trees exceeds the number of tree(s) in the model: ",
	104	x$n.trees, ". Using ", x$n.trees,
	105	" tree(s) instead.", sep = ""))
	106	n.trees <- x$n.trees
	107	}
	108
	109	if(length(i.var) > 3) {
	110	warning("plot.gbm() will only create up to (and including) 3-way ",
	111	"interaction plots.\nBeyond that, plot.gbm() will only return ",
	112	"the plotting data structure.")
	113	return.grid <- TRUE
	114	}
	115
	116	# Generate grid of predictor values on which to compute the partial
	117	# dependence values
	118	grid.levels <- vector("list", length(i.var))
	119	for(i in 1:length(i.var)) {
	120	if(is.numeric(x$var.levels[[i.var[i]]])) { # continuous
	121	grid.levels[[i]] <- seq(from = min(x$var.levels[[i.var[i]]]),
	122	to = max(x$var.levels[[i.var[i]]]),
	123	length = continuous.resolution)
	124	} else { # categorical
	125	grid.levels[[i]] <-
	126	as.numeric(factor(x$var.levels[[i.var[i]]],
	127	levels = x$var.levels[[i.var[i]]])) - 1
	128	}
	129	}
	130	X <- expand.grid(grid.levels)
	131	names(X) <- paste("X", 1:length(i.var), sep = "")
	132
	133	# For compatibility with gbm version 1.6
	134	if (is.null(x$num.classes)) {
	135	x$num.classes <- 1
	136	}
	137
	138	# Compute partial dependence values
	139	y <- .Call("gbm_plot", X = as.double(data.matrix(X)),
	140	cRows = as.integer(nrow(X)), cCols = as.integer(ncol(X)),
	141	n.class = as.integer(x$num.classes),
	142	i.var = as.integer(i.var - 1), n.trees = as.integer(n.trees),
	143	initF = as.double(x$initF), trees = x$trees,
	144	c.splits = x$c.splits, var.type = as.integer(x$var.type),
	145	PACKAGE = "gbm")
	146
	147	if (x$distribution$name == "multinomial") { # reshape into matrix
	148	X$y <- matrix(y, ncol = x$num.classes)
	149	colnames(X$y) <- x$classes
	150
	151	# Convert to class probabilities (if requested)
	152	if (type == "response") {
	153	X$y <- exp(X$y)
	154	X$y <- X$y / matrix(rowSums(X$y), ncol = ncol(X$y), nrow = nrow(X$y))
	155	}
	156	} else if(is.element(x$distribution$name, c("bernoulli", "pairwise")) &&
	157	type == "response") {
	158	X$y <- 1 / (1 + exp(-y))
	159	} else if ((x$distribution$name == "poisson") && (type == "response")) {
	160	X$y <- exp(y)
	161	} else if (type == "response"){
	162	warning("`type = \"response\"` only implemented for \"bernoulli\", ",
	163	"\"poisson\", \"multinomial\", and \"pairwise\" distributions. ",
	164	"Ignoring." )
	165	} else {
	166	X$y <- y
	167	}
	168
	169	# Transform categorical variables back to factors
	170	f.factor <- rep(FALSE, length(i.var))
	171	for(i in 1:length(i.var)) {
	172	if(!is.numeric(x$var.levels[[i.var[i]]])) {
	173	X[,i] <- factor(x$var.levels[[i.var[i]]][X[, i] + 1],
	174	levels = x$var.levels[[i.var[i]]])
	175	f.factor[i] <- TRUE
	176	}
	177	}
	178
	179	# Return original variable names
	180	names(X)[1:length(i.var)] <- x$var.names[i.var]
	181
	182	# Return grid only (if requested)
	183	if(return.grid) {
	184	return(X)
	185	}
	186
	187	# Determine number of predictors
	188	nx <- length(i.var)
	189
	190	# Determine which type of plot to draw based on the number of predictors
	191	if (nx == 1L) {
	192
	193	# Single predictor
	194	plotOnePredictorPDP(X, ...)
	195
	196	} else if (nx == 2) {
	197
	198	# Two predictors
	199	plotTwoPredictorPDP(X, level.plot = level.plot, contour = contour,
	200	col.regions = col.regions, ...)
	201
	202	} else {
	203
	204	# Three predictors (paneled version of plotTwoPredictorPDP)
	205	plotThreePredictorPDP(X, nx = nx, level.plot = level.plot,
	206	contour = contour, col.regions = col.regions,
	207	number = number, overlap = overlap, ...)
	208
	209	}
	210
396	211	}
	212
	213
	214	#' @keywords internal
	215	plotOnePredictorPDP <- function(X, ...) {
	216
	217	# Use the first column to determine which type of plot to construct
	218	if (is.numeric(X[[1L]])) {
	219
	220	# Draw a line plot
	221	lattice::xyplot(stats::as.formula(paste("y ~", names(X)[1L])),
	222	data = X, type = "l", ...)
	223
	224	} else {
	225
	226	# Draw a Cleveland dot plot
	227	lattice::dotplot(stats::as.formula(paste("y ~", names(X)[1L])),
	228	data = X, xlab = names(X)[1L], ...)
	229
	230	}
	231	}
	232
	233
	234	#' @keywords internal
	235	plotTwoPredictorPDP <- function(X, level.plot, contour, col.regions, ...) {
	236
	237	# Use the first two columns to determine which type of plot to construct
	238	if (is.factor(X[[1L]]) && is.factor(X[[2L]])) {
	239
	240	# Draw a Cleveland dot plot
	241	lattice::dotplot(stats::as.formula(
	242	paste("y ~", paste(names(X)[1L:2L], collapse = "\|"))
	243	), data = X, xlab = names(X)[1L], ...)
	244
	245	} else if (is.factor(X[[1L]]) \|\| is.factor(X[[2L]])) {
	246
	247	# Lattice plot formula
	248	form <- if (is.factor(X[[1L]])) {
	249	stats::as.formula(paste("y ~", paste(names(X)[2L:1L], collapse = "\|")))
	250	} else {
	251	stats::as.formula(paste("y ~", paste(names(X)[1L:2L], collapse = "\|")))
	252	}
	253
	254	# Draw a paneled line plot
	255	lattice::xyplot(form, data = X, type = "l", ...)
	256
	257	} else {
	258
	259	# Lattice plot formula
	260	form <- stats::as.formula(
	261	paste("y ~", paste(names(X)[1L:2L], collapse = "*"))
	262	)
	263
	264	# Draw a three-dimensional surface
	265	if (level.plot) {
	266
	267	# Draw a false color level plot
	268	lattice::levelplot(form, data = X, col.regions = col.regions,
	269	contour = contour, ...)
	270
	271	} else {
	272
	273	# Draw a wireframe plot
	274	lattice::wireframe(form, data = X, ...)
	275
	276	}
	277
	278	}
	279	}
	280
	281
	282	#' @keywords internal
	283	plotThreePredictorPDP <- function(X, nx, level.plot, contour, col.regions,
	284	number, overlap, ...) {
	285
	286	# Factor, numeric, numeric
	287	if (is.factor(X[[1L]]) && !is.factor(X[[2L]]) && !is.factor(X[[3L]])) {
	288	X[, 1L:3L] <- X[, c(2L, 3L, 1L)]
	289	}
	290
	291	# Numeric, factor, numeric
	292	if (!is.factor(X[[1L]]) && is.factor(X[[2L]]) && !is.factor(X[[3L]])) {
	293	X[, 1L:3L] <- X[, c(1L, 3L, 2L)]
	294	}
	295
	296	# Factor, factor, numeric
	297	if (is.factor(X[[1L]]) && is.factor(X[[2L]]) && !is.factor(X[[3L]])) {
	298	X[, 1L:3L] <- X[, c(3L, 1L, 2L)]
	299	}
	300
	301	# Factor, numeric, factor
	302	if (is.factor(X[[1L]]) && !is.factor(X[[2L]]) && is.factor(X[[3L]])) {
	303	X[, 1L:3L] <- X[, c(2L, 1L, 3L)]
	304	}
	305
	306	# Convert third predictor to a factor using the equal count algorithm
	307	if (is.numeric(X[[3L]])) {
	308	X[[3L]] <- equal.count(X[[3L]], number = number, overlap = overlap)
	309	}
	310
	311	if (is.factor(X[[1L]]) && is.factor(X[[2L]])) {
	312
	313	# Lattice plot formula
	314	form <- stats::as.formula(
	315	paste("y ~", names(X)[1L], "\|", paste(names(X)[2L:nx], collapse = "*"))
	316	)
	317
	318	# Produce a paneled dotplot
	319	lattice::dotplot(form, data = X, xlab = names(X)[1L], ...)
	320
	321	} else if (is.numeric(X[[1L]]) && is.factor(X[[2L]])) {
	322
	323	# Lattice plot formula
	324	form <- stats::as.formula(
	325	paste("y ~", names(X)[1L], "\|", paste(names(X)[2L:nx], collapse = "*"))
	326	)
	327
	328	# Produce a paneled lineplot
	329	lattice::xyplot(form, data = X, type = "l", ...)
	330
	331	} else {
	332
	333	# Lattice plot formula
	334	form <- stats::as.formula(
	335	paste("y ~", paste(names(X)[1L:2L], collapse = "*"), "\|",
	336	paste(names(X)[3L:nx], collapse = "*"))
	337	)
	338
	339	# Draw a three-dimensional surface
	340	if (level.plot) {
	341
	342	# Draw a false color level plot
	343	lattice::levelplot(form, data = X, col.regions = col.regions,
	344	contour = contour, ...)
	345
	346	} else {
	347
	348	# Draw a wireframe plot
	349	lattice::wireframe(form, data = X, ...)
	350
	351	}
	352
	353	}
	354
	355	}⏎

+51

-1

R/predict.gbm.R less more

	0	#' Predict method for GBM Model Fits
	1	#'
	2	#' Predicted values based on a generalized boosted model object
	3	#'
	4	#' \code{predict.gbm} produces predicted values for each observation in
	5	#' \code{newdata} using the the first \code{n.trees} iterations of the boosting
	6	#' sequence. If \code{n.trees} is a vector than the result is a matrix with
	7	#' each column representing the predictions from gbm models with
	8	#' \code{n.trees[1]} iterations, \code{n.trees[2]} iterations, and so on.
	9	#'
	10	#' The predictions from \code{gbm} do not include the offset term. The user may
	11	#' add the value of the offset to the predicted value if desired.
	12	#'
	13	#' If \code{object} was fit using \code{\link{gbm.fit}} there will be no
	14	#' \code{Terms} component. Therefore, the user has greater responsibility to
	15	#' make sure that \code{newdata} is of the same format (order and number of
	16	#' variables) as the one originally used to fit the model.
	17	#'
	18	#' @param object Object of class inheriting from (\code{\link{gbm.object}})
	19	#'
	20	#' @param newdata Data frame of observations for which to make predictions
	21	#'
	22	#' @param n.trees Number of trees used in the prediction. \code{n.trees} may be
	23	#' a vector in which case predictions are returned for each iteration specified
	24	#'
	25	#' @param type The scale on which gbm makes the predictions
	26	#'
	27	#' @param single.tree If \code{single.tree=TRUE} then \code{predict.gbm}
	28	#' returns only the predictions from tree(s) \code{n.trees}
	29	#'
	30	#' @param \dots further arguments passed to or from other methods
	31	#'
	32	#' @return Returns a vector of predictions. By default the predictions are on
	33	#' the scale of f(x). For example, for the Bernoulli loss the returned value is
	34	#' on the log odds scale, poisson loss on the log scale, and coxph is on the
	35	#' log hazard scale.
	36	#'
	37	#' If \code{type="response"} then \code{gbm} converts back to the same scale as
	38	#' the outcome. Currently the only effect this will have is returning
	39	#' probabilities for bernoulli and expected counts for poisson. For the other
	40	#' distributions "response" and "link" return the same.
	41	#'
	42	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	43	#'
	44	#' @seealso \code{\link{gbm}}, \code{\link{gbm.object}}
	45	#'
	46	#' @keywords models regression
	47	#'
	48	#' @export predict.gbm
	49	#' @export
0	50	predict.gbm <- function(object,newdata,n.trees,
1	51	type="link",
2	52	single.tree = FALSE,

46	96	if (!identical(object$var.levels[[i]], new.compare)) {
47	97	x[,i] <- factor(x[,i], union(object$var.levels[[i]], levels(x[,i])))
48	98	}
49		x[,i] <- as.numeric(x[,i])-1
	99	x[,i] <- as.numeric(factor(x[,i]))-1
50	100	}
51	101	}
52	102

+28

-0

R/pretty.gbm.tree.R less more

	0	#' Print gbm tree components
	1	#'
	2	#' \code{gbm} stores the collection of trees used to construct the model in a
	3	#' compact matrix structure. This function extracts the information from a
	4	#' single tree and displays it in a slightly more readable form. This function
	5	#' is mostly for debugging purposes and to satisfy some users' curiosity.
	6	#'
	7	#'
	8	#' @param object a \code{\link{gbm.object}} initially fit using
	9	#' \code{\link{gbm}}
	10	#' @param i.tree the index of the tree component to extract from \code{object}
	11	#' and display
	12	#' @return \code{pretty.gbm.tree} returns a data frame. Each row corresponds to
	13	#' a node in the tree. Columns indicate \item{SplitVar}{index of which variable
	14	#' is used to split. -1 indicates a terminal node.} \item{SplitCodePred}{if the
	15	#' split variable is continuous then this component is the split point. If the
	16	#' split variable is categorical then this component contains the index of
	17	#' \code{object$c.split} that describes the categorical split. If the node is a
	18	#' terminal node then this is the prediction.} \item{LeftNode}{the index of the
	19	#' row corresponding to the left node.} \item{RightNode}{the index of the row
	20	#' corresponding to the right node.} \item{ErrorReduction}{the reduction in the
	21	#' loss function as a result of splitting this node.} \item{Weight}{the total
	22	#' weight of observations in the node. If weights are all equal to 1 then this
	23	#' is the number of observations in the node.}
	24	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	25	#' @seealso \code{\link{gbm}}, \code{\link{gbm.object}}
	26	#' @keywords print
	27	#' @export pretty.gbm.tree
0	28	pretty.gbm.tree <- function(object,i.tree=1)
1	29	{
2	30	if((i.tree<1) \|\| (i.tree>length(object$trees)))

+100

-1

R/print.gbm.R less more

0		# print, show and summary functions for gbm
	0	#' Print model summary
	1	#'
	2	#' Display basic information about a \code{gbm} object.
	3	#'
	4	#' Prints some information about the model object. In particular, this method
	5	#' prints the call to \code{gbm()}, the type of loss function that was used,
	6	#' and the total number of iterations.
	7	#'
	8	#' If cross-validation was performed, the 'best' number of trees as estimated
	9	#' by cross-validation error is displayed. If a test set was used, the 'best'
	10	#' number of trees as estimated by the test set error is displayed.
	11	#'
	12	#' The number of available predictors, and the number of those having non-zero
	13	#' influence on predictions is given (which might be interesting in data mining
	14	#' applications).
	15	#'
	16	#' If multinomial, bernoulli or adaboost was used, the confusion matrix and
	17	#' prediction accuracy are printed (objects being allocated to the class with
	18	#' highest probability for multinomial and bernoulli). These classifications
	19	#' are performed on the entire training data using the model with the 'best'
	20	#' number of trees as described above, or the maximum number of trees if the
	21	#' 'best' cannot be computed.
	22	#'
	23	#' If the 'distribution' was specified as gaussian, laplace, quantile or
	24	#' t-distribution, a summary of the residuals is displayed. The residuals are
	25	#' for the training data with the model at the 'best' number of trees, as
	26	#' described above, or the maximum number of trees if the 'best' cannot be
	27	#' computed.
	28	#'
	29	#' @aliases print.gbm show.gbm
	30	#' @param x an object of class \code{gbm}.
	31	#' @param \dots arguments passed to \code{print.default}.
	32	#' @author Harry Southworth, Daniel Edwards
	33	#' @seealso \code{\link{gbm}}
	34	#' @keywords models nonlinear survival nonparametric
	35	#' @examples
	36	#'
	37	#' data(iris)
	38	#' iris.mod <- gbm(Species ~ ., distribution="multinomial", data=iris,
	39	#' n.trees=2000, shrinkage=0.01, cv.folds=5,
	40	#' verbose=FALSE, n.cores=1)
	41	#' iris.mod
	42	#' #data(lung)
	43	#' #lung.mod <- gbm(Surv(time, status) ~ ., distribution="coxph", data=lung,
	44	#' # n.trees=2000, shrinkage=0.01, cv.folds=5,verbose =FALSE)
	45	#' #lung.mod
1	46
	47	#' @rdname print.gbm
	48	#' @export
2	49	print.gbm <- function(x, ... )
3	50	{
4	51	if (!is.null(x$call)){ print(x$call) }

38	85	invisible()
39	86	}
40	87
	88
	89	#' @rdname print.gbm
	90	#'
	91	#' @export
41	92	show.gbm <- print.gbm
42	93
	94
	95	#' Summary of a gbm object
	96	#'
	97	#' Computes the relative influence of each variable in the gbm object.
	98	#'
	99	#' For \code{distribution="gaussian"} this returns exactly the reduction of
	100	#' squared error attributable to each variable. For other loss functions this
	101	#' returns the reduction attributable to each variable in sum of squared error
	102	#' in predicting the gradient on each iteration. It describes the relative
	103	#' influence of each variable in reducing the loss function. See the references
	104	#' below for exact details on the computation.
	105	#'
	106	#' @param object a \code{gbm} object created from an initial call to
	107	#' \code{\link{gbm}}.
	108	#' @param cBars the number of bars to plot. If \code{order=TRUE} the only the
	109	#' variables with the \code{cBars} largest relative influence will appear in
	110	#' the barplot. If \code{order=FALSE} then the first \code{cBars} variables
	111	#' will appear in the plot. In either case, the function will return the
	112	#' relative influence of all of the variables.
	113	#' @param n.trees the number of trees used to generate the plot. Only the first
	114	#' \code{n.trees} trees will be used.
	115	#' @param plotit an indicator as to whether the plot is generated.
	116	#' @param order an indicator as to whether the plotted and/or returned relative
	117	#' influences are sorted.
	118	#' @param method The function used to compute the relative influence.
	119	#' \code{\link{relative.influence}} is the default and is the same as that
	120	#' described in Friedman (2001). The other current (and experimental) choice is
	121	#' \code{\link{permutation.test.gbm}}. This method randomly permutes each
	122	#' predictor variable at a time and computes the associated reduction in
	123	#' predictive performance. This is similar to the variable importance measures
	124	#' Breiman uses for random forests, but \code{gbm} currently computes using the
	125	#' entire training dataset (not the out-of-bag observations).
	126	#' @param normalize if \code{FALSE} then \code{summary.gbm} returns the
	127	#' unnormalized influence.
	128	#' @param ... other arguments passed to the plot function.
	129	#' @return Returns a data frame where the first component is the variable name
	130	#' and the second is the computed relative influence, normalized to sum to 100.
	131	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	132	#' @seealso \code{\link{gbm}}
	133	#' @references J.H. Friedman (2001). "Greedy Function Approximation: A Gradient
	134	#' Boosting Machine," Annals of Statistics 29(5):1189-1232.
	135	#'
	136	#' L. Breiman
	137	#' (2001).\url{https://www.stat.berkeley.edu/users/breiman/randomforest2001.pdf}.
	138	#' @keywords hplot
	139	#'
	140	#' @export summary.gbm
	141	#' @export
43	142	summary.gbm <- function(object,
44	143	cBars=length(object$var.names),
45	144	n.trees=object$n.trees,

+13

-0

R/reconstructGBMdata.R less more

	0	#' Reconstruct a GBM's Source Data
	1	#'
	2	#' Helper function to reconstitute the data for plots and summaries. This
	3	#' function is not intended for the user to call directly.
	4	#'
	5	#'
	6	#' @param x a \code{\link{gbm.object}} initially fit using \code{\link{gbm}}
	7	#' @return Returns a data used to fit the gbm in a format that can subsequently
	8	#' be used for plots and summaries
	9	#' @author Harry Southworth
	10	#' @seealso \code{\link{gbm}}, \code{\link{gbm.object}}
	11	#' @keywords manip
	12	#' @export
0	13	reconstructGBMdata <- function(x)
1	14	{
2	15	if(class(x) != "gbm")

+146

-0

R/relative.influence.R less more

	0	#' Methods for estimating relative influence
	1	#'
	2	#' Helper functions for computing the relative influence of each variable in
	3	#' the gbm object.
	4	#'
	5	#' @details
	6	#' This is not intended for end-user use. These functions offer the different
	7	#' methods for computing the relative influence in \code{\link{summary.gbm}}.
	8	#' \code{gbm.loss} is a helper function for \code{permutation.test.gbm}.
	9	#'
	10	#' @aliases relative.influence permutation.test.gbm gbm.loss
	11	#'
	12	#' @param object a \code{gbm} object created from an initial call to
	13	#' \code{\link{gbm}}.
	14	#'
	15	#' @param n.trees the number of trees to use for computations. If not provided,
	16	#' the the function will guess: if a test set was used in fitting, the number
	17	#' of trees resulting in lowest test set error will be used; otherwise, if
	18	#' cross-validation was performed, the number of trees resulting in lowest
	19	#' cross-validation error will be used; otherwise, all trees will be used.
	20	#'
	21	#' @param scale. whether or not the result should be scaled. Defaults to
	22	#' \code{FALSE}.
	23	#'
	24	#' @param sort. whether or not the results should be (reverse) sorted.
	25	#' Defaults to \code{FALSE}.
	26	#'
	27	#' @param y,f,w,offset,dist,baseline For \code{gbm.loss}: These components are
	28	#' the outcome, predicted value, observation weight, offset, distribution, and
	29	#' comparison loss function, respectively.
	30	#'
	31	#' @param group,max.rank Used internally when \code{distribution =
	32	#' \'pairwise\'}.
	33	#'
	34	#' @return By default, returns an unprocessed vector of estimated relative
	35	#' influences. If the \code{scale.} and \code{sort.} arguments are used,
	36	#' returns a processed version of the same.
	37	#'
	38	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	39	#'
	40	#' @seealso \code{\link{summary.gbm}}
	41	#'
	42	#' @references J.H. Friedman (2001). "Greedy Function Approximation: A Gradient
	43	#' Boosting Machine," Annals of Statistics 29(5):1189-1232.
	44	#'
	45	#' L. Breiman (2001).
	46	#' \url{https://www.stat.berkeley.edu/users/breiman/randomforest2001.pdf}.
	47	#'
	48	#' @keywords hplot
	49	#'
	50	#' @rdname relative.influence
	51	#'
	52	#' @export
0	53	relative.influence <- function(object,
1	54	n.trees,
2	55	scale. = FALSE,

46	99
47	100	return(rel.inf=rel.inf)
48	101	}
	102
	103
	104	#' @rdname relative.influence
	105	#' @export
	106	permutation.test.gbm <- function(object,
	107	n.trees)
	108	{
	109	# get variables used in the model
	110	i.vars <- sort(unique(unlist(lapply(object$trees[1:n.trees],
	111	function(x){unique(x[[1]])}))))
	112	i.vars <- i.vars[i.vars!=-1] + 1
	113	rel.inf <- rep(0,length(object$var.names))
	114
	115	if(!is.null(object$data))
	116	{
	117	y <- object$data$y
	118	os <- object$data$offset
	119	Misc <- object$data$Misc
	120	w <- object$data$w
	121	x <- matrix(object$data$x, ncol=length(object$var.names))
	122	object$Terms <- NULL # this makes predict.gbm take x as it is
	123
	124	if (object$distribution$name == "pairwise")
	125	{
	126	# group and cutoff are only relevant for distribution "pairwise"
	127	# in this case, the last element specifies the max rank
	128	# max rank = 0 means no cut off
	129	group <- Misc[1:length(y)]
	130	max.rank <- Misc[length(y)+1]
	131	}
	132	}
	133	else
	134	{
	135	stop("Model was fit with keep.data=FALSE. permutation.test.gbm has not been implemented for that case.")
	136	}
	137
	138	# the index shuffler
	139	j <- sample(1:nrow(x))
	140	for(i in 1:length(i.vars))
	141	{
	142	x[ ,i.vars[i]] <- x[j,i.vars[i]]
	143
	144	new.pred <- predict.gbm(object,newdata=x,n.trees=n.trees)
	145	rel.inf[i.vars[i]] <- gbm.loss(y,new.pred,w,os,
	146	object$distribution,
	147	object$train.error[n.trees],
	148	group,
	149	max.rank)
	150
	151	x[j,i.vars[i]] <- x[ ,i.vars[i]]
	152	}
	153
	154	return(rel.inf=rel.inf)
	155	}
	156
	157
	158	#' @rdname relative.influence
	159	#' @export
	160	gbm.loss <- function(y, f, w, offset, dist, baseline, group=NULL, max.rank=NULL)
	161	{
	162	if (!is.na(offset))
	163	{
	164	f <- offset+f
	165	}
	166
	167	if (dist$name != "pairwise")
	168	{
	169	switch(dist$name,
	170	gaussian = weighted.mean((y - f)^2,w) - baseline,
	171	bernoulli = -2weighted.mean(yf - log(1+exp(f)),w) - baseline,
	172	laplace = weighted.mean(abs(y-f),w) - baseline,
	173	adaboost = weighted.mean(exp(-(2y-1)f),w) - baseline,
	174	poisson = -2weighted.mean(yf-exp(f),w) - baseline,
	175	stop(paste("Distribution",dist$name,"is not yet supported for method=permutation.test.gbm")))
	176	}
	177	else # dist$name == "pairwise"
	178	{
	179	if (is.null(dist$metric))
	180	{
	181	stop("No metric specified for distribution 'pairwise'")
	182	}
	183	if (!is.element(dist$metric, c("conc", "ndcg", "map", "mrr")))
	184	{
	185	stop("Invalid metric '", dist$metric, "' specified for distribution 'pairwise'")
	186	}
	187	if (is.null(group))
	188	{
	189	stop("For distribution 'pairwise', parameter 'group' has to be supplied")
	190	}
	191	# Loss = 1 - utility
	192	(1 - perf.pairwise(y, f, group, dist$metric, w, max.rank)) - baseline
	193	}
	194	}

+38

-0

R/shrink.gbm.R less more

0	0	# evaluates the objective function and gradient with respect to beta
1	1	# beta = log(lambda/(1-lambda))
	2
	3	#' L1 shrinkage of the predictor variables in a GBM
	4	#'
	5	#' Performs recursive shrinkage in each of the trees in a GBM fit using
	6	#' different shrinkage parameters for each variable.
	7	#'
	8	#' This function is currently experimental. Used in conjunction with a gradient
	9	#' ascent search for inclusion of variables.
	10	#'
	11	#' @param object A \code{\link{gbm.object}}.
	12	#'
	13	#' @param n.trees Integer specifying the number of trees to use.
	14	#'
	15	#' @param lambda Vector of length equal to the number of variables containing
	16	#' the shrinkage parameter for each variable.
	17	#'
	18	#' @param \dots Additional optional arguments. (Currently ignored.)
	19	#'
	20	#' @return \item{predF}{Predicted values from the shrunken tree}
	21	#' \item{objective}{The value of the loss function associated with the
	22	#' predicted values} \item{gradient}{A vector with length equal to the number
	23	#' of variables containing the derivative of the objective function with
	24	#' respect to beta, the logit transform of the shrinkage parameter for each
	25	#' variable}
	26	#'
	27	#' @note Warning: This function is experimental.
	28	#'
	29	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	30	#'
	31	#' @seealso \code{\link{shrink.gbm.pred}}, \code{\link{gbm}}
	32	#'
	33	#' @references Hastie, T. J., and Pregibon, D.
	34	#' \url{https://web.stanford.edu/~hastie/Papers/shrink_tree.pdf}. AT&T Bell
	35	#' Laboratories Technical Report (March 1990).
	36	#'
	37	#' @keywords methods
	38	#'
	39	#' @export
2	40	shrink.gbm <- function(object,n.trees,
3	41	lambda=rep(10,length(object$var.names)),
4	42	...)

+17

-0

R/shrink.gbm.pred.R less more

	0	#' Predictions from a shrunked GBM
	1	#'
	2	#' Makes predictions from a shrunken GBM model.
	3	#'
	4	#' @param object a \code{\link{gbm.object}}
	5	#' @param newdata dataset for predictions
	6	#' @param n.trees the number of trees to use
	7	#' @param lambda a vector with length equal to the number of variables
	8	#' containing the shrinkage parameter for each variable
	9	#' @param \dots other parameters (ignored)
	10	#' @return A vector with length equal to the number of observations in newdata
	11	#' containing the predictions
	12	#' @section Warning: This function is experimental
	13	#' @author Greg Ridgeway \email{gregridgeway@@gmail.com}
	14	#' @seealso \code{\link{shrink.gbm}}, \code{\link{gbm}}
	15	#' @keywords methods
	16	#' @export
0	17	shrink.gbm.pred <- function(object,newdata,n.trees,
1	18	lambda=rep(1,length(object$var.names)),
2	19	...)

+50

-25

R/test.gbm.R less more

	0	#' Test the \code{gbm} package.
	1	#'
	2	#' Run tests on \code{gbm} functions to perform logical checks and
	3	#' reproducibility.
	4	#'
	5	#' The function uses functionality in the \code{RUnit} package. A fairly small
	6	#' validation suite is executed that checks to see that relative influence
	7	#' identifies sensible variables from simulated data, and that predictions from
	8	#' GBMs with Gaussian, Cox or binomial distributions are sensible,
	9	#'
	10	#' @aliases validate.gbm test.gbm test.relative.influence
	11	#' @return An object of class \code{RUnitTestData}. See the help for
	12	#' \code{RUnit} for details.
	13	#' @note The test suite is not comprehensive.
	14	#' @author Harry Southworth
	15	#' @seealso \code{\link{gbm}}
	16	#' @keywords models
	17	#' @examples
	18	#'
	19	#' # Uncomment the following lines to run - commented out to make CRAN happy
	20	#' #library(RUnit)
	21	#' #val <- validate.texmex()
	22	#' #printHTMLProtocol(val, "texmexReport.html")
	23
	24	#' @export
0	25	test.gbm <- function(){
1	26	# Based on example in R package
2	27	# Gaussian example

102	127	data <- data.frame(tt=tt,delta=delta,X1=X1,X2=X2,X3=X3)
103	128
104	129	# fit initial model
105		gbm1 <- gbm(Surv(tt,delta)~X1+X2+X3, # formula
	130	gbm1 <- gbm(Surv(tt,delta)~X1+X2+X3, # formula
106	131	data=data, # dataset
107	132	weights=w,
108	133	var.monotone=c(0,0,0), # -1: monotone decrease, +1: monotone increase, 0: no monotone restrictions

137	162
138	163	# predict on the new data using "best" number of trees
139	164	# f.predict will be on the canonical scale (logit,log,etc.)
140		f.predict <- predict(gbm1,data2,best.iter)
	165	f.predict <- predict(gbm1, newdata = data2, n.trees = best.iter)
141	166
142	167	#plot(data2$f,f.predict)
143	168	# Use observed sd

197	222
198	223	# predict on the new data using "best" number of trees
199	224	# f.predict will be on the canonical scale (logit,log,etc.)
200		f.1.predict <- predict.gbm(gbm1,data2, n.trees=best.iter.test)
	225	f.1.predict <- predict(gbm1,data2, n.trees=best.iter.test)
201	226
202	227	# compute quantity prior to transformation
203	228	f.new = sin(3X1) - 4X2 + mu

212	237	########################### test.relative.influence() ##########################
213	238	########################### ##########################
214	239
	240
	241	#' @export
215	242	test.relative.influence <- function(){
216	243	# Test that relative.influence really does pick out the true predictors
217	244	set.seed(1234)

233	260	################################ validate.gbm() ################################
234	261	################################ ################################
235	262
	263
	264	#' @export
236	265	validate.gbm <- function () {
237		if(!requireNamespace("RUnit", quietly = TRUE))
238		stop("You need to install the RUnit package to validate gbm")
239
240		wh <- (1:length(search()))[search() == "package:gbm"]
241		tests <- objects(wh)[substring(objects(wh), 1, 5) == "test."]
242
243		# Create temporary directory to put tests into
244		if (.Platform$OS.type == "windows"){ sep <- "\\" }
245		else { sep <- "/" }
246
247		dir <- file.path(tempdir(), "gbm.tests", fsep = sep)
248
249		dir.create(dir)
250
251		for (i in 1:length(tests)) {
252		str <- paste(dir, sep, tests[i], ".R", sep = "")
253		dump(tests[i], file = str)
254		}
255		res <- RUnit::defineTestSuite("gbm", dirs = dir, testFuncRegexp = "^test.+", testFileRegexp = "*.R")
256		cat("Running gbm test suite.\nThis will take some time...\n\n")
257		res <- RUnit::runTestSuite(res)
258		res
	266	wh <- (1:length(search()))[search() == "package:gbm"]
	267	tests <- objects(wh)[substring(objects(wh), 1, 5) == "test."]
	268
	269	# Create temporary directory to put tests into
	270	sep <- if (.Platform$OS.type == "windows") "\\" else "/"
	271
	272	dir <- file.path(tempdir(), "gbm.tests", fsep = sep)
	273
	274	dir.create(dir)
	275
	276	for (i in 1:length(tests)) {
	277	str <- paste(dir, sep, tests[i], ".R", sep = "")
	278	dump(tests[i], file = str)
	279	}
	280	res <- RUnit::defineTestSuite("gbm", dirs = dir, testFuncRegexp = "^test.+",
	281	testFileRegexp = "*.R")
	282	cat("Running gbm test suite.\nThis will take some time...\n\n")
	283	RUnit::runTestSuite(res)
259	284	}
260	285

+158

-0

R/utils.R less more

	0	#' Arrange multiple grobs on a page
	1	#'
	2	#' See \code{\link[gridExtra]{grid.arrange}} for more details.
	3	#'
	4	#' @name grid.arrange
	5	#' @rdname grid.arrange
	6	#' @keywords internal
	7	#' @export
	8	#' @importFrom gridExtra grid.arrange
	9	#' @usage grid.arrange(..., newpage = TRUE)
	10	NULL
	11
	12
	13	#' @keywords internal
	14	getAvailableDistributions <- function() {
	15	c("adaboost", "bernoulli", "coxph", "gaussian", "huberized", "laplace",
	16	"multinomial", "pairwise", "poisson", "quantile", "tdist")
	17	}
	18
	19
	20	#' @keywords internal
	21	guess_error_method <- function(object) {
	22	if (has_train_test_split(object)) {
	23	"test"
	24	} else if (has_cross_validation(object)) {
	25	"cv"
	26	} else {
	27	"OOB"
	28	}
	29	}
	30
	31
	32	#' @keywords internal
	33	has_train_test_split <- function(object) {
	34	object$train.fraction < 1
	35	}
	36
	37
	38	#' @keywords internal
	39	has_cross_validation <- function(object) {
	40	!is.null(object$cv.error)
	41	}
	42
	43
	44	#' @keywords internal
	45	best_iter <- function(object, method) {
	46	check_if_gbm_fit(object)
	47	if (method == "OOB") {
	48	best_iter_out_of_bag(object)
	49	} else if (method == "test") {
	50	best_iter_test(object)
	51	} else if (method == "cv") {
	52	best_iter_cv(object)
	53	} else {
	54	stop("method must be one of \"cv\", \"test\", or \"OOB\"")
	55	}
	56	}
	57
	58
	59	#' @keywords internal
	60	best_iter_test <- function(object) {
	61	check_if_gbm_fit(object)
	62	best_iter_test <- which.min(object$valid.error)
	63	return(best_iter_test)
	64	}
	65
	66
	67	#' @keywords internal
	68	best_iter_cv <- function(object) {
	69	check_if_gbm_fit(object)
	70	if(!has_cross_validation(object)) {
	71	stop('In order to use method="cv" gbm must be called with cv_folds>1.')
	72	}
	73	best_iter_cv <- which.min(object$cv.error)
	74	return(best_iter_cv)
	75	}
	76
	77
	78	#' @keywords internal
	79	best_iter_out_of_bag <- function(object) {
	80	check_if_gbm_fit(object)
	81	if(object$bag.fraction == 1) {
	82	stop("Cannot compute OOB estimate or the OOB curve when bag_fraction=1.")
	83	}
	84	if(all(!is.finite(object$oobag.improve))) {
	85	stop("Cannot compute OOB estimate or the OOB curve. No finite OOB ",
	86	"estimates of improvement.")
	87	}
	88	message("OOB generally underestimates the optimal number of iterations ",
	89	"although predictive performance is reasonably competitive. Using ",
	90	"cv_folds>1 when calling gbm usually results in improved predictive ",
	91	"performance.")
	92	smoother <- generate_smoother_oobag(object)
	93	best_iter_oob <- smoother$x[which.min(-cumsum(smoother$y))]
	94	attr(best_iter_oob, "smoother") <- smoother
	95	return(best_iter_oob)
	96	}
	97
	98
	99	#' @keywords internal
	100	generate_smoother_oobag <- function(object) {
	101	check_if_gbm_fit(object)
	102	x <- seq_len(object$n.trees)
	103	smoother <- loess(object$oobag.improve ~ x,
	104	enp.target = min(max(4, length(x) / 10), 50))
	105	smoother$y <- smoother$fitted
	106	smoother$x <- x
	107	return(smoother)
	108	}
	109
	110
	111	#' @keywords internal
	112	check_if_gbm_fit <- function(object) {
	113	if (!inherits(object, "gbm")) {
	114	stop(deparse(substitute(object)), " is not a valid \"gbm\" object.")
	115	}
	116	}
	117
	118
	119	#' @keywords internal
	120	get_ylab <- function(object) {
	121	check_if_gbm_fit(object)
	122	if (object$distribution$name != "pairwise") {
	123	switch(substring(object$distribution$name, 1, 2),
	124	ga = "Squared error loss",
	125	be = "Bernoulli deviance",
	126	po = "Poisson deviance",
	127	ad = "AdaBoost exponential bound",
	128	co = "Cox partial deviance",
	129	la = "Absolute loss",
	130	qu = "Quantile loss",
	131	mu = "Multinomial deviance",
	132	td = "t-distribution deviance")
	133	} else {
	134	switch(object$distribution$metric,
	135	conc = "Fraction of concordant pairs",
	136	ndcg = "Normalized discounted cumulative gain",
	137	map = "Mean average precision",
	138	mrr = "Mean reciprocal rank")
	139	}
	140	}
	141
	142
	143	#' @keywords internal
	144	get_ylim <- function(object, method) {
	145	check_if_gbm_fit(object)
	146	if(object$train.fraction == 1) {
	147	if ( method=="cv" ) {
	148	range(object$train.error, object$cv.error)
	149	} else if ( method == "test" ) {
	150	range( object$train.error, object$valid.error)
	151	} else {
	152	range(object$train.error)
	153	}
	154	} else {
	155	range(object$train.error, object$valid.error)
	156	}
	157	}

+5

-0

R/zzz.R less more

	0	#' @keywords internal
	1	.onAttach <- function(lib, pkg) {
	2	vers <- utils::packageVersion("gbm")
	3	packageStartupMessage(paste("Loaded gbm", vers))
	4	}

+50

-0

README.md less more

	0	gbm
	1	===
	2
	3	[![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/gbm)](https://cran.r-project.org/package=gbm)
	4	[![Build
	5	Status](https://travis-ci.org/gbm-developers/gbm.svg?branch=master)](https://travis-ci.org/gbm-developers/gbm)
	6	[![Downloads](http://cranlogs.r-pkg.org/badges/gbm)](http://cranlogs.r-pkg.org/badges/gbm)
	7	[![Total
	8	Downloads](http://cranlogs.r-pkg.org/badges/grand-total/gbm)](http://cranlogs.r-pkg.org/badges/grand-total/gbm)
	9
	10	Overview
	11	--------
	12
	13	The gbm package (which stands for generalized boosted
	14	models) implements extensions to Freund and Schapire’s AdaBoost
	15	algorithm and [Friedman’s gradient boosting
	16	machine](http://projecteuclid.org/euclid.aos/1013203451). It includes
	17	regression methods for least squares, absolute loss, t-distribution
	18	loss, quantile regression, logistic, multinomial logistic, Poisson, Cox
	19	proportional hazards partial likelihood, AdaBoost exponential loss,
	20	Huberized hinge loss, and Learning to Rank measures (i.e.,
	21	[LambdaMart](https://www.microsoft.com/en-us/research/publication/from-ranknet-to-lambdarank-to-lambdamart-an-overview/)).
	22
	23	Installation
	24	------------
	25
	26	``` r
	27	# The easiest way to get gbm is to it install from CRAN:
	28	install.packages("gbm")
	29
	30	# Or the the development version from GitHub:
	31	# install.packages("devtools")
	32	devtools::install_github("gbm-developers/gbm")
	33	```
	34
	35	Lifecycle
	36	---------
	37
	38	[![lifecycle](https://img.shields.io/badge/lifecycle-retired-orange.svg)](https://www.tidyverse.org/lifecycle/#retired)
	39
	40	The gbm package is retired and no longer under active development. We
	41	will only make the necessary changes to ensure that gbm remain on CRAN.
	42	For the most part, no new features will be added, and only the most
	43	critical of bugs will be fixed.
	44
	45	This is a maintained version of `gbm` back compatible to CRAN versions
	46	of `gbm` 2.1.x. It exists mainly for the purpose of reproducible
	47	research and data analyses performed with the 2.1.x versions of `gbm`.
	48	For newer development, and a more consistent API, try out the
	49	[gbm3](https://github.com/gbm-developers/gbm3) package!

build/vignette.rds less more

Binary diff not shown

+373

-0

inst/doc/gbm.Rnw less more

	0	\documentclass{article}
	1
	2	\bibliographystyle{plain}
	3
	4	\newcommand{\EV}{\mathrm{E}}
	5	\newcommand{\Var}{\mathrm{Var}}
	6	\newcommand{\aRule}{\begin{center} \rule{5in}{1mm} \end{center}}
	7
	8	\title{Generalized Boosted Models:\\A guide to the gbm package} \author{Greg Ridgeway}
	9
	10	%\VignetteEngine{knitr::knitr}
	11	%\VignetteIndexEntry{Generalized Boosted Models: A guide to the gbm package}
	12
	13	\newcommand{\mathgbf}[1]{{\mbox{\boldmath$#1$\unboldmath}}}
	14
	15	\begin{document}
	16
	17	\maketitle
	18
	19	Boosting takes on various forms with different programs using different loss functions, different base models, and different optimization schemes. The gbm package takes the approach described in \cite{Friedman:2001} and \cite{Friedman:2002}. Some of the terminology differs, mostly due to an effort to cast boosting terms into more standard statistical terminology (e.g. deviance). In addition, the gbm package implements boosting for models commonly used in statistics but not commonly associated with boosting. The Cox proportional hazard model, for example, is an incredibly useful model and the boosting framework applies quite readily with only slight modification \cite{Ridgeway:1999}. Also some algorithms implemented in the gbm package differ from the standard implementation. The AdaBoost algorithm \cite{FreundSchapire:1997} has a particular loss function and a particular optimization algorithm associated with it. The gbm implementation of AdaBoost adopts AdaBoost's exponential loss function (its bound on misclassification rate) but uses Friedman's gradient descent algorithm rather than the original one proposed. So the main purposes of this document is to spell out in detail what the gbm package implements.
	20
	21	\section{Gradient boosting}
	22
	23	This section essentially presents the derivation of boosting described in \cite{Friedman:2001}. The gbm package also adopts the stochastic gradient boosting strategy, a small but important tweak on the basic algorithm, described in \cite{Friedman:2002}.
	24
	25	\subsection{Friedman's gradient boosting machine} \label{sec:GradientBoostingMachine}
	26
	27	\begin{figure}
	28	\aRule Initialize $\hat f(\mathbf{x})$ to be a constant, $\hat f(\mathbf{x}) = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\rho)$. \\
	29	For $t$ in $1,\ldots,T$ do
	30	\begin{enumerate}
	31	\item Compute the negative gradient as the working response
	32	\begin{equation}
	33	z_i = -\frac{\partial}{\partial f(\mathbf{x}_i)} \Psi(y_i,f(\mathbf{x}_i)) \mbox{\Huge $\|$}_{f(\mathbf{x}_i)=\hat f(\mathbf{x}_i)}
	34	\end{equation}
	35	\item Fit a regression model, $g(\mathbf{x})$, predicting $z_i$ from the covariates $\mathbf{x}_i$. \item Choose a gradient descent step size as
	36	\begin{equation}
	37	\rho = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\hat f(\mathbf{x}_i)+\rho g(\mathbf{x}_i))
	38	\end{equation}
	39	\item Update the estimate of $f(\mathbf{x})$ as
	40	\begin{equation}
	41	\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \rho g(\mathbf{x})
	42	\end{equation}
	43	\end{enumerate} \aRule \caption{Friedman's Gradient Boost algorithm} \label{fig:GradientBoost} \end{figure}
	44
	45	Friedman (2001) and the companion paper Friedman (2002) extended the work of Friedman, Hastie, and Tibshirani (2000) and laid the ground work for a new generation of boosting algorithms. Using the connection between boosting and optimization, this new work proposes the Gradient Boosting Machine.
	46
	47	In any function estimation problem we wish to find a regression function, $\hat f(\mathbf{x})$, that minimizes the expectation of some loss function, $\Psi(y,f)$, as shown in (\ref{NonparametricRegression1}).
	48
	49	\begin{eqnarray}
	50	\hspace{0.5in}
	51	\hat f(\mathbf{x}) &=& \arg \min_{f(\mathbf{x})} \EV_{y,\mathbf{x}} \Psi(y,f(\mathbf{x})) \nonumber \\ \label{NonparametricRegression1}
	52	&=& \arg \min_{f(\mathbf{x})} \EV_x \left[ \EV_{y\|\mathbf{x}} \Psi(y,f(\mathbf{x})) \Big\| \mathbf{x} \right]
	53	\end{eqnarray}
	54
	55	We will focus on finding estimates of $f(\mathbf{x})$ such that \begin{equation}
	56	\label{NonparametricRegression2}
	57	\hspace{0.5in}
	58	\hat f(\mathbf{x}) = \arg \min_{f(\mathbf{x})} \EV_{y\|\mathbf{x}} \left[ \Psi(y,f(\mathbf{x}))\|\mathbf{x} \right]
	59	\end{equation}
	60	Parametric regression models assume that $f(\mathbf{x})$ is a function with a finite number of parameters, $\beta$, and estimates them by selecting those values that minimize a loss function (e.g. squared error loss) over a training sample of $N$ observations on $(y,\mathbf{x})$ pairs as in (\ref{eq:Friedman1}).
	61	\begin{equation}
	62	\label{eq:Friedman1}
	63	\hspace{0.5in}
	64	\hat\beta = \arg \min_{\beta} \sum_{i=1}^N \Psi(y_i,f(\mathbf{x}_i;\beta))
	65	\end{equation}
	66	When we wish to estimate $f(\mathbf{x})$ non-parametrically the task becomes more difficult. Again we can proceed similarly to \cite{FHT:2000} and modify our current estimate of $f(\mathbf{x})$ by adding a new function $f(\mathbf{x})$ in a greedy fashion. Letting $f_i = f(\mathbf{x}_i)$, we see that we want to decrease the $N$ dimensional function
	67	\begin{eqnarray}
	68	\label{EQ:Friedman2}
	69	\hspace{0.5in}
	70	J(\mathbf{f}) &=& \sum_{i=1}^N \Psi(y_i,f(\mathbf{x}_i)) \nonumber \\
	71	&=& \sum_{i=1}^N \Psi(y_i,F_i).
	72	\end{eqnarray}
	73	The negative gradient of $J(\mathbf{f})$ indicates the direction of the locally greatest decrease in $J(\mathbf{f})$. Gradient descent would then have us modify $\mathbf{f}$ as
	74	\begin{equation}
	75	\label{eq:Friedman3}
	76	\hspace{0.5in}
	77	\hat \mathbf{f} \leftarrow \hat \mathbf{f} - \rho \nabla J(\mathbf{f})
	78	\end{equation}
	79	where $\rho$ is the size of the step along the direction of greatest descent. Clearly, this step alone is far from our desired goal. First, it only fits $f$ at values of $\mathbf{x}$ for which we have observations. Second, it does not take into account that observations with similar $\mathbf{x}$ are likely to have similar values of $f(\mathbf{x})$. Both these problems would have disastrous effects on generalization error. However, Friedman suggests selecting a class of functions that use the covariate information to approximate the gradient, usually a regression tree. This line of reasoning produces his Gradient Boosting algorithm shown in Figure~\ref{fig:GradientBoost}. At each iteration the algorithm determines the direction, the gradient, in which it needs to improve the fit to the data and selects a particular model from the allowable class of functions that is in most agreement with the direction. In the case of squared-error loss, $\Psi(y_i,f(\mathbf{x}_i)) = \sum_{i=1}^N (y_i-f(\mathbf{x}_i))^2$, this algorithm corresponds exactly to residual fitting.
	80
	81	There are various ways to extend and improve upon the basic framework suggested in Figure~\ref{fig:GradientBoost}. For example, Friedman (2001) substituted several choices in for $\Psi$ to develop new boosting algorithms for robust regression with least absolute deviation and Huber loss functions. Friedman (2002) showed that a simple subsampling trick can greatly improve predictive performance while simultaneously reduce computation time. Section~\ref{GBMModifications} discusses some of these modifications.
	82
	83	\section{Improving boosting methods using control of the learning rate, sub-sampling, and a decomposition for interpretation} \label{GBMModifications}
	84
	85	This section explores the variations of the previous algorithms that have the potential to improve their predictive performance and interpretability. In particular, by controlling the optimization speed or learning rate, introducing low-variance regression methods, and applying ideas from robust regression we can produce non-parametric regression procedures with many desirable properties. As a by-product some of these modifications lead directly into implementations for learning from massive datasets. All these methods take advantage of the general form of boosting
	86	\begin{equation}
	87	\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \EV(z(y,\hat f(\mathbf{x}))\|\mathbf{x}).
	88	\end{equation} So far we have taken advantage of this form only by substituting in our favorite regression procedure for $\EV_w(z\|\mathbf{x})$. I will discuss some modifications to estimating $\EV_w(z\|\mathbf{x})$ that have the potential to improve our algorithm.
	89
	90	\subsection{Decreasing the learning rate} As several authors have phrased slightly differently, ``...boosting, whatever flavor, seldom seems to overfit, no matter how many terms are included in the additive expansion''. This is not true as the discussion to \cite{FHT:2000} points out.
	91
	92	In the update step of any boosting algorithm we can introduce a learning rate to dampen the proposed move.
	93	\begin{equation}
	94	\label{eq:shrinkage}
	95	\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \lambda \EV(z(y,\hat f(\mathbf{x}))\|\mathbf{x}).
	96	\end{equation}
	97	By multiplying the gradient step by $\lambda$ as in equation~\ref{eq:shrinkage} we have control on the rate at which the boosting algorithm descends the error surface (or ascends the likelihood surface). When $\lambda=1$ we return to performing full gradient steps. Friedman (2001) relates the learning rate to regularization through shrinkage.
	98
	99	The optimal number of iterations, $T$, and the learning rate, $\lambda$, depend on each other. In practice I set $\lambda$ to be as small as possible and then select $T$ by cross-validation. Performance is best when $\lambda$ is as small as possible performance with decreasing marginal utility for smaller and smaller $\lambda$. Slower learning rates do not necessarily scale the number of optimal iterations. That is, if when $\lambda=1.0$ and the optimal $T$ is 100 iterations, does {\it not} necessarily imply that when $\lambda=0.1$ the optimal $T$ is 1000 iterations.
	100
	101	\subsection{Variance reduction using subsampling}
	102
	103	Friedman (2002) proposed the stochastic gradient boosting algorithm that simply samples uniformly without replacement from the dataset before estimating the next gradient step. He found that this additional step greatly improved performance. We estimate the regression $\EV(z(y,\hat f(\mathbf{x}))\|\mathbf{x})$ using a random subsample of the dataset.
	104
	105	\subsection{ANOVA decomposition}
	106
	107	Certain function approximation methods are decomposable in terms of a ``functional ANOVA decomposition''. That is a function is decomposable as
	108	\begin{equation}
	109	\label{ANOVAdecomp}
	110	f(\mathbf{x}) = \sum_j f_j(x_j) + \sum_{jk} f_{jk}(x_j,x_k) + \sum_{jk\ell} f_{jk\ell}(x_j,x_k,x_\ell) + \cdots.
	111	\end{equation} This applies to boosted trees. Regression stumps (one split decision trees) depend on only one variable and fall into the first term of \ref{ANOVAdecomp}. Trees with two splits fall into the second term of \ref{ANOVAdecomp} and so on. By restricting the depth of the trees produced on each boosting iteration we can control the order of approximation. Often additive components are sufficient to approximate a multivariate function well, generalized additive models, the na\"{\i}ve Bayes classifier, and boosted stumps for example. When the approximation is restricted to a first order we can also produce plots of $x_j$ versus $f_j(x_j)$ to demonstrate how changes in $x_j$ might affect changes in the response variable.
	112
	113	\subsection{Relative influence} Friedman (2001) also develops an extension of a variable's ``relative influence'' for boosted estimates. For tree based methods the approximate relative influence of a variable $x_j$ is
	114	\begin{equation}
	115	\label{RelInfluence}
	116	\hspace{0.5in}
	117	\hat J_j^2 = \hspace{-0.1in}\sum_{\mathrm{splits~on~}x_j}\hspace{-0.2in}I_t^2
	118	\end{equation} where $I_t^2$ is the empirical improvement by splitting on $x_j$ at that point. Friedman's extension to boosted models is to average the relative influence of variable $x_j$ across all the trees generated by the boosting algorithm.
	119
	120	\begin{figure}
	121	\aRule
	122	Select
	123	\begin{itemize}
	124	\item a loss function (\texttt{distribution})
	125	\item the number of iterations, $T$ (\texttt{n.trees})
	126	\item the depth of each tree, $K$ (\texttt{interaction.depth})
	127	\item the shrinkage (or learning rate) parameter, $\lambda$ (\texttt{shrinkage})
	128	\item the subsampling rate, $p$ (\texttt{bag.fraction})
	129	\end{itemize}
	130	Initialize $\hat f(\mathbf{x})$ to be a constant, $\hat f(\mathbf{x}) = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\rho)$ \\
	131	For $t$ in $1,\ldots,T$ do
	132	\begin{enumerate}
	133	\item Compute the negative gradient as the working response
	134	\begin{equation}
	135	z_i = -\frac{\partial}{\partial f(\mathbf{x}_i)} \Psi(y_i,f(\mathbf{x}_i)) \mbox{\Huge $\|$}_{f(\mathbf{x}_i)=\hat f(\mathbf{x}_i)}
	136	\end{equation}
	137	\item Randomly select $p\times N$ cases from the dataset
	138	\item Fit a regression tree with $K$ terminal nodes, $g(\mathbf{x})=\EV(z\|\mathbf{x})$. This tree is fit using only those randomly selected observations
	139	\item Compute the optimal terminal node predictions, $\rho_1,\ldots,\rho_K$, as
	140	\begin{equation}
	141	\rho_k = \arg \min_{\rho} \sum_{\mathbf{x}_i\in S_k} \Psi(y_i,\hat f(\mathbf{x}_i)+\rho)
	142	\end{equation}
	143	where $S_k$ is the set of $\mathbf{x}$s that define terminal node $k$. Again this step uses only the randomly selected observations.
	144	\item Update $\hat f(\mathbf{x})$ as
	145	\begin{equation}
	146	\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \lambda\rho_{k(\mathbf{x})}
	147	\end{equation}
	148	where $k(\mathbf{x})$ indicates the index of the terminal node into which an observation with features $\mathbf{x}$ would fall.
	149	\end{enumerate}
	150	\aRule
	151	\caption{Boosting as implemented in \texttt{gbm()}}
	152	\label{fig:gbm}
	153	\end{figure}
	154
	155	\section{Common user options}
	156
	157	This section discusses the options to gbm that most users will need to change or tune.
	158
	159	\subsection{Loss function}
	160
	161	The first and foremost choice is \texttt{distribution}. This should be easily dictated by the application. For most classification problems either \texttt{bernoulli} or \texttt{adaboost} will be appropriate, the former being recommended. For continuous outcomes the choices are \texttt{gaussian} (for minimizing squared error), \texttt{laplace} (for minimizing absolute error), and quantile regression (for estimating percentiles of the conditional distribution of the outcome). Censored survival outcomes should require \texttt{coxph}. Count outcomes may use \texttt{poisson} although one might also consider \texttt{gaussian} or \texttt{laplace} depending on the analytical goals.
	162
	163	\subsection{The relationship between shrinkage and number of iterations} The issues that most new users of gbm struggle with are the choice of \texttt{n.trees} and \texttt{shrinkage}. It is important to know that smaller values of \texttt{shrinkage} (almost) always give improved predictive performance. That is, setting \texttt{shrinkage=0.001} will almost certainly result in a model with better out-of-sample predictive performance than setting \texttt{shrinkage=0.01}. However, there are computational costs, both storage and CPU time, associated with setting \texttt{shrinkage} to be low. The model with \texttt{shrinkage=0.001} will likely require ten times as many iterations as the model with \texttt{shrinkage=0.01}, increasing storage and computation time by a factor of 10. Figure~\ref{fig:shrinkViters} shows the relationship between predictive performance, the number of iterations, and the shrinkage parameter. Note that the increase in the optimal number of iterations between two choices for shrinkage is roughly equal to the ratio of the shrinkage parameters. It is generally the case that for small shrinkage parameters, 0.001 for example, there is a fairly long plateau in which predictive performance is at its best. My rule of thumb is to set \texttt{shrinkage} as small as possible while still being able to fit the model in a reasonable amount of time and storage. I usually aim for 3,000 to 10,000 iterations with shrinkage rates between 0.01 and 0.001.
	164
	165	\begin{figure}[ht] \begin{center} \includegraphics[width=5in]{shrinkage-v-iterations} \end{center} \caption{Out-of-sample predictive performance by number of iterations and shrinkage. Smaller values of the shrinkage parameter offer improved predictive performance, but with decreasing marginal improvement.} \label{fig:shrinkViters} \end{figure}
	166
	167	\subsection{Estimating the optimal number of iterations} gbm offers three methods for estimating the optimal number of iterations after the gbm model has been fit, an independent test set (\texttt{test}), out-of-bag estimation (\texttt{OOB}), and $v$-fold cross validation (\texttt{cv}). The function \texttt{gbm.perf} computes the iteration estimate.
	168
	169	Like Friedman's MART software, the independent test set method uses a single holdout test set to select the optimal number of iterations. If \texttt{train.fraction} is set to be less than 1, then only the \textit{first} \texttt{train.fraction}$\times$\texttt{nrow(data)} will be used to fit the model. Note that if the data are sorted in a systematic way (such as cases for which $y=1$ come first), then the data should be shuffled before running gbm. Those observations not used in the model fit can be used to get an unbiased estimate of the optimal number of iterations. The downside of this method is that a considerable number of observations are used to estimate the single regularization parameter (number of iterations) leaving a reduced dataset for estimating the entire multivariate model structure. Use \texttt{gbm.perf(...,method="test")} to obtain an estimate of the optimal number of iterations using the held out test set.
	170
	171	If \texttt{bag.fraction} is set to be greater than 0 (0.5 is recommended), gbm computes an out-of-bag estimate of the improvement in predictive performance. It evaluates the reduction in deviance on those observations not used in selecting the next regression tree. The out-of-bag estimator underestimates the reduction in deviance. As a result, it almost always is too conservative in its selection for the optimal number of iterations. The motivation behind this method was to avoid having to set aside a large independent dataset, which reduces the information available for learning the model structure. Use \texttt{gbm.perf(...,method="OOB")} to obtain the OOB estimate.
	172
	173	Lastly, gbm offers $v$-fold cross validation for estimating the optimal number of iterations. If when fitting the gbm model, \texttt{cv.folds=5} then gbm will do 5-fold cross validation. gbm will fit five gbm models in order to compute the cross validation error estimate and then will fit a sixth and final gbm model with \texttt{n.trees}iterations using all of the data. The returned model object will have a component labeled \texttt{cv.error}. Note that \texttt{gbm.more} will do additional gbm iterations but will not add to the \texttt{cv.error} component. Use \texttt{gbm.perf(...,method="cv")} to obtain the cross validation estimate.
	174
	175	\begin{figure}[ht]
	176	\begin{center}
	177	\includegraphics[width=5in]{oobperf2}
	178	\end{center}
	179	\caption{Out-of-sample predictive performance of four methods of selecting the optimal number of iterations. The vertical axis plots performance relative the best. The boxplots indicate relative performance across thirteen real datasets from the UCI repository. See \texttt{demo(OOB-reps)}.}
	180	\label{fig:oobperf}
	181	\end{figure}
	182
	183	Figure~\ref{fig:oobperf} compares the three methods for estimating the optimal number of iterations across 13 datasets. The boxplots show the methods performance relative to the best method on that dataset. For most datasets the method perform similarly, however, 5-fold cross validation is consistently the best of them. OOB, using a 33\% test set, and using a 20\% test set all have datasets for which the perform considerably worse than the best method. My recommendation is to use 5- or 10-fold cross validation if you can afford the computing time. Otherwise you may choose among the other options, knowing that OOB is conservative.
	184
	185	\section{Available distributions}
	186
	187	This section gives some of the mathematical detail for each of the distribution options that gbm offers. The gbm engine written in C++ has access to a C++ class for each of these distributions. Each class contains methods for computing the associated deviance, initial value, the gradient, and the constants to predict in each terminal node.
	188
	189	In the equations shown below, for non-zero offset terms, replace $f(\mathbf{x}_i)$ with $o_i + f(\mathbf{x}_i)$.
	190
	191	\subsection{Gaussian}
	192
	193	\begin{tabular}{ll}
	194	Deviance & $\displaystyle \frac{1}{\sum w_i} \sum w_i(y_i-f(\mathbf{x}_i))^2$ \\
	195	Initial value & $\displaystyle f(\mathbf{x})=\frac{\sum w_i(y_i-o_i)}{\sum w_i}$ \\
	196	Gradient & $z_i=y_i - f(\mathbf{x}_i)$ \\
	197	Terminal node estimates & $\displaystyle \frac{\sum w_i(y_i-f(\mathbf{x}_i))}{\sum w_i}$
	198	\end{tabular}
	199
	200	\subsection{AdaBoost}
	201
	202	\begin{tabular}{ll} Deviance & $\displaystyle \frac{1}{\sum w_i} \sum w_i\exp(-(2y_i-1)f(\mathbf{x}_i))$ \\ Initial value & $\displaystyle \frac{1}{2}\log\frac{\sum y_iw_ie^{-o_i}}{\sum (1-y_i)w_ie^{o_i}}$ \\ Gradient & $\displaystyle z_i= -(2y_i-1)\exp(-(2y_i-1)f(\mathbf{x}_i))$ \\ Terminal node estimates & $\displaystyle \frac{\sum (2y_i-1)w_i\exp(-(2y_i-1)f(\mathbf{x}_i))}
	203	{\sum w_i\exp(-(2y_i-1)f(\mathbf{x}_i))}$
	204	\end{tabular}
	205
	206	\subsection{Bernoulli}
	207
	208	\begin{tabular}{ll} Deviance & $\displaystyle -2\frac{1}{\sum w_i} \sum w_i(y_if(\mathbf{x}_i)-\log(1+\exp(f(\mathbf{x}_i))))$ \\ Initial value & $\displaystyle \log\frac{\sum w_iy_i}{\sum w_i(1-y_i)}$ \\ Gradient & $\displaystyle z_i=y_i-\frac{1}{1+\exp(-f(\mathbf{x}_i))}$ \\ Terminal node estimates & $\displaystyle \frac{\sum w_i(y_i-p_i)}{\sum w_ip_i(1-p_i)}$ \\
	209	& where $\displaystyle p_i = \frac{1}{1+\exp(-f(\mathbf{x}_i))}$ \\
	210	\end{tabular}
	211
	212	Notes: \begin{itemize} \item For non-zero offset terms, the computation of the initial value requires Newton-Raphson. Initialize $f_0=0$ and iterate $\displaystyle f_0 \leftarrow f_0 + \frac{\sum w_i(y_i-p_i)}{\sum w_ip_i(1-p_i)}$ where $\displaystyle p_i = \frac{1}{1+\exp(-(o_i+f_0))}$. \end{itemize}
	213
	214	\subsection{Laplace}
	215
	216	\begin{tabular}{ll} Deviance & $\frac{1}{\sum w_i} \sum w_i\|y_i-f(\mathbf{x}_i)\|$ \\ Initial value & $\mbox{median}_w(y)$ \\ Gradient & $z_i=\mbox{sign}(y_i-f(\mathbf{x}_i))$ \\ Terminal node estimates & $\mbox{median}_w(z)$ \end{tabular}
	217
	218	Notes: \begin{itemize} \item $\mbox{median}_w(y)$ denotes the weighted median, defined as the solution to the equation $\frac{\sum w_iI(y_i\leq m)}{\sum w_i}=\frac{1}{2}$ \item \texttt{gbm()} currently does not implement the weighted median and issues a warning when the user uses weighted data with \texttt{distribution="laplace"}. \end{itemize}
	219
	220
	221	\subsection{Quantile regression}
	222
	223	Contributed by Brian Kriegler (see \cite{Kriegler:2010}).
	224
	225	\begin{tabular}{ll} Deviance & $\frac{1}{\sum w_i}
	226	\left(\alpha\sum_{y_i>f(\mathbf{x}_i)} w_i(y_i-f(\mathbf{x}_i))\right. +$ \\
	227	& \hspace{0.5in}$\left.(1-\alpha)\sum_{y_i\leq f(\mathbf{x}_i)} w_i(f(\mathbf{x}_i)-y_i)\right)$ \\
	228	Initial value & $\mathrm{quantile}^{(\alpha)}_w(y)$ \\ Gradient & $z_i=\alpha I(y_i>f(\mathbf{x}_i))-(1-\alpha)I(y_i\leq f(\mathbf{x}_i))$ \\ Terminal node estimates & $\mathrm{quantile}^{(\alpha)}_w(z)$ \end{tabular}
	229
	230	Notes: \begin{itemize} \item $\mathrm{quantile}^{(\alpha)}_w(y)$ denotes the weighted quantile, defined as the solution to the equation $\frac{\sum w_iI(y_i\leq q)}{\sum w_i}=\alpha$ \item \texttt{gbm()} currently does not implement the weighted median and issues a warning when the user uses weighted data with \texttt{distribution=list(name="quantile")}. \end{itemize}
	231
	232
	233	\subsection{Cox Proportional Hazard}
	234
	235	\begin{tabular}{ll} Deviance & $-2\sum w_i(\delta_i(f(\mathbf{x}_i)-\log(R_i/w_i)))$\\ Gradient & $\displaystyle z_i=\delta_i - \sum_j \delta_j
	236	\frac{w_jI(t_i\geq t_j)e^{f(\mathbf{x}_i)}}
	237	{\sum_k w_kI(t_k\geq t_j)e^{f(\mathbf{x}_k)}}$ \\
	238	Initial value & 0 \\ Terminal node estimates & Newton-Raphson algorithm \end{tabular}
	239
	240	\begin{enumerate}
	241	\item Initialize the terminal node predictions to 0, $\mathgbf{\rho}=0$
	242	\item Let $\displaystyle
	243	p_i^{(k)}=\frac{\sum_j I(k(j)=k)I(t_j\geq t_i)e^{f(\mathbf{x}_i)+\rho_k}}
	244	{\sum_j I(t_j\geq t_i)e^{f(\mathbf{x}_i)+\rho_k}}$
	245	\item Let $g_k=\sum w_i\delta_i\left(I(k(i)=k)-p_i^{(k)}\right)$
	246	\item Let $\mathbf{H}$ be a $k\times k$ matrix with diagonal elements
	247	\begin{enumerate}
	248	\item Set diagonal elements $H_{mm}=\sum w_i\delta_i p_i^{(m)}\left(1-p_i^{(m)}\right)$
	249	\item Set off diagonal elements $H_{mn}=-\sum w_i\delta_i p_i^{(m)}p_i^{(n)}$
	250	\end{enumerate}
	251	\item Newton-Raphson update $\mathgbf{\rho} \leftarrow \mathgbf{\rho} - \mathbf{H}^{-1}\mathbf{g}$
	252	\item Return to step 2 until convergence
	253	\end{enumerate}
	254
	255	Notes:
	256	\begin{itemize}
	257	\item $t_i$ is the survival time and $\delta_i$ is the death indicator.
	258	\item $R_i$ denotes the hazard for the risk set, $R_i=\sum_{j=1}^N w_jI(t_j\geq t_i)e^{f(\mathbf{x}_i)}$
	259	\item $k(i)$ indexes the terminal node of observation $i$
	260	\item For speed, \texttt{gbm()} does only one step of the Newton-Raphson algorithm rather than iterating to convergence. No appreciable loss of accuracy since the next boosting iteration will simply correct for the prior iterations inadequacy.
	261	\item \texttt{gbm()} initially sorts the data by survival time. Doing this reduces the computation of the risk set from $O(n^2)$ to $O(n)$ at the cost of a single up front sort on survival time. After the model is fit, the data are then put back in their original order.
	262	\end{itemize}
	263
	264	\subsection{Poisson}
	265	\begin{tabular}{ll}
	266	Deviance & -2$\frac{1}{\sum w_i} \sum w_i(y_if(\mathbf{x}_i)-\exp(f(\mathbf{x}_i)))$ \\
	267	Initial value & $\displaystyle f(\mathbf{x})= \log\left(\frac{\sum w_iy_i}{\sum w_ie^{o_i}}\right)$ \\
	268	Gradient & $z_i=y_i - \exp(f(\mathbf{x}_i))$ \\
	269	Terminal node estimates & $\displaystyle \log\frac{\sum w_iy_i}{\sum w_i\exp(f(\mathbf{x}_i))}$
	270	\end{tabular}
	271
	272	The Poisson class includes special safeguards so that the most extreme predicted values are $e^{-19}$ and $e^{+19}$. This behavior is consistent with \texttt{glm()}.
	273
	274	\subsection{Pairwise}
	275
	276	This distribution implements ranking measures following the
	277	\emph{LambdaMart} algorithm \cite{Burges:2010}. Instances belong to
	278	\emph{groups}; all pairs of items with different labels, belonging to
	279	the same group, are used for training. In \emph{Information Retrieval}
	280	applications, groups correspond to user queries,
	281	and items to (feature vectors of) documents in the associated match
	282	set to be ranked.
	283
	284	For consistency with typical usage, our goal is to \emph{maximize} one
	285	of the \emph{utility} functions listed below. Consider a group with
	286	instances $x_1, \dots, x_n$, ordered such that $f(x_1) \geq f(x_2)
	287	\geq \dots f(x_n)$; i.e., the \emph{rank} of $x_i$ is $i$, where
	288	smaller ranks are preferable. Let $P$ be the set of all ordered pairs
	289	such that $y_i > y_j$.
	290
	291	\begin{enumerate}
	292	\item[{\bf Concordance:}] Fraction of concordant (i.e, correctly ordered)
	293	pairs. For the special case of binary labels, this is equivalent to
	294	the Area under the ROC Curve.
	295	$$\left\{ \begin{array}{l l}\frac{\\|\{(i,j)\in P \|
	296	f(x_i)>f(x_j)\}\\|}{\\|P\\|}
	297	& P \neq \emptyset\\
	298	0 & \mbox{otherwise.}
	299	\end{array}\right.
	300	$$
	301	\item[{\bf MRR:}] Mean reciprocal rank of the highest-ranked positive
	302	instance (it is assumed $y_i\in\{0,1\}$):
	303	$$\left\{ \begin{array}{l l}\frac{1}{\min\{1 \leq i \leq n \|y_i=1\}}
	304	& \exists i: \, 1 \leq i \leq n, y_i=1\\
	305	0 & \mbox{otherwise.}\end{array}\right.$$
	306	\item[{\bf MAP:}] Mean average precision, a generalization of
	307	MRR to multiple positive instances:
	308	$$\left\{ \begin{array}{l l} \frac{\sum_{1\leq i\leq n \| y_i=1} \\|\{1\leq j\leq i
	309	\|y_j=1\}\\|\,/\,i}{\\|\{1\leq i\leq n \| y_i=1\}\\|} & \exists i: \,
	310	1 \leq i \leq n, y_i=1\\
	311	0 & \mbox{otherwise.}\end{array}\right.$$
	312	\item[{\bf nDCG:}] Normalized discounted cumulative gain:
	313	$$\frac{\sum_{1\leq i\leq n} \log_2(i+1) \, y_i}{\sum_{1\leq i\leq n}
	314	\log_2(i+1) \, y'_i},$$ where $y'_1, \dots, y'_n$ is a reordering of $y_1,
	315	\dots,y_n$ with $y'_1 \geq y'_2 \geq \dots \geq y'_n$.
	316	\end{enumerate}
	317
	318	The generalization to multiple (possibly weighted) groups is
	319	straightforward. Sometimes a cut-off rank $k$ is given for \emph{MRR}
	320	and \emph{nDCG}, in which case we replace the outer index $n$ by
	321	$\min(n,k)$.
	322
	323	The initial value for $f(x_i)$ is always zero. We derive the gradient of
	324	a cost function whose gradient locally approximates the gradient of
	325	the IR measure for a fixed ranking:
	326
	327	\begin{eqnarray*}
	328	\Phi & = & \sum_{(i,j) \in P} \Phi_{ij}\\
	329	& = & \sum_{(i,j) \in P} \|\Delta Z_{ij}\| \log \left( 1 + e^{-(f(x_i) -
	330	f(x_j))}\right),
	331	\end{eqnarray*}
	332	where $\|\Delta Z_{ij}\|$ is the absolute utility difference when
	333	swapping the ranks of $i$ and $j$, while leaving all other instances
	334	the same. Define
	335	\begin{eqnarray*}
	336	\lambda_{ij} & = & \frac{\partial\Phi_{ij}}{\partial f(x_i)}\\
	337	& = & - \|\Delta Z_{ij}\| \frac{1}{1 + e^{f(x_i) - f(x_j)}}\\
	338	& = & - \|\Delta Z_{ij}\| \, \rho_{ij},
	339	\end{eqnarray*}
	340	with
	341	$$ \rho_{ij} = - \frac{\lambda_{ij }}{\|\Delta Z_{ij}\|} = \frac{1}{1 + e^{f(x_i) - f(x_j)}}$$
	342
	343	For the gradient of $\Phi$ with respect to $f(x_i)$, define
	344	\begin{eqnarray*}
	345	\lambda_i & = & \frac{\partial \Phi}{\partial f(x_i)}\\
	346	& = & \sum_{j\|(i,j) \in P} \lambda_{ij} - \sum_{j\|(j,i) \in P} \lambda_{ji}\\
	347	& = & - \sum_{j\|(i,j) \in P} \|\Delta Z_{ij}\| \, \rho_{ij}\\
	348	& & \mbox{} + \sum_{j\|(j,i) \in P} \|\Delta Z_{ji}\| \, \rho_{ji}.
	349	\end{eqnarray*}
	350
	351	The second derivative is
	352	\begin{eqnarray*}
	353	\gamma_i & \stackrel{def}{=} & \frac{\partial^2\Phi}{\partial f(x_i)^2}\\
	354	& = & \sum_{j\|(i,j) \in P} \|\Delta Z_{ij}\| \, \rho_{ij} \, (1-\rho_{ij})\\
	355	& & \mbox{} + \sum_{j\|(j,i) \in P} \|\Delta Z_{ji}\| \, \rho_{ji} \, (1-\rho_{ji}).
	356	\end{eqnarray*}
	357
	358	Now consider again all groups with associated weights. For a given terminal node, let $i$
	359	range over all contained instances. Then its estimate is
	360	$$-\frac{\sum_i v_i\lambda_{i}}{\sum_i v_i \gamma_i},$$ where
	361	$v_i=w(\mbox{\em group}(i))/\\|\{(j,k)\in\mbox{\em group}(i)\}\\|.$
	362
	363	In each iteration, instances are reranked according to the preliminary
	364	scores $f(x_i)$ to determine the $\|\Delta Z_{ij}\|$. Note that in order
	365	to avoid ranking bias, we break ties by adding a small amount of
	366	random noise.
	367
	368
	369
	370	\bibliography{gbm}
	371
	372	\end{document}

+0

-391

~~inst/doc/gbm.Sweave~~ less more

0		% setwd("c:/dev/gbm/inst/doc") % Sweave("gbm.rnw"); system("texify gbm.tex"); system("c:\\MiKTeX\\texmf\\miktex\\bin\\yap.exe gbm.dvi",wait=FALSE)
1
2		\documentclass{article}
3		\bibliographystyle{plain}
4		\usepackage[active]{srcltx}
5		\newcommand{\EV}{\mathrm{E}}
6		\newcommand{\Var}{\mathrm{Var}}
7		\newcommand{\aRule}{\begin{center} \rule{5in}{1mm} \end{center}}
8
9		\title{Generalized Boosted Models:\\A guide to the gbm package} \author{Greg Ridgeway}
10
11		%\VignetteIndexEntry{Generalized Boosted Models: A guide to the gbm package}
12
13		\newcommand{\mathgbf}[1]{{\mbox{\boldmath$#1$\unboldmath}}}
14
15		\begin{document}
16
17		\maketitle
18
19		Boosting takes on various forms with different programs using different loss functions, different base models, and different optimization schemes. The gbm package takes the approach described in \cite{Friedman:2001} and \cite{Friedman:2002}. Some of the terminology differs, mostly due to an effort to cast boosting terms into more standard statistical terminology (e.g. deviance). In addition, the gbm package implements boosting for models commonly used in statistics but not commonly associated with boosting. The Cox proportional hazard model, for example, is an incredibly useful model and the boosting framework applies quite readily with only slight modification \cite{Ridgeway:1999}. Also some algorithms implemented in the gbm package differ from the standard implementation. The AdaBoost algorithm \cite{FreundSchapire:1997} has a particular loss function and a particular optimization algorithm associated with it. The gbm implementation of AdaBoost adopts AdaBoost's exponential loss function (its bound on misclassification rate) but uses Friedman's gradient descent algorithm rather than the original one proposed. So the main purposes of this document is to spell out in detail what the gbm package implements.
20
21		\section{Gradient boosting}
22
23		This section essentially presents the derivation of boosting described in \cite{Friedman:2001}. The gbm package also adopts the stochastic gradient boosting strategy, a small but important tweak on the basic algorithm, described in \cite{Friedman:2002}.
24
25		\subsection{Friedman's gradient boosting machine} \label{sec:GradientBoostingMachine}
26
27		\begin{figure}
28		\aRule Initialize $\hat f(\mathbf{x})$ to be a constant, $\hat f(\mathbf{x}) = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\rho)$. \\
29		For $t$ in $1,\ldots,T$ do
30		\begin{enumerate}
31		\item Compute the negative gradient as the working response
32		\begin{equation}
33		z_i = -\frac{\partial}{\partial f(\mathbf{x}_i)} \Psi(y_i,f(\mathbf{x}_i)) \mbox{\Huge $\|$}_{f(\mathbf{x}_i)=\hat f(\mathbf{x}_i)}
34		\end{equation}
35		\item Fit a regression model, $g(\mathbf{x})$, predicting $z_i$ from the covariates $\mathbf{x}_i$. \item Choose a gradient descent step size as
36		\begin{equation}
37		\rho = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\hat f(\mathbf{x}_i)+\rho g(\mathbf{x}_i))
38		\end{equation}
39		\item Update the estimate of $f(\mathbf{x})$ as
40		\begin{equation}
41		\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \rho g(\mathbf{x})
42		\end{equation}
43		\end{enumerate} \aRule \caption{Friedman's Gradient Boost algorithm} \label{fig:GradientBoost} \end{figure}
44
45		Friedman (2001) and the companion paper Friedman (2002) extended the work of Friedman, Hastie, and Tibshirani (2000) and laid the ground work for a new generation of boosting algorithms. Using the connection between boosting and optimization, this new work proposes the Gradient Boosting Machine.
46
47		In any function estimation problem we wish to find a regression function, $\hat f(\mathbf{x})$, that minimizes the expectation of some loss function, $\Psi(y,f)$, as shown in (\ref{NonparametricRegression1}).
48
49		\begin{eqnarray}
50		\hspace{0.5in}
51		\hat f(\mathbf{x}) &=& \arg \min_{f(\mathbf{x})} \EV_{y,\mathbf{x}} \Psi(y,f(\mathbf{x})) \nonumber \\ \label{NonparametricRegression1}
52		&=& \arg \min_{f(\mathbf{x})} \EV_x \left[ \EV_{y\|\mathbf{x}} \Psi(y,f(\mathbf{x})) \Big\| \mathbf{x} \right]
53		\end{eqnarray}
54
55		We will focus on finding estimates of $f(\mathbf{x})$ such that \begin{equation}
56		\label{NonparametricRegression2}
57		\hspace{0.5in}
58		\hat f(\mathbf{x}) = \arg \min_{f(\mathbf{x})} \EV_{y\|\mathbf{x}} \left[ \Psi(y,f(\mathbf{x}))\|\mathbf{x} \right]
59		\end{equation}
60		Parametric regression models assume that $f(\mathbf{x})$ is a function with a finite number of parameters, $\beta$, and estimates them by selecting those values that minimize a loss function (e.g. squared error loss) over a training sample of $N$ observations on $(y,\mathbf{x})$ pairs as in (\ref{eq:Friedman1}).
61		\begin{equation}
62		\label{eq:Friedman1}
63		\hspace{0.5in}
64		\hat\beta = \arg \min_{\beta} \sum_{i=1}^N \Psi(y_i,f(\mathbf{x}_i;\beta))
65		\end{equation}
66		When we wish to estimate $f(\mathbf{x})$ non-parametrically the task becomes more difficult. Again we can proceed similarly to \cite{FHT:2000} and modify our current estimate of $f(\mathbf{x})$ by adding a new function $f(\mathbf{x})$ in a greedy fashion. Letting $f_i = f(\mathbf{x}_i)$, we see that we want to decrease the $N$ dimensional function
67		\begin{eqnarray}
68		\label{EQ:Friedman2}
69		\hspace{0.5in}
70		J(\mathbf{f}) &=& \sum_{i=1}^N \Psi(y_i,f(\mathbf{x}_i)) \nonumber \\
71		&=& \sum_{i=1}^N \Psi(y_i,F_i).
72		\end{eqnarray}
73		The negative gradient of $J(\mathbf{f})$ indicates the direction of the locally greatest decrease in $J(\mathbf{f})$. Gradient descent would then have us modify $\mathbf{f}$ as
74		\begin{equation}
75		\label{eq:Friedman3}
76		\hspace{0.5in}
77		\hat \mathbf{f} \leftarrow \hat \mathbf{f} - \rho \nabla J(\mathbf{f})
78		\end{equation}
79		where $\rho$ is the size of the step along the direction of greatest descent. Clearly, this step alone is far from our desired goal. First, it only fits $f$ at values of $\mathbf{x}$ for which we have observations. Second, it does not take into account that observations with similar $\mathbf{x}$ are likely to have similar values of $f(\mathbf{x})$. Both these problems would have disastrous effects on generalization error. However, Friedman suggests selecting a class of functions that use the covariate information to approximate the gradient, usually a regression tree. This line of reasoning produces his Gradient Boosting algorithm shown in Figure~\ref{fig:GradientBoost}. At each iteration the algorithm determines the direction, the gradient, in which it needs to improve the fit to the data and selects a particular model from the allowable class of functions that is in most agreement with the direction. In the case of squared-error loss, $\Psi(y_i,f(\mathbf{x}_i)) = \sum_{i=1}^N (y_i-f(\mathbf{x}_i))^2$, this algorithm corresponds exactly to residual fitting.
80
81		There are various ways to extend and improve upon the basic framework suggested in Figure~\ref{fig:GradientBoost}. For example, Friedman (2001) substituted several choices in for $\Psi$ to develop new boosting algorithms for robust regression with least absolute deviation and Huber loss functions. Friedman (2002) showed that a simple subsampling trick can greatly improve predictive performance while simultaneously reduce computation time. Section~\ref{GBMModifications} discusses some of these modifications.
82
83		\section{Improving boosting methods using control of the learning rate, sub-sampling, and a decomposition for interpretation} \label{GBMModifications}
84
85		This section explores the variations of the previous algorithms that have the potential to improve their predictive performance and interpretability. In particular, by controlling the optimization speed or learning rate, introducing low-variance regression methods, and applying ideas from robust regression we can produce non-parametric regression procedures with many desirable properties. As a by-product some of these modifications lead directly into implementations for learning from massive datasets. All these methods take advantage of the general form of boosting
86		\begin{equation}
87		\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \EV(z(y,\hat f(\mathbf{x}))\|\mathbf{x}).
88		\end{equation} So far we have taken advantage of this form only by substituting in our favorite regression procedure for $\EV_w(z\|\mathbf{x})$. I will discuss some modifications to estimating $\EV_w(z\|\mathbf{x})$ that have the potential to improve our algorithm.
89
90		\subsection{Decreasing the learning rate} As several authors have phrased slightly differently, ``...boosting, whatever flavor, seldom seems to overfit, no matter how many terms are included in the additive expansion''. This is not true as the discussion to \cite{FHT:2000} points out.
91
92		In the update step of any boosting algorithm we can introduce a learning rate to dampen the proposed move.
93		\begin{equation}
94		\label{eq:shrinkage}
95		\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \lambda \EV(z(y,\hat f(\mathbf{x}))\|\mathbf{x}).
96		\end{equation}
97		By multiplying the gradient step by $\lambda$ as in equation~\ref{eq:shrinkage} we have control on the rate at which the boosting algorithm descends the error surface (or ascends the likelihood surface). When $\lambda=1$ we return to performing full gradient steps. Friedman (2001) relates the learning rate to regularization through shrinkage.
98
99		The optimal number of iterations, $T$, and the learning rate, $\lambda$, depend on each other. In practice I set $\lambda$ to be as small as possible and then select $T$ by cross-validation. Performance is best when $\lambda$ is as small as possible performance with decreasing marginal utility for smaller and smaller $\lambda$. Slower learning rates do not necessarily scale the number of optimal iterations. That is, if when $\lambda=1.0$ and the optimal $T$ is 100 iterations, does {\it not} necessarily imply that when $\lambda=0.1$ the optimal $T$ is 1000 iterations.
100
101		\subsection{Variance reduction using subsampling}
102
103		Friedman (2002) proposed the stochastic gradient boosting algorithm that simply samples uniformly without replacement from the dataset before estimating the next gradient step. He found that this additional step greatly improved performance. We estimate the regression $\EV(z(y,\hat f(\mathbf{x}))\|\mathbf{x})$ using a random subsample of the dataset.
104
105		\subsection{ANOVA decomposition}
106
107		Certain function approximation methods are decomposable in terms of a ``functional ANOVA decomposition''. That is a function is decomposable as
108		\begin{equation}
109		\label{ANOVAdecomp}
110		f(\mathbf{x}) = \sum_j f_j(x_j) + \sum_{jk} f_{jk}(x_j,x_k) + \sum_{jk\ell} f_{jk\ell}(x_j,x_k,x_\ell) + \cdots.
111		\end{equation} This applies to boosted trees. Regression stumps (one split decision trees) depend on only one variable and fall into the first term of \ref{ANOVAdecomp}. Trees with two splits fall into the second term of \ref{ANOVAdecomp} and so on. By restricting the depth of the trees produced on each boosting iteration we can control the order of approximation. Often additive components are sufficient to approximate a multivariate function well, generalized additive models, the na\"{\i}ve Bayes classifier, and boosted stumps for example. When the approximation is restricted to a first order we can also produce plots of $x_j$ versus $f_j(x_j)$ to demonstrate how changes in $x_j$ might affect changes in the response variable.
112
113		\subsection{Relative influence} Friedman (2001) also develops an extension of a variable's ``relative influence'' for boosted estimates. For tree based methods the approximate relative influence of a variable $x_j$ is
114		\begin{equation}
115		\label{RelInfluence}
116		\hspace{0.5in}
117		\hat J_j^2 = \hspace{-0.1in}\sum_{\mathrm{splits~on~}x_j}\hspace{-0.2in}I_t^2
118		\end{equation} where $I_t^2$ is the empirical improvement by splitting on $x_j$ at that point. Friedman's extension to boosted models is to average the relative influence of variable $x_j$ across all the trees generated by the boosting algorithm.
119
120		\begin{figure}
121		\aRule
122		Select
123		\begin{itemize}
124		\item a loss function (\texttt{distribution})
125		\item the number of iterations, $T$ (\texttt{n.trees})
126		\item the depth of each tree, $K$ (\texttt{interaction.depth})
127		\item the shrinkage (or learning rate) parameter, $\lambda$ (\texttt{shrinkage})
128		\item the subsampling rate, $p$ (\texttt{bag.fraction})
129		\end{itemize}
130		Initialize $\hat f(\mathbf{x})$ to be a constant, $\hat f(\mathbf{x}) = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\rho)$ \\
131		For $t$ in $1,\ldots,T$ do
132		\begin{enumerate}
133		\item Compute the negative gradient as the working response
134		\begin{equation}
135		z_i = -\frac{\partial}{\partial f(\mathbf{x}_i)} \Psi(y_i,f(\mathbf{x}_i)) \mbox{\Huge $\|$}_{f(\mathbf{x}_i)=\hat f(\mathbf{x}_i)}
136		\end{equation}
137		\item Randomly select $p\times N$ cases from the dataset
138		\item Fit a regression tree with $K$ terminal nodes, $g(\mathbf{x})=\EV(z\|\mathbf{x})$. This tree is fit using only those randomly selected observations
139		\item Compute the optimal terminal node predictions, $\rho_1,\ldots,\rho_K$, as
140		\begin{equation}
141		\rho_k = \arg \min_{\rho} \sum_{\mathbf{x}_i\in S_k} \Psi(y_i,\hat f(\mathbf{x}_i)+\rho)
142		\end{equation}
143		where $S_k$ is the set of $\mathbf{x}$s that define terminal node $k$. Again this step uses only the randomly selected observations.
144		\item Update $\hat f(\mathbf{x})$ as
145		\begin{equation}
146		\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \lambda\rho_{k(\mathbf{x})}
147		\end{equation}
148		where $k(\mathbf{x})$ indicates the index of the terminal node into which an observation with features $\mathbf{x}$ would fall.
149		\end{enumerate}
150		\aRule
151		\caption{Boosting as implemented in \texttt{gbm()}}
152		\label{fig:gbm}
153		\end{figure}
154
155		\section{Common user options}
156
157		This section discusses the options to gbm that most users will need to change or tune.
158
159		\subsection{Loss function}
160
161		The first and foremost choice is \texttt{distribution}. This should be easily dictated by the application. For most classification problems either \texttt{bernoulli} or \texttt{adaboost} will be appropriate, the former being recommended. For continuous outcomes the choices are \texttt{gaussian} (for minimizing squared error), \texttt{laplace} (for minimizing absolute error), and quantile regression (for estimating percentiles of the conditional distribution of the outcome). Censored survival outcomes should require \texttt{coxph}. Count outcomes may use \texttt{poisson} although one might also consider \texttt{gaussian} or \texttt{laplace} depending on the analytical goals.
162
163		\subsection{The relationship between shrinkage and number of iterations} The issues that most new users of gbm struggle with are the choice of \texttt{n.trees} and \texttt{shrinkage}. It is important to know that smaller values of \texttt{shrinkage} (almost) always give improved predictive performance. That is, setting \texttt{shrinkage=0.001} will almost certainly result in a model with better out-of-sample predictive performance than setting \texttt{shrinkage=0.01}. However, there are computational costs, both storage and CPU time, associated with setting \texttt{shrinkage} to be low. The model with \texttt{shrinkage=0.001} will likely require ten times as many iterations as the model with \texttt{shrinkage=0.01}, increasing storage and computation time by a factor of 10. Figure~\ref{fig:shrinkViters} shows the relationship between predictive performance, the number of iterations, and the shrinkage parameter. Note that the increase in the optimal number of iterations between two choices for shrinkage is roughly equal to the ratio of the shrinkage parameters. It is generally the case that for small shrinkage parameters, 0.001 for example, there is a fairly long plateau in which predictive performance is at its best. My rule of thumb is to set \texttt{shrinkage} as small as possible while still being able to fit the model in a reasonable amount of time and storage. I usually aim for 3,000 to 10,000 iterations with shrinkage rates between 0.01 and 0.001.
164
165		\begin{figure}[ht] \begin{center} \includegraphics[width=5in]{shrinkage-v-iterations} \end{center} \caption{Out-of-sample predictive performance by number of iterations and shrinkage. Smaller values of the shrinkage parameter offer improved predictive performance, but with decreasing marginal improvement.} \label{fig:shrinkViters} \end{figure}
166
167		\subsection{Estimating the optimal number of iterations} gbm offers three methods for estimating the optimal number of iterations after the gbm model has been fit, an independent test set (\texttt{test}), out-of-bag estimation (\texttt{OOB}), and $v$-fold cross validation (\texttt{cv}). The function \texttt{gbm.perf} computes the iteration estimate.
168
169		Like Friedman's MART software, the independent test set method uses a single holdout test set to select the optimal number of iterations. If \texttt{train.fraction} is set to be less than 1, then only the \textit{first} \texttt{train.fraction}$\times$\texttt{nrow(data)} will be used to fit the model. Note that if the data are sorted in a systematic way (such as cases for which $y=1$ come first), then the data should be shuffled before running gbm. Those observations not used in the model fit can be used to get an unbiased estimate of the optimal number of iterations. The downside of this method is that a considerable number of observations are used to estimate the single regularization parameter (number of iterations) leaving a reduced dataset for estimating the entire multivariate model structure. Use \texttt{gbm.perf(...,method="test")} to obtain an estimate of the optimal number of iterations using the held out test set.
170
171		If \texttt{bag.fraction} is set to be greater than 0 (0.5 is recommended), gbm computes an out-of-bag estimate of the improvement in predictive performance. It evaluates the reduction in deviance on those observations not used in selecting the next regression tree. The out-of-bag estimator underestimates the reduction in deviance. As a result, it almost always is too conservative in its selection for the optimal number of iterations. The motivation behind this method was to avoid having to set aside a large independent dataset, which reduces the information available for learning the model structure. Use \texttt{gbm.perf(...,method="OOB")} to obtain the OOB estimate.
172
173		Lastly, gbm offers $v$-fold cross validation for estimating the optimal number of iterations. If when fitting the gbm model, \texttt{cv.folds=5} then gbm will do 5-fold cross validation. gbm will fit five gbm models in order to compute the cross validation error estimate and then will fit a sixth and final gbm model with \texttt{n.trees}iterations using all of the data. The returned model object will have a component labeled \texttt{cv.error}. Note that \texttt{gbm.more} will do additional gbm iterations but will not add to the \texttt{cv.error} component. Use \texttt{gbm.perf(...,method="cv")} to obtain the cross validation estimate.
174
175		\begin{figure}[ht]
176		\begin{center}
177		\includegraphics[width=5in]{oobperf2}
178		\end{center}
179		\caption{Out-of-sample predictive performance of four methods of selecting the optimal number of iterations. The vertical axis plots performance relative the best. The boxplots indicate relative performance across thirteen real datasets from the UCI repository. See \texttt{demo(OOB-reps)}.}
180		\label{fig:oobperf}
181		\end{figure}
182
183		Figure~\ref{fig:oobperf} compares the three methods for estimating the optimal number of iterations across 13 datasets. The boxplots show the methods performance relative to the best method on that dataset. For most datasets the method perform similarly, however, 5-fold cross validation is consistently the best of them. OOB, using a 33\% test set, and using a 20\% test set all have datasets for which the perform considerably worse than the best method. My recommendation is to use 5- or 10-fold cross validation if you can afford the computing time. Otherwise you may choose among the other options, knowing that OOB is conservative.
184
185		\section{Available distributions}
186
187		This section gives some of the mathematical detail for each of the distribution options that gbm offers. The gbm engine written in C++ has access to a C++ class for each of these distributions. Each class contains methods for computing the associated deviance, initial value, the gradient, and the constants to predict in each terminal node.
188
189		In the equations shown below, for non-zero offset terms, replace $f(\mathbf{x}_i)$ with $o_i + f(\mathbf{x}_i)$.
190
191		\subsection{Gaussian}
192
193		\begin{tabular}{ll}
194		Deviance & $\displaystyle \frac{1}{\sum w_i} \sum w_i(y_i-f(\mathbf{x}_i))^2$ \\
195		Initial value & $\displaystyle f(\mathbf{x})=\frac{\sum w_i(y_i-o_i)}{\sum w_i}$ \\
196		Gradient & $z_i=y_i - f(\mathbf{x}_i)$ \\
197		Terminal node estimates & $\displaystyle \frac{\sum w_i(y_i-f(\mathbf{x}_i))}{\sum w_i}$
198		\end{tabular}
199
200		\subsection{AdaBoost}
201
202		\begin{tabular}{ll} Deviance & $\displaystyle \frac{1}{\sum w_i} \sum w_i\exp(-(2y_i-1)f(\mathbf{x}_i))$ \\ Initial value & $\displaystyle \frac{1}{2}\log\frac{\sum y_iw_ie^{-o_i}}{\sum (1-y_i)w_ie^{o_i}}$ \\ Gradient & $\displaystyle z_i= -(2y_i-1)\exp(-(2y_i-1)f(\mathbf{x}_i))$ \\ Terminal node estimates & $\displaystyle \frac{\sum (2y_i-1)w_i\exp(-(2y_i-1)f(\mathbf{x}_i))}
203		{\sum w_i\exp(-(2y_i-1)f(\mathbf{x}_i))}$
204		\end{tabular}
205
206		\subsection{Bernoulli}
207
208		\begin{tabular}{ll} Deviance & $\displaystyle -2\frac{1}{\sum w_i} \sum w_i(y_if(\mathbf{x}_i)-\log(1+\exp(f(\mathbf{x}_i))))$ \\ Initial value & $\displaystyle \log\frac{\sum w_iy_i}{\sum w_i(1-y_i)}$ \\ Gradient & $\displaystyle z_i=y_i-\frac{1}{1+\exp(-f(\mathbf{x}_i))}$ \\ Terminal node estimates & $\displaystyle \frac{\sum w_i(y_i-p_i)}{\sum w_ip_i(1-p_i)}$ \\
209		& where $\displaystyle p_i = \frac{1}{1+\exp(-f(\mathbf{x}_i))}$ \\
210		\end{tabular}
211
212		Notes: \begin{itemize} \item For non-zero offset terms, the computation of the initial value requires Newton-Raphson. Initialize $f_0=0$ and iterate $\displaystyle f_0 \leftarrow f_0 + \frac{\sum w_i(y_i-p_i)}{\sum w_ip_i(1-p_i)}$ where $\displaystyle p_i = \frac{1}{1+\exp(-(o_i+f_0))}$. \end{itemize}
213
214		\subsection{Laplace}
215
216		\begin{tabular}{ll} Deviance & $\frac{1}{\sum w_i} \sum w_i\|y_i-f(\mathbf{x}_i)\|$ \\ Initial value & $\mbox{median}_w(y)$ \\ Gradient & $z_i=\mbox{sign}(y_i-f(\mathbf{x}_i))$ \\ Terminal node estimates & $\mbox{median}_w(z)$ \end{tabular}
217
218		Notes: \begin{itemize} \item $\mbox{median}_w(y)$ denotes the weighted median, defined as the solution to the equation $\frac{\sum w_iI(y_i\leq m)}{\sum w_i}=\frac{1}{2}$ \item \texttt{gbm()} currently does not implement the weighted median and issues a warning when the user uses weighted data with \texttt{distribution="laplace"}. \end{itemize}
219
220
221		\subsection{Quantile regression}
222
223		Contributed by Brian Kriegler (see \cite{Kriegler:2010}).
224
225		\begin{tabular}{ll} Deviance & $\frac{1}{\sum w_i}
226		\left(\alpha\sum_{y_i>f(\mathbf{x}_i)} w_i(y_i-f(\mathbf{x}_i))\right. +$ \\
227		& \hspace{0.5in}$\left.(1-\alpha)\sum_{y_i\leq f(\mathbf{x}_i)} w_i(f(\mathbf{x}_i)-y_i)\right)$ \\
228		Initial value & $\mathrm{quantile}^{(\alpha)}_w(y)$ \\ Gradient & $z_i=\alpha I(y_i>f(\mathbf{x}_i))-(1-\alpha)I(y_i\leq f(\mathbf{x}_i))$ \\ Terminal node estimates & $\mathrm{quantile}^{(\alpha)}_w(z)$ \end{tabular}
229
230		Notes: \begin{itemize} \item $\mathrm{quantile}^{(\alpha)}_w(y)$ denotes the weighted quantile, defined as the solution to the equation $\frac{\sum w_iI(y_i\leq q)}{\sum w_i}=\alpha$ \item \texttt{gbm()} currently does not implement the weighted median and issues a warning when the user uses weighted data with \texttt{distribution=list(name="quantile")}. \end{itemize}
231
232
233		\subsection{Cox Proportional Hazard}
234
235		\begin{tabular}{ll} Deviance & $-2\sum w_i(\delta_i(f(\mathbf{x}_i)-\log(R_i/w_i)))$\\ Gradient & $\displaystyle z_i=\delta_i - \sum_j \delta_j
236		\frac{w_jI(t_i\geq t_j)e^{f(\mathbf{x}_i)}}
237		{\sum_k w_kI(t_k\geq t_j)e^{f(\mathbf{x}_k)}}$ \\
238		Initial value & 0 \\ Terminal node estimates & Newton-Raphson algorithm \end{tabular}
239
240		\begin{enumerate}
241		\item Initialize the terminal node predictions to 0, $\mathgbf{\rho}=0$
242		\item Let $\displaystyle
243		p_i^{(k)}=\frac{\sum_j I(k(j)=k)I(t_j\geq t_i)e^{f(\mathbf{x}_i)+\rho_k}}
244		{\sum_j I(t_j\geq t_i)e^{f(\mathbf{x}_i)+\rho_k}}$
245		\item Let $g_k=\sum w_i\delta_i\left(I(k(i)=k)-p_i^{(k)}\right)$
246		\item Let $\mathbf{H}$ be a $k\times k$ matrix with diagonal elements
247		\begin{enumerate}
248		\item Set diagonal elements $H_{mm}=\sum w_i\delta_i p_i^{(m)}\left(1-p_i^{(m)}\right)$
249		\item Set off diagonal elements $H_{mn}=-\sum w_i\delta_i p_i^{(m)}p_i^{(n)}$
250		\end{enumerate}
251		\item Newton-Raphson update $\mathgbf{\rho} \leftarrow \mathgbf{\rho} - \mathbf{H}^{-1}\mathbf{g}$
252		\item Return to step 2 until convergence
253		\end{enumerate}
254
255		Notes:
256		\begin{itemize}
257		\item $t_i$ is the survival time and $\delta_i$ is the death indicator.
258		\item $R_i$ denotes the hazard for the risk set, $R_i=\sum_{j=1}^N w_jI(t_j\geq t_i)e^{f(\mathbf{x}_i)}$
259		\item $k(i)$ indexes the terminal node of observation $i$
260		\item For speed, \texttt{gbm()} does only one step of the Newton-Raphson algorithm rather than iterating to convergence. No appreciable loss of accuracy since the next boosting iteration will simply correct for the prior iterations inadequacy.
261		\item \texttt{gbm()} initially sorts the data by survival time. Doing this reduces the computation of the risk set from $O(n^2)$ to $O(n)$ at the cost of a single up front sort on survival time. After the model is fit, the data are then put back in their original order.
262		\end{itemize}
263
264		\subsection{Poisson}
265		\begin{tabular}{ll}
266		Deviance & -2$\frac{1}{\sum w_i} \sum w_i(y_if(\mathbf{x}_i)-\exp(f(\mathbf{x}_i)))$ \\
267		Initial value & $\displaystyle f(\mathbf{x})= \log\left(\frac{\sum w_iy_i}{\sum w_ie^{o_i}}\right)$ \\
268		Gradient & $z_i=y_i - \exp(f(\mathbf{x}_i))$ \\
269		Terminal node estimates & $\displaystyle \log\frac{\sum w_iy_i}{\sum w_i\exp(f(\mathbf{x}_i))}$
270		\end{tabular}
271
272		The Poisson class includes special safeguards so that the most extreme predicted values are $e^{-19}$ and $e^{+19}$. This behavior is consistent with \texttt{glm()}.
273
274		\subsection{Pairwise}
275
276		This distribution implements ranking measures following the
277		\emph{LambdaMart} algorithm \cite{Burges:2010}. Instances belong to
278		\emph{groups}; all pairs of items with different labels, belonging to
279		the same group, are used for training. In \emph{Information Retrieval}
280		applications, groups correspond to user queries,
281		and items to (feature vectors of) documents in the associated match
282		set to be ranked.
283
284		For consistency with typical usage, our goal is to \emph{maximize} one
285		of the \emph{utility} functions listed below. Consider a group with
286		instances $x_1, \dots, x_n$, ordered such that $f(x_1) \geq f(x_2)
287		\geq \dots f(x_n)$; i.e., the \emph{rank} of $x_i$ is $i$, where
288		smaller ranks are preferable. Let $P$ be the set of all ordered pairs
289		such that $y_i > y_j$.
290
291		\begin{enumerate}
292		\item[{\bf Concordance:}] Fraction of concordant (i.e, correctly ordered)
293		pairs. For the special case of binary labels, this is equivalent to
294		the Area under the ROC Curve.
295		$$\left\{ \begin{array}{l l}\frac{\\|\{(i,j)\in P \|
296		f(x_i)>f(x_j)\}\\|}{\\|P\\|}
297		& P \neq \emptyset\\
298		0 & \mbox{otherwise.}
299		\end{array}\right.
300		$$
301		\item[{\bf MRR:}] Mean reciprocal rank of the highest-ranked positive
302		instance (it is assumed $y_i\in\{0,1\}$):
303		$$\left\{ \begin{array}{l l}\frac{1}{\min\{1 \leq i \leq n \|y_i=1\}}
304		& \exists i: \, 1 \leq i \leq n, y_i=1\\
305		0 & \mbox{otherwise.}\end{array}\right.$$
306		\item[{\bf MAP:}] Mean average precision, a generalization of
307		MRR to multiple positive instances:
308		$$\left\{ \begin{array}{l l} \frac{\sum_{1\leq i\leq n \| y_i=1} \\|\{1\leq j\leq i
309		\|y_j=1\}\\|\,/\,i}{\\|\{1\leq i\leq n \| y_i=1\}\\|} & \exists i: \,
310		1 \leq i \leq n, y_i=1\\
311		0 & \mbox{otherwise.}\end{array}\right.$$
312		\item[{\bf nDCG:}] Normalized discounted cumulative gain:
313		$$\frac{\sum_{1\leq i\leq n} \log_2(i+1) \, y_i}{\sum_{1\leq i\leq n}
314		\log_2(i+1) \, y'_i},$$ where $y'_1, \dots, y'_n$ is a reordering of $y_1,
315		\dots,y_n$ with $y'_1 \geq y'_2 \geq \dots \geq y'_n$.
316		\end{enumerate}
317
318		The generalization to multiple (possibly weighted) groups is
319		straightforward. Sometimes a cut-off rank $k$ is given for \emph{MRR}
320		and \emph{nDCG}, in which case we replace the outer index $n$ by
321		$\min(n,k)$.
322
323		The initial value for $f(x_i)$ is always zero. We derive the gradient of
324		a cost function whose gradient locally approximates the gradient of
325		the IR measure for a fixed ranking:
326
327		\begin{eqnarray*}
328		\Phi & = & \sum_{(i,j) \in P} \Phi_{ij}\\
329		& = & \sum_{(i,j) \in P} \|\Delta Z_{ij}\| \log \left( 1 + e^{-(f(x_i) -
330		f(x_j))}\right),
331		\end{eqnarray*}
332		where $\|\Delta Z_{ij}\|$ is the absolute utility difference when
333		swapping the ranks of $i$ and $j$, while leaving all other instances
334		the same. Define
335		\begin{eqnarray*}
336		\lambda_{ij} & = & \frac{\partial\Phi_{ij}}{\partial f(x_i)}\\
337		& = & - \|\Delta Z_{ij}\| \frac{1}{1 + e^{f(x_i) - f(x_j)}}\\
338		& = & - \|\Delta Z_{ij}\| \, \rho_{ij},
339		\end{eqnarray*}
340		with
341		$$ \rho_{ij} = - \frac{\lambda_{ij }}{\|\Delta Z_{ij}\|} = \frac{1}{1 + e^{f(x_i) - f(x_j)}}$$
342
343		For the gradient of $\Phi$ with respect to $f(x_i)$, define
344		\begin{eqnarray*}
345		\lambda_i & = & \frac{\partial \Phi}{\partial f(x_i)}\\
346		& = & \sum_{j\|(i,j) \in P} \lambda_{ij} - \sum_{j\|(j,i) \in P} \lambda_{ji}\\
347		& = & - \sum_{j\|(i,j) \in P} \|\Delta Z_{ij}\| \, \rho_{ij}\\
348		& & \mbox{} + \sum_{j\|(j,i) \in P} \|\Delta Z_{ji}\| \, \rho_{ji}.
349		\end{eqnarray*}
350
351		The second derivative is
352		\begin{eqnarray*}
353		\gamma_i & \stackrel{def}{=} & \frac{\partial^2\Phi}{\partial f(x_i)^2}\\
354		& = & \sum_{j\|(i,j) \in P} \|\Delta Z_{ij}\| \, \rho_{ij} \, (1-\rho_{ij})\\
355		& & \mbox{} + \sum_{j\|(j,i) \in P} \|\Delta Z_{ji}\| \, \rho_{ji} \, (1-\rho_{ji}).
356		\end{eqnarray*}
357
358		Now consider again all groups with associated weights. For a given terminal node, let $i$
359		range over all contained instances. Then its estimate is
360		$$-\frac{\sum_i v_i\lambda_{i}}{\sum_i v_i \gamma_i},$$ where
361		$v_i=w(\mbox{\em group}(i))/\\|\{(j,k)\in\mbox{\em group}(i)\}\\|.$
362
363		In each iteration, instances are reranked according to the preliminary
364		scores $f(x_i)$ to determine the $\|\Delta Z_{ij}\|$. Note that in order
365		to avoid ranking bias, we break ties by adding a small amount of
366		random noise.
367
368
369
370		\begin{thebibliography}{77} % start the bibliography
371
372		\small % put the bibliography in a small font
373
374		\bibitem{FreundSchapire:1997} Y. Freund and R.E. Schapire (1997). ``A decision-theoretic generalization of on-line learning and an application to boosting,'' \textit{Journal of Computer and System Sciences}, 55(1):119-139.
375
376		\bibitem{Friedman:2001} J.H. Friedman (2001). ``Greedy Function Approximation: A Gradient Boosting Machine,'' \textit{Annals of Statistics} 29(5):1189-1232.
377
378		\bibitem{Friedman:2002} J.H. Friedman (2002). ``Stochastic Gradient Boosting,'' \textit{Computational Statistics and Data Analysis} 38(4):367-378.
379
380		\bibitem{FHT:2000} J.H. Friedman, T. Hastie, R. Tibshirani (2000). ``Additive Logistic Regression: a Statistical View of Boosting,'' \textit{Annals of Statistics} 28(2):337-374.
381
382		\bibitem{Kriegler:2010} B. Kriegler and R. Berk (2010). ``Small Area Estimation of the Homeless in Los Angeles, An Application of Cost-Sensitive Stochastic Gradient Boosting,'' \textit{Annals of Applied Statistics} 4(3):1234-1255.
383
384		\bibitem{Ridgeway:1999} G. Ridgeway (1999). ``The state of boosting,'' \textit{Computing Science and Statistics} 31:172-181.
385
386		\bibitem{Burges:2010} C. Burges (2010). ``From RankNet to LambdaRank to LambdaMART: An Overview'', \textit{Microsoft Research Technical Report MSR-TR-2010-82}
387
388		\end{thebibliography} % end the bibliography
389
390		\end{document}

inst/doc/gbm.pdf less more

Binary diff not shown

+0

-391

~~inst/doc/gbm.tex~~ less more

0		% setwd("c:/dev/gbm/inst/doc") % Sweave("gbm.rnw"); system("texify gbm.tex"); system("c:\\MiKTeX\\texmf\\miktex\\bin\\yap.exe gbm.dvi",wait=FALSE)
1
2		\documentclass{article}
3		\bibliographystyle{plain}
4		\usepackage[active]{srcltx}
5		\newcommand{\EV}{\mathrm{E}}
6		\newcommand{\Var}{\mathrm{Var}}
7		\newcommand{\aRule}{\begin{center} \rule{5in}{1mm} \end{center}}
8
9		\title{Generalized Boosted Models:\\A guide to the gbm package} \author{Greg Ridgeway}
10
11		%\VignetteIndexEntry{Generalized Boosted Models: A guide to the gbm package}
12
13		\newcommand{\mathgbf}[1]{{\mbox{\boldmath$#1$\unboldmath}}}
14
15		\usepackage{Sweave}
16		\begin{document}
17
18		\maketitle
19
20		Boosting takes on various forms with different programs using different loss functions, different base models, and different optimization schemes. The gbm package takes the approach described in \cite{Friedman:2001} and \cite{Friedman:2002}. Some of the terminology differs, mostly due to an effort to cast boosting terms into more standard statistical terminology (e.g. deviance). In addition, the gbm package implements boosting for models commonly used in statistics but not commonly associated with boosting. The Cox proportional hazard model, for example, is an incredibly useful model and the boosting framework applies quite readily with only slight modification \cite{Ridgeway:1999}. Also some algorithms implemented in the gbm package differ from the standard implementation. The AdaBoost algorithm \cite{FreundSchapire:1997} has a particular loss function and a particular optimization algorithm associated with it. The gbm implementation of AdaBoost adopts AdaBoost's exponential loss function (its bound on misclassification rate) but uses Friedman's gradient descent algorithm rather than the original one proposed. So the main purposes of this document is to spell out in detail what the gbm package implements.
21
22		\section{Gradient boosting}
23
24		This section essentially presents the derivation of boosting described in \cite{Friedman:2001}. The gbm package also adopts the stochastic gradient boosting strategy, a small but important tweak on the basic algorithm, described in \cite{Friedman:2002}.
25
26		\subsection{Friedman's gradient boosting machine} \label{sec:GradientBoostingMachine}
27
28		\begin{figure}
29		\aRule Initialize $\hat f(\mathbf{x})$ to be a constant, $\hat f(\mathbf{x}) = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\rho)$. \\
30		For $t$ in $1,\ldots,T$ do
31		\begin{enumerate}
32		\item Compute the negative gradient as the working response
33		\begin{equation}
34		z_i = -\frac{\partial}{\partial f(\mathbf{x}_i)} \Psi(y_i,f(\mathbf{x}_i)) \mbox{\Huge $\|$}_{f(\mathbf{x}_i)=\hat f(\mathbf{x}_i)}
35		\end{equation}
36		\item Fit a regression model, $g(\mathbf{x})$, predicting $z_i$ from the covariates $\mathbf{x}_i$. \item Choose a gradient descent step size as
37		\begin{equation}
38		\rho = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\hat f(\mathbf{x}_i)+\rho g(\mathbf{x}_i))
39		\end{equation}
40		\item Update the estimate of $f(\mathbf{x})$ as
41		\begin{equation}
42		\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \rho g(\mathbf{x})
43		\end{equation}
44		\end{enumerate} \aRule \caption{Friedman's Gradient Boost algorithm} \label{fig:GradientBoost} \end{figure}
45
46		Friedman (2001) and the companion paper Friedman (2002) extended the work of Friedman, Hastie, and Tibshirani (2000) and laid the ground work for a new generation of boosting algorithms. Using the connection between boosting and optimization, this new work proposes the Gradient Boosting Machine.
47
48		In any function estimation problem we wish to find a regression function, $\hat f(\mathbf{x})$, that minimizes the expectation of some loss function, $\Psi(y,f)$, as shown in (\ref{NonparametricRegression1}).
49
50		\begin{eqnarray}
51		\hspace{0.5in}
52		\hat f(\mathbf{x}) &=& \arg \min_{f(\mathbf{x})} \EV_{y,\mathbf{x}} \Psi(y,f(\mathbf{x})) \nonumber \\ \label{NonparametricRegression1}
53		&=& \arg \min_{f(\mathbf{x})} \EV_x \left[ \EV_{y\|\mathbf{x}} \Psi(y,f(\mathbf{x})) \Big\| \mathbf{x} \right]
54		\end{eqnarray}
55
56		We will focus on finding estimates of $f(\mathbf{x})$ such that \begin{equation}
57		\label{NonparametricRegression2}
58		\hspace{0.5in}
59		\hat f(\mathbf{x}) = \arg \min_{f(\mathbf{x})} \EV_{y\|\mathbf{x}} \left[ \Psi(y,f(\mathbf{x}))\|\mathbf{x} \right]
60		\end{equation}
61		Parametric regression models assume that $f(\mathbf{x})$ is a function with a finite number of parameters, $\beta$, and estimates them by selecting those values that minimize a loss function (e.g. squared error loss) over a training sample of $N$ observations on $(y,\mathbf{x})$ pairs as in (\ref{eq:Friedman1}).
62		\begin{equation}
63		\label{eq:Friedman1}
64		\hspace{0.5in}
65		\hat\beta = \arg \min_{\beta} \sum_{i=1}^N \Psi(y_i,f(\mathbf{x}_i;\beta))
66		\end{equation}
67		When we wish to estimate $f(\mathbf{x})$ non-parametrically the task becomes more difficult. Again we can proceed similarly to \cite{FHT:2000} and modify our current estimate of $f(\mathbf{x})$ by adding a new function $f(\mathbf{x})$ in a greedy fashion. Letting $f_i = f(\mathbf{x}_i)$, we see that we want to decrease the $N$ dimensional function
68		\begin{eqnarray}
69		\label{EQ:Friedman2}
70		\hspace{0.5in}
71		J(\mathbf{f}) &=& \sum_{i=1}^N \Psi(y_i,f(\mathbf{x}_i)) \nonumber \\
72		&=& \sum_{i=1}^N \Psi(y_i,F_i).
73		\end{eqnarray}
74		The negative gradient of $J(\mathbf{f})$ indicates the direction of the locally greatest decrease in $J(\mathbf{f})$. Gradient descent would then have us modify $\mathbf{f}$ as
75		\begin{equation}
76		\label{eq:Friedman3}
77		\hspace{0.5in}
78		\hat \mathbf{f} \leftarrow \hat \mathbf{f} - \rho \nabla J(\mathbf{f})
79		\end{equation}
80		where $\rho$ is the size of the step along the direction of greatest descent. Clearly, this step alone is far from our desired goal. First, it only fits $f$ at values of $\mathbf{x}$ for which we have observations. Second, it does not take into account that observations with similar $\mathbf{x}$ are likely to have similar values of $f(\mathbf{x})$. Both these problems would have disastrous effects on generalization error. However, Friedman suggests selecting a class of functions that use the covariate information to approximate the gradient, usually a regression tree. This line of reasoning produces his Gradient Boosting algorithm shown in Figure~\ref{fig:GradientBoost}. At each iteration the algorithm determines the direction, the gradient, in which it needs to improve the fit to the data and selects a particular model from the allowable class of functions that is in most agreement with the direction. In the case of squared-error loss, $\Psi(y_i,f(\mathbf{x}_i)) = \sum_{i=1}^N (y_i-f(\mathbf{x}_i))^2$, this algorithm corresponds exactly to residual fitting.
81
82		There are various ways to extend and improve upon the basic framework suggested in Figure~\ref{fig:GradientBoost}. For example, Friedman (2001) substituted several choices in for $\Psi$ to develop new boosting algorithms for robust regression with least absolute deviation and Huber loss functions. Friedman (2002) showed that a simple subsampling trick can greatly improve predictive performance while simultaneously reduce computation time. Section~\ref{GBMModifications} discusses some of these modifications.
83
84		\section{Improving boosting methods using control of the learning rate, sub-sampling, and a decomposition for interpretation} \label{GBMModifications}
85
86		This section explores the variations of the previous algorithms that have the potential to improve their predictive performance and interpretability. In particular, by controlling the optimization speed or learning rate, introducing low-variance regression methods, and applying ideas from robust regression we can produce non-parametric regression procedures with many desirable properties. As a by-product some of these modifications lead directly into implementations for learning from massive datasets. All these methods take advantage of the general form of boosting
87		\begin{equation}
88		\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \EV(z(y,\hat f(\mathbf{x}))\|\mathbf{x}).
89		\end{equation} So far we have taken advantage of this form only by substituting in our favorite regression procedure for $\EV_w(z\|\mathbf{x})$. I will discuss some modifications to estimating $\EV_w(z\|\mathbf{x})$ that have the potential to improve our algorithm.
90
91		\subsection{Decreasing the learning rate} As several authors have phrased slightly differently, ``...boosting, whatever flavor, seldom seems to overfit, no matter how many terms are included in the additive expansion''. This is not true as the discussion to \cite{FHT:2000} points out.
92
93		In the update step of any boosting algorithm we can introduce a learning rate to dampen the proposed move.
94		\begin{equation}
95		\label{eq:shrinkage}
96		\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \lambda \EV(z(y,\hat f(\mathbf{x}))\|\mathbf{x}).
97		\end{equation}
98		By multiplying the gradient step by $\lambda$ as in equation~\ref{eq:shrinkage} we have control on the rate at which the boosting algorithm descends the error surface (or ascends the likelihood surface). When $\lambda=1$ we return to performing full gradient steps. Friedman (2001) relates the learning rate to regularization through shrinkage.
99
100		The optimal number of iterations, $T$, and the learning rate, $\lambda$, depend on each other. In practice I set $\lambda$ to be as small as possible and then select $T$ by cross-validation. Performance is best when $\lambda$ is as small as possible performance with decreasing marginal utility for smaller and smaller $\lambda$. Slower learning rates do not necessarily scale the number of optimal iterations. That is, if when $\lambda=1.0$ and the optimal $T$ is 100 iterations, does {\it not} necessarily imply that when $\lambda=0.1$ the optimal $T$ is 1000 iterations.
101
102		\subsection{Variance reduction using subsampling}
103
104		Friedman (2002) proposed the stochastic gradient boosting algorithm that simply samples uniformly without replacement from the dataset before estimating the next gradient step. He found that this additional step greatly improved performance. We estimate the regression $\EV(z(y,\hat f(\mathbf{x}))\|\mathbf{x})$ using a random subsample of the dataset.
105
106		\subsection{ANOVA decomposition}
107
108		Certain function approximation methods are decomposable in terms of a ``functional ANOVA decomposition''. That is a function is decomposable as
109		\begin{equation}
110		\label{ANOVAdecomp}
111		f(\mathbf{x}) = \sum_j f_j(x_j) + \sum_{jk} f_{jk}(x_j,x_k) + \sum_{jk\ell} f_{jk\ell}(x_j,x_k,x_\ell) + \cdots.
112		\end{equation} This applies to boosted trees. Regression stumps (one split decision trees) depend on only one variable and fall into the first term of \ref{ANOVAdecomp}. Trees with two splits fall into the second term of \ref{ANOVAdecomp} and so on. By restricting the depth of the trees produced on each boosting iteration we can control the order of approximation. Often additive components are sufficient to approximate a multivariate function well, generalized additive models, the na\"{\i}ve Bayes classifier, and boosted stumps for example. When the approximation is restricted to a first order we can also produce plots of $x_j$ versus $f_j(x_j)$ to demonstrate how changes in $x_j$ might affect changes in the response variable.
113
114		\subsection{Relative influence} Friedman (2001) also develops an extension of a variable's ``relative influence'' for boosted estimates. For tree based methods the approximate relative influence of a variable $x_j$ is
115		\begin{equation}
116		\label{RelInfluence}
117		\hspace{0.5in}
118		\hat J_j^2 = \hspace{-0.1in}\sum_{\mathrm{splits~on~}x_j}\hspace{-0.2in}I_t^2
119		\end{equation} where $I_t^2$ is the empirical improvement by splitting on $x_j$ at that point. Friedman's extension to boosted models is to average the relative influence of variable $x_j$ across all the trees generated by the boosting algorithm.
120
121		\begin{figure}
122		\aRule
123		Select
124		\begin{itemize}
125		\item a loss function (\texttt{distribution})
126		\item the number of iterations, $T$ (\texttt{n.trees})
127		\item the depth of each tree, $K$ (\texttt{interaction.depth})
128		\item the shrinkage (or learning rate) parameter, $\lambda$ (\texttt{shrinkage})
129		\item the subsampling rate, $p$ (\texttt{bag.fraction})
130		\end{itemize}
131		Initialize $\hat f(\mathbf{x})$ to be a constant, $\hat f(\mathbf{x}) = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\rho)$ \\
132		For $t$ in $1,\ldots,T$ do
133		\begin{enumerate}
134		\item Compute the negative gradient as the working response
135		\begin{equation}
136		z_i = -\frac{\partial}{\partial f(\mathbf{x}_i)} \Psi(y_i,f(\mathbf{x}_i)) \mbox{\Huge $\|$}_{f(\mathbf{x}_i)=\hat f(\mathbf{x}_i)}
137		\end{equation}
138		\item Randomly select $p\times N$ cases from the dataset
139		\item Fit a regression tree with $K$ terminal nodes, $g(\mathbf{x})=\EV(z\|\mathbf{x})$. This tree is fit using only those randomly selected observations
140		\item Compute the optimal terminal node predictions, $\rho_1,\ldots,\rho_K$, as
141		\begin{equation}
142		\rho_k = \arg \min_{\rho} \sum_{\mathbf{x}_i\in S_k} \Psi(y_i,\hat f(\mathbf{x}_i)+\rho)
143		\end{equation}
144		where $S_k$ is the set of $\mathbf{x}$s that define terminal node $k$. Again this step uses only the randomly selected observations.
145		\item Update $\hat f(\mathbf{x})$ as
146		\begin{equation}
147		\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \lambda\rho_{k(\mathbf{x})}
148		\end{equation}
149		where $k(\mathbf{x})$ indicates the index of the terminal node into which an observation with features $\mathbf{x}$ would fall.
150		\end{enumerate}
151		\aRule
152		\caption{Boosting as implemented in \texttt{gbm()}}
153		\label{fig:gbm}
154		\end{figure}
155
156		\section{Common user options}
157
158		This section discusses the options to gbm that most users will need to change or tune.
159
160		\subsection{Loss function}
161
162		The first and foremost choice is \texttt{distribution}. This should be easily dictated by the application. For most classification problems either \texttt{bernoulli} or \texttt{adaboost} will be appropriate, the former being recommended. For continuous outcomes the choices are \texttt{gaussian} (for minimizing squared error), \texttt{laplace} (for minimizing absolute error), and quantile regression (for estimating percentiles of the conditional distribution of the outcome). Censored survival outcomes should require \texttt{coxph}. Count outcomes may use \texttt{poisson} although one might also consider \texttt{gaussian} or \texttt{laplace} depending on the analytical goals.
163
164		\subsection{The relationship between shrinkage and number of iterations} The issues that most new users of gbm struggle with are the choice of \texttt{n.trees} and \texttt{shrinkage}. It is important to know that smaller values of \texttt{shrinkage} (almost) always give improved predictive performance. That is, setting \texttt{shrinkage=0.001} will almost certainly result in a model with better out-of-sample predictive performance than setting \texttt{shrinkage=0.01}. However, there are computational costs, both storage and CPU time, associated with setting \texttt{shrinkage} to be low. The model with \texttt{shrinkage=0.001} will likely require ten times as many iterations as the model with \texttt{shrinkage=0.01}, increasing storage and computation time by a factor of 10. Figure~\ref{fig:shrinkViters} shows the relationship between predictive performance, the number of iterations, and the shrinkage parameter. Note that the increase in the optimal number of iterations between two choices for shrinkage is roughly equal to the ratio of the shrinkage parameters. It is generally the case that for small shrinkage parameters, 0.001 for example, there is a fairly long plateau in which predictive performance is at its best. My rule of thumb is to set \texttt{shrinkage} as small as possible while still being able to fit the model in a reasonable amount of time and storage. I usually aim for 3,000 to 10,000 iterations with shrinkage rates between 0.01 and 0.001.
165
166		\begin{figure}[ht] \begin{center} \includegraphics[width=5in]{shrinkage-v-iterations} \end{center} \caption{Out-of-sample predictive performance by number of iterations and shrinkage. Smaller values of the shrinkage parameter offer improved predictive performance, but with decreasing marginal improvement.} \label{fig:shrinkViters} \end{figure}
167
168		\subsection{Estimating the optimal number of iterations} gbm offers three methods for estimating the optimal number of iterations after the gbm model has been fit, an independent test set (\texttt{test}), out-of-bag estimation (\texttt{OOB}), and $v$-fold cross validation (\texttt{cv}). The function \texttt{gbm.perf} computes the iteration estimate.
169
170		Like Friedman's MART software, the independent test set method uses a single holdout test set to select the optimal number of iterations. If \texttt{train.fraction} is set to be less than 1, then only the \textit{first} \texttt{train.fraction}$\times$\texttt{nrow(data)} will be used to fit the model. Note that if the data are sorted in a systematic way (such as cases for which $y=1$ come first), then the data should be shuffled before running gbm. Those observations not used in the model fit can be used to get an unbiased estimate of the optimal number of iterations. The downside of this method is that a considerable number of observations are used to estimate the single regularization parameter (number of iterations) leaving a reduced dataset for estimating the entire multivariate model structure. Use \texttt{gbm.perf(...,method="test")} to obtain an estimate of the optimal number of iterations using the held out test set.
171
172		If \texttt{bag.fraction} is set to be greater than 0 (0.5 is recommended), gbm computes an out-of-bag estimate of the improvement in predictive performance. It evaluates the reduction in deviance on those observations not used in selecting the next regression tree. The out-of-bag estimator underestimates the reduction in deviance. As a result, it almost always is too conservative in its selection for the optimal number of iterations. The motivation behind this method was to avoid having to set aside a large independent dataset, which reduces the information available for learning the model structure. Use \texttt{gbm.perf(...,method="OOB")} to obtain the OOB estimate.
173
174		Lastly, gbm offers $v$-fold cross validation for estimating the optimal number of iterations. If when fitting the gbm model, \texttt{cv.folds=5} then gbm will do 5-fold cross validation. gbm will fit five gbm models in order to compute the cross validation error estimate and then will fit a sixth and final gbm model with \texttt{n.trees}iterations using all of the data. The returned model object will have a component labeled \texttt{cv.error}. Note that \texttt{gbm.more} will do additional gbm iterations but will not add to the \texttt{cv.error} component. Use \texttt{gbm.perf(...,method="cv")} to obtain the cross validation estimate.
175
176		\begin{figure}[ht]
177		\begin{center}
178		\includegraphics[width=5in]{oobperf2}
179		\end{center}
180		\caption{Out-of-sample predictive performance of four methods of selecting the optimal number of iterations. The vertical axis plots performance relative the best. The boxplots indicate relative performance across thirteen real datasets from the UCI repository. See \texttt{demo(OOB-reps)}.}
181		\label{fig:oobperf}
182		\end{figure}
183
184		Figure~\ref{fig:oobperf} compares the three methods for estimating the optimal number of iterations across 13 datasets. The boxplots show the methods performance relative to the best method on that dataset. For most datasets the method perform similarly, however, 5-fold cross validation is consistently the best of them. OOB, using a 33\% test set, and using a 20\% test set all have datasets for which the perform considerably worse than the best method. My recommendation is to use 5- or 10-fold cross validation if you can afford the computing time. Otherwise you may choose among the other options, knowing that OOB is conservative.
185
186		\section{Available distributions}
187
188		This section gives some of the mathematical detail for each of the distribution options that gbm offers. The gbm engine written in C++ has access to a C++ class for each of these distributions. Each class contains methods for computing the associated deviance, initial value, the gradient, and the constants to predict in each terminal node.
189
190		In the equations shown below, for non-zero offset terms, replace $f(\mathbf{x}_i)$ with $o_i + f(\mathbf{x}_i)$.
191
192		\subsection{Gaussian}
193
194		\begin{tabular}{ll}
195		Deviance & $\displaystyle \frac{1}{\sum w_i} \sum w_i(y_i-f(\mathbf{x}_i))^2$ \\
196		Initial value & $\displaystyle f(\mathbf{x})=\frac{\sum w_i(y_i-o_i)}{\sum w_i}$ \\
197		Gradient & $z_i=y_i - f(\mathbf{x}_i)$ \\
198		Terminal node estimates & $\displaystyle \frac{\sum w_i(y_i-f(\mathbf{x}_i))}{\sum w_i}$
199		\end{tabular}
200
201		\subsection{AdaBoost}
202
203		\begin{tabular}{ll} Deviance & $\displaystyle \frac{1}{\sum w_i} \sum w_i\exp(-(2y_i-1)f(\mathbf{x}_i))$ \\ Initial value & $\displaystyle \frac{1}{2}\log\frac{\sum y_iw_ie^{-o_i}}{\sum (1-y_i)w_ie^{o_i}}$ \\ Gradient & $\displaystyle z_i= -(2y_i-1)\exp(-(2y_i-1)f(\mathbf{x}_i))$ \\ Terminal node estimates & $\displaystyle \frac{\sum (2y_i-1)w_i\exp(-(2y_i-1)f(\mathbf{x}_i))}
204		{\sum w_i\exp(-(2y_i-1)f(\mathbf{x}_i))}$
205		\end{tabular}
206
207		\subsection{Bernoulli}
208
209		\begin{tabular}{ll} Deviance & $\displaystyle -2\frac{1}{\sum w_i} \sum w_i(y_if(\mathbf{x}_i)-\log(1+\exp(f(\mathbf{x}_i))))$ \\ Initial value & $\displaystyle \log\frac{\sum w_iy_i}{\sum w_i(1-y_i)}$ \\ Gradient & $\displaystyle z_i=y_i-\frac{1}{1+\exp(-f(\mathbf{x}_i))}$ \\ Terminal node estimates & $\displaystyle \frac{\sum w_i(y_i-p_i)}{\sum w_ip_i(1-p_i)}$ \\
210		& where $\displaystyle p_i = \frac{1}{1+\exp(-f(\mathbf{x}_i))}$ \\
211		\end{tabular}
212
213		Notes: \begin{itemize} \item For non-zero offset terms, the computation of the initial value requires Newton-Raphson. Initialize $f_0=0$ and iterate $\displaystyle f_0 \leftarrow f_0 + \frac{\sum w_i(y_i-p_i)}{\sum w_ip_i(1-p_i)}$ where $\displaystyle p_i = \frac{1}{1+\exp(-(o_i+f_0))}$. \end{itemize}
214
215		\subsection{Laplace}
216
217		\begin{tabular}{ll} Deviance & $\frac{1}{\sum w_i} \sum w_i\|y_i-f(\mathbf{x}_i)\|$ \\ Initial value & $\mbox{median}_w(y)$ \\ Gradient & $z_i=\mbox{sign}(y_i-f(\mathbf{x}_i))$ \\ Terminal node estimates & $\mbox{median}_w(z)$ \end{tabular}
218
219		Notes: \begin{itemize} \item $\mbox{median}_w(y)$ denotes the weighted median, defined as the solution to the equation $\frac{\sum w_iI(y_i\leq m)}{\sum w_i}=\frac{1}{2}$ \item \texttt{gbm()} currently does not implement the weighted median and issues a warning when the user uses weighted data with \texttt{distribution="laplace"}. \end{itemize}
220
221
222		\subsection{Quantile regression}
223
224		Contributed by Brian Kriegler (see \cite{Kriegler:2010}).
225
226		\begin{tabular}{ll} Deviance & $\frac{1}{\sum w_i}
227		\left(\alpha\sum_{y_i>f(\mathbf{x}_i)} w_i(y_i-f(\mathbf{x}_i))\right. +$ \\
228		& \hspace{0.5in}$\left.(1-\alpha)\sum_{y_i\leq f(\mathbf{x}_i)} w_i(f(\mathbf{x}_i)-y_i)\right)$ \\
229		Initial value & $\mathrm{quantile}^{(\alpha)}_w(y)$ \\ Gradient & $z_i=\alpha I(y_i>f(\mathbf{x}_i))-(1-\alpha)I(y_i\leq f(\mathbf{x}_i))$ \\ Terminal node estimates & $\mathrm{quantile}^{(\alpha)}_w(z)$ \end{tabular}
230
231		Notes: \begin{itemize} \item $\mathrm{quantile}^{(\alpha)}_w(y)$ denotes the weighted quantile, defined as the solution to the equation $\frac{\sum w_iI(y_i\leq q)}{\sum w_i}=\alpha$ \item \texttt{gbm()} currently does not implement the weighted median and issues a warning when the user uses weighted data with \texttt{distribution=list(name="quantile")}. \end{itemize}
232
233
234		\subsection{Cox Proportional Hazard}
235
236		\begin{tabular}{ll} Deviance & $-2\sum w_i(\delta_i(f(\mathbf{x}_i)-\log(R_i/w_i)))$\\ Gradient & $\displaystyle z_i=\delta_i - \sum_j \delta_j
237		\frac{w_jI(t_i\geq t_j)e^{f(\mathbf{x}_i)}}
238		{\sum_k w_kI(t_k\geq t_j)e^{f(\mathbf{x}_k)}}$ \\
239		Initial value & 0 \\ Terminal node estimates & Newton-Raphson algorithm \end{tabular}
240
241		\begin{enumerate}
242		\item Initialize the terminal node predictions to 0, $\mathgbf{\rho}=0$
243		\item Let $\displaystyle
244		p_i^{(k)}=\frac{\sum_j I(k(j)=k)I(t_j\geq t_i)e^{f(\mathbf{x}_i)+\rho_k}}
245		{\sum_j I(t_j\geq t_i)e^{f(\mathbf{x}_i)+\rho_k}}$
246		\item Let $g_k=\sum w_i\delta_i\left(I(k(i)=k)-p_i^{(k)}\right)$
247		\item Let $\mathbf{H}$ be a $k\times k$ matrix with diagonal elements
248		\begin{enumerate}
249		\item Set diagonal elements $H_{mm}=\sum w_i\delta_i p_i^{(m)}\left(1-p_i^{(m)}\right)$
250		\item Set off diagonal elements $H_{mn}=-\sum w_i\delta_i p_i^{(m)}p_i^{(n)}$
251		\end{enumerate}
252		\item Newton-Raphson update $\mathgbf{\rho} \leftarrow \mathgbf{\rho} - \mathbf{H}^{-1}\mathbf{g}$
253		\item Return to step 2 until convergence
254		\end{enumerate}
255
256		Notes:
257		\begin{itemize}
258		\item $t_i$ is the survival time and $\delta_i$ is the death indicator.
259		\item $R_i$ denotes the hazard for the risk set, $R_i=\sum_{j=1}^N w_jI(t_j\geq t_i)e^{f(\mathbf{x}_i)}$
260		\item $k(i)$ indexes the terminal node of observation $i$
261		\item For speed, \texttt{gbm()} does only one step of the Newton-Raphson algorithm rather than iterating to convergence. No appreciable loss of accuracy since the next boosting iteration will simply correct for the prior iterations inadequacy.
262		\item \texttt{gbm()} initially sorts the data by survival time. Doing this reduces the computation of the risk set from $O(n^2)$ to $O(n)$ at the cost of a single up front sort on survival time. After the model is fit, the data are then put back in their original order.
263		\end{itemize}
264
265		\subsection{Poisson}
266		\begin{tabular}{ll}
267		Deviance & -2$\frac{1}{\sum w_i} \sum w_i(y_if(\mathbf{x}_i)-\exp(f(\mathbf{x}_i)))$ \\
268		Initial value & $\displaystyle f(\mathbf{x})= \log\left(\frac{\sum w_iy_i}{\sum w_ie^{o_i}}\right)$ \\
269		Gradient & $z_i=y_i - \exp(f(\mathbf{x}_i))$ \\
270		Terminal node estimates & $\displaystyle \log\frac{\sum w_iy_i}{\sum w_i\exp(f(\mathbf{x}_i))}$
271		\end{tabular}
272
273		The Poisson class includes special safeguards so that the most extreme predicted values are $e^{-19}$ and $e^{+19}$. This behavior is consistent with \texttt{glm()}.
274
275
276		\subsection{Pairwise}
277
278		This distribution implements ranking measures following the
279		\emph{LambdaMart} algorithm \cite{Burges:2010}. Instances belong to
280		\emph{groups}; all pairs of items with different labels, belonging to
281		the same group, are used for training. In \emph{Information Retrieval}
282		applications, groups correspond to user queries,
283		and items to (feature vectors of) documents in the associated match
284		set to be ranked.
285
286		For consistency with typical usage, our goal is to \emph{maximize} one
287		of the \emph{utility} functions listed below. Consider a group with
288		instances $x_1, \dots, x_n$, ordered such that $f(x_1) \geq f(x_2)
289		\geq \dots f(x_n)$; i.e., the \emph{rank} of $x_i$ is $i$, where
290		smaller ranks are preferable. Let $P$ be the set of all ordered pairs
291		such that $y_i > y_j$.
292
293		\begin{enumerate}
294		\item[{\bf Concordance:}] Fraction of concordant (i.e, correctly ordered)
295		pairs. For the special case of binary labels, this is equivalent to
296		the Area under the ROC Curve.
297		$$\left\{ \begin{array}{l l}\frac{\\|\{(i,j)\in P \|
298		f(x_i)>f(x_j)\}\\|}{\\|P\\|}
299		& P \neq \emptyset\\
300		0 & \mbox{otherwise.}
301		\end{array}\right.
302		$$
303		\item[{\bf MRR:}] Mean reciprocal rank of the highest-ranked positive
304		instance (it is assumed $y_i\in\{0,1\}$):
305		$$\left\{ \begin{array}{l l}\frac{1}{\min\{1 \leq i \leq n \|y_i=1\}}
306		& \exists i: \, 1 \leq i \leq n, y_i=1\\
307		0 & \mbox{otherwise.}\end{array}\right.$$
308		\item[{\bf MAP:}] Mean average precision, a generalization of
309		MRR to multiple positive instances:
310		$$\left\{ \begin{array}{l l} \frac{\sum_{1\leq i\leq n \| y_i=1} \\|\{1\leq j\leq i
311		\|y_j=1\}\\|\,/\,i}{\\|\{1\leq i\leq n \| y_i=1\}\\|} & \exists i: \,
312		1 \leq i \leq n, y_i=1\\
313		0 & \mbox{otherwise.}\end{array}\right.$$
314		\item[{\bf nDCG:}] Normalized discounted cumulative gain:
315		$$\frac{\sum_{1\leq i\leq n} \log_2(i+1) \, y_i}{\sum_{1\leq i\leq n}
316		\log_2(i+1) \, y'_i},$$ where $y'_1, \dots, y'_n$ is a reordering of $y_1,
317		\dots,y_n$ with $y'_1 \geq y'_2 \geq \dots \geq y'_n$.
318		\end{enumerate}
319
320		The generalization to multiple (possibly weighted) groups is
321		straightforward. Sometimes a cut-off rank $k$ is given for \emph{MRR}
322		and \emph{nDCG}, in which case we replace the outer index $n$ by
323		$\min(n,k)$.
324
325		The initial value for $f(x_i)$ is always zero. We derive the gradient of
326		a cost function whose gradient locally approximates the gradient of
327		the IR measure for a fixed ranking:
328
329		\begin{eqnarray*}
330		\Phi & = & \sum_{(i,j) \in P} \Phi_{ij}\\
331		& = & \sum_{(i,j) \in P} \|\Delta Z_{ij}\| \log \left( 1 + e^{-(f(x_i) -
332		f(x_j))}\right),
333		\end{eqnarray*}
334		where $\|\Delta Z_{ij}\|$ is the absolute utility difference when
335		swapping the ranks of $i$ and $j$, while leaving all other instances
336		the same. Define
337		\begin{eqnarray*}
338		\lambda_{ij} & = & \frac{\partial\Phi_{ij}}{\partial f(x_i)}\\
339		& = & - \|\Delta Z_{ij}\| \frac{1}{1 + e^{f(x_i) - f(x_j)}}\\
340		& = & - \|\Delta Z_{ij}\| \, \rho_{ij},
341		\end{eqnarray*}
342		with
343		$$ \rho_{ij} = - \frac{\lambda_{ij }}{\|\Delta Z_{ij}\|} = \frac{1}{1 + e^{f(x_i) - f(x_j)}}$$
344
345		For the gradient of $\Phi$ with respect to $f(x_i)$, define
346		\begin{eqnarray*}
347		\lambda_i & = & \frac{\partial \Phi}{\partial f(x_i)}\\
348		& = & \sum_{j\|(i,j) \in P} \lambda_{ij} - \sum_{j\|(j,i) \in P} \lambda_{ji}\\
349		& = & - \sum_{j\|(i,j) \in P} \|\Delta Z_{ij}\| \, \rho_{ij}\\
350		& & \mbox{} + \sum_{j\|(j,i) \in P} \|\Delta Z_{ji}\| \, \rho_{ji}.
351		\end{eqnarray*}
352
353		The second derivative is
354		\begin{eqnarray*}
355		\gamma_i & \stackrel{def}{=} & \frac{\partial^2\Phi}{\partial f(x_i)^2}\\
356		& = & \sum_{j\|(i,j) \in P} \|\Delta Z_{ij}\| \, \rho_{ij} \, (1-\rho_{ij})\\
357		& & \mbox{} + \sum_{j\|(j,i) \in P} \|\Delta Z_{ji}\| \, \rho_{ji} \, (1-\rho_{ji}).
358		\end{eqnarray*}
359
360		Now consider again all groups with associated weights. For a given terminal node, let $i$
361		range over all contained instances. Then its estimate is
362		$$-\frac{\sum_i v_i\lambda_{i}}{\sum_i v_i \gamma_i},$$ where
363		$v_i=w(\mbox{\em group}(i))/\\|\{(j,k)\in\mbox{\em group}(i)\}\\|.$
364
365		In each iteration, instances are reranked according to the preliminary
366		scores $f(x_i)$ to determine the $\|\Delta Z_{ij}\|$. Note that in order
367		to avoid ranking bias, we break ties by adding a small amount of
368		random noise.
369
370		\begin{thebibliography}{77} % start the bibliography
371
372		\small % put the bibliography in a small font
373
374		\bibitem{FreundSchapire:1997} Y. Freund and R.E. Schapire (1997). ``A decision-theoretic generalization of on-line learning and an application to boosting,'' \textit{Journal of Computer and System Sciences}, 55(1):119-139.
375
376		\bibitem{Friedman:2001} J.H. Friedman (2001). ``Greedy Function Approximation: A Gradient Boosting Machine,'' \textit{Annals of Statistics} 29(5):1189-1232.
377
378		\bibitem{Friedman:2002} J.H. Friedman (2002). ``Stochastic Gradient Boosting,'' \textit{Computational Statistics and Data Analysis} 38(4):367-378.
379
380		\bibitem{FHT:2000} J.H. Friedman, T. Hastie, R. Tibshirani (2000). ``Additive Logistic Regression: a Statistical View of Boosting,'' \textit{Annals of Statistics} 28(2):337-374.
381
382		\bibitem{Kriegler:2010} B. Kriegler and R. Berk (2010). ``Small Area Estimation of the Homeless in Los Angeles, An Application of Cost-Sensitive Stochastic Gradient Boosting,'' \textit{Annals of Applied Statistics} 4(3):1234-1255.
383
384		\bibitem{Ridgeway:1999} G. Ridgeway (1999). ``The state of boosting,'' \textit{Computing Science and Statistics} 31:172-181.
385
386		\bibitem{Burges:2010} C. Burges (2010). ``From RankNet to LambdaRank to LambdaMART: An Overview'', \textit{Microsoft Research Technical Report MSR-TR-2010-82}
387
388		\end{thebibliography} % end the bibliography
389
390		\end{document}

+0

-21

~~inst/doc/index.html~~ less more

0		<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1		<html><head><title>R: Vignettes</title>
2		<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
3		<link rel="stylesheet" type="text/css" href="/doc/html/R.css">
4		<!-- MANUALLY EDITED BY Harry Southworth -->
5		</head><body>
6		<h1> Vignettes
7		<img class="toplogo" src="/doc/html/logo.jpg" alt="[R logo]">
8		</h1>
9		<hr>
10		<div align="center">
11		<a href="/doc/html/index.html"><img src="/doc/html/up.jpg" alt="[Top]" width="30" height="30" border="0"></a>
12		</div>
13		<h2>Vignettes from package 'gbm'</h2>
14		<ul>
15		<li><a href= '../../../library/gbm/doc/gbm.pdf'>
16		<em>Generalized Boosted Models: A guide to the <tt>gbm</tt> package</em>, by Greg Ridgeway
17		</a></li>
18		</ul>
19
20		</body></html>

+0

-487

~~inst/doc/oobperf2.eps~~ less more

0		%!PS-Adobe-3.0 EPSF-3.0
1		%%Title: WMF2EPS 1.32 : WMF->EPS conversion for oobperf2.wmf
2		%%Creator: PScript5.dll Version 5.2.2
3		%%CreationDate: 1/30/2005 11:3:37
4		%%For: gregr
5		%%BoundingBox: 56 56 404 271
6		%%Pages: 1
7		%%Orientation: Portrait
8		%%PageOrder: Ascend
9		%%DocumentNeededResources: (atend)
10		%%DocumentSuppliedResources: (atend)
11		%%DocumentData: Clean7Bit
12		%%TargetDevice: (WMF2EPS Color PS) (2010.0) 2
13		%%LanguageLevel: 2
14		%%EndComments
15
16		%%BeginDefaults
17		%%PageBoundingBox: 0 0 405 271
18		%%ViewingOrientation: 1 0 0 1
19		%%EndDefaults
20
21		%%BeginProlog
22		%%BeginResource: file Pscript_WinNT_ErrorHandler 5.0 0
23		/currentpacking where{pop/oldpack currentpacking def/setpacking where{pop false
24		setpacking}if}if/$brkpage 64 dict def $brkpage begin/prnt{dup type/stringtype
25		ne{=string cvs}if dup length 6 mul/tx exch def/ty 10 def currentpoint/toy exch
26		def/tox exch def 1 setgray newpath tox toy 2 sub moveto 0 ty rlineto tx 0
27		rlineto 0 ty neg rlineto closepath fill tox toy moveto 0 setgray show}bind def
28		/nl{currentpoint exch pop lmargin exch moveto 0 -10 rmoveto}def/=={/cp 0 def
29		typeprint nl}def/typeprint{dup type exec}readonly def/lmargin 72 def/rmargin 72
30		def/tprint{dup length cp add rmargin gt{nl/cp 0 def}if dup length cp add/cp
31		exch def prnt}readonly def/cvsprint{=string cvs tprint( )tprint}readonly def
32		/integertype{cvsprint}readonly def/realtype{cvsprint}readonly def/booleantype
33		{cvsprint}readonly def/operatortype{(--)tprint =string cvs tprint(-- )tprint}
34		readonly def/marktype{pop(-mark- )tprint}readonly def/dicttype{pop
35		(-dictionary- )tprint}readonly def/nulltype{pop(-null- )tprint}readonly def
36		/filetype{pop(-filestream- )tprint}readonly def/savetype{pop(-savelevel- )
37		tprint}readonly def/fonttype{pop(-fontid- )tprint}readonly def/nametype{dup
38		xcheck not{(/)tprint}if cvsprint}readonly def/stringtype{dup rcheck{(\()tprint
39		tprint(\))tprint}{pop(-string- )tprint}ifelse}readonly def/arraytype{dup rcheck
40		{dup xcheck{({)tprint{typeprint}forall(})tprint}{([)tprint{typeprint}forall(])
41		tprint}ifelse}{pop(-array- )tprint}ifelse}readonly def/packedarraytype{dup
42		rcheck{dup xcheck{({)tprint{typeprint}forall(})tprint}{([)tprint{typeprint}
43		forall(])tprint}ifelse}{pop(-packedarray- )tprint}ifelse}readonly def/courier
44		/Courier findfont 10 scalefont def end errordict/handleerror{systemdict begin
45		$error begin $brkpage begin newerror{/newerror false store vmstatus pop pop 0
46		ne{grestoreall}if errorname(VMerror)ne{showpage}if initgraphics courier setfont
47		lmargin 720 moveto errorname(VMerror)eq{userdict/ehsave known{clear userdict
48		/ehsave get restore 2 vmreclaim}if vmstatus exch pop exch pop PrtVMMsg}{
49		(ERROR: )prnt errorname prnt nl(OFFENDING COMMAND: )prnt/command load prnt
50		$error/ostack known{nl nl(STACK:)prnt nl nl $error/ostack get aload length{==}
51		repeat}if}ifelse systemdict/showpage get exec(%%[ Error: )print errorname
52		=print(; OffendingCommand: )print/command load =print( ]%%)= flush}if end end
53		end}dup 0 systemdict put dup 4 $brkpage put bind readonly put/currentpacking
54		where{pop/setpacking where{pop oldpack setpacking}if}if
55		%%EndResource
56		userdict /Pscript_WinNT_Incr 230 dict dup begin put
57		%%BeginResource: file Pscript_FatalError 5.0 0
58		userdict begin/FatalErrorIf{{initgraphics findfont 1 index 0 eq{exch pop}{dup
59		length dict begin{1 index/FID ne{def}{pop pop}ifelse}forall/Encoding
60		{ISOLatin1Encoding}stopped{StandardEncoding}if def currentdict end
61		/ErrFont-Latin1 exch definefont}ifelse exch scalefont setfont counttomark 3 div
62		cvi{moveto show}repeat showpage quit}{cleartomark}ifelse}bind def end
63		%%EndResource
64		userdict begin/PrtVMMsg{vmstatus exch sub exch pop gt{[
65		(This job requires more memory than is available in this printer.)100 500
66		(Try one or more of the following, and then print again:)100 485
67		(For the output format, choose Optimize For Portability.)115 470
68		(In the Device Settings page, make sure the Available PostScript Memory is accurate.)
69		115 455(Reduce the number of fonts in the document.)115 440
70		(Print the document in parts.)115 425 12/Times-Roman showpage
71		(%%[ PrinterError: Low Printer VM ]%%)= true FatalErrorIf}if}bind def end
72		version cvi 2016 ge{/VM?{pop}bind def}{/VM? userdict/PrtVMMsg get def}ifelse
73		%%BeginResource: file Pscript_Win_Basic 5.0 0
74		/d/def load def/,/load load d/~/exch , d/?/ifelse , d/!/pop , d/`/begin , d/^
75		/index , d/@/dup , d/+/translate , d/$/roll , d/U/userdict , d/M/moveto , d/-
76		/rlineto , d/&/currentdict , d/:/gsave , d/;/grestore , d/F/false , d/T/true ,
77		d/N/newpath , d/E/end , d/Ac/arc , d/An/arcn , d/A/ashow , d/D/awidthshow , d/C
78		/closepath , d/V/div , d/O/eofill , d/L/fill , d/I/lineto , d/-c/curveto , d/-M
79		/rmoveto , d/+S/scale , d/Ji/setfont , d/Lc/setlinecap , d/Lj/setlinejoin , d
80		/Lw/setlinewidth , d/Lm/setmiterlimit , d/sd/setdash , d/S/show , d/LH/showpage
81		, d/K/stroke , d/W/widthshow , d/R/rotate , d/L2? false/languagelevel where{pop
82		languagelevel 2 ge{pop true}if}if d L2?{/xS/xshow , d/yS/yshow , d/zS/xyshow ,
83		d}if/b{bind d}bind d/bd{bind d}bind d/xd{~ d}bd/ld{, d}bd/bn/bind ld/lw/Lw ld
84		/lc/Lc ld/lj/Lj ld/sg/setgray ld/ADO_mxRot null d/self & d/OrgMx matrix
85		currentmatrix d/reinitialize{: OrgMx setmatrix[/TextInit/GraphInit/UtilsInit
86		counttomark{@ where{self eq}{F}?{cvx exec}{!}?}repeat cleartomark ;}b
87		/initialize{`{/Pscript_Win_Data where{!}{U/Pscript_Win_Data & put}?/ADO_mxRot ~
88		d/TextInitialised? F d reinitialize E}{U/Pscript_Win_Data 230 dict @ ` put
89		/ADO_mxRot ~ d/TextInitialised? F d reinitialize}?}b/terminate{!{& self eq
90		{exit}{E}?}loop E}b/suspend/terminate , d/resume{` Pscript_Win_Data `}b U `
91		/lucas 21690 d/featurebegin{countdictstack lucas[}b/featurecleanup{stopped
92		{cleartomark @ lucas eq{! exit}if}loop countdictstack ~ sub @ 0 gt{{E}repeat}
93		{!}?}b E/snap{transform 0.25 sub round 0.25 add ~ 0.25 sub round 0.25 add ~
94		itransform}b/dsnap{dtransform round ~ round ~ idtransform}b/nonzero_round{@ 0.5
95		ge{round}{@ -0.5 lt{round}{0 ge{1}{-1}?}?}?}b/nonzero_dsnap{dtransform
96		nonzero_round ~ nonzero_round ~ idtransform}b U<04>cvn{}put/rr{1 ^ 0 - 0 ~ -
97		neg 0 - C}b/irp{4 -2 $ + +S fx 4 2 $ M 1 ^ 0 - 0 ~ - neg 0 -}b/rp{4 2 $ M 1 ^ 0
98		- 0 ~ - neg 0 -}b/solid{[]0 sd}b/g{@ not{U/DefIf_save save put}if U/DefIf_bool
99		2 ^ put}b/DefIf_El{if U/DefIf_bool get not @{U/DefIf_save get restore}if}b/e
100		{DefIf_El !}b/UDF{L2?{undefinefont}{!}?}b/UDR{L2?{undefineresource}{! !}?}b
101		/freeVM{/Courier findfont[40 0 0 -40 0 0]makefont Ji 2 vmreclaim}b/hfRedefFont
102		{findfont @ length dict `{1 ^/FID ne{d}{! !}?}forall & E @ ` ~{/CharStrings 1
103		dict `/.notdef 0 d & E d}if/Encoding 256 array 0 1 255{1 ^ ~/.notdef put}for d
104		E definefont !}bind d/hfMkCIDFont{/CIDFont findresource @ length 2 add dict `{1
105		^ @/FID eq ~ @/XUID eq ~/UIDBase eq or or{! !}{d}?}forall/CDevProc ~ d/Metrics2
106		16 dict d/CIDFontName 1 ^ d & E 1 ^ ~/CIDFont defineresource ![~]composefont !}
107		bind d
108		%%EndResource
109		%%BeginResource: file Pscript_Win_Utils_L2 5.0 0
110		/rf/rectfill , d/fx{1 1 dtransform @ 0 ge{1 sub 0.5}{1 add -0.5}? 3 -1 $ @ 0 ge
111		{1 sub 0.5}{1 add -0.5}? 3 1 $ 4 1 $ idtransform 4 -2 $ idtransform}b/BZ{4 -2 $
112		snap + +S fx rf}b/rs/rectstroke , d/rc/rectclip , d/UtilsInit{currentglobal{F
113		setglobal}if}b/scol{! setcolor}b/colspA/DeviceGray d/colspABC/DeviceRGB d
114		/colspRefresh{colspABC setcolorspace}b/SetColSpace{colspABC setcolorspace}b
115		/resourcestatus where{!/ColorRendering/ProcSet resourcestatus{! ! T}{F}?}{F}?
116		not{/ColorRendering<</GetHalftoneName{currenthalftone @/HalftoneName known{
117		/HalftoneName get}{!/none}?}bn/GetPageDeviceName{currentpagedevice @
118		/PageDeviceName known{/PageDeviceName get @ null eq{!/none}if}{!/none}?}bn
119		/GetSubstituteCRD{!/DefaultColorRendering/ColorRendering resourcestatus{! !
120		/DefaultColorRendering}{(DefaultColorRendering*){cvn exit}127 string
121		/ColorRendering resourceforall}?}bn>>/defineresource where{!/ProcSet
122		defineresource !}{! !}?}if/buildcrdname{/ColorRendering/ProcSet findresource `
123		mark GetHalftoneName @ type @/nametype ne ~/stringtype ne and{!/none}if(.)
124		GetPageDeviceName @ type @/nametype ne ~/stringtype ne and{!/none}if(.)5 ^ 0 5
125		-1 1{^ length add}for string 6 1 $ 5 ^ 5{~ 1 ^ cvs length 1 ^ length 1 ^ sub
126		getinterval}repeat ! cvn 3 1 $ ! ! E}b/definecolorrendering{~ buildcrdname ~
127		/ColorRendering defineresource !}b/findcolorrendering where{!}{
128		/findcolorrendering{buildcrdname @/ColorRendering resourcestatus{! ! T}{
129		/ColorRendering/ProcSet findresource ` GetSubstituteCRD E F}?}b}?
130		/selectcolorrendering{findcolorrendering !/ColorRendering findresource
131		setcolorrendering}b/G2UBegin{findresource/FontInfo get/GlyphNames2Unicode get
132		`}bind d/G2CCBegin{findresource/FontInfo get/GlyphNames2HostCode get `}bind d
133		/G2UEnd{E}bind d/AddFontInfoBegin{/FontInfo 8 dict @ `}bind d/AddFontInfo{
134		/GlyphNames2Unicode 16 dict d/GlyphNames2HostCode 16 dict d}bind d
135		/AddFontInfoEnd{E d}bind d/T0AddCFFMtx2{/CIDFont findresource/Metrics2 get ` d
136		E}bind d
137		%%EndResource
138		end
139		%%EndProlog
140
141		%%BeginSetup
142		[ 1 0 0 1 0 0 ] false Pscript_WinNT_Incr dup /initialize get exec
143		1 setlinecap 1 setlinejoin
144		/mysetup [ 72 600 V 0 0 -72 600 V 0 270.99212 ] def
145		%%EndSetup
146
147		%%Page: 1 1
148		%%PageBoundingBox: 0 0 405 271
149		%%EndPageComments
150		%%BeginPageSetup
151		/DeviceRGB dup setcolorspace /colspABC exch def
152		mysetup concat colspRefresh
153		%%EndPageSetup
154
155		0 0 0 1 scol : 472 1 43 112 rc Pscript_WinNT_Incr begin
156		%%BeginResource: file Pscript_Text 5.0 0
157		/TextInit{TextInitialised? not{/Pscript_Windows_Font & d/TextInitialised? T d
158		/fM[1 0 0 1 0 0]d/mFM matrix d/iMat[1 0 0.212557 1 0 0]d}if}b/copyfont{1 ^
159		length add dict `{1 ^/FID ne{d}{! !}?}forall & E}b/EncodeDict 11 dict d/bullets
160		{{/bullet}repeat}b/rF{3 copyfont @ ` ~ EncodeDict ~ get/Encoding ~ 3 ^/0 eq{&
161		/CharStrings known{CharStrings/Eth known not{! EncodeDict/ANSIEncodingOld get}
162		if}if}if d E}b/mF{@ 7 1 $ findfont ~{@/Encoding get @ StandardEncoding eq{! T}{
163		{ISOLatin1Encoding}stopped{! F}{eq}?{T}{@ ` T 32 1 127{Encoding 1 ^ get
164		StandardEncoding 3 -1 $ get eq and}for E}?}?}{F}?{1 ^ ~ rF}{0 copyfont}? 6 -2 $
165		! ! ~ !/pd_charset @ where{~ get 128 eq{@ FDV 2 copy get @ length array copy
166		put pd_CoverFCRange}if}{!}? 2 ^ ~ definefont fM 5 4 -1 $ put fM 4 0 put fM
167		makefont Pscript_Windows_Font 3 1 $ put}b/sLT{: Lw -M currentpoint snap M 0 - 0
168		Lc K ;}b/xUP null d/yUP null d/uW null d/xSP null d/ySP null d/sW null d/sSU{N
169		/uW ~ d/yUP ~ d/xUP ~ d}b/sU{xUP yUP uW sLT}b/sST{N/sW ~ d/ySP ~ d/xSP ~ d}b/sT
170		{xSP ySP sW sLT}b/sR{: + R 0 0 M}b/sRxy{: matrix astore concat 0 0 M}b/eR/; , d
171		/AddOrigFP{{&/FontInfo known{&/FontInfo get length 6 add}{6}? dict `
172		/WinPitchAndFamily ~ d/WinCharSet ~ d/OrigFontType ~ d/OrigFontStyle ~ d
173		/OrigFontName ~ d & E/FontInfo ~ d}{! ! ! ! !}?}b/mFS{makefont
174		Pscript_Windows_Font 3 1 $ put}b/mF42D{0 copyfont `/FontName ~ d 2 copy ~ sub 1
175		add dict `/.notdef 0 d 2 copy 1 ~{@ 3 ^ sub Encoding ~ get ~ d}for & E
176		/CharStrings ~ d ! ! & @ E/FontName get ~ definefont}b/mF42{15 dict ` @ 4 1 $
177		FontName ~ d/FontType 0 d/FMapType 2 d/FontMatrix[1 0 0 1 0 0]d 1 ^ 254 add 255
178		idiv @ array/Encoding ~ d 0 1 3 -1 $ 1 sub{@ Encoding 3 1 $ put}for/FDepVector
179		Encoding length array d/CharStrings 2 dict `/.notdef 0 d & E d 0 1 Encoding
180		length 1 sub{@ @ 10 lt{! FontName length 1 add string}{100 lt{FontName length 2
181		add string}{FontName length 3 add string}?}? @ 0 FontName @ length string cvs
182		putinterval @ 3 -1 $ @ 4 1 $ 3 string cvs FontName length ~ putinterval cvn 1 ^
183		256 mul @ 255 add 3 -1 $ 4 ^ findfont mF42D FDepVector 3 1 $ put}for & @ E
184		/FontName get ~ definefont ! ! ! mF}b/mF_OTF_V{~ ! ~ ! 4 -1 $ ! findfont 2 ^ ~
185		definefont fM @ @ 4 6 -1 $ neg put 5 0 put 90 matrix R matrix concatmatrix
186		makefont Pscript_Windows_Font 3 1 $ put}b/mF_TTF_V{3{~ !}repeat 3 -1 $ !
187		findfont 1 ^ ~ definefont Pscript_Windows_Font 3 1 $ put}b/UmF{L2?
188		{Pscript_Windows_Font ~ undef}{!}?}b/UmF42{@ findfont/FDepVector get{/FontName
189		get undefinefont}forall undefinefont}b
190		%%EndResource
191		end reinitialize
192		Pscript_WinNT_Incr begin
193		%%BeginResource: file Pscript_Encoding256 5.0 0
194		/CharCol256Encoding[/.notdef/breve/caron/dotaccent/dotlessi/fi/fl/fraction
195		/hungarumlaut/Lslash/lslash/minus/ogonek/ring/Zcaron/zcaron/.notdef/.notdef
196		/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef
197		/.notdef/.notdef/.notdef/.notdef/.notdef/space/exclam/quotedbl/numbersign
198		/dollar/percent/ampersand/quotesingle/parenleft/parenright/asterisk/plus/comma
199		/hyphen/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon
200		/semicolon/less/equal/greater/question/at/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S
201		/T/U/V/W/X/Y/Z/bracketleft/backslash/bracketright/asciicircum/underscore/grave
202		/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/braceleft/bar/braceright
203		/asciitilde/.notdef/Euro/.notdef/quotesinglbase/florin/quotedblbase/ellipsis
204		/dagger/daggerdbl/circumflex/perthousand/Scaron/guilsinglleft/OE/.notdef
205		/.notdef/.notdef/.notdef/quoteleft/quoteright/quotedblleft/quotedblright/bullet
206		/endash/emdash/tilde/trademark/scaron/guilsinglright/oe/.notdef/.notdef
207		/Ydieresis/.notdef/exclamdown/cent/sterling/currency/yen/brokenbar/section
208		/dieresis/copyright/ordfeminine/guillemotleft/logicalnot/.notdef/registered
209		/macron/degree/plusminus/twosuperior/threesuperior/acute/mu/paragraph
210		/periodcentered/cedilla/onesuperior/ordmasculine/guillemotright/onequarter
211		/onehalf/threequarters/questiondown/Agrave/Aacute/Acircumflex/Atilde/Adieresis
212		/Aring/AE/Ccedilla/Egrave/Eacute/Ecircumflex/Edieresis/Igrave/Iacute
213		/Icircumflex/Idieresis/Eth/Ntilde/Ograve/Oacute/Ocircumflex/Otilde/Odieresis
214		/multiply/Oslash/Ugrave/Uacute/Ucircumflex/Udieresis/Yacute/Thorn/germandbls
215		/agrave/aacute/acircumflex/atilde/adieresis/aring/ae/ccedilla/egrave/eacute
216		/ecircumflex/edieresis/igrave/iacute/icircumflex/idieresis/eth/ntilde/ograve
217		/oacute/ocircumflex/otilde/odieresis/divide/oslash/ugrave/uacute/ucircumflex
218		/udieresis/yacute/thorn/ydieresis]def EncodeDict/256 CharCol256Encoding put
219		%%EndResource
220		end reinitialize
221
222		%%IncludeResource: font Times-Roman
223		Pscript_WinNT_Incr begin
224		%%BeginResource: file Pscript_Win_Euro_L2 5.0 0
225		/UseT3EuroFont{/currentdistillerparams where{pop currentdistillerparams
226		/CoreDistVersion get 4000 le}{false}ifelse}bind def/NewEuroT3Font?{dup/FontType
227		get 3 eq{dup/EuroFont known exch/BaseFont known and}{pop false}ifelse}bind def
228		/T1FontHasEuro{dup/CharStrings known not{dup NewEuroT3Font?{dup/EuroGlyphName
229		get exch/EuroFont get/CharStrings get exch known{true}{false}ifelse}{pop false}
230		ifelse}{dup/FontType get 1 eq{/CharStrings get/Euro known}{dup/InfoDict known{
231		/InfoDict get/Euro known}{/CharStrings get/Euro known}ifelse}ifelse}ifelse}bind
232		def/FontHasEuro{findfont dup/Blend known{pop true}{T1FontHasEuro}ifelse}bind
233		def/EuroEncodingIdx 1 def/EuroFontHdr{12 dict begin/FontInfo 10 dict dup begin
234		/version(001.000)readonly def/Notice(Copyright (c)1999 Adobe Systems
235		Incorporated. All Rights Reserved.)readonly def/FullName(Euro)readonly def
236		/FamilyName(Euro)readonly def/Weight(Regular)readonly def/isFixedPitch false
237		def/ItalicAngle 0 def/UnderlinePosition -100 def/UnderlineThickness 50 def end
238		readonly def/FontName/Euro def/Encoding 256 array 0 1 255{1 index exch/.notdef
239		put}for def/PaintType 0 def/FontType 1 def/FontMatrix[0.001 0 0 0.001 0 0]def
240		/FontBBox{-25 -23 1500 804}readonly def currentdict end dup/Private 20 dict dup
241		begin/ND{def}def/NP{put}def/lenIV -1 def/RD{string currentfile exch
242		readhexstring pop}def/-\|{string currentfile exch readstring pop}executeonly def
243		/\|-{def}executeonly def/\|{put}executeonly def/BlueValues[-20 0 706 736 547 572]
244		\|-/OtherBlues[-211 -203]\|-/BlueScale 0.0312917 def/MinFeature{16 16}\|-/StdHW
245		[60]\|-/StdVW[71]\|-/ForceBold false def/password 5839 def/Erode{8.5 dup 3 -1
246		roll 0.1 mul exch 0.5 sub mul cvi sub dup mul 71 0 dtransform dup mul exch dup
247		mul add le{pop pop 1.0 1.0}{pop pop 0.0 1.5}ifelse}def/OtherSubrs[{}{}{}
248		{systemdict/internaldict known not{pop 3}{1183615869 systemdict/internaldict
249		get exec dup/startlock known{/startlock get exec}{dup/strtlck known{/strtlck
250		get exec}{pop 3}ifelse}ifelse}ifelse}executeonly]\|-/Subrs 5 array dup 0
251		<8E8B0C100C110C110C210B>put dup 1<8B8C0C100B>put dup 2<8B8D0C100B>put dup 3<0B>
252		put dup 4<8E8C8E0C100C110A0B>put \|- 2 index/CharStrings 256 dict dup begin
253		/.notdef<8b8b0d0e>def end end put put dup/FontName get exch definefont pop}bind
254		def/AddEuroGlyph{2 index exch EuroEncodingIdx 1 eq{EuroFontHdr}if systemdict
255		begin/Euro findfont dup dup/Encoding get 5 1 roll/Private get begin/CharStrings
256		get dup 3 index known{pop pop pop pop end end}{begin 1 index exch def end end
257		end EuroEncodingIdx dup 1 add/EuroEncodingIdx exch def exch put}ifelse}bind def
258		/GetNewXUID{currentdict/XUID known{[7 XUID aload pop]true}{currentdict/UniqueID
259		known{[7 UniqueID]true}{false}ifelse}ifelse}bind def/BuildT3EuroFont{exch 16
260		dict begin dup/FontName exch def findfont dup/Encoding get/Encoding exch def
261		dup length 1 add dict copy dup/FID undef begin dup dup/FontName exch def
262		/Encoding 256 array 0 1 255{1 index exch/.notdef put}for def GetNewXUID{/XUID
263		exch def}if currentdict end definefont pop/BaseFont exch findfont 1000
264		scalefont def/EuroFont exch findfont 1000 scalefont def pop/EuroGlyphName exch
265		def/FontType 3 def/FontMatrix[.001 0 0 .001 0 0]def/FontBBox BaseFont/FontBBox
266		get def/Char 1 string def/BuildChar{exch dup begin/Encoding get 1 index get
267		/Euro eq{BaseFont T1FontHasEuro{false}{true}ifelse}{false}ifelse{EuroFont
268		setfont pop userdict/Idx 0 put EuroFont/Encoding get{EuroGlyphName eq{exit}
269		{userdict/Idx Idx 1 add put}ifelse}forall userdict/Idx get}{dup dup Encoding
270		exch get BaseFont/Encoding get 3 1 roll put BaseFont setfont}ifelse Char 0 3 -1
271		roll put Char stringwidth newpath 0 0 moveto Char true charpath flattenpath
272		pathbbox setcachedevice 0 0 moveto Char show end}bind def currentdict end dup
273		/FontName get exch definefont pop}bind def/AddEuroToT1Font{dup findfont dup
274		length 10 add dict copy dup/FID undef begin/EuroFont 3 -1 roll findfont 1000
275		scalefont def CharStrings dup length 1 add dict copy begin/Euro{EuroFont
276		setfont pop EuroGBBox aload pop setcachedevice 0 0 moveto EuroGName glyphshow}
277		bind def currentdict end/CharStrings exch def GetNewXUID{/XUID exch def}if 3 1
278		roll/EuroGBBox exch def/EuroGName exch def currentdict end definefont pop}bind
279		def/BuildNewFont{UseT3EuroFont{BuildT3EuroFont}{pop AddEuroToT1Font}ifelse}bind
280		def/UseObliqueEuro{findfont/FontMatrix get dup 2 get 0 eq exch dup 0 get exch 3
281		get eq and UseT3EuroFont or}bind def
282		%%EndResource
283		end reinitialize
284		/Times-Roman FontHasEuro not
285		{
286		/Euro.Times-Roman
287		[500 0 24 -14 493 676 ]
288		<A3F8880DC0EB03F854F743950C0C037DA501F7A0BB01F805BB01F91EA501F869F8911584
289		F73A0576068478827E7F8B087F8B70AC438B082A8BFB0D6471FB700863067C5B05BE068A
290		7E8B878B7D08750764067D5B05C206A0FB7BF71858E88B088F0AC0EB03F84BF789950C0C
291		037DA501F7A0BB01F805BB01F91EA501CA8BACAC998B08988B95809076088F0AC0EB03F8
292		54F743950C0C037DA501F7A0BB01F805BB01F91EA501A00692F73705750675FB09417667
293		8B083A8B43C182F75E08F7740699BB05FB84069C078B978C978B9708F7950699BB05FBA1
294		069AF755D5B3D18B08AB8BDD71A2FB0708090E>
295		AddEuroGlyph
296		/Euro /Times-Roman /Times-Roman-Copy BuildNewFont
297		} if
298		F /F0 0 /256 T /Times-Roman mF
299		/F0S63 F0 [99.363 0 0 -99.363 0 0 ] mFS
300		F0S63 Ji
301		469 89 M ( )S
302		Pscript_WinNT_Incr begin
303		%%BeginResource: file Pscript_Win_GdiObject 5.0 0
304		/SavedCTM null d/CTMsave{/SavedCTM SavedCTM currentmatrix d}b/CTMrestore
305		{SavedCTM setmatrix}b/mp null d/ADO_mxRot null d/GDIHMatrix null d
306		/GDIHPatternDict 22 dict d GDIHPatternDict `/PatternType 1 d/PaintType 2 d/Reps
307		L2?{1}{5}? d/XStep 8 Reps mul d/YStep XStep d/BBox[0 0 XStep YStep]d/TilingType
308		1 d/PaintProc{` 1 Lw[]0 sd PaintData , exec E}b/FGnd null d/BGnd null d
309		/HS_Horizontal{horiz}b/HS_Vertical{vert}b/HS_FDiagonal{fdiag}b/HS_BDiagonal
310		{biag}b/HS_Cross{horiz vert}b/HS_DiagCross{fdiag biag}b/MaxXYStep XStep YStep
311		gt{XStep}{YStep}? d/horiz{Reps{0 4 M XStep 0 - 0 8 +}repeat 0 -8 Reps mul + K}b
312		/vert{Reps{4 0 M 0 YStep - 8 0 +}repeat 0 -8 Reps mul + K}b/biag{Reps{0 0 M
313		MaxXYStep @ - 0 YStep neg M MaxXYStep @ - 0 8 +}repeat 0 -8 Reps mul + 0 YStep
314		M 8 8 - K}b/fdiag{Reps{0 0 M MaxXYStep @ neg - 0 YStep M MaxXYStep @ neg - 0 8
315		+}repeat 0 -8 Reps mul + MaxXYStep @ M 8 -8 - K}b E/makehatch{4 -2 $/yOrg ~ d
316		/xOrg ~ d GDIHPatternDict/PaintData 3 -1 $ put CTMsave GDIHMatrix setmatrix
317		GDIHPatternDict matrix xOrg yOrg + mp CTMrestore ~ U ~ 2 ^ put}b/h0{/h0
318		/HS_Horizontal makehatch}b/h1{/h1/HS_Vertical makehatch}b/h2{/h2/HS_FDiagonal
319		makehatch}b/h3{/h3/HS_BDiagonal makehatch}b/h4{/h4/HS_Cross makehatch}b/h5{/h5
320		/HS_DiagCross makehatch}b/GDIBWPatternMx null d/pfprep{save 8 1 $
321		/PatternOfTheDay 8 1 $ GDIBWPatternDict `/yOrg ~ d/xOrg ~ d/PaintData ~ d/yExt
322		~ d/Width ~ d/BGnd ~ d/FGnd ~ d/Height yExt RepsV mul d/mx[Width 0 0 Height 0
323		0]d E build_pattern ~ !}b/pfbf{/fEOFill ~ d pfprep hbf fEOFill{O}{L}? restore}b
324		/GraphInit{GDIHMatrix null eq{/SavedCTM matrix d : ADO_mxRot concat 0 0 snap +
325		: 0.48 @ GDIHPatternDict ` YStep mul ~ XStep mul ~ nonzero_dsnap YStep V ~
326		XStep V ~ E +S/GDIHMatrix matrix currentmatrix readonly d ; : 0.24 -0.24 +S
327		GDIBWPatternDict ` Width Height E nonzero_dsnap +S/GDIBWPatternMx matrix
328		currentmatrix readonly d ; ;}if}b
329		%%EndResource
330		%%BeginResource: file Pscript_Win_GdiObject_L2 5.0 0
331		/GDIBWPatternDict 25 dict @ `/PatternType 1 d/PaintType 1 d/RepsV 1 d/RepsH 1 d
332		/BBox[0 0 RepsH 1]d/TilingType 1 d/XStep 1 d/YStep 1 d/Height 8 RepsV mul d
333		/Width 8 d/mx[Width 0 0 Height neg 0 Height]d/FGnd null d/BGnd null d
334		/SetBGndFGnd{BGnd null ne{BGnd aload ! scol BBox aload ! 2 ^ sub ~ 3 ^ sub ~
335		rf}if FGnd null ne{FGnd aload ! scol}if}b/PaintProc{` SetBGndFGnd RepsH{Width
336		Height F mx PaintData imagemask Width 0 +}repeat E}b E d/mp/makepattern , d
337		/build_pattern{CTMsave GDIBWPatternMx setmatrix/nupangle where{! nupangle -90
338		eq{nupangle R}if}if GDIBWPatternDict @ ` Width Height ne{Width Height gt{Width
339		Height V 1}{1 Height Width V}? +S}if xOrg yOrg E matrix + mp CTMrestore}b/hbf
340		{setpattern}b/hf{:/fEOFill ~ d ~ ! setpattern fEOFill{O}{L}? ;}b/pbf{: !
341		/fEOFill ~ d GDIBWPatternDict `/yOrg ~ d/xOrg ~ d/PaintData ~ d/OutputBPP ~ d
342		/Height ~ d/Width ~ d/PaintType 1 d/PatternType 1 d/TilingType 1 d/BBox[0 0
343		Width Height]d/XStep Width d/YStep Height d/mx xOrg yOrg matrix + d 20 dict @ `
344		/ImageType 1 d/Width Width d/Height Height d/ImageMatrix[1 0 0 1 0 0]d
345		/BitsPerComponent 8 d OutputBPP 24 eq{/Decode[0 1 0 1 0 1]d}{OutputBPP 8 eq{
346		/Decode[0 1]d}{/Decode[0 1 0 1 0 1 0 1]d}?}?/DataSource{PaintData}d E/ImageDict
347		~ d/PaintProc{` ImageDict image E}b & mx makepattern setpattern E fEOFill{O}{L}
348		? ;}b/mask_pbf{:/fEOFill ~ d 20 dict `/yOrg ~ d/xOrg ~ d/PaintData ~ d/Height ~
349		d/Width ~ d/PatternType 1 d/PaintType 2 d/TilingType 1 d/BBox[0 0 Width Height]
350		d/XStep Width d/YStep Height d/mx xOrg yOrg matrix + d/PaintProc{` Width Height
351		T 1 1 dtransform abs ~ abs ~ 0 0 3 -1 $ 0 0 6 array astore{PaintData}imagemask
352		E}b & mx makepattern setpattern E fEOFill{O}{L}? ;}b
353		%%EndResource
354		end reinitialize
355		; N 961 92 M 961 176 I 1463 176 I 1463 92 I 961 92 I C
356		0.68 0.848 0.902 1 scol O 0 0 0 1 scol 1 Lj 1 Lc 5 Lw solid N 961 92 M 961 176 I 1463 176 I 1463 92 I C
357		: 1.289 1.289 +S K
358		; 4 Lw N 961 92 M 961 176 I 1463 176 I 1463 92 I C
359		: 1.289 1.289 +S K
360		; N 961 147 M 1463 147 I : 1.289 1.289 +S K
361		; N 1215 209 M 1215 190 I : 1.289 1.289 +S K
362		; N 1215 77 M 1215 92 I : 1.289 1.289 +S K
363		; N 1087 209 M 1337 209 I : 1.289 1.289 +S K
364		; N 1087 77 M 1337 77 I : 1.289 1.289 +S K
365		; N 1215 302 M 1211 303 I 1208 303 I 1207 305 I 1204 307 I 1203 308 I 1202 311 I 1200 314 I 1200 316 I 1200 319 I 1202 321 I 1203 324 I 1204 327 I 1207 328 I 1208 329 I 1211 330 I 1215 330 I 1217 330 I 1220 329 I 1222 328 I 1224 327 I 1226 324 I 1228 321 I 1228 319 I 1229 316 I 1228 314 I 1228 311 I 1226 308 I 1224 307 I 1222 305 I 1220 303 I 1217 303 I 1215 302 I : 1.289 1.289 +S K
366		; N 1215 1336 M 1211 1336 I 1208 1337 I 1207 1338 I 1204 1340 I 1203 1342 I 1202 1345 I 1200 1347 I 1200 1350 I 1200 1352 I 1202 1355 I 1203 1358 I 1204 1360 I 1207 1361 I 1208 1363 I 1211 1364 I 1215 1364 I 1217 1364 I 1220 1363 I 1222 1361 I 1224 1360 I 1226 1358 I 1228 1355 I 1228 1352 I 1229 1350 I 1228 1347 I 1228 1345 I 1226 1342 I 1224 1340 I 1222 1338 I 1220 1337 I 1217 1336 I 1215 1336 I : 1.289 1.289 +S K
367		; N 1591 86 M 1591 129 I 2094 129 I 2094 86 I 1591 86 I C
368		0.68 0.848 0.902 1 scol O 0 0 0 1 scol 5 Lw N 1591 86 M 1591 129 I 2094 129 I 2094 86 I C
369		: 1.289 1.289 +S K
370		; 4 Lw N 1591 86 M 1591 129 I 2094 129 I 2094 86 I C
371		: 1.289 1.289 +S K
372		; N 1591 110 M 2094 110 I : 1.289 1.289 +S K
373		; N 1840 77 M 1840 86 I : 1.289 1.289 +S K
374		; N 1712 129 M 1966 129 I : 1.289 1.289 +S K
375		; N 1712 77 M 1966 77 I : 1.289 1.289 +S K
376		; N 1840 190 M 1837 190 I 1835 191 I 1832 192 I 1830 194 I 1828 196 I 1827 199 I 1826 201 I 1826 204 I 1826 207 I 1827 209 I 1828 212 I 1830 214 I 1832 216 I 1835 217 I 1837 218 I 1840 218 I 1843 218 I 1845 217 I 1848 216 I 1849 214 I 1852 212 I 1853 209 I 1853 207 I 1854 204 I 1853 201 I 1853 199 I 1852 196 I 1849 194 I 1848 192 I 1845 191 I 1843 190 I 1840 190 I : 1.289 1.289 +S K
377		; N 1840 218 M 1837 218 I 1835 219 I 1832 221 I 1830 222 I 1828 225 I 1827 226 I 1826 228 I 1826 232 I 1826 235 I 1827 237 I 1828 240 I 1830 241 I 1832 244 I 1835 245 I 1837 245 I 1840 247 I 1843 245 I 1845 245 I 1848 244 I 1849 241 I 1852 240 I 1853 237 I 1853 235 I 1854 232 I 1853 228 I 1853 226 I 1852 225 I 1849 222 I 1848 221 I 1845 219 I 1843 218 I 1840 218 I : 1.289 1.289 +S K
378		; N 1840 588 M 1837 590 I 1835 590 I 1832 591 I 1830 594 I 1828 595 I 1827 598 I 1826 600 I 1826 603 I 1826 605 I 1827 608 I 1828 610 I 1830 613 I 1832 614 I 1835 616 I 1837 617 I 1840 617 I 1843 617 I 1845 616 I 1848 614 I 1849 613 I 1852 610 I 1853 608 I 1853 605 I 1854 603 I 1853 600 I 1853 598 I 1852 595 I 1849 594 I 1848 591 I 1845 590 I 1843 590 I 1840 588 I : 1.289 1.289 +S K
379		; N 2216 92 M 2216 185 I 2719 185 I 2719 92 I 2216 92 I C
380		0.68 0.848 0.902 1 scol O 0 0 0 1 scol 5 Lw N 2216 92 M 2216 185 I 2719 185 I 2719 92 I C
381		: 1.289 1.289 +S K
382		; 4 Lw N 2216 92 M 2216 185 I 2719 185 I 2719 92 I C
383		: 1.289 1.289 +S K
384		; N 2216 101 M 2719 101 I : 1.289 1.289 +S K
385		; N 2469 232 M 2469 213 I : 1.289 1.289 +S K
386		; N 2469 195 M 2469 185 I : 1.289 1.289 +S K
387		; N 2469 77 M 2469 92 I : 1.289 1.289 +S K
388		; N 2343 232 M 2597 232 I : 1.289 1.289 +S K
389		; N 2343 77 M 2597 77 I : 1.289 1.289 +S K
390		; N 2469 327 M 2467 327 I 2464 327 I 2461 328 I 2460 330 I 2458 332 I 2456 334 I 2456 337 I 2455 339 I 2456 343 I 2456 346 I 2458 348 I 2460 350 I 2461 352 I 2464 354 I 2467 354 I 2469 354 I 2473 354 I 2476 354 I 2477 352 I 2479 350 I 2481 348 I 2482 346 I 2483 343 I 2483 339 I 2483 337 I 2482 334 I 2481 332 I 2479 330 I 2477 328 I 2476 327 I 2473 327 I 2469 327 I : 1.289 1.289 +S K
391		; N 2469 579 M 2467 579 I 2464 581 I 2461 582 I 2460 583 I 2458 586 I 2456 588 I 2456 591 I 2455 594 I 2456 596 I 2456 599 I 2458 601 I 2460 604 I 2461 605 I 2464 607 I 2467 608 I 2469 608 I 2473 608 I 2476 607 I 2477 605 I 2479 604 I 2481 601 I 2482 599 I 2483 596 I 2483 594 I 2483 591 I 2482 588 I 2481 586 I 2479 583 I 2477 582 I 2476 581 I 2473 579 I 2469 579 I : 1.289 1.289 +S K
392		; N 2846 77 M 2846 77 I 3348 77 I 3348 77 I 2846 77 I C
393		0.68 0.848 0.902 1 scol O 0 0 0 1 scol 5 Lw N 2846 77 M 2846 77 I 3348 77 I 3348 77 I C
394		: 1.289 1.289 +S K
395		; 4 Lw N 2846 77 M 3348 77 I 2846 77 I 3348 77 I : 1.289 1.289 +S K
396		; N 2973 77 M 3222 77 I : 1.289 1.289 +S K
397		; N 2973 77 M 3222 77 I : 1.289 1.289 +S K
398		; N 3094 72 M 3092 72 I 3089 74 I 3087 75 I 3085 76 I 3083 79 I 3082 81 I 3082 84 I 3080 86 I 3082 89 I 3082 92 I 3083 94 I 3085 97 I 3087 98 I 3089 99 I 3092 101 I 3094 101 I 3098 101 I 3101 99 I 3102 98 I 3105 97 I 3106 94 I 3107 92 I 3109 89 I 3109 86 I 3109 84 I 3107 81 I 3106 79 I 3105 76 I 3102 75 I 3101 74 I 3098 72 I 3094 72 I : 1.289 1.289 +S K
399		; N 3094 170 M 3092 172 I 3089 172 I 3087 173 I 3085 176 I 3083 177 I 3082 179 I 3082 182 I 3080 185 I 3082 188 I 3082 191 I 3083 192 I 3085 195 I 3087 196 I 3089 197 I 3092 199 I 3094 199 I 3098 199 I 3101 197 I 3102 196 I 3105 195 I 3106 192 I 3107 191 I 3109 188 I 3109 185 I 3109 182 I 3107 179 I 3106 177 I 3105 176 I 3102 173 I 3101 172 I 3098 172 I 3094 170 I : 1.289 1.289 +S K
400		; N 1215 1401 M 1215 1443 I : 1.289 1.289 +S K
401		; N 1840 1401 M 1840 1443 I : 1.289 1.289 +S K
402		; N 2469 1401 M 2469 1443 I : 1.289 1.289 +S K
403		; N 3094 1401 M 3094 1443 I : 1.289 1.289 +S K
404		;
405		%%IncludeResource: font Helvetica
406		/Helvetica FontHasEuro not
407		{
408		/Euro.Helvetica
409		[556 0 24 -19 541 703 ]
410		<A3F8C00DD4E90378DA01F779D301F808D301F904DA01F899F908156CB447AD338B08FB2D
411		8B372B7BFB37085806724305D30644075B06724305DA06A0FB36E035F7218B08E08BCDB7
412		939208EE077A71405E418B083F8B4CCE84F108F76506A3D305FB8306D207F79C06A3D305
413		FBB10692E2B7E8F28B08E08BBE62A45A08090E>
414		AddEuroGlyph
415		/Euro /Helvetica /Helvetica-Copy BuildNewFont
416		} if
417		F /F1 0 /256 T /Helvetica mF
418		/F1S4A F1 [74.844 0 0 -74.844 0 0 ] mFS
419		F1S4A Ji
420		1132 1567 M (OOB)[58 58 0]xS
421		: 1298 1497 30 94 rc F0S63 Ji
422		1298 1567 M ( )S
423		; 1681 1567 M (Test 33%)[47 41 37 21 21 41 41 0]xS
424		: 1997 1497 27 94 rc F0S63 Ji
425		1997 1567 M ( )S
426		; 2312 1567 M (Test 20%)[46 42 37 21 20 41 42 0]xS
427		: 2628 1497 27 94 rc F0S63 Ji
428		2628 1567 M ( )S
429		; 2940 1567 M (5)S
430		2981 1567 M (-)S
431		3005 1567 M (fold CV)[21 41 17 41 21 54 0]xS
432		: 3250 1497 39 94 rc F0S63 Ji
433		3250 1567 M ( )S
434		; N 794 1392 M 752 1392 I : 1.289 1.289 +S K
435		; N 794 1063 M 752 1063 I : 1.289 1.289 +S K
436		; N 794 734 M 752 734 I : 1.289 1.289 +S K
437		; N 794 405 M 752 405 I : 1.289 1.289 +S K
438		; N 794 77 M 752 77 I : 1.289 1.289 +S K
439		; /F1S00IFFFFFFB5 F1 [0 -74.844 -74.844 0 0 0 ] mFS
440		F1S00IFFFFFFB5 Ji
441		712 1443 M (0.2)[-42 -20 0]yS
442		: 641 1315 95 26 rc /F0S00IFFFFFF9C F0 [0 -99.363 -99.363 0 0 0 ] mFS
443		F0S00IFFFFFF9C Ji
444		712 1340 M ( )S
445		; 712 1114 M (0.4)[-42 -20 0]yS
446		: 641 986 95 25 rc F0S00IFFFFFF9C Ji
447		712 1010 M ( )S
448		; 712 786 M (0.6)[-41 -21 0]yS
449		: 641 658 95 26 rc F0S00IFFFFFF9C Ji
450		712 683 M ( )S
451		; 712 457 M (0.8)[-41 -21 0]yS
452		: 641 329 95 26 rc F0S00IFFFFFF9C Ji
453		712 354 M ( )S
454		; 712 128 M (1.0)[-42 -20 0]yS
455		: 641 0 95 26 rc F0S00IFFFFFF9C Ji
456		712 25 M ( )S
457		; F1S4A Ji
458		1416 1734 M (Method for selecting the number of iterations)[62 41 21 41 41 43 20 21 41 25 20 39 41 17 41 38 20 17 41 41 22 21 41 42 20 43 41 63 41 42 24 21
459		41 22 21 16 21 41 26 41 21 17 41 41 0]xS
460		: 2895 1665 47 93 rc F0S63 Ji
461		2895 1734 M ( )S
462		; F1S00IFFFFFFB5 Ji
463		543 1210 M (Performance over 13 datasets)[-50 -41 -25 -20 -42 -24 -63 -42 -41 -37 -42 -20 -42 -38 -42 -24 -21 -42 -42 -20 -42 -41 -22 -41 -38 -41 -21 0]yS
464		: 472 162 95 48 rc F0S00IFFFFFF9C Ji
465		543 209 M ( )S
466		; 5 Lw : 789 0 2573 1406 rc N 794 4 M 794 1400 I 3356 1400 I 3356 4 I C
467		: 1.289 1.289 +S K
468		; ; LH
469		%%PageTrailer
470
471		%%Trailer
472		%%DocumentNeededResources:
473		%%+ font Times-Roman
474		%%+ font Helvetica
475		%%DocumentSuppliedResources:
476		%%+ procset Pscript_WinNT_ErrorHandler 5.0 0
477		%%+ procset Pscript_FatalError 5.0 0
478		%%+ procset Pscript_Win_Basic 5.0 0
479		%%+ procset Pscript_Win_Utils_L2 5.0 0
480		%%+ procset Pscript_Text 5.0 0
481		%%+ procset Pscript_Encoding256 5.0 0
482		%%+ procset Pscript_Win_Euro_L2 5.0 0
483		%%+ procset Pscript_Win_GdiObject 5.0 0
484		%%+ procset Pscript_Win_GdiObject_L2 5.0 0
485		Pscript_WinNT_Incr dup /terminate get exec
486		%%EOF

~~inst/doc/oobperf2.pdf~~ less more

Binary diff not shown

+0

-1672

~~inst/doc/shrinkage-v-iterations.eps~~ less more

0		%!PS-Adobe-3.0
1		%%DocumentNeededResources: font Helvetica
2		%%+ font Helvetica-Bold
3		%%+ font Helvetica-Oblique
4		%%+ font Helvetica-BoldOblique
5		%%+ font Symbol
6		%%DocumentMedia: a4 595 841 0 () ()
7		%%Title: R Graphics Output
8		%%Creator: R Software
9		%%Pages: (atend)
10		%%Orientation: Portrait
11		%%BoundingBox: 18 205 577 637
12		%%EndComments
13		%%BeginProlog
14		/bp { gs gs } def
15		% begin .ps.prolog
16		/gs { gsave } def
17		/gr { grestore } def
18		/ep { showpage gr gr } def
19		/m { moveto } def
20		/l { rlineto } def
21		/np { newpath } def
22		/cp { closepath } def
23		/f { fill } def
24		/o { stroke } def
25		/c { newpath 0 360 arc } def
26		/r { 4 2 roll moveto 1 copy 3 -1 roll exch 0 exch rlineto 0 rlineto -1 mul 0 exch rlineto closepath } def
27		/p1 { stroke } def
28		/p2 { gsave bg setrgbcolor fill grestore newpath } def
29		/p3 { gsave bg setrgbcolor fill grestore stroke } def
30		/t { 6 -2 roll moveto gsave rotate
31		ps mul neg 0 2 1 roll rmoveto
32		1 index stringwidth pop
33		mul neg 0 rmoveto show grestore } def
34		/cl { grestore gsave newpath 3 index 3 index moveto 1 index
35		4 -1 roll lineto exch 1 index lineto lineto
36		closepath clip newpath } def
37		/rgb { setrgbcolor } def
38		/s { scalefont setfont } def
39		% end .ps.prolog
40		%%IncludeResource: font Helvetica
41		/Helvetica findfont
42		dup length dict begin
43		{1 index /FID ne {def} {pop pop} ifelse} forall
44		/Encoding ISOLatin1Encoding def
45		currentdict
46		end
47		/Font1 exch definefont pop
48		%%IncludeResource: font Helvetica-Bold
49		/Helvetica-Bold findfont
50		dup length dict begin
51		{1 index /FID ne {def} {pop pop} ifelse} forall
52		/Encoding ISOLatin1Encoding def
53		currentdict
54		end
55		/Font2 exch definefont pop
56		%%IncludeResource: font Helvetica-Oblique
57		/Helvetica-Oblique findfont
58		dup length dict begin
59		{1 index /FID ne {def} {pop pop} ifelse} forall
60		/Encoding ISOLatin1Encoding def
61		currentdict
62		end
63		/Font3 exch definefont pop
64		%%IncludeResource: font Helvetica-BoldOblique
65		/Helvetica-BoldOblique findfont
66		dup length dict begin
67		{1 index /FID ne {def} {pop pop} ifelse} forall
68		/Encoding ISOLatin1Encoding def
69		currentdict
70		end
71		/Font4 exch definefont pop
72		%%IncludeResource: font Symbol
73		/Symbol findfont
74		dup length dict begin
75		{1 index /FID ne {def} {pop pop} ifelse} forall
76		currentdict
77		end
78		/Font5 exch definefont pop
79		%%EndProlog
80		%%Page: 1 1
81		bp
82		18.00 204.94 577.28 636.94 cl
83		0 0 0 rgb
84		0.75 setlinewidth
85		[] 0 setdash
86		1 setlinecap
87		1 setlinejoin
88		10.00 setmiterlimit
89		np
90		94.45 278.38 m
91		435.18 0 l
92		o
93		np
94		94.45 278.38 m
95		0 -7.20 l
96		o
97		np
98		181.48 278.38 m
99		0 -7.20 l
100		o
101		np
102		268.52 278.38 m
103		0 -7.20 l
104		o
105		np
106		355.56 278.38 m
107		0 -7.20 l
108		o
109		np
110		442.59 278.38 m
111		0 -7.20 l
112		o
113		np
114		529.63 278.38 m
115		0 -7.20 l
116		o
117		/ps 12 def /Font1 findfont 12 s
118		94.45 252.46 (0) .5 0 0 t
119		181.48 252.46 (2000) .5 0 0 t
120		268.52 252.46 (4000) .5 0 0 t
121		355.56 252.46 (6000) .5 0 0 t
122		442.59 252.46 (8000) .5 0 0 t
123		529.63 252.46 (10000) .5 0 0 t
124		np
125		77.04 289.48 m
126		0 277.33 l
127		o
128		np
129		77.04 289.48 m
130		-7.20 0 l
131		o
132		np
133		77.04 358.81 m
134		-7.20 0 l
135		o
136		np
137		77.04 428.14 m
138		-7.20 0 l
139		o
140		np
141		77.04 497.48 m
142		-7.20 0 l
143		o
144		np
145		77.04 566.81 m
146		-7.20 0 l
147		o
148		59.76 289.48 (0.190) .5 0 90 t
149		59.76 358.81 (0.195) .5 0 90 t
150		59.76 428.14 (0.200) .5 0 90 t
151		59.76 497.48 (0.205) .5 0 90 t
152		59.76 566.81 (0.210) .5 0 90 t
153		np
154		77.04 278.38 m
155		470.00 0 l
156		0 299.52 l
157		-470.00 0 l
158		0 -299.52 l
159		o
160		18.00 204.94 577.28 636.94 cl
161		/ps 12 def /Font1 findfont 12 s
162		0 0 0 rgb
163		312.04 223.66 (Iterations) .5 0 0 t
164		30.96 428.14 (Squared error) .5 0 90 t
165		77.04 278.38 547.04 577.90 cl
166		0 0 0 rgb
167		0.75 setlinewidth
168		[] 0 setdash
169		1 setlinecap
170		1 setlinejoin
171		10.00 setmiterlimit
172		np
173		96.30 636.94 m
174		0.80 -256.68 l
175		0.87 -37.13 l
176		0.35 -0.45 l
177		0.52 11.92 l
178		0.87 2.33 l
179		0.87 -2.31 l
180		0.87 9.87 l
181		0.87 15.91 l
182		0.87 2.35 l
183		0.87 14.48 l
184		0.88 3.97 l
185		0.87 10.90 l
186		0.91 13.99 l
187		0.87 9.66 l
188		0.87 14.32 l
189		0.87 12.06 l
190		0.87 0.63 l
191		0.87 7.84 l
192		0.87 0.86 l
193		0.87 16.91 l
194		0.87 13.95 l
195		0.87 6.15 l
196		0.87 19.28 l
197		0.87 13.99 l
198		0.87 7.21 l
199		0.87 3.98 l
200		0.87 6.88 l
201		0.87 6.38 l
202		0.88 14.52 l
203		0.87 15.71 l
204		0.87 12.61 l
205		0.87 1.80 l
206		0.87 4.97 l
207		0.87 5.38 l
208		0.87 8.96 l
209		0.87 5.94 l
210		0.08 0.86 l
211		o
212		18.00 204.94 577.28 636.94 cl
213		0 0 0 rgb
214		0.38 setlinewidth
215		[] 0 setdash
216		1 setlinecap
217		1 setlinejoin
218		10.00 setmiterlimit
219		np
220		98.32 278.38 m
221		0 0 l
222		o
223		np
224		98.32 278.38 m
225		0 8.99 l
226		o
227		77.04 278.38 547.04 577.90 cl
228		/ps 12 def /Font1 findfont 12 s
229		0 0 0 rgb
230		98.32 331.64 (0.1) 1 0 0 t
231		1 0 0 rgb
232		0.75 setlinewidth
233		[] 0 setdash
234		1 setlinecap
235		1 setlinejoin
236		10.00 setmiterlimit
237		np
238		98.18 636.94 m
239		0.66 -170.73 l
240		0.87 -91.55 l
241		0.87 -32.71 l
242		0.87 -11.27 l
243		0.83 -4.42 l
244		0.04 1.99 l
245		0.87 2.36 l
246		0.87 5.74 l
247		0.88 5.32 l
248		0.87 10.10 l
249		0.91 -0.18 l
250		0.87 0.01 l
251		0.87 7.30 l
252		0.87 5.28 l
253		0.87 3.18 l
254		0.87 8.16 l
255		0.87 3.52 l
256		0.87 3.60 l
257		0.87 6.17 l
258		0.87 5.93 l
259		0.87 5.86 l
260		0.87 9.58 l
261		0.87 4.88 l
262		0.87 1.50 l
263		0.87 5.88 l
264		0.87 1.56 l
265		0.88 1.58 l
266		0.87 6.89 l
267		0.87 8.30 l
268		0.87 3.06 l
269		0.87 3.80 l
270		0.87 7.77 l
271		0.87 6.79 l
272		0.87 0.12 l
273		0.87 5.03 l
274		0.87 3.56 l
275		0.91 5.44 l
276		0.87 0.11 l
277		0.87 2.74 l
278		0.87 3.12 l
279		0.87 2.71 l
280		0.87 4.84 l
281		0.87 9.32 l
282		0.87 2.49 l
283		0.88 2.34 l
284		0.87 8.43 l
285		0.87 3.91 l
286		0.87 3.49 l
287		0.87 8.17 l
288		0.87 6.87 l
289		0.87 2.14 l
290		0.87 -1.13 l
291		0.87 1.81 l
292		0.87 5.45 l
293		0.87 3.55 l
294		0.87 4.15 l
295		0.87 -0.88 l
296		0.87 3.64 l
297		0.87 6.16 l
298		0.87 6.55 l
299		0.87 3.76 l
300		0.87 -1.51 l
301		0.92 3.77 l
302		0.87 4.83 l
303		0.87 4.47 l
304		0.87 -0.78 l
305		0.87 3.07 l
306		0.87 2.23 l
307		0.87 3.07 l
308		0.87 4.25 l
309		0.87 0.91 l
310		0.87 2.07 l
311		0.87 3.87 l
312		0.87 -0.27 l
313		0.87 4.13 l
314		0.87 2.34 l
315		0.87 6.20 l
316		0.87 -0.02 l
317		0.87 2.70 l
318		0.87 4.35 l
319		0.87 1.13 l
320		0.87 1.01 l
321		0.87 6.80 l
322		0.87 1.41 l
323		0.87 4.57 l
324		0.34 2.26 l
325		o
326		18.00 204.94 577.28 636.94 cl
327		1 0 0 rgb
328		0.38 setlinewidth
329		[] 0 setdash
330		1 setlinecap
331		1 setlinejoin
332		10.00 setmiterlimit
333		np
334		102.28 278.38 m
335		0 0 l
336		o
337		np
338		102.28 278.38 m
339		0 8.99 l
340		o
341		77.04 278.38 547.04 577.90 cl
342		/ps 12 def /Font1 findfont 12 s
343		0 0 0 rgb
344		102.28 315.22 (0.05) 1 0 0 t
345		0 0.8039 0 rgb
346		0.75 setlinewidth
347		[] 0 setdash
348		1 setlinecap
349		1 setlinejoin
350		10.00 setmiterlimit
351		np
352		112.73 636.94 m
353		0.08 -6.15 l
354		0.87 -56.32 l
355		0.87 -45.80 l
356		0.87 -38.10 l
357		0.87 -32.80 l
358		0.87 -27.05 l
359		0.87 -22.21 l
360		0.87 -20.65 l
361		0.87 -16.95 l
362		0.88 -11.58 l
363		0.87 -9.86 l
364		0.87 -9.21 l
365		0.87 -6.46 l
366		0.87 -5.19 l
367		0.87 -3.64 l
368		0.87 -3.58 l
369		0.87 -1.01 l
370		0.87 -2.63 l
371		0.87 -2.59 l
372		0.91 -1.32 l
373		0.87 -1.63 l
374		0.87 0.29 l
375		0.87 -0.46 l
376		0.87 -0.33 l
377		0.87 0.60 l
378		0.87 -0.35 l
379		0.87 -0.30 l
380		0.88 0.56 l
381		0.87 0.21 l
382		0.87 0.44 l
383		0.87 -1.21 l
384		0.69 -0.58 l
385		0.18 0.27 l
386		0.87 0.81 l
387		0.87 1.14 l
388		0.87 0.71 l
389		0.87 0.66 l
390		0.87 0.85 l
391		0.87 1.16 l
392		0.87 1.98 l
393		0.87 0.37 l
394		0.87 0 l
395		0.87 0.88 l
396		0.87 0.51 l
397		0.87 -0.38 l
398		0.87 0.17 l
399		0.92 0.88 l
400		0.87 1.91 l
401		0.87 0.90 l
402		0.87 1.37 l
403		0.87 2.06 l
404		0.87 1.83 l
405		0.87 0.91 l
406		0.87 0.82 l
407		0.87 0.86 l
408		0.87 0.51 l
409		0.87 -0.11 l
410		0.87 0.64 l
411		0.87 1.33 l
412		0.87 1.11 l
413		0.87 0.91 l
414		0.87 0.58 l
415		0.87 0.11 l
416		0.87 1.19 l
417		0.87 2.27 l
418		0.87 1.39 l
419		0.87 -0.25 l
420		0.87 0.04 l
421		0.87 0.37 l
422		0.87 1.15 l
423		0.87 0.02 l
424		0.87 0.51 l
425		0.92 0.29 l
426		0.87 1.00 l
427		0.87 1.59 l
428		0.87 -0.20 l
429		0.87 0.27 l
430		0.87 0.64 l
431		0.87 -0.20 l
432		0.87 1.00 l
433		0.87 0.78 l
434		0.87 0.81 l
435		0.87 1.66 l
436		0.87 0.58 l
437		0.87 0.25 l
438		0.87 0.42 l
439		0.87 1.09 l
440		0.87 1.17 l
441		0.87 1.33 l
442		0.87 -0.17 l
443		0.87 0.97 l
444		0.88 0.62 l
445		0.87 1.02 l
446		0.87 0.29 l
447		0.87 0.87 l
448		0.87 0.76 l
449		0.87 1.14 l
450		0.87 1.21 l
451		0.87 0.65 l
452		198.28 363.84 lineto
453		0.87 0.66 l
454		0.87 0.85 l
455		0.87 0.97 l
456		0.87 0.44 l
457		0.87 1.68 l
458		0.87 0.14 l
459		0.87 1.08 l
460		0.87 1.38 l
461		0.87 1.74 l
462		0.88 0.28 l
463		0.87 1.61 l
464		0.87 0.37 l
465		0.87 0.22 l
466		0.87 0.99 l
467		0.87 1.60 l
468		0.87 0.66 l
469		0.87 1.76 l
470		0.87 -0.19 l
471		0.87 0.51 l
472		0.87 0.94 l
473		0.87 0.38 l
474		0.87 -0.91 l
475		0.87 0.12 l
476		0.87 1.15 l
477		0.87 1.08 l
478		0.91 0.21 l
479		0.87 0.27 l
480		0.88 1.68 l
481		0.87 0.51 l
482		0.87 0.39 l
483		0.87 1.43 l
484		0.87 0.10 l
485		0.87 -0.25 l
486		0.87 0.91 l
487		0.87 0.85 l
488		0.87 0.22 l
489		0.87 0.42 l
490		0.87 0.45 l
491		0.87 0.93 l
492		0.87 1.95 l
493		0.87 0.77 l
494		0.87 0.36 l
495		0.87 1.83 l
496		0.87 0.75 l
497		0.87 1.03 l
498		0.87 1.37 l
499		0.87 0.72 l
500		0.87 0.91 l
501		0.87 0.23 l
502		0.87 -0.03 l
503		0.87 1.04 l
504		0.92 0.62 l
505		0.87 2.08 l
506		0.87 0.94 l
507		0.87 1.17 l
508		0.87 0.61 l
509		0.87 0.70 l
510		0.87 0.75 l
511		0.87 0.41 l
512		0.87 1.05 l
513		0.87 0.98 l
514		0.87 1.62 l
515		0.87 1.02 l
516		0.87 0.90 l
517		0.87 0.61 l
518		0.87 1.07 l
519		0.87 0.31 l
520		0.87 1.95 l
521		0.87 0.57 l
522		0.87 0.43 l
523		0.87 0.26 l
524		0.87 1.33 l
525		0.87 1.35 l
526		0.88 0.39 l
527		0.87 0.46 l
528		0.87 1.00 l
529		0.87 0.97 l
530		0.91 1.56 l
531		0.87 0.76 l
532		0.87 0.31 l
533		0.87 0.64 l
534		0.87 1.48 l
535		0.87 0.36 l
536		0.87 1.65 l
537		0.87 0.04 l
538		0.87 0.60 l
539		0.87 0.97 l
540		0.87 -0.25 l
541		0.87 1.14 l
542		0.87 1.21 l
543		0.87 0.97 l
544		0.88 0.71 l
545		0.87 1.29 l
546		0.87 2.65 l
547		0.87 0.27 l
548		0.87 0.83 l
549		0.87 0.48 l
550		0.87 1.17 l
551		0.87 1.93 l
552		285.45 447.56 lineto
553		0.87 1.61 l
554		0.87 1.50 l
555		0.87 0.99 l
556		0.87 0.53 l
557		0.91 1.19 l
558		0.87 0.24 l
559		0.87 0.58 l
560		0.87 1.15 l
561		0.88 0.63 l
562		0.87 0.98 l
563		0.87 0.41 l
564		0.87 0.41 l
565		0.87 0.54 l
566		0.87 0.71 l
567		0.87 0.36 l
568		0.87 1.06 l
569		0.87 1.10 l
570		0.87 0.91 l
571		0.87 0.15 l
572		0.87 1.07 l
573		0.87 0.36 l
574		0.87 0.21 l
575		0.87 -0.30 l
576		0.87 0.62 l
577		0.87 1.48 l
578		0.87 1.05 l
579		0.87 1.33 l
580		0.87 0.22 l
581		0.87 0.56 l
582		0.87 0.59 l
583		0.92 1.34 l
584		0.87 1.07 l
585		0.87 0.45 l
586		0.87 0.94 l
587		0.87 1.14 l
588		0.87 1.16 l
589		0.87 0.24 l
590		0.87 0.68 l
591		0.87 1.09 l
592		0.87 0.54 l
593		0.87 0.18 l
594		0.87 0.45 l
595		0.87 0.34 l
596		0.87 0.82 l
597		0.87 1.84 l
598		0.87 0.53 l
599		0.87 0.76 l
600		0.87 1.45 l
601		0.87 0.78 l
602		0.87 -0.06 l
603		0.87 -0.05 l
604		0.87 0.72 l
605		0.87 0.31 l
606		0.87 -0.60 l
607		0.88 0.38 l
608		0.87 0.46 l
609		0.91 1.02 l
610		0.87 0.88 l
611		0.87 0.81 l
612		0.87 0.38 l
613		0.87 0.96 l
614		0.87 1.18 l
615		0.87 0.54 l
616		0.87 0.37 l
617		0.87 0.55 l
618		0.87 0.44 l
619		0.87 0.68 l
620		0.87 0.28 l
621		0.87 0.87 l
622		0.87 0.37 l
623		0.87 2.00 l
624		0.87 2.04 l
625		0.88 0.18 l
626		0.87 1.12 l
627		0.87 0.16 l
628		0.87 0.73 l
629		0.87 0.50 l
630		0.87 0.32 l
631		0.87 0.53 l
632		0.87 0.79 l
633		0.87 0.81 l
634		0.87 0.58 l
635		0.87 1.01 l
636		0.91 0.83 l
637		0.87 0.47 l
638		0.87 0.19 l
639		0.87 1.51 l
640		0.87 0.76 l
641		0.87 0.82 l
642		0.87 -0.46 l
643		0.88 0.31 l
644		0.87 0.80 l
645		0.87 1.23 l
646		0.87 0.96 l
647		0.87 0.80 l
648		0.87 1.16 l
649		0.87 0.92 l
650		0.87 0.23 l
651		0.87 0.79 l
652		372.66 519.24 lineto
653		0.87 0.41 l
654		0.87 0.50 l
655		0.87 0.44 l
656		0.87 0.47 l
657		0.87 1.13 l
658		0.87 -0.09 l
659		0.87 0.65 l
660		0.87 1.10 l
661		0.87 0.35 l
662		0.92 0.59 l
663		0.87 0.81 l
664		0.87 0.66 l
665		0.87 1.19 l
666		0.87 0.17 l
667		0.87 0.55 l
668		0.87 1.10 l
669		0.87 0.75 l
670		0.87 -0.18 l
671		0.87 0.06 l
672		0.87 0.26 l
673		0.87 0.50 l
674		0.87 0.42 l
675		0.87 1.57 l
676		0.87 -0.33 l
677		0.87 0.58 l
678		0.87 0 l
679		0.87 0.12 l
680		0.87 0.19 l
681		0.87 1.21 l
682		0.87 0.97 l
683		0.87 -0.84 l
684		0.87 0.74 l
685		0.87 0.72 l
686		0.87 0.74 l
687		0.87 -0.11 l
688		0.92 0.28 l
689		0.87 -0.04 l
690		0.87 0.08 l
691		0.87 -0.49 l
692		0.87 -0.63 l
693		0.87 0.57 l
694		0.87 1.38 l
695		0.87 0.20 l
696		0.87 0.99 l
697		0.87 -0.07 l
698		0.87 0.52 l
699		0.87 0.85 l
700		0.87 0.78 l
701		0.87 0.01 l
702		0.87 1.07 l
703		0.87 1.58 l
704		0.87 0.84 l
705		0.87 0.79 l
706		0.88 1.16 l
707		0.87 1.08 l
708		0.87 0.38 l
709		0.87 0.24 l
710		0.87 -0.48 l
711		0.87 1.08 l
712		0.87 0.71 l
713		0.87 0.92 l
714		0.91 0.40 l
715		0.87 1.26 l
716		0.87 0.13 l
717		0.87 0.65 l
718		0.87 0.34 l
719		0.87 -0.78 l
720		0.87 0.49 l
721		0.87 0.94 l
722		0.87 0.88 l
723		0.87 1.49 l
724		0.88 1.43 l
725		0.87 0.77 l
726		0.87 0.39 l
727		0.87 0.26 l
728		0.87 0.77 l
729		0.87 1.01 l
730		0.87 1.04 l
731		0.87 0.13 l
732		0.87 0.77 l
733		0.87 0.92 l
734		0.87 1.01 l
735		0.87 0.85 l
736		0.87 0.40 l
737		0.87 1.08 l
738		0.87 0.02 l
739		0.87 0.70 l
740		0.87 0.79 l
741		0.91 1.16 l
742		0.88 1.49 l
743		0.87 0.46 l
744		0.87 0.96 l
745		0.87 0.51 l
746		0.87 0.42 l
747		0.87 0.53 l
748		0.87 0.83 l
749		0.87 0.11 l
750		0.87 1.15 l
751		0.87 0.17 l
752		459.87 576.51 lineto
753		0.87 1.18 l
754		0.87 0.49 l
755		0.87 0.58 l
756		0.87 0.42 l
757		0.87 0.78 l
758		0.87 0.61 l
759		0.87 0.78 l
760		0.87 1.21 l
761		0.87 0.97 l
762		0.87 1.38 l
763		0.87 1.18 l
764		0.87 0.85 l
765		0.87 1.89 l
766		0.87 0.01 l
767		0.92 0 l
768		0.87 0.64 l
769		0.87 1.02 l
770		0.87 0.35 l
771		0.87 -1.00 l
772		0.87 1.41 l
773		0.87 1.70 l
774		0.87 -0.01 l
775		0.87 0.55 l
776		0.87 0.91 l
777		0.87 0.34 l
778		0.87 0.71 l
779		0.87 0.43 l
780		0.87 0.82 l
781		0.87 0.59 l
782		0.87 -0.05 l
783		0.87 0.11 l
784		0.87 1.12 l
785		0.87 0.70 l
786		0.87 0.45 l
787		0.88 0.65 l
788		0.87 0.81 l
789		0.87 0 l
790		0.87 0 l
791		0.87 0.20 l
792		0.87 1.37 l
793		0.91 0.80 l
794		0.87 0.44 l
795		0.87 0.61 l
796		0.87 -0.29 l
797		0.87 0.67 l
798		0.87 0.45 l
799		0.87 0.76 l
800		0.87 0.44 l
801		0.87 -0.19 l
802		0.87 0.81 l
803		0.87 1.87 l
804		0.87 0.69 l
805		0.88 0.20 l
806		0.87 1.37 l
807		0.87 0.65 l
808		0.87 0.85 l
809		0.87 0.57 l
810		0.87 -0.65 l
811		0.87 0.66 l
812		0.87 0.69 l
813		0.87 1.65 l
814		0.87 1.24 l
815		0.87 -0.53 l
816		0.87 -0.67 l
817		0.87 -0.17 l
818		0.87 -0.31 l
819		0.91 1.21 l
820		0.87 0.78 l
821		0.87 0.44 l
822		0.87 0.46 l
823		0.88 0.91 l
824		0.87 0.45 l
825		0.87 0.56 l
826		0.87 -0.07 l
827		0.87 0.52 l
828		0.87 1.12 l
829		0.87 1.18 l
830		0.87 1.15 l
831		0.87 1.25 l
832		0.87 0.33 l
833		o
834		18.00 204.94 577.28 636.94 cl
835		0 0.8039 0 rgb
836		0.38 setlinewidth
837		[] 0 setdash
838		1 setlinecap
839		1 setlinejoin
840		10.00 setmiterlimit
841		np
842		139.66 278.38 m
843		0 0 l
844		o
845		np
846		139.66 278.38 m
847		0 8.99 l
848		o
849		77.04 278.38 547.04 577.90 cl
850		/ps 12 def /Font1 findfont 12 s
851		0 0 0 rgb
852		139.66 300.04 (0.01) 1 0 0 t
853		0 0 1 rgb
854		0.75 setlinewidth
855		[] 0 setdash
856		1 setlinecap
857		1 setlinejoin
858		10.00 setmiterlimit
859		np
860		130.97 636.94 m
861		0.16 -5.96 l
862		0.87 -29.77 l
863		0.87 -27.35 l
864		0.87 -23.76 l
865		0.87 -22.19 l
866		0.87 -18.93 l
867		0.88 -19.94 l
868		0.87 -17.25 l
869		0.87 -15.60 l
870		0.87 -14.01 l
871		0.87 -13.88 l
872		0.87 -10.34 l
873		0.87 -9.97 l
874		0.87 -9.50 l
875		0.87 -8.65 l
876		0.87 -6.63 l
877		0.87 -7.77 l
878		0.87 -7.32 l
879		0.87 -6.40 l
880		0.87 -4.39 l
881		0.87 -5.45 l
882		0.87 -4.60 l
883		0.87 -4.51 l
884		0.87 -4.87 l
885		0.92 -4.37 l
886		0.87 -3.10 l
887		0.87 -2.55 l
888		0.87 -2.58 l
889		0.87 -1.95 l
890		0.87 -1.30 l
891		0.87 -1.30 l
892		0.87 -1.61 l
893		0.87 -1.12 l
894		0.87 -1.40 l
895		0.87 -0.32 l
896		0.87 -0.93 l
897		0.87 -0.70 l
898		0.87 -1.47 l
899		0.87 -0.31 l
900		0.87 -0.14 l
901		0.87 -1.08 l
902		0.87 -0.21 l
903		0.87 -0.21 l
904		0.87 -0.90 l
905		0.87 -0.70 l
906		0.48 -0.22 l
907		0.39 0.32 l
908		0.87 0.37 l
909		0.87 0.05 l
910		0.87 0.46 l
911		0.87 -0.33 l
912		0.92 0.09 l
913		0.87 0.43 l
914		0.87 0.19 l
915		0.87 0.30 l
916		0.87 0.14 l
917		0.87 0.01 l
918		0.87 0.44 l
919		0.87 0.23 l
920		0.87 0 l
921		0.87 0.53 l
922		0.87 0 l
923		0.87 -0.06 l
924		0.87 0.59 l
925		0.87 -0.07 l
926		0.87 0.46 l
927		0.87 0.37 l
928		0.87 0.59 l
929		0.87 0.67 l
930		0.87 0.26 l
931		0.88 0.96 l
932		0.87 -0.17 l
933		0.87 0.37 l
934		0.87 0.63 l
935		0.87 0.63 l
936		0.87 -0.22 l
937		0.87 0.38 l
938		0.87 0.16 l
939		0.91 0.58 l
940		0.87 0.02 l
941		0.87 0.32 l
942		0.87 0.30 l
943		0.87 0.54 l
944		0.87 -0.02 l
945		0.87 0.95 l
946		0.87 0.32 l
947		0.87 0.27 l
948		0.87 0.63 l
949		0.88 0.11 l
950		0.87 0.08 l
951		0.87 0.47 l
952		0.87 0.44 l
953		0.87 0.43 l
954		0.87 0.41 l
955		0.87 0.71 l
956		0.87 0.43 l
957		0.87 0.22 l
958		0.87 0.77 l
959		0.87 0.21 l
960		216.56 326.63 lineto
961		0.87 0.44 l
962		0.87 0.56 l
963		0.87 0.34 l
964		0.87 0.73 l
965		0.91 0.42 l
966		0.87 0.53 l
967		0.88 0.31 l
968		0.87 0.69 l
969		0.87 -0.31 l
970		0.87 -0.15 l
971		0.87 0.84 l
972		0.87 0.28 l
973		0.87 0.03 l
974		0.87 0.59 l
975		0.87 0.09 l
976		0.87 0.42 l
977		0.87 0.32 l
978		0.87 0.30 l
979		0.87 0.26 l
980		0.87 0.61 l
981		0.87 -0.07 l
982		0.87 0.38 l
983		0.87 0.39 l
984		0.87 0.18 l
985		0.87 0.26 l
986		0.87 0.17 l
987		0.87 0.45 l
988		0.87 0.26 l
989		0.87 1.10 l
990		0.87 0.59 l
991		0.92 0.35 l
992		0.87 0.58 l
993		0.87 0.82 l
994		0.87 0.31 l
995		0.87 0.04 l
996		0.87 0.88 l
997		0.87 0.16 l
998		0.87 0.25 l
999		0.87 1.26 l
1000		0.87 0.33 l
1001		0.87 0.34 l
1002		0.87 -0.03 l
1003		0.87 -0.05 l
1004		0.87 0.15 l
1005		0.87 0.59 l
1006		0.87 0.50 l
1007		0.87 0.65 l
1008		0.87 0.12 l
1009		0.87 0.73 l
1010		0.87 0.56 l
1011		0.87 0.42 l
1012		0.87 0.61 l
1013		0.88 0.13 l
1014		0.87 0.68 l
1015		0.87 0.75 l
1016		0.87 0.28 l
1017		0.91 -0.07 l
1018		0.87 0.56 l
1019		0.87 0.43 l
1020		0.87 0.49 l
1021		0.87 0.64 l
1022		0.87 0.46 l
1023		0.87 0.58 l
1024		0.87 0.17 l
1025		0.87 0.88 l
1026		0.87 0.30 l
1027		0.87 0.85 l
1028		0.87 0.47 l
1029		0.87 0.09 l
1030		0.87 0.31 l
1031		0.88 0.99 l
1032		0.87 0.49 l
1033		0.87 0.37 l
1034		0.87 -0.04 l
1035		0.87 0.68 l
1036		0.87 0.75 l
1037		0.87 0.49 l
1038		0.87 -0.01 l
1039		0.87 0.47 l
1040		0.87 0.47 l
1041		0.87 -0.10 l
1042		0.87 0.34 l
1043		0.87 0.04 l
1044		0.91 0.58 l
1045		0.87 0.03 l
1046		0.87 0.28 l
1047		0.87 0.85 l
1048		0.88 0.36 l
1049		0.87 0.68 l
1050		0.87 0.37 l
1051		0.87 0.38 l
1052		0.87 0.64 l
1053		0.87 0.22 l
1054		0.87 0.31 l
1055		0.87 0.82 l
1056		0.87 0.35 l
1057		0.87 0.43 l
1058		0.87 0.42 l
1059		0.87 0.30 l
1060		303.77 367.68 lineto
1061		0.87 0.46 l
1062		0.87 -0.27 l
1063		0.87 0.40 l
1064		0.87 0.47 l
1065		0.87 0.65 l
1066		0.87 0.07 l
1067		0.87 0.22 l
1068		0.87 0.45 l
1069		0.87 0.37 l
1070		0.92 1.28 l
1071		0.87 0.77 l
1072		0.87 0.69 l
1073		0.87 0.22 l
1074		0.87 0.73 l
1075		0.87 0.78 l
1076		0.87 0.22 l
1077		0.87 0.56 l
1078		0.87 0.81 l
1079		0.87 -0.01 l
1080		0.87 0.72 l
1081		0.87 0.24 l
1082		0.87 0.32 l
1083		0.87 0.21 l
1084		0.87 0.31 l
1085		0.87 0.19 l
1086		0.87 0.46 l
1087		0.87 0.61 l
1088		0.87 0.24 l
1089		0.87 0.22 l
1090		0.87 0.31 l
1091		0.87 -0.14 l
1092		0.87 0.52 l
1093		0.87 0.21 l
1094		0.88 0.47 l
1095		0.87 0.38 l
1096		0.91 0.58 l
1097		0.87 0.41 l
1098		0.87 -0.05 l
1099		0.87 -0.34 l
1100		0.87 0.71 l
1101		0.87 0.76 l
1102		0.87 0.24 l
1103		0.87 0.42 l
1104		0.87 0.55 l
1105		0.87 0.46 l
1106		0.87 0.23 l
1107		0.87 0.45 l
1108		0.87 0.84 l
1109		0.87 0.46 l
1110		0.87 0.53 l
1111		0.87 0.27 l
1112		0.88 0.02 l
1113		0.87 0.68 l
1114		0.87 0.87 l
1115		0.87 0.72 l
1116		0.87 0.34 l
1117		0.87 0.17 l
1118		0.87 0.77 l
1119		0.87 -0.02 l
1120		0.87 0.74 l
1121		0.87 0.35 l
1122		0.87 0.34 l
1123		0.91 0.75 l
1124		0.87 0.20 l
1125		0.87 0.36 l
1126		0.87 0.62 l
1127		0.87 0.38 l
1128		0.87 0.10 l
1129		0.87 0.15 l
1130		0.88 0.10 l
1131		0.87 -0.03 l
1132		0.87 0.68 l
1133		0.87 0.59 l
1134		0.87 0.60 l
1135		0.87 0.25 l
1136		0.87 0.04 l
1137		0.87 0.12 l
1138		0.87 0.61 l
1139		0.87 0.80 l
1140		0.87 0.30 l
1141		0.87 0.79 l
1142		0.87 0.69 l
1143		0.87 0.31 l
1144		0.87 0.15 l
1145		0.87 0.16 l
1146		0.87 0.51 l
1147		0.87 0.65 l
1148		0.87 0.45 l
1149		0.92 0.01 l
1150		0.87 0.55 l
1151		0.87 0.62 l
1152		0.87 -0.02 l
1153		0.87 0.36 l
1154		0.87 0.73 l
1155		0.87 0.54 l
1156		0.87 0.45 l
1157		0.87 0.08 l
1158		0.87 0.34 l
1159		0.87 0.82 l
1160		390.98 409.25 lineto
1161		0.87 0.30 l
1162		0.87 0.16 l
1163		0.87 0.35 l
1164		0.87 0.40 l
1165		0.87 0.41 l
1166		0.87 0.48 l
1167		0.87 0.19 l
1168		0.87 0.39 l
1169		0.87 0.36 l
1170		0.87 0.03 l
1171		0.87 0.22 l
1172		0.87 0.59 l
1173		0.87 0.43 l
1174		0.87 0.58 l
1175		0.92 0.91 l
1176		0.87 0.47 l
1177		0.87 0.46 l
1178		0.87 0.38 l
1179		0.87 0.10 l
1180		0.87 0.56 l
1181		0.87 0.51 l
1182		0.87 0.09 l
1183		0.87 0.65 l
1184		0.87 0.31 l
1185		0.87 0.39 l
1186		0.87 0.42 l
1187		0.87 0.10 l
1188		0.87 0.91 l
1189		0.87 -0.17 l
1190		0.87 -0.02 l
1191		0.87 0.36 l
1192		0.87 0.06 l
1193		0.88 0.90 l
1194		0.87 0.64 l
1195		0.87 0.46 l
1196		0.87 0.47 l
1197		0.87 0.61 l
1198		0.87 0.44 l
1199		0.87 -0.02 l
1200		0.87 0.64 l
1201		0.91 1.08 l
1202		0.87 0.38 l
1203		0.87 0.96 l
1204		0.87 0 l
1205		0.87 0.33 l
1206		0.87 0.59 l
1207		0.87 0.62 l
1208		0.87 0.33 l
1209		0.87 0.33 l
1210		0.87 0.30 l
1211		0.88 0.38 l
1212		0.87 0.15 l
1213		0.87 0.42 l
1214		0.87 0.80 l
1215		0.87 0.50 l
1216		0.87 0.56 l
1217		0.87 -0.05 l
1218		0.87 0.52 l
1219		0.87 0.52 l
1220		0.87 0.19 l
1221		0.87 0.05 l
1222		0.87 0.21 l
1223		0.87 0.46 l
1224		0.87 0.31 l
1225		0.87 0.30 l
1226		0.87 0.44 l
1227		0.87 0.29 l
1228		0.91 0.42 l
1229		0.88 0.27 l
1230		0.87 0.27 l
1231		0.87 0.31 l
1232		0.87 0.19 l
1233		0.87 0.03 l
1234		0.87 1.10 l
1235		0.87 0.75 l
1236		0.87 0.19 l
1237		0.87 0.37 l
1238		0.87 0.36 l
1239		0.87 0 l
1240		0.87 0.37 l
1241		0.87 0.34 l
1242		0.87 -0.04 l
1243		0.87 0.51 l
1244		0.87 0.30 l
1245		0.87 0.50 l
1246		0.87 0.15 l
1247		0.87 0.79 l
1248		0.87 0.12 l
1249		0.87 0.22 l
1250		0.87 0.60 l
1251		0.87 0.53 l
1252		0.87 0.60 l
1253		0.87 0.59 l
1254		0.92 0.89 l
1255		0.87 1.02 l
1256		0.87 0.35 l
1257		0.87 0.01 l
1258		0.87 0.42 l
1259		0.87 0.52 l
1260		478.19 448.90 lineto
1261		0.87 0.38 l
1262		0.87 -0.21 l
1263		0.87 0.74 l
1264		0.87 0.35 l
1265		0.87 0.37 l
1266		0.87 0.55 l
1267		0.87 0.64 l
1268		0.87 0.14 l
1269		0.87 0.36 l
1270		0.87 0.77 l
1271		0.87 0.33 l
1272		0.87 0.26 l
1273		0.87 1.09 l
1274		0.88 0.15 l
1275		0.87 0.03 l
1276		0.87 0.50 l
1277		0.87 0.25 l
1278		0.87 0.34 l
1279		0.87 -0.12 l
1280		0.91 -0.04 l
1281		0.87 0.25 l
1282		0.87 0.56 l
1283		0.87 0.16 l
1284		0.87 0.29 l
1285		0.87 0.45 l
1286		0.87 0.04 l
1287		0.87 0.16 l
1288		0.87 0.87 l
1289		0.87 0.71 l
1290		0.87 0.87 l
1291		0.87 0.83 l
1292		0.88 0.63 l
1293		0.87 0.21 l
1294		0.87 0.23 l
1295		0.87 0.20 l
1296		0.87 0.08 l
1297		0.87 0.46 l
1298		0.87 -0.02 l
1299		0.87 0.60 l
1300		0.87 0.32 l
1301		0.87 0.83 l
1302		0.87 -0.29 l
1303		0.87 0.79 l
1304		0.87 0.34 l
1305		0.87 0.58 l
1306		0.91 -0.32 l
1307		0.87 -0.11 l
1308		0.87 -0.11 l
1309		0.87 0.96 l
1310		0.88 0.21 l
1311		0.87 0.59 l
1312		0.87 0.37 l
1313		0.87 0.76 l
1314		0.87 0.19 l
1315		0.87 0.53 l
1316		0.87 0.51 l
1317		0.87 0.40 l
1318		0.87 0.33 l
1319		0.87 0.49 l
1320		o
1321		18.00 204.94 577.28 636.94 cl
1322		0 0 1 rgb
1323		0.38 setlinewidth
1324		[] 0 setdash
1325		1 setlinecap
1326		1 setlinejoin
1327		10.00 setmiterlimit
1328		np
1329		169.95 278.38 m
1330		0 0 l
1331		o
1332		np
1333		169.95 278.38 m
1334		0 8.99 l
1335		o
1336		77.04 278.38 547.04 577.90 cl
1337		/ps 12 def /Font1 findfont 12 s
1338		0 0 0 rgb
1339		169.95 298.39 (0.005) 1 0 0 t
1340		0 1 1 rgb
1341		0.75 setlinewidth
1342		[] 0 setdash
1343		1 setlinecap
1344		1 setlinejoin
1345		10.00 setmiterlimit
1346		np
1347		277.47 636.94 m
1348		0.14 -1.03 l
1349		0.88 -6.00 l
1350		0.87 -5.98 l
1351		0.87 -6.15 l
1352		0.87 -5.86 l
1353		0.87 -5.45 l
1354		0.87 -6.00 l
1355		0.87 -5.63 l
1356		0.87 -5.28 l
1357		0.87 -5.66 l
1358		0.87 -5.09 l
1359		0.87 -5.20 l
1360		0.87 -4.82 l
1361		0.87 -4.75 l
1362		0.91 -5.14 l
1363		0.87 -5.00 l
1364		0.87 -4.85 l
1365		0.87 -4.44 l
1366		0.88 -4.82 l
1367		0.87 -4.31 l
1368		0.87 -4.45 l
1369		0.87 -4.48 l
1370		0.87 -4.15 l
1371		0.87 -4.00 l
1372		0.87 -4.48 l
1373		0.87 -3.96 l
1374		0.87 -3.99 l
1375		0.87 -3.78 l
1376		0.87 -3.72 l
1377		0.87 -3.65 l
1378		0.87 -3.75 l
1379		0.87 -3.48 l
1380		0.87 -3.59 l
1381		0.87 -3.03 l
1382		0.87 -3.49 l
1383		0.87 -3.03 l
1384		0.87 -3.15 l
1385		0.87 -3.24 l
1386		0.87 -3.13 l
1387		0.87 -3.11 l
1388		0.92 -3.59 l
1389		0.87 -3.04 l
1390		0.87 -2.87 l
1391		0.87 -2.65 l
1392		0.87 -2.75 l
1393		0.87 -2.57 l
1394		0.87 -2.39 l
1395		0.87 -2.70 l
1396		0.87 -2.83 l
1397		0.87 -2.41 l
1398		0.87 -2.33 l
1399		0.87 -2.53 l
1400		0.87 -2.52 l
1401		0.87 -2.19 l
1402		0.87 -2.49 l
1403		0.87 -2.01 l
1404		0.87 -2.09 l
1405		0.87 -2.13 l
1406		0.87 -2.24 l
1407		0.87 -2.03 l
1408		0.87 -2.16 l
1409		0.87 -1.93 l
1410		0.87 -1.88 l
1411		0.87 -1.85 l
1412		0.88 -1.75 l
1413		0.87 -1.94 l
1414		0.91 -1.67 l
1415		0.87 -1.68 l
1416		0.87 -1.89 l
1417		0.87 -1.69 l
1418		0.87 -1.72 l
1419		0.87 -1.71 l
1420		0.87 -1.75 l
1421		0.87 -1.57 l
1422		0.87 -1.86 l
1423		0.87 -1.42 l
1424		0.87 -1.66 l
1425		0.87 -1.56 l
1426		0.87 -1.41 l
1427		0.87 -1.42 l
1428		0.87 -1.41 l
1429		0.87 -1.45 l
1430		0.88 -1.57 l
1431		0.87 -1.34 l
1432		0.87 -1.42 l
1433		0.87 -1.11 l
1434		0.87 -1.08 l
1435		0.87 -1.16 l
1436		0.87 -1.15 l
1437		0.87 -1.00 l
1438		0.87 -1.20 l
1439		0.87 -1.28 l
1440		0.87 -1.39 l
1441		0.91 -1.23 l
1442		0.87 -1.02 l
1443		0.87 -1.04 l
1444		0.87 -0.94 l
1445		0.87 -0.99 l
1446		0.87 -0.91 l
1447		363.95 353.37 lineto
1448		0.88 -1.01 l
1449		0.87 -0.95 l
1450		0.87 -1.14 l
1451		0.87 -0.89 l
1452		0.87 -1.03 l
1453		0.87 -0.95 l
1454		0.87 -0.75 l
1455		0.87 -0.73 l
1456		0.87 -0.68 l
1457		0.87 -0.78 l
1458		0.87 -0.87 l
1459		0.87 -0.89 l
1460		0.87 -0.89 l
1461		0.87 -0.66 l
1462		0.87 -0.82 l
1463		0.87 -0.77 l
1464		0.87 -0.74 l
1465		0.87 -0.55 l
1466		0.87 -0.76 l
1467		0.92 -0.61 l
1468		0.87 -0.61 l
1469		0.87 -0.55 l
1470		0.87 -0.36 l
1471		0.87 -0.59 l
1472		0.87 -0.62 l
1473		0.87 -0.64 l
1474		0.87 -0.81 l
1475		0.87 -0.67 l
1476		0.87 -0.75 l
1477		0.87 -0.71 l
1478		0.87 -0.49 l
1479		0.87 -0.52 l
1480		0.87 -0.37 l
1481		0.87 -0.51 l
1482		0.87 -0.53 l
1483		0.87 -0.51 l
1484		0.87 -0.42 l
1485		0.87 -0.35 l
1486		0.87 -0.37 l
1487		0.87 -0.60 l
1488		0.87 -0.49 l
1489		0.87 -0.37 l
1490		0.87 -0.50 l
1491		0.87 -0.42 l
1492		0.87 -0.34 l
1493		0.92 -0.41 l
1494		0.87 -0.33 l
1495		0.87 -0.50 l
1496		0.87 -0.57 l
1497		0.87 -0.29 l
1498		0.87 -0.35 l
1499		0.87 -0.42 l
1500		0.87 -0.23 l
1501		0.87 -0.47 l
1502		0.87 -0.25 l
1503		0.87 -0.39 l
1504		0.87 -0.32 l
1505		0.87 -0.30 l
1506		0.87 -0.39 l
1507		0.87 -0.32 l
1508		0.87 -0.34 l
1509		0.87 -0.28 l
1510		0.87 -0.37 l
1511		0.88 -0.30 l
1512		0.87 -0.20 l
1513		0.87 -0.27 l
1514		0.87 -0.27 l
1515		0.87 -0.18 l
1516		0.87 -0.36 l
1517		0.87 -0.24 l
1518		0.87 -0.35 l
1519		0.91 -0.26 l
1520		0.87 -0.24 l
1521		0.87 -0.35 l
1522		0.87 -0.36 l
1523		0.87 -0.19 l
1524		0.87 -0.23 l
1525		0.87 -0.21 l
1526		0.87 -0.19 l
1527		0.87 -0.18 l
1528		0.87 -0.18 l
1529		0.88 -0.22 l
1530		0.87 -0.28 l
1531		0.87 -0.19 l
1532		0.87 -0.11 l
1533		0.87 -0.05 l
1534		0.87 -0.10 l
1535		0.87 -0.10 l
1536		0.87 -0.14 l
1537		0.87 0.01 l
1538		0.87 -0.12 l
1539		0.87 -0.16 l
1540		0.87 -0.10 l
1541		0.87 -0.05 l
1542		0.87 -0.09 l
1543		0.87 -0.10 l
1544		0.87 -0.06 l
1545		0.87 -0.08 l
1546		0.91 -0.08 l
1547		451.17 310.65 lineto
1548		0.87 -0.14 l
1549		0.87 -0.12 l
1550		0.87 -0.09 l
1551		0.87 -0.06 l
1552		0.87 -0.19 l
1553		0.87 -0.14 l
1554		0.87 -0.01 l
1555		0.87 -0.07 l
1556		0.87 -0.08 l
1557		0.87 -0.12 l
1558		0.87 -0.21 l
1559		0.87 -0.03 l
1560		0.87 0.02 l
1561		0.87 0.06 l
1562		0.87 0.01 l
1563		0.87 -0.09 l
1564		0.87 -0.03 l
1565		0.87 -0.16 l
1566		0.87 -0.09 l
1567		0.87 -0.12 l
1568		0.87 0.06 l
1569		0.87 -0.09 l
1570		0.87 -0.04 l
1571		0.87 -0.06 l
1572		0.92 -0.13 l
1573		0.87 -0.01 l
1574		0.87 0.03 l
1575		0.87 0.02 l
1576		0.87 -0.07 l
1577		0.87 -0.04 l
1578		0.87 -0.01 l
1579		0.87 0.02 l
1580		0.87 0.01 l
1581		0.87 -0.04 l
1582		0.87 -0.06 l
1583		0.87 -0.09 l
1584		0.87 0.15 l
1585		0.87 -0.06 l
1586		0.87 -0.08 l
1587		0.87 0.07 l
1588		0.87 -0.06 l
1589		0.87 0 l
1590		0.87 -0.07 l
1591		0.22 -0.07 l
1592		0.65 0.03 l
1593		0.88 0.17 l
1594		0.87 0.03 l
1595		0.87 0.06 l
1596		0.87 -0.06 l
1597		0.87 -0.05 l
1598		0.87 0.12 l
1599		0.91 -0.14 l
1600		0.87 -0.04 l
1601		0.87 -0.02 l
1602		0.87 0.06 l
1603		0.87 0.08 l
1604		0.87 0.07 l
1605		0.87 0.02 l
1606		0.87 -0.03 l
1607		0.87 0.02 l
1608		0.87 0.02 l
1609		0.87 -0.01 l
1610		0.87 0.05 l
1611		0.88 0.05 l
1612		0.87 0.07 l
1613		0.87 -0.06 l
1614		0.87 -0.01 l
1615		0.87 0 l
1616		0.87 -0.13 l
1617		0.87 0.03 l
1618		0.87 0.01 l
1619		0.87 -0.12 l
1620		0.87 0.10 l
1621		0.87 -0.02 l
1622		0.87 0.08 l
1623		0.87 0.05 l
1624		0.87 0.07 l
1625		0.91 -0.07 l
1626		0.87 -0.02 l
1627		0.87 -0.06 l
1628		0.87 -0.05 l
1629		0.88 0.07 l
1630		0.87 -0.04 l
1631		0.87 0.04 l
1632		0.87 0.14 l
1633		0.87 -0.02 l
1634		0.87 -0.05 l
1635		0.87 -0.02 l
1636		0.87 0.13 l
1637		0.87 0.11 l
1638		0.87 -0.07 l
1639		o
1640		18.00 204.94 577.28 636.94 cl
1641		0 1 1 rgb
1642		0.38 setlinewidth
1643		[] 0 setdash
1644		1 setlinecap
1645		1 setlinejoin
1646		10.00 setmiterlimit
1647		np
1648		488.85 278.38 m
1649		0 0 l
1650		o
1651		np
1652		488.85 278.38 m
1653		0 8.99 l
1654		o
1655		77.04 278.38 547.04 577.90 cl
1656		/ps 12 def /Font1 findfont 12 s
1657		0 0 0 rgb
1658		488.85 297.33 (0.001) 1 0 0 t
1659		0.75 setlinewidth
1660		[] 0 setdash
1661		1 setlinecap
1662		1 setlinejoin
1663		10.00 setmiterlimit
1664		np
1665		77.04 308.37 m
1666		470.00 0 l
1667		o
1668		ep
1669		%%Trailer
1670		%%Pages: 1
1671		%%EOF

~~inst/doc/shrinkage-v-iterations.pdf~~ less more

Binary diff not shown

+0

-172

~~inst/doc/srcltx.sty~~ less more

0		%%
1		%% This is file `srcltx.sty',
2		%% generated with the docstrip utility.
3		%%
4		%% The original source files were:
5		%%
6		%% srcltx.dtx (with options: `package,latex')
7		%%
8		%% This package is in the public domain. It comes with no guarantees
9		%% and no reserved rights. You can use or modify this package at your
10		%% own risk.
11		%% Originally written by: Aleksander Simonic
12		%% Current maintainer: Stefan Ulrich <stefanulrich@users.sourceforge.net>
13		%%
14		\NeedsTeXFormat{LaTeX2e}
15		\ProvidesPackage{srcltx}[2006/11/12 v1.6 Source specials for inverse search in DVI files]
16		\newif\ifSRCOK \SRCOKtrue
17		\newif\ifsrc@debug@
18		\newif\ifsrc@dviwin@
19		\newif\ifsrc@winedt@\src@winedt@true
20		\newif\ifsrc@everypar@\src@everypar@true
21		\newif\ifsrc@everymath@\src@everymath@true
22		\RequirePackage{ifthen}
23		\DeclareOption{active}{\SRCOKtrue}
24		\DeclareOption{inactive}{\SRCOKfalse}
25		\DeclareOption{nowinedt}{\src@winedt@false}
26		\DeclareOption{debug}{\src@debug@true}
27		\DeclareOption{nopar}{\global\src@everypar@false}
28		\DeclareOption{nomath}{\global\src@everymath@false}
29		\newcommand*\src@maybe@space{}
30		\let\src@maybe@space\space
31		\DeclareOption{dviwin}{\let\src@maybe@space\relax}
32		\ExecuteOptions{active}
33		\ProcessOptions
34		\newcount\src@lastline
35		\global\src@lastline=-1
36		\newcommand*\src@debug{}
37		\def\src@debug#1{\ifsrc@debug@\typeout{DBG: \|#1\|}\fi}
38		\newcommand*\MainFile{}
39		\def\MainFile{\jobname.tex}
40		\newcommand*\CurrentInput{}
41		\gdef\CurrentInput{\MainFile}
42		\newcommand*\WinEdt{}
43		\def\WinEdt#1{\ifsrc@winedt@\typeout{:#1}\fi}
44		\newcommand\src@AfterFi{}
45		\def\src@AfterFi#1\fi{\fi#1}
46		\AtBeginDocument{%
47		\@ifpackageloaded{soul}{%
48		\let\src@SOUL@\SOUL@
49		\def\SOUL@#1{%
50		\ifSRCOK
51		\SRCOKfalse\src@SOUL@{#1}\SRCOKtrue
52		\else
53		\src@AfterFi\src@SOUL@{#1}%
54		\fi
55		}%
56		}{}%
57		}
58		\newcommand*\srcIncludeHook[1]{\protected@xdef\CurrentInput{#1.tex}}
59		\newcommand*\srcInputHook[1]{%
60		\src@getfilename@with@ext{#1}%
61		}
62		\newcommand*\src@spec{}
63		\def\src@spec{%
64		\ifSRCOK
65		\ifnum\inputlineno>\src@lastline
66		\global\src@lastline=\inputlineno
67		\src@debug{%
68		src:\the\inputlineno\src@maybe@space\CurrentInput}%
69		\special{src:\the\inputlineno\src@maybe@space\CurrentInput}%
70		\fi
71		\fi
72		}
73		\newcommand\src@before@file@hook{}
74		\newcommand\src@after@file@hook{}
75		\def\src@before@file@hook{%
76		\WinEdt{<+ \CurrentInput}%
77		\global\src@lastline=0
78		\ifSRCOK\special{src:1\src@maybe@space\CurrentInput}\fi
79		}
80		\def\src@after@file@hook#1{%
81		\WinEdt{<-}%
82		\global\src@lastline=\inputlineno
83		\global\advance\src@lastline by -1%
84		\gdef\CurrentInput{#1}%
85		\src@spec
86		}
87		\newcommand*\src@fname{}%
88		\newcommand*\src@tempa{}%
89		\newcommand*\src@extensions@path{}%
90		\newcommand*\src@getfilename@with@ext{}%
91		\def\src@extensions@path#1.#2\end{%
92		\ifthenelse{\equal{#2}{}}{%
93		\protected@edef\src@extensions@last{#1}%
94		\let\src@tempa\relax
95		}{%
96		\def\src@tempa{\src@extensions@path#2\end}%
97		}%
98		\src@tempa
99		}
100		\def\src@getfilename@with@ext#1{%
101		\expandafter\src@extensions@path#1.\end
102		\ifthenelse{\equal{\src@extensions@last}{tex}}{%
103		\protected@xdef\CurrentInput{#1}%
104		}{%
105		\protected@xdef\CurrentInput{#1.tex}%
106		}%
107		\PackageInfo{srcltx}{Expanded filename `#1' to `\CurrentInput'}%
108		}
109		\newcommand*\src@include{}
110		\newcommand*\src@@include{}
111		\let\src@include\include
112		\def\include#1{%
113		\src@spec
114		\clearpage
115		\expandafter\src@@include\expandafter{\CurrentInput}{#1}%
116		}%
117		\def\src@@include#1#2{%
118		\srcIncludeHook{#2}%
119		\src@before@file@hook
120		\src@include{#2}%
121		\src@after@file@hook{#1}%
122		}
123		\newcommand*\src@input{}
124		\newcommand*\src@@input{}
125		\newcommand*\src@@@input{}
126		\let\src@input\input
127		\def\input{\src@spec\@ifnextchar\bgroup\src@@input\@@input}%
128		\def\src@@input#1{%
129		\expandafter\src@@@input\expandafter{\CurrentInput}{#1}%
130		}
131		\def\src@@@input#1#2{%
132		\srcInputHook{#2}%
133		\src@before@file@hook
134		\src@input{#2}%
135		\src@after@file@hook{#1}%
136		}
137		\newcommand\Input{}
138		\let\Input\input
139		\ifsrc@everypar@
140		\newcommand*\src@old@everypar{}
141		\let\src@old@everypar\everypar
142		\newtoks\src@new@everypar
143		\let\everypar\src@new@everypar
144		\everypar\expandafter{\the\src@old@everypar}
145		\src@old@everypar{\the\src@new@everypar\src@spec}
146		\fi
147		\ifsrc@everymath@
148		\def\@tempa#1\the\everymath#2\delimiter{{#1\src@spec\the\everymath#2}}
149		\frozen@everymath=\expandafter\@tempa\the\frozen@everymath\delimiter
150		\fi
151		\newcommand*\src@bibliography{}
152		\newcommand*\src@@bibliography{}
153		\let\src@bibliography\bibliography
154		\def\bibliography#1{%
155		\expandafter\src@@bibliography\expandafter{\CurrentInput}{#1}%
156		}
157		\def\src@@bibliography#1#2{%
158		\protected@xdef\CurrentInput{\jobname.bbl}%
159		\src@before@file@hook
160		\src@bibliography{#2}%
161		\src@after@file@hook{#1}%
162		}
163		\newcommand*\src@old@output{}
164		\let\src@old@output\output
165		\newtoks\src@new@output
166		\let\output\src@new@output
167		\output\expandafter{\the\src@old@output}
168		\src@old@output{\SRCOKfalse\the\src@new@output}
169		\endinput
170		%%
171		%% End of file `srcltx.sty'.

+49

-29

man/basehaz.gbm.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/basehaz.gbm.R
0	2	\name{basehaz.gbm}
1	3	\alias{basehaz.gbm}
2		\title{ Baseline hazard function }
3		\description{
4		Computes the Breslow estimator of the baseline hazard function for a proportional hazard regression model
5		}
	4	\title{Baseline hazard function}
6	5	\usage{
7		basehaz.gbm(t, delta, f.x,
8		t.eval = NULL,
9		smooth = FALSE,
10		cumulative = TRUE)
	6	basehaz.gbm(t, delta, f.x, t.eval = NULL, smooth = FALSE,
	7	cumulative = TRUE)
11	8	}
12	9	\arguments{
13		\item{t}{ the survival times }
14		\item{delta}{ the censoring indicator }
15		\item{f.x}{ the predicted values of the regression model on the log hazard scale }
16		\item{t.eval}{ values at which the baseline hazard will be evaluated }
17		\item{smooth}{ if \code{TRUE} \code{basehaz.gbm} will smooth the estimated baseline hazard using Friedman's super smoother \code{\link{supsmu}}}
18		\item{cumulative}{ if \code{TRUE} the cumulative survival function will be computed }
	10	\item{t}{The survival times.}
	11
	12	\item{delta}{The censoring indicator.}
	13
	14	\item{f.x}{The predicted values of the regression model on the log hazard
	15	scale.}
	16
	17	\item{t.eval}{Values at which the baseline hazard will be evaluated.}
	18
	19	\item{smooth}{If \code{TRUE} \code{basehaz.gbm} will smooth the estimated
	20	baseline hazard using Friedman's super smoother \code{\link{supsmu}}.}
	21
	22	\item{cumulative}{If \code{TRUE} the cumulative survival function will be
	23	computed.}
	24	}
	25	\value{
	26	A vector of length equal to the length of t (or of length
	27	\code{t.eval} if \code{t.eval} is not \code{NULL}) containing the baseline
	28	hazard evaluated at t (or at \code{t.eval} if \code{t.eval} is not
	29	\code{NULL}). If \code{cumulative} is set to \code{TRUE} then the returned
	30	vector evaluates the cumulative hazard function at those values.
	31	}
	32	\description{
	33	Computes the Breslow estimator of the baseline hazard function for a
	34	proportional hazard regression model.
19	35	}
20	36	\details{
21		The proportional hazard model assumes h(t\|x)=lambda(t)*exp(f(x)). \code{\link{gbm}} can estimate the f(x) component via partial likelihood. After estimating f(x), \code{basehaz.gbm} can compute the a nonparametric estimate of lambda(t).
	37	The proportional hazard model assumes h(t\|x)=lambda(t)*exp(f(x)).
	38	\code{\link{gbm}} can estimate the f(x) component via partial likelihood.
	39	After estimating f(x), \code{basehaz.gbm} can compute the a nonparametric
	40	estimate of lambda(t).
22	41	}
23		\value{
24		a vector of length equal to the length of t (or of length \code{t.eval} if \code{t.eval} is not \code{NULL}) containing the baseline hazard evaluated at t (or at \code{t.eval} if \code{t.eval} is not \code{NULL}). If \code{cumulative} is set to \code{TRUE} then the returned vector evaluates the cumulative hazard function at those values.
	42	\references{
	43	N. Breslow (1972). "Discussion of `Regression Models and
	44	Life-Tables' by D.R. Cox," Journal of the Royal Statistical Society, Series
	45	B, 34(2):216-217.
	46
	47	N. Breslow (1974). "Covariance analysis of censored survival data,"
	48	Biometrics 30:89-99.
25	49	}
26		\references{N. Breslow (1972). "Disussion of `Regression Models and Life-Tables' by D.R. Cox," Journal of the Royal Statistical Society, Series B, 34(2):216-217.
27
28		N. Breslow (1974). "Covariance analysis of censored survival data," Biometrics 30:89-99.
29		}
30		\author{ Greg Ridgeway \email{gregridgeway@gmail.com}}
31
32
33
34		\seealso{ \code{\link[survival]{survfit}}, \code{\link{gbm}} }
35
36		\keyword{ methods }
37		\keyword{ survival }
	50	\seealso{
	51	\code{\link[survival]{survfit}}, \code{\link{gbm}}
	52	}
	53	\author{
	54	Greg Ridgeway \email{gregridgeway@gmail.com}
	55	}
	56	\keyword{methods}
	57	\keyword{survival}

+63

-53

man/calibrate.plot.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/calibrate.plot.R
0	2	\name{calibrate.plot}
1	3	\alias{calibrate.plot}
2	4	\title{Calibration plot}
3		\description{
4		An experimental diagnostic tool that plots the fitted values versus the actual average values.
5		Currently developed for only \code{distribution="bernoulli"}.
6		}
7	5	\usage{
8		calibrate.plot(y,p,
9		distribution="bernoulli",
10		replace=TRUE,
11		line.par=list(col="black"),
12		shade.col="lightyellow",
13		shade.density=NULL,
14		rug.par=list(side=1),
15		xlab="Predicted value",
16		ylab="Observed average",
17		xlim=NULL,ylim=NULL,
18		knots=NULL,df=6,
19		...)
	6	calibrate.plot(y, p, distribution = "bernoulli", replace = TRUE,
	7	line.par = list(col = "black"), shade.col = "lightyellow",
	8	shade.density = NULL, rug.par = list(side = 1),
	9	xlab = "Predicted value", ylab = "Observed average", xlim = NULL,
	10	ylim = NULL, knots = NULL, df = 6, ...)
20	11	}
21	12	\arguments{
22		\item{y}{ the outcome 0-1 variable }
23		\item{p}{ the predictions estimating E(y\|x) }
24		\item{distribution}{the loss function used in creating \code{p}.
25		\code{bernoulli} and \code{poisson} are currently the
26		only special options. All others default to squared error
27		assuming \code{gaussian}}
28		\item{replace}{ determines whether this plot will replace or overlay the current plot.
29		\code{replace=FALSE} is useful for comparing the calibration of several
30		methods}
31		\item{line.par}{ graphics parameters for the line }
32		\item{shade.col}{ color for shading the 2 SE region. \code{shade.col=NA} implies no 2 SE
33		region}
34		\item{shade.density}{ the \code{density} parameter for \code{\link{polygon}}}
35		\item{rug.par}{graphics parameters passed to \code{\link{rug}}}
36		\item{xlab}{x-axis label corresponding to the predicted values}
37		\item{ylab}{y-axis label corresponding to the observed average}
38		\item{xlim,ylim}{x and y-axis limits. If not specified te function will select
39		limits}
40		\item{knots,df}{these parameters are passed directly to
41		\code{\link[splines]{ns}} for constructing a natural spline
42		smoother for the calibration curve}
43		\item{...}{ other graphics parameters passed on to the plot function }
	13	\item{y}{The outcome 0-1 variable.}
	14
	15	\item{p}{The predictions estimating E(y\|x).}
	16
	17	\item{distribution}{The loss function used in creating \code{p}.
	18	\code{bernoulli} and \code{poisson} are currently the only special options.
	19	All others default to squared error assuming \code{gaussian}.}
	20
	21	\item{replace}{Determines whether this plot will replace or overlay the
	22	current plot. \code{replace=FALSE} is useful for comparing the calibration
	23	of several methods.}
	24
	25	\item{line.par}{Graphics parameters for the line.}
	26
	27	\item{shade.col}{Color for shading the 2 SE region. \code{shade.col=NA}
	28	implies no 2 SE region.}
	29
	30	\item{shade.density}{The \code{density} parameter for \code{\link{polygon}}.}
	31
	32	\item{rug.par}{Graphics parameters passed to \code{\link{rug}}.}
	33
	34	\item{xlab}{x-axis label corresponding to the predicted values.}
	35
	36	\item{ylab}{y-axis label corresponding to the observed average.}
	37
	38	\item{xlim, ylim}{x- and y-axis limits. If not specified te function will
	39	select limits.}
	40
	41	\item{knots, df}{These parameters are passed directly to
	42	\code{\link[splines]{ns}} for constructing a natural spline smoother for the
	43	calibration curve.}
	44
	45	\item{...}{Additional optional arguments to be passed onto
	46	\code{\link[graphics]{plot}}}
	47	}
	48	\value{
	49	No return values.
	50	}
	51	\description{
	52	An experimental diagnostic tool that plots the fitted values versus the
	53	actual average values. Currently only available when
	54	\code{distribution = "bernoulli"}.
44	55	}
45	56	\details{
46		Uses natural splines to estimate E(y\|p). Well-calibrated predictions
47		imply that E(y\|p) = p. The plot also includes a pointwise 95% confidence
48		band.
	57	Uses natural splines to estimate E(y\|p). Well-calibrated predictions imply
	58	that E(y\|p) = p. The plot also includes a pointwise 95% confidence band.
49	59	}
50		\value{
51		\code{calibrate.plot} returns no values.
52		}
53		\references{
54		J.F. Yates (1982). "External correspondence: decomposition of the mean
55		probability score," Organisational Behaviour and Human Performance 30:132-156.
56
57		D.J. Spiegelhalter (1986). "Probabilistic Prediction in Patient Management
58		and Clinical Trials," Statistics in Medicine 5:421-433.
59		}
60		\author{Greg Ridgeway \email{gregridgeway@gmail.com}}
61	60	\examples{
62	61	# Don't want R CMD check to think there is a dependency on rpart
63	62	# so comment out the example

69	68	#p <- predict(glm1,type="response")
70	69	#calibrate.plot(y, p, xlim=c(0,0.6), ylim=c(0,0.6))
71	70	}
72		\keyword{ hplot }
	71	\references{
	72	J.F. Yates (1982). "External correspondence: decomposition of
	73	the mean probability score," Organisational Behaviour and Human Performance
	74	30:132-156.
	75
	76	D.J. Spiegelhalter (1986). "Probabilistic Prediction in Patient Management
	77	and Clinical Trials," Statistics in Medicine 5:421-433.
	78	}
	79	\author{
	80	Greg Ridgeway \email{gregridgeway@gmail.com}
	81	}
	82	\keyword{hplot}

+0

-48

~~man/gbm-internal.Rd~~ less more

0		\name{gbm-internal}
1		\alias{guessDist}
2		\alias{getStratify}
3		\alias{getCVgroup}
4		\alias{checkMissing}
5		\alias{checkID}
6		\alias{checkWeights}
7		\alias{checkOffset}
8		\alias{getVarNames}
9		\alias{gbmCluster}
10		\title{gbm internal functions}
11		\description{Helper functions for preprocessing data prior to
12		building the model}
13		\usage{
14		guessDist(y)
15		getCVgroup(distribution, class.stratify.cv, y, i.train, cv.folds, group)
16		getStratify(strat, d)
17		checkMissing(x, y)
18		checkWeights(w, n)
19		checkID(id)
20		checkOffset(o, y)
21		getVarNames(x)
22		gbmCluster(n)
23		}
24		\arguments{
25		\item{y}{The response variable}
26		\item{d, distribution}{The distribution, either specified by the user or
27		implied}
28		\item{class.stratify.cv}{Whether or not to stratify, if provided by
29		the user}
30		\item{i.train}{Computed internally by \code{gbm}}
31		\item{group}{The group, if using \code{distibution='pairwise'}}
32		\item{strat}{Whether or not to stratify}
33		\item{cv.folds}{The number of cross-validation folds}
34		\item{x}{The design matrix}
35		\item{id}{The interaction depth}
36		\item{w}{The weights}
37		\item{n}{The number of cores to use in the cluster.}
38		\item{o}{The offset}
39		% \item{verbose}{Whether or not to print output to screen}
40		% \item{X, var.monotone, n.trees, n.minobsinnode, shrinkage, bag.fraction,
41		% var.names, response.name, cv.group}{Arguments passed
42		% through to gbm.fit}
43		} % Close \arguments
44		\details{
45		These are functions used internally by \code{gbm} and not intended for
46		direct use by the user.
47		}

+66

-0

man/gbm-internals.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/gbm-internals.R
	2	\name{guessDist}
	3	\alias{guessDist}
	4	\alias{getStratify}
	5	\alias{getCVgroup}
	6	\alias{checkMissing}
	7	\alias{checkID}
	8	\alias{checkWeights}
	9	\alias{checkOffset}
	10	\alias{getVarNames}
	11	\alias{gbmCluster}
	12	\title{gbm internal functions}
	13	\usage{
	14	guessDist(y)
	15
	16	getCVgroup(distribution, class.stratify.cv, y, i.train, cv.folds, group)
	17
	18	getStratify(strat, d)
	19
	20	checkMissing(x, y)
	21
	22	checkWeights(w, n)
	23
	24	checkID(id)
	25
	26	checkOffset(o, y)
	27
	28	getVarNames(x)
	29
	30	gbmCluster(n)
	31	}
	32	\arguments{
	33	\item{y}{The response variable.}
	34
	35	\item{class.stratify.cv}{Whether or not to stratify, if provided by the user.}
	36
	37	\item{i.train}{Computed internally by \code{gbm}.}
	38
	39	\item{cv.folds}{The number of cross-validation folds.}
	40
	41	\item{group}{The group, if using \code{distibution = "pairwise"}.}
	42
	43	\item{strat}{Whether or not to stratify.}
	44
	45	\item{d, distribution}{The distribution, either specified by the user or
	46	implied.}
	47
	48	\item{x}{The design matrix.}
	49
	50	\item{w}{The weights.}
	51
	52	\item{n}{The number of cores to use in the cluster.}
	53
	54	\item{id}{The interaction depth.}
	55
	56	\item{o}{The offset.}
	57	}
	58	\description{
	59	Helper functions for preprocessing data prior to building a \code{"gbm"}
	60	object.
	61	}
	62	\details{
	63	These are functions used internally by \code{gbm} and not intended for direct
	64	use by the user.
	65	}

+31

-53

man/gbm-package.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/gbm-package.R
	2	\docType{package}
0	3	\name{gbm-package}
1	4	\alias{gbm-package}
2		\docType{package}
3		\title{Generalized Boosted Regression Models}
4		\description{This package implements extensions to Freund and
5		Schapire's AdaBoost algorithm and J. Friedman's gradient
6		boosting machine. Includes regression methods for least
7		squares, absolute loss, logistic, Poisson, Cox proportional
8		hazards partial likelihood, multinomial, t-distribution,
9		AdaBoost exponential loss, Learning to Rank, and
10		Huberized hinge loss.}
	5	\title{Generalized Boosted Regression Models (GBMs)}
	6	\description{
	7	This package implements extensions to Freund and Schapire's AdaBoost
	8	algorithm and J. Friedman's gradient boosting machine. Includes regression
	9	methods for least squares, absolute loss, logistic, Poisson, Cox
	10	proportional hazards partial likelihood, multinomial, t-distribution,
	11	AdaBoost exponential loss, Learning to Rank, and Huberized hinge loss.
	12	}
11	13	\details{
12		\tabular{ll}{
13		Package: \tab gbm\cr
14		Version: \tab 2.1\cr
15		Date: \tab 2013-05-10\cr
16		Depends: \tab R (>= 2.9.0), survival, lattice, mgcv\cr
17		License: \tab GPL (version 2 or newer)\cr
18		URL: \tab http://code.google.com/p/gradientboostedmodels/\cr
19		}
20		Index:
21		\preformatted{basehaz.gbm Baseline hazard function
22		calibrate.plot Calibration plot
23		gbm Generalized Boosted Regression Modeling
24		gbm.object Generalized Boosted Regression Model Object
25		gbm.perf GBM performance
26		plot.gbm Marginal plots of fitted gbm objects
27		predict.gbm Predict method for GBM Model Fits
28		pretty.gbm.tree Print gbm tree components
29		quantile.rug Quantile rug plot
30		relative.influence Methods for estimating relative influence
31		shrink.gbm L1 shrinkage of the predictor variables in a GBM
32		shrink.gbm.pred Predictions from a shrunked GBM
33		summary.gbm Summary of a gbm object
34		}
35
36		Further information is available in the following vignettes:
37		\tabular{ll}{
38		\code{gbm} \tab Generalized Boosted Models: A guide to the gbm package (source, pdf)\cr}
39		} % Close \details
40		\author{
41		Greg Ridgeway \email{gregridgeway@gmail.com} with contributions by
42		Daniel Edwards, Brian Kriegler, Stefan Schroedl and Harry Southworth.
	14	Further information is available in vignette:
	15	\code{browseVignettes(package = "gbm")}
43	16	}
44	17	\references{
45		Y. Freund and R.E. Schapire (1997) \dQuote{A decision-theoretic generalization of
46		on-line learning and an application to boosting,} \emph{Journal of Computer and
47		System Sciences,} 55(1):119-139.
	18	Y. Freund and R.E. Schapire (1997) \dQuote{A decision-theoretic
	19	generalization of on-line learning and an application to boosting,}
	20	\emph{Journal of Computer and System Sciences,} 55(1):119-139.
48	21
49		G. Ridgeway (1999). \dQuote{The state of boosting,} \emph{Computing Science and
50		Statistics} 31:172-181.
	22	G. Ridgeway (1999). \dQuote{The state of boosting,} \emph{Computing Science
	23	and Statistics} 31:172-181.
51	24
52		J.H. Friedman, T. Hastie, R. Tibshirani (2000). \dQuote{Additive Logistic Regression:
53		a Statistical View of Boosting,} \emph{Annals of Statistics} 28(2):337-374.
	25	J.H. Friedman, T. Hastie, R. Tibshirani (2000). \dQuote{Additive Logistic
	26	Regression: a Statistical View of Boosting,} \emph{Annals of Statistics}
	27	28(2):337-374.
54	28
55		J.H. Friedman (2001). \dQuote{Greedy Function Approximation: A Gradient Boosting
56		Machine,} \emph{Annals of Statistics} 29(5):1189-1232.
	29	J.H. Friedman (2001). \dQuote{Greedy Function Approximation: A Gradient
	30	Boosting Machine,} \emph{Annals of Statistics} 29(5):1189-1232.
57	31
58		J.H. Friedman (2002). \dQuote{Stochastic Gradient Boosting,} \emph{Computational Statistics
59		and Data Analysis} 38(4):367-378.
	32	J.H. Friedman (2002). \dQuote{Stochastic Gradient Boosting,}
	33	\emph{Computational Statistics and Data Analysis} 38(4):367-378.
60	34
61		The \href{http://www-stat.stanford.edu/~jhf/R-MART.html}{MART} website.
62		} % Close \references
	35	The \url{http://statweb.stanford.edu/~jhf/R-MART} website.
	36	}
	37	\author{
	38	Greg Ridgeway \email{gregridgeway@gmail.com} with contributions by
	39	Daniel Edwards, Brian Kriegler, Stefan Schroedl and Harry Southworth.
	40	}
63	41	\keyword{package}

+304

-269

man/gbm.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/gbm.R
0	2	\name{gbm}
1	3	\alias{gbm}
2		\alias{gbm.more}
3		\alias{gbm.fit}
4		\title{Generalized Boosted Regression Modeling}
5		\description{Fits generalized boosted regression models.}
	4	\title{Generalized Boosted Regression Modeling (GBM)}
6	5	\usage{
7		gbm(formula = formula(data),
8		distribution = "bernoulli",
9		data = list(),
10		weights,
11		var.monotone = NULL,
12		n.trees = 100,
13		interaction.depth = 1,
14		n.minobsinnode = 10,
15		shrinkage = 0.001,
16		bag.fraction = 0.5,
17		train.fraction = 1.0,
18		cv.folds=0,
19		keep.data = TRUE,
20		verbose = "CV",
21		class.stratify.cv=NULL,
22		n.cores = NULL)
23
24		gbm.fit(x, y,
25		offset = NULL,
26		misc = NULL,
27		distribution = "bernoulli",
28		w = NULL,
29		var.monotone = NULL,
30		n.trees = 100,
31		interaction.depth = 1,
32		n.minobsinnode = 10,
33		shrinkage = 0.001,
34		bag.fraction = 0.5,
35		nTrain = NULL,
36		train.fraction = NULL,
37		keep.data = TRUE,
38		verbose = TRUE,
39		var.names = NULL,
40		response.name = "y",
41		group = NULL)
42
43		gbm.more(object,
44		n.new.trees = 100,
45		data = NULL,
46		weights = NULL,
47		offset = NULL,
48		verbose = NULL)
49		}
50		\arguments{\item{formula}{a symbolic description of the model to be fit. The formula may include an offset term (e.g. y~offset(n)+x). If \code{keep.data=FALSE} in the initial call to \code{gbm} then it is the user's responsibility to resupply the offset to \code{\link{gbm.more}}.}
51		\item{distribution}{either a character string specifying the name of the distribution to use or a list with a component \code{name} specifying the distribution and any additional parameters needed. If not specified, \code{gbm} will try to guess: if the response has only 2 unique values, bernoulli is assumed; otherwise, if the response is a factor, multinomial is assumed; otherwise, if the response has class "Surv", coxph is assumed; otherwise, gaussian is assumed.
52
53		Currently available options are "gaussian" (squared error), "laplace" (absolute loss), "tdist" (t-distribution loss), "bernoulli" (logistic regression for 0-1 outcomes),
54		"huberized" (huberized hinge loss for 0-1 outcomes),
55		"multinomial" (classification when there are more than 2 classes), "adaboost" (the AdaBoost exponential loss for 0-1 outcomes), "poisson" (count outcomes), "coxph" (right censored observations), "quantile", or "pairwise" (ranking measure using the LambdaMart algorithm).
56
57		If quantile regression is specified, \code{distribution} must be a list of the form \code{list(name="quantile",alpha=0.25)} where \code{alpha} is the quantile to estimate. The current version's quantile regression method does not handle non-constant weights and will stop.
58
59		If "tdist" is specified, the default degrees of freedom is 4 and this can be controlled by specifying \code{distribution=list(name="tdist", df=DF)} where \code{DF} is your chosen degrees of freedom.
60
61		If "pairwise" regression is specified, \code{distribution} must be a list
62		of the form
63		\code{list(name="pairwise",group=...,metric=...,max.rank=...)}
64		(\code{metric} and \code{max.rank} are optional, see
65		below). \code{group} is a character vector with the column names of
66		\code{data} that jointly indicate the group an instance belongs to
67		(typically a query in Information Retrieval applications).
68		For training, only pairs of
69		instances from the same group and with different target labels can be
70		considered. \code{metric} is the IR measure to use, one of
71		\describe{
72		\item{\code{conc}:}{Fraction of concordant pairs; for binary labels,
73		this is equivalent to the Area under the ROC Curve}
74		\item{\code{mrr}:}{Mean reciprocal rank of the highest-ranked positive instance}
75		\item{\code{map}:}{Mean average precision, a generalization of
76		\code{mrr} to multiple positive instances}
77		\item{\code{ndcg:}}{Normalized discounted cumulative gain. The score is
78		the weighted sum (DCG) of the user-supplied target values, weighted by
79		log(rank+1), and normalized to the maximum achievable value. This
80		is the default if the user did not specify a metric.}
	6	gbm(formula = formula(data), distribution = "bernoulli",
	7	data = list(), weights, var.monotone = NULL, n.trees = 100,
	8	interaction.depth = 1, n.minobsinnode = 10, shrinkage = 0.1,
	9	bag.fraction = 0.5, train.fraction = 1, cv.folds = 0,
	10	keep.data = TRUE, verbose = FALSE, class.stratify.cv = NULL,
	11	n.cores = NULL)
	12	}
	13	\arguments{
	14	\item{formula}{A symbolic description of the model to be fit. The formula
	15	may include an offset term (e.g. y~offset(n)+x). If
	16	\code{keep.data = FALSE} in the initial call to \code{gbm} then it is the
	17	user's responsibility to resupply the offset to \code{\link{gbm.more}}.}
	18
	19	\item{distribution}{Either a character string specifying the name of the
	20	distribution to use or a list with a component \code{name} specifying the
	21	distribution and any additional parameters needed. If not specified,
	22	\code{gbm} will try to guess: if the response has only 2 unique values,
	23	bernoulli is assumed; otherwise, if the response is a factor, multinomial is
	24	assumed; otherwise, if the response has class \code{"Surv"}, coxph is
	25	assumed; otherwise, gaussian is assumed.
	26
	27	Currently available options are \code{"gaussian"} (squared error),
	28	\code{"laplace"} (absolute loss), \code{"tdist"} (t-distribution loss),
	29	\code{"bernoulli"} (logistic regression for 0-1 outcomes),
	30	\code{"huberized"} (huberized hinge loss for 0-1 outcomes), classes),
	31	\code{"adaboost"} (the AdaBoost exponential loss for 0-1 outcomes),
	32	\code{"poisson"} (count outcomes), \code{"coxph"} (right censored
	33	observations), \code{"quantile"}, or \code{"pairwise"} (ranking measure
	34	using the LambdaMart algorithm).
	35
	36	If quantile regression is specified, \code{distribution} must be a list of
	37	the form \code{list(name = "quantile", alpha = 0.25)} where \code{alpha} is
	38	the quantile to estimate. The current version's quantile regression method
	39	does not handle non-constant weights and will stop.
	40
	41	If \code{"tdist"} is specified, the default degrees of freedom is 4 and
	42	this can be controlled by specifying
	43	\code{distribution = list(name = "tdist", df = DF)} where \code{DF} is your
	44	chosen degrees of freedom.
	45
	46	If "pairwise" regression is specified, \code{distribution} must be a list of
	47	the form \code{list(name="pairwise",group=...,metric=...,max.rank=...)}
	48	(\code{metric} and \code{max.rank} are optional, see below). \code{group} is
	49	a character vector with the column names of \code{data} that jointly
	50	indicate the group an instance belongs to (typically a query in Information
	51	Retrieval applications). For training, only pairs of instances from the same
	52	group and with different target labels can be considered. \code{metric} is
	53	the IR measure to use, one of
	54	\describe{
	55	\item{list("conc")}{Fraction of concordant pairs; for binary labels, this
	56	is equivalent to the Area under the ROC Curve}
	57	\item{:}{Fraction of concordant pairs; for binary labels, this is
	58	equivalent to the Area under the ROC Curve}
	59	\item{list("mrr")}{Mean reciprocal rank of the highest-ranked positive
	60	instance}
	61	\item{:}{Mean reciprocal rank of the highest-ranked positive instance}
	62	\item{list("map")}{Mean average precision, a generalization of \code{mrr}
	63	to multiple positive instances}\item{:}{Mean average precision, a
	64	generalization of \code{mrr} to multiple positive instances}
	65	\item{list("ndcg:")}{Normalized discounted cumulative gain. The score is
	66	the weighted sum (DCG) of the user-supplied target values, weighted
	67	by log(rank+1), and normalized to the maximum achievable value. This
	68	is the default if the user did not specify a metric.}
81	69	}
82	70
83	71	\code{ndcg} and \code{conc} allow arbitrary target values, while binary
84		targets \{0,1\} are expected for \code{map} and \code{mrr}. For
85		\code{ndcg} and \code{mrr}, a cut-off can be chosen using a positive
86		integer parameter \code{max.rank}. If left unspecified, all ranks are
87		taken into account.
88
89		Note that splitting of instances into training and validation sets
90		follows group boundaries and therefore only approximates the specified
91		\code{train.fraction} ratio (the same applies to cross-validation
92		folds). Internally, queries are randomly shuffled before training, to
93		avoid bias.
	72	targets {0,1} are expected for \code{map} and \code{mrr}. For \code{ndcg}
	73	and \code{mrr}, a cut-off can be chosen using a positive integer parameter
	74	\code{max.rank}. If left unspecified, all ranks are taken into account.
	75
	76	Note that splitting of instances into training and validation sets follows
	77	group boundaries and therefore only approximates the specified
	78	\code{train.fraction} ratio (the same applies to cross-validation folds).
	79	Internally, queries are randomly shuffled before training, to avoid bias.
94	80
95	81	Weights can be used in conjunction with pairwise metrics, however it is
96	82	assumed that they are constant for instances from the same group.
97	83
98		For details and background on the algorithm, see e.g. Burges (2010).
99		}
100
101		\item{data}{an optional data frame containing the variables in the model. By default the variables are taken from \code{environment(formula)}, typically the environment from which \code{gbm} is called. If \code{keep.data=TRUE} in the initial call to \code{gbm} then \code{gbm} stores a copy with the object. If \code{keep.data=FALSE} then subsequent calls to \code{\link{gbm.more}} must resupply the same dataset. It becomes the user's responsibility to resupply the same data at this point.}
102		\item{weights}{an optional vector of weights to be used in the fitting process. Must be positive but do not need to be normalized. If \code{keep.data=FALSE} in the initial call to \code{gbm} then it is the user's responsibility to resupply the weights to \code{\link{gbm.more}}.}
103		\item{var.monotone}{an optional vector, the same length as the number of predictors, indicating which variables have a monotone increasing (+1), decreasing (-1), or arbitrary (0) relationship with the outcome.}
104		\item{n.trees}{the total number of trees to fit. This is equivalent to the number of iterations and the number of basis functions in the additive expansion.}
105		\item{cv.folds}{Number of cross-validation folds to perform. If \code{cv.folds}>1 then \code{gbm}, in addition to the usual fit, will perform a cross-validation, calculate an estimate of generalization error returned in \code{cv.error}.}
106		\item{interaction.depth}{The maximum depth of variable interactions. 1 implies an additive model, 2 implies a model with up to 2-way interactions, etc.}
107		\item{n.minobsinnode}{minimum number of observations in the trees terminal nodes. Note that this is the actual number of observations not the total weight.}
108		\item{shrinkage}{a shrinkage parameter applied to each tree in the expansion. Also known as the learning rate or step-size reduction.}
109		\item{bag.fraction}{the fraction of the training set observations randomly selected to propose the next tree in the expansion. This introduces randomnesses into the model fit. If \code{bag.fraction}<1 then running the same model twice will result in similar but different fits. \code{gbm} uses the R random number generator so \code{set.seed} can ensure that the model can be reconstructed. Preferably, the user can save the returned \code{\link{gbm.object}} using \code{\link{save}}.}
	84	For details and background on the algorithm, see e.g. Burges (2010).}
	85
	86	\item{data}{an optional data frame containing the variables in the model. By
	87	default the variables are taken from \code{environment(formula)}, typically
	88	the environment from which \code{gbm} is called. If \code{keep.data=TRUE} in
	89	the initial call to \code{gbm} then \code{gbm} stores a copy with the
	90	object. If \code{keep.data=FALSE} then subsequent calls to
	91	\code{\link{gbm.more}} must resupply the same dataset. It becomes the user's
	92	responsibility to resupply the same data at this point.}
	93
	94	\item{weights}{an optional vector of weights to be used in the fitting
	95	process. Must be positive but do not need to be normalized. If
	96	\code{keep.data=FALSE} in the initial call to \code{gbm} then it is the
	97	user's responsibility to resupply the weights to \code{\link{gbm.more}}.}
	98
	99	\item{var.monotone}{an optional vector, the same length as the number of
	100	predictors, indicating which variables have a monotone increasing (+1),
	101	decreasing (-1), or arbitrary (0) relationship with the outcome.}
	102
	103	\item{n.trees}{Integer specifying the total number of trees to fit. This is
	104	equivalent to the number of iterations and the number of basis functions in
	105	the additive expansion. Default is 100.}
	106
	107	\item{interaction.depth}{Integer specifying the maximum depth of each tree
	108	(i.e., the highest level of variable interactions allowed). A value of 1
	109	implies an additive model, a value of 2 implies a model with up to 2-way
	110	interactions, etc. Default is 1.}
	111
	112	\item{n.minobsinnode}{Integer specifying the minimum number of observations
	113	in the terminal nodes of the trees. Note that this is the actual number of
	114	observations, not the total weight.}
	115
	116	\item{shrinkage}{a shrinkage parameter applied to each tree in the
	117	expansion. Also known as the learning rate or step-size reduction; 0.001 to
	118	0.1 usually work, but a smaller learning rate typically requires more trees.
	119	Default is 0.1.}
	120
	121	\item{bag.fraction}{the fraction of the training set observations randomly
	122	selected to propose the next tree in the expansion. This introduces
	123	randomnesses into the model fit. If \code{bag.fraction} < 1 then running the
	124	same model twice will result in similar but different fits. \code{gbm} uses
	125	the R random number generator so \code{set.seed} can ensure that the model
	126	can be reconstructed. Preferably, the user can save the returned
	127	\code{\link{gbm.object}} using \code{\link{save}}. Default is 0.5.}
	128
110	129	\item{train.fraction}{The first \code{train.fraction * nrows(data)}
111		observations are used to fit the \code{gbm} and the remainder are used
112		for computing out-of-sample estimates of the loss function.}
113		\item{nTrain}{An integer representing the number of cases on which to
114		train. This is the preferred way of specification for \code{gbm.fit};
115		The option \code{train.fraction} in \code{gbm.fit} is deprecated and
116		only maintained for backward compatibility. These two parameters are
117		mutually exclusive. If both are unspecified, all data is used for training.}
118		\item{keep.data}{a logical variable indicating whether to keep the data and an index of the data stored with the object. Keeping the data and index makes subsequent calls to \code{\link{gbm.more}} faster at the cost of storing an extra copy of the dataset.}
119		\item{object}{a \code{gbm} object created from an initial call to \code{\link{gbm}}.}
120		\item{n.new.trees}{the number of additional trees to add to \code{object}.}
121		\item{verbose}{If TRUE, gbm will print out progress and performance indicators. If this option is left unspecified for gbm.more then it uses \code{verbose} from \code{object}.}
122		\item{class.stratify.cv}{whether or not the cross-validation should be stratified by class. Defaults to \code{TRUE} for \code{distribution="multinomial"} and is only implementated for \code{multinomial} and \code{bernoulli}. The purpose of stratifying the cross-validation is to help avoiding situations in which training sets do not contain all classes.}
123		\item{x, y}{For \code{gbm.fit}: \code{x} is a data frame or data matrix containing the predictor variables and \code{y} is the vector of outcomes. The number of rows in \code{x} must be the same as the length of \code{y}.}
124		\item{offset}{a vector of values for the offset}
125		\item{misc}{For \code{gbm.fit}: \code{misc} is an R object that is simply passed on to the gbm engine. It can be used for additional data for the specific distribution. Currently it is only used for passing the censoring indicator for the Cox proportional hazards model.}
126		\item{w}{For \code{gbm.fit}: \code{w} is a vector of weights of the same length as the \code{y}.}
127		\item{var.names}{For \code{gbm.fit}: A vector of strings of length equal to the number of columns of \code{x} containing the names of the predictor variables.}
128		\item{response.name}{For \code{gbm.fit}: A character string label for the response variable.}
129		\item{group}{\code{group} used when \code{distribution = 'pairwise'.}}
	130	observations are used to fit the \code{gbm} and the remainder are used for
	131	computing out-of-sample estimates of the loss function.}
	132
	133	\item{cv.folds}{Number of cross-validation folds to perform. If
	134	\code{cv.folds}>1 then \code{gbm}, in addition to the usual fit, will
	135	perform a cross-validation, calculate an estimate of generalization error
	136	returned in \code{cv.error}.}
	137
	138	\item{keep.data}{a logical variable indicating whether to keep the data and
	139	an index of the data stored with the object. Keeping the data and index
	140	makes subsequent calls to \code{\link{gbm.more}} faster at the cost of
	141	storing an extra copy of the dataset.}
	142
	143	\item{verbose}{Logical indicating whether or not to print out progress and
	144	performance indicators (\code{TRUE}). If this option is left unspecified for
	145	\code{gbm.more}, then it uses \code{verbose} from \code{object}. Default is
	146	\code{FALSE}.}
	147
	148	\item{class.stratify.cv}{Logical indicating whether or not the
	149	cross-validation should be stratified by class. Defaults to \code{TRUE} for
	150	\code{distribution = "multinomial"} and is only implemented for
	151	\code{"multinomial"} and \code{"bernoulli"}. The purpose of stratifying the
	152	cross-validation is to help avoiding situations in which training sets do
	153	not contain all classes.}
	154
130	155	\item{n.cores}{The number of CPU cores to use. The cross-validation loop
131		will attempt to send different CV folds off to different cores. If
132		\code{n.cores} is not specified by the user, it is guessed using the
133		\code{detectCores} function in the \code{parallel} package. Note that
134		the documentation for \code{detectCores} makes clear that it is not
135		failsave and could return a spurious number of available cores.}
136		}
137
138		\details{See the \href{../doc/gbm.pdf}{gbm vignette} for technical details.
139
140		This package implements the generalized boosted modeling framework. Boosting is the process of iteratively adding basis functions in a greedy fashion so that each additional basis function further reduces the selected loss function. This implementation closely follows Friedman's Gradient Boosting Machine (Friedman, 2001).
141
142		In addition to many of the features documented in the Gradient Boosting Machine, \code{gbm} offers additional features including the out-of-bag estimator for the optimal number of iterations, the ability to store and manipulate the resulting \code{gbm} object, and a variety of other loss functions that had not previously had associated boosting algorithms, including the Cox partial likelihood for censored data, the poisson likelihood for count outcomes, and a gradient boosting implementation to minimize the AdaBoost exponential loss function.
143
144		\code{gbm.fit} provides the link between R and the C++ gbm engine. \code{gbm} is a front-end to \code{gbm.fit} that uses the familiar R modeling formulas. However, \code{\link[stats]{model.frame}} is very slow if there are many predictor variables. For power-users with many variables use \code{gbm.fit}. For general practice \code{gbm} is preferable.}
145
146		\value{ \code{gbm}, \code{gbm.fit}, and \code{gbm.more} return a \code{\link{gbm.object}}. }
147
148		\references{
149		Y. Freund and R.E. Schapire (1997) \dQuote{A decision-theoretic generalization of on-line learning and an application to boosting,} \emph{Journal of Computer and System Sciences,} 55(1):119-139.
150
151		G. Ridgeway (1999). \dQuote{The state of boosting,} \emph{Computing Science and Statistics} 31:172-181.
152
153		J.H. Friedman, T. Hastie, R. Tibshirani (2000). \dQuote{Additive Logistic Regression: a Statistical View of Boosting,} \emph{Annals of Statistics} 28(2):337-374.
154
155		J.H. Friedman (2001). \dQuote{Greedy Function Approximation: A Gradient Boosting Machine,} \emph{Annals of Statistics} 29(5):1189-1232.
156
157		J.H. Friedman (2002). \dQuote{Stochastic Gradient Boosting,} \emph{Computational Statistics and Data Analysis} 38(4):367-378.
158
159		B. Kriegler (2007). \href{http://statistics.ucla.edu/theses/uclastat-dissertation-2007:2}{Cost-Sensitive Stochastic Gradient Boosting Within a Quantitative Regression Framework}. PhD dissertation, UCLA Statistics.
160
161		C. Burges (2010). \dQuote{From RankNet to LambdaRank to LambdaMART: An Overview,} Microsoft Research Technical Report MSR-TR-2010-82.
162
163		\href{http://sites.google.com/site/gregridgeway}{Greg Ridgeway's site}.
164
165		The \href{http://www-stat.stanford.edu/~jhf/R-MART.html}{MART} website. }
166
167		\author{Greg Ridgeway \email{gregridgeway@gmail.com}
168
169		Quantile regression code developed by Brian Kriegler \email{bk@stat.ucla.edu}
170
171		t-distribution, and multinomial code developed by Harry Southworth and Daniel Edwards
172
173		Pairwise code developed by Stefan Schroedl \email{schroedl@a9.com}}
174
175		\seealso{ \code{\link{gbm.object}}, \code{\link{gbm.perf}}, \code{\link{plot.gbm}},
176		\code{\link{predict.gbm}}, \code{\link{summary.gbm}}, \code{\link{pretty.gbm.tree}}. }
177
178		\examples{ # A least squares regression example # create some data
179
	156	will attempt to send different CV folds off to different cores. If
	157	\code{n.cores} is not specified by the user, it is guessed using the
	158	\code{detectCores} function in the \code{parallel} package. Note that the
	159	documentation for \code{detectCores} makes clear that it is not failsafe and
	160	could return a spurious number of available cores.}
	161	}
	162	\value{
	163	A \code{\link{gbm.object}} object.
	164	}
	165	\description{
	166	Fits generalized boosted regression models. For technical details, see the
	167	vignette: \code{utils::browseVignettes("gbm")}.
	168	}
	169	\details{
	170	\code{gbm.fit} provides the link between R and the C++ gbm engine.
	171	\code{gbm} is a front-end to \code{gbm.fit} that uses the familiar R
	172	modeling formulas. However, \code{\link[stats]{model.frame}} is very slow if
	173	there are many predictor variables. For power-users with many variables use
	174	\code{gbm.fit}. For general practice \code{gbm} is preferable.
	175
	176
	177	This package implements the generalized boosted modeling framework. Boosting
	178	is the process of iteratively adding basis functions in a greedy fashion so
	179	that each additional basis function further reduces the selected loss
	180	function. This implementation closely follows Friedman's Gradient Boosting
	181	Machine (Friedman, 2001).
	182
	183	In addition to many of the features documented in the Gradient Boosting
	184	Machine, \code{gbm} offers additional features including the out-of-bag
	185	estimator for the optimal number of iterations, the ability to store and
	186	manipulate the resulting \code{gbm} object, and a variety of other loss
	187	functions that had not previously had associated boosting algorithms,
	188	including the Cox partial likelihood for censored data, the poisson
	189	likelihood for count outcomes, and a gradient boosting implementation to
	190	minimize the AdaBoost exponential loss function.
	191	}
	192	\examples{
	193	#
	194	# A least squares regression example
	195	#
	196
	197	# Simulate data
	198	set.seed(101) # for reproducibility
180	199	N <- 1000
181	200	X1 <- runif(N)
182		X2 <- 2*runif(N)
183		X3 <- ordered(sample(letters[1:4],N,replace=TRUE),levels=letters[4:1])
184		X4 <- factor(sample(letters[1:6],N,replace=TRUE))
185		X5 <- factor(sample(letters[1:3],N,replace=TRUE))
186		X6 <- 3*runif(N)
187		mu <- c(-1,0,1,2)[as.numeric(X3)]
188
189		SNR <- 10 # signal-to-noise ratio
190		Y <- X1*1.5 + 2 (X2**.5) + mu
191		sigma <- sqrt(var(Y)/SNR)
192		Y <- Y + rnorm(N,0,sigma)
193
194		# introduce some missing values
195		X1[sample(1:N,size=500)] <- NA
196		X4[sample(1:N,size=300)] <- NA
197
198		data <- data.frame(Y=Y,X1=X1,X2=X2,X3=X3,X4=X4,X5=X5,X6=X6)
199
200		# fit initial model
201		gbm1 <-
202		gbm(Y~X1+X2+X3+X4+X5+X6, # formula
203		data=data, # dataset
204		var.monotone=c(0,0,0,0,0,0), # -1: monotone decrease,
205		# +1: monotone increase,
206		# 0: no monotone restrictions
207		distribution="gaussian", # see the help for other choices
208		n.trees=1000, # number of trees
209		shrinkage=0.05, # shrinkage or learning rate,
210		# 0.001 to 0.1 usually work
211		interaction.depth=3, # 1: additive model, 2: two-way interactions, etc.
212		bag.fraction = 0.5, # subsampling fraction, 0.5 is probably best
213		train.fraction = 0.5, # fraction of data for training,
214		# first train.fraction*N used for training
215		n.minobsinnode = 10, # minimum total weight needed in each node
216		cv.folds = 3, # do 3-fold cross-validation
217		keep.data=TRUE, # keep a copy of the dataset with the object
218		verbose=FALSE, # don't print out progress
219		n.cores=1) # use only a single core (detecting #cores is
220		# error-prone, so avoided here)
221
222		# check performance using an out-of-bag estimator
223		# OOB underestimates the optimal number of iterations
224		best.iter <- gbm.perf(gbm1,method="OOB")
	201	X2 <- 2 * runif(N)
	202	X3 <- ordered(sample(letters[1:4], N, replace = TRUE), levels = letters[4:1])
	203	X4 <- factor(sample(letters[1:6], N, replace = TRUE))
	204	X5 <- factor(sample(letters[1:3], N, replace = TRUE))
	205	X6 <- 3 * runif(N)
	206	mu <- c(-1, 0, 1, 2)[as.numeric(X3)]
	207	SNR <- 10 # signal-to-noise ratio
	208	Y <- X1 ^ 1.5 + 2 * (X2 ^ 0.5) + mu
	209	sigma <- sqrt(var(Y) / SNR)
	210	Y <- Y + rnorm(N, 0, sigma)
	211	X1[sample(1:N,size=500)] <- NA # introduce some missing values
	212	X4[sample(1:N,size=300)] <- NA # introduce some missing values
	213	data <- data.frame(Y, X1, X2, X3, X4, X5, X6)
	214
	215	# Fit a GBM
	216	set.seed(102) # for reproducibility
	217	gbm1 <- gbm(Y ~ ., data = data, var.monotone = c(0, 0, 0, 0, 0, 0),
	218	distribution = "gaussian", n.trees = 100, shrinkage = 0.1,
	219	interaction.depth = 3, bag.fraction = 0.5, train.fraction = 0.5,
	220	n.minobsinnode = 10, cv.folds = 5, keep.data = TRUE,
	221	verbose = FALSE, n.cores = 1)
	222
	223	# Check performance using the out-of-bag (OOB) error; the OOB error typically
	224	# underestimates the optimal number of iterations
	225	best.iter <- gbm.perf(gbm1, method = "OOB")
225	226	print(best.iter)
226	227
227		# check performance using a 50\% heldout test set
228		best.iter <- gbm.perf(gbm1,method="test")
	228	# Check performance using the 50\% heldout test set
	229	best.iter <- gbm.perf(gbm1, method = "test")
229	230	print(best.iter)
230	231
231		# check performance using 5-fold cross-validation
232		best.iter <- gbm.perf(gbm1,method="cv")
	232	# Check performance using 5-fold cross-validation
	233	best.iter <- gbm.perf(gbm1, method = "cv")
233	234	print(best.iter)
234	235
235		# plot the performance # plot variable influence
236		summary(gbm1,n.trees=1) # based on the first tree
237		summary(gbm1,n.trees=best.iter) # based on the estimated best number of trees
238
239		# compactly print the first and last trees for curiosity
240		print(pretty.gbm.tree(gbm1,1))
241		print(pretty.gbm.tree(gbm1,gbm1$n.trees))
242
243		# make some new data
	236	# Plot relative influence of each variable
	237	par(mfrow = c(1, 2))
	238	summary(gbm1, n.trees = 1) # using first tree
	239	summary(gbm1, n.trees = best.iter) # using estimated best number of trees
	240
	241	# Compactly print the first and last trees for curiosity
	242	print(pretty.gbm.tree(gbm1, i.tree = 1))
	243	print(pretty.gbm.tree(gbm1, i.tree = gbm1$n.trees))
	244
	245	# Simulate new data
	246	set.seed(103) # for reproducibility
244	247	N <- 1000
245	248	X1 <- runif(N)
246		X2 <- 2*runif(N)
247		X3 <- ordered(sample(letters[1:4],N,replace=TRUE))
248		X4 <- factor(sample(letters[1:6],N,replace=TRUE))
249		X5 <- factor(sample(letters[1:3],N,replace=TRUE))
250		X6 <- 3*runif(N)
251		mu <- c(-1,0,1,2)[as.numeric(X3)]
252
253		Y <- X1*1.5 + 2 (X2**.5) + mu + rnorm(N,0,sigma)
254
255		data2 <- data.frame(Y=Y,X1=X1,X2=X2,X3=X3,X4=X4,X5=X5,X6=X6)
256
257		# predict on the new data using "best" number of trees
258		# f.predict generally will be on the canonical scale (logit,log,etc.)
259		f.predict <- predict(gbm1,data2,best.iter)
	249	X2 <- 2 * runif(N)
	250	X3 <- ordered(sample(letters[1:4], N, replace = TRUE))
	251	X4 <- factor(sample(letters[1:6], N, replace = TRUE))
	252	X5 <- factor(sample(letters[1:3], N, replace = TRUE))
	253	X6 <- 3 * runif(N)
	254	mu <- c(-1, 0, 1, 2)[as.numeric(X3)]
	255	Y <- X1 ^ 1.5 + 2 * (X2 ^ 0.5) + mu + rnorm(N, 0, sigma)
	256	data2 <- data.frame(Y, X1, X2, X3, X4, X5, X6)
	257
	258	# Predict on the new data using the "best" number of trees; by default,
	259	# predictions will be on the link scale
	260	Yhat <- predict(gbm1, newdata = data2, n.trees = best.iter, type = "link")
260	261
261	262	# least squares error
262		print(sum((data2$Y-f.predict)^2))
263
264		# create marginal plots
265		# plot variable X1,X2,X3 after "best" iterations
266		par(mfrow=c(1,3))
267		plot(gbm1,1,best.iter)
268		plot(gbm1,2,best.iter)
269		plot(gbm1,3,best.iter)
270		par(mfrow=c(1,1))
271		# contour plot of variables 1 and 2 after "best" iterations
272		plot(gbm1,1:2,best.iter)
273		# lattice plot of variables 2 and 3
274		plot(gbm1,2:3,best.iter)
275		# lattice plot of variables 3 and 4
276		plot(gbm1,3:4,best.iter)
277
278		# 3-way plots
279		plot(gbm1,c(1,2,6),best.iter,cont=20)
280		plot(gbm1,1:3,best.iter)
281		plot(gbm1,2:4,best.iter)
282		plot(gbm1,3:5,best.iter)
283
284		# do another 100 iterations
285		gbm2 <- gbm.more(gbm1,100,
286		verbose=FALSE) # stop printing detailed progress
287		}
288		\keyword{models}
289		\keyword{nonlinear}
290		\keyword{survival}
291		\keyword{nonparametric}
292		\keyword{tree}
	263	print(sum((data2$Y - Yhat)^2))
	264
	265	# Construct univariate partial dependence plots
	266	p1 <- plot(gbm1, i.var = 1, n.trees = best.iter)
	267	p2 <- plot(gbm1, i.var = 2, n.trees = best.iter)
	268	p3 <- plot(gbm1, i.var = "X3", n.trees = best.iter) # can use index or name
	269	grid.arrange(p1, p2, p3, ncol = 3)
	270
	271	# Construct bivariate partial dependence plots
	272	plot(gbm1, i.var = 1:2, n.trees = best.iter)
	273	plot(gbm1, i.var = c("X2", "X3"), n.trees = best.iter)
	274	plot(gbm1, i.var = 3:4, n.trees = best.iter)
	275
	276	# Construct trivariate partial dependence plots
	277	plot(gbm1, i.var = c(1, 2, 6), n.trees = best.iter,
	278	continuous.resolution = 20)
	279	plot(gbm1, i.var = 1:3, n.trees = best.iter)
	280	plot(gbm1, i.var = 2:4, n.trees = best.iter)
	281	plot(gbm1, i.var = 3:5, n.trees = best.iter)
	282
	283	# Add more (i.e., 100) boosting iterations to the ensemble
	284	gbm2 <- gbm.more(gbm1, n.new.trees = 100, verbose = FALSE)
	285	}
	286	\references{
	287	Y. Freund and R.E. Schapire (1997) \dQuote{A decision-theoretic
	288	generalization of on-line learning and an application to boosting,}
	289	\emph{Journal of Computer and System Sciences,} 55(1):119-139.
	290
	291	G. Ridgeway (1999). \dQuote{The state of boosting,} \emph{Computing Science
	292	and Statistics} 31:172-181.
	293
	294	J.H. Friedman, T. Hastie, R. Tibshirani (2000). \dQuote{Additive Logistic
	295	Regression: a Statistical View of Boosting,} \emph{Annals of Statistics}
	296	28(2):337-374.
	297
	298	J.H. Friedman (2001). \dQuote{Greedy Function Approximation: A Gradient
	299	Boosting Machine,} \emph{Annals of Statistics} 29(5):1189-1232.
	300
	301	J.H. Friedman (2002). \dQuote{Stochastic Gradient Boosting,}
	302	\emph{Computational Statistics and Data Analysis} 38(4):367-378.
	303
	304	B. Kriegler (2007). Cost-Sensitive Stochastic Gradient Boosting Within a
	305	Quantitative Regression Framework. Ph.D. Dissertation. University of
	306	California at Los Angeles, Los Angeles, CA, USA. Advisor(s) Richard A. Berk.
	307	url{https://dl.acm.org/citation.cfm?id=1354603}.
	308
	309	C. Burges (2010). \dQuote{From RankNet to LambdaRank to LambdaMART: An
	310	Overview,} Microsoft Research Technical Report MSR-TR-2010-82.
	311	}
	312	\seealso{
	313	\code{\link{gbm.object}}, \code{\link{gbm.perf}},
	314	\code{\link{plot.gbm}}, \code{\link{predict.gbm}}, \code{\link{summary.gbm}},
	315	and \code{\link{pretty.gbm.tree}}.
	316	}
	317	\author{
	318	Greg Ridgeway \email{gregridgeway@gmail.com}
	319
	320	Quantile regression code developed by Brian Kriegler
	321	\email{bk@stat.ucla.edu}
	322
	323	t-distribution, and multinomial code developed by Harry Southworth and
	324	Daniel Edwards
	325
	326	Pairwise code developed by Stefan Schroedl \email{schroedl@a9.com}
	327	}

+220

-0

man/gbm.fit.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/gbm.fit.R
	2	\name{gbm.fit}
	3	\alias{gbm.fit}
	4	\title{Generalized Boosted Regression Modeling (GBM)}
	5	\usage{
	6	gbm.fit(x, y, offset = NULL, misc = NULL, distribution = "bernoulli",
	7	w = NULL, var.monotone = NULL, n.trees = 100,
	8	interaction.depth = 1, n.minobsinnode = 10, shrinkage = 0.001,
	9	bag.fraction = 0.5, nTrain = NULL, train.fraction = NULL,
	10	keep.data = TRUE, verbose = TRUE, var.names = NULL,
	11	response.name = "y", group = NULL)
	12	}
	13	\arguments{
	14	\item{x}{A data frame or matrix containing the predictor variables. The
	15	number of rows in \code{x} must be the same as the length of \code{y}.}
	16
	17	\item{y}{A vector of outcomes. The number of rows in \code{x} must be the
	18	same as the length of \code{y}.}
	19
	20	\item{offset}{A vector of offset values.}
	21
	22	\item{misc}{An R object that is simply passed on to the gbm engine. It can be
	23	used for additional data for the specific distribution. Currently it is only
	24	used for passing the censoring indicator for the Cox proportional hazards
	25	model.}
	26
	27	\item{distribution}{Either a character string specifying the name of the
	28	distribution to use or a list with a component \code{name} specifying the
	29	distribution and any additional parameters needed. If not specified,
	30	\code{gbm} will try to guess: if the response has only 2 unique values,
	31	bernoulli is assumed; otherwise, if the response is a factor, multinomial is
	32	assumed; otherwise, if the response has class \code{"Surv"}, coxph is
	33	assumed; otherwise, gaussian is assumed.
	34
	35	Currently available options are \code{"gaussian"} (squared error),
	36	\code{"laplace"} (absolute loss), \code{"tdist"} (t-distribution loss),
	37	\code{"bernoulli"} (logistic regression for 0-1 outcomes),
	38	\code{"huberized"} (huberized hinge loss for 0-1 outcomes), classes),
	39	\code{"adaboost"} (the AdaBoost exponential loss for 0-1 outcomes),
	40	\code{"poisson"} (count outcomes), \code{"coxph"} (right censored
	41	observations), \code{"quantile"}, or \code{"pairwise"} (ranking measure
	42	using the LambdaMart algorithm).
	43
	44	If quantile regression is specified, \code{distribution} must be a list of
	45	the form \code{list(name = "quantile", alpha = 0.25)} where \code{alpha} is
	46	the quantile to estimate. The current version's quantile regression method
	47	does not handle non-constant weights and will stop.
	48
	49	If \code{"tdist"} is specified, the default degrees of freedom is 4 and
	50	this can be controlled by specifying
	51	\code{distribution = list(name = "tdist", df = DF)} where \code{DF} is your
	52	chosen degrees of freedom.
	53
	54	If "pairwise" regression is specified, \code{distribution} must be a list of
	55	the form \code{list(name="pairwise",group=...,metric=...,max.rank=...)}
	56	(\code{metric} and \code{max.rank} are optional, see below). \code{group} is
	57	a character vector with the column names of \code{data} that jointly
	58	indicate the group an instance belongs to (typically a query in Information
	59	Retrieval applications). For training, only pairs of instances from the same
	60	group and with different target labels can be considered. \code{metric} is
	61	the IR measure to use, one of
	62	\describe{
	63	\item{list("conc")}{Fraction of concordant pairs; for binary labels, this
	64	is equivalent to the Area under the ROC Curve}
	65	\item{:}{Fraction of concordant pairs; for binary labels, this is
	66	equivalent to the Area under the ROC Curve}
	67	\item{list("mrr")}{Mean reciprocal rank of the highest-ranked positive
	68	instance}
	69	\item{:}{Mean reciprocal rank of the highest-ranked positive instance}
	70	\item{list("map")}{Mean average precision, a generalization of \code{mrr}
	71	to multiple positive instances}\item{:}{Mean average precision, a
	72	generalization of \code{mrr} to multiple positive instances}
	73	\item{list("ndcg:")}{Normalized discounted cumulative gain. The score is
	74	the weighted sum (DCG) of the user-supplied target values, weighted
	75	by log(rank+1), and normalized to the maximum achievable value. This
	76	is the default if the user did not specify a metric.}
	77	}
	78
	79	\code{ndcg} and \code{conc} allow arbitrary target values, while binary
	80	targets {0,1} are expected for \code{map} and \code{mrr}. For \code{ndcg}
	81	and \code{mrr}, a cut-off can be chosen using a positive integer parameter
	82	\code{max.rank}. If left unspecified, all ranks are taken into account.
	83
	84	Note that splitting of instances into training and validation sets follows
	85	group boundaries and therefore only approximates the specified
	86	\code{train.fraction} ratio (the same applies to cross-validation folds).
	87	Internally, queries are randomly shuffled before training, to avoid bias.
	88
	89	Weights can be used in conjunction with pairwise metrics, however it is
	90	assumed that they are constant for instances from the same group.
	91
	92	For details and background on the algorithm, see e.g. Burges (2010).}
	93
	94	\item{w}{A vector of weights of the same length as the \code{y}.}
	95
	96	\item{var.monotone}{an optional vector, the same length as the number of
	97	predictors, indicating which variables have a monotone increasing (+1),
	98	decreasing (-1), or arbitrary (0) relationship with the outcome.}
	99
	100	\item{n.trees}{the total number of trees to fit. This is equivalent to the
	101	number of iterations and the number of basis functions in the additive
	102	expansion.}
	103
	104	\item{interaction.depth}{The maximum depth of variable interactions. A value
	105	of 1 implies an additive model, a value of 2 implies a model with up to 2-way
	106	interactions, etc. Default is \code{1}.}
	107
	108	\item{n.minobsinnode}{Integer specifying the minimum number of observations
	109	in the trees terminal nodes. Note that this is the actual number of
	110	observations not the total weight.}
	111
	112	\item{shrinkage}{The shrinkage parameter applied to each tree in the
	113	expansion. Also known as the learning rate or step-size reduction; 0.001 to
	114	0.1 usually work, but a smaller learning rate typically requires more trees.
	115	Default is \code{0.1}.}
	116
	117	\item{bag.fraction}{The fraction of the training set observations randomly
	118	selected to propose the next tree in the expansion. This introduces
	119	randomnesses into the model fit. If \code{bag.fraction} < 1 then running the
	120	same model twice will result in similar but different fits. \code{gbm} uses
	121	the R random number generator so \code{set.seed} can ensure that the model
	122	can be reconstructed. Preferably, the user can save the returned
	123	\code{\link{gbm.object}} using \code{\link{save}}. Default is \code{0.5}.}
	124
	125	\item{nTrain}{An integer representing the number of cases on which to train.
	126	This is the preferred way of specification for \code{gbm.fit}; The option
	127	\code{train.fraction} in \code{gbm.fit} is deprecated and only maintained
	128	for backward compatibility. These two parameters are mutually exclusive. If
	129	both are unspecified, all data is used for training.}
	130
	131	\item{train.fraction}{The first \code{train.fraction * nrows(data)}
	132	observations are used to fit the \code{gbm} and the remainder are used for
	133	computing out-of-sample estimates of the loss function.}
	134
	135	\item{keep.data}{Logical indicating whether or not to keep the data and an
	136	index of the data stored with the object. Keeping the data and index makes
	137	subsequent calls to \code{\link{gbm.more}} faster at the cost of storing an
	138	extra copy of the dataset.}
	139
	140	\item{verbose}{Logical indicating whether or not to print out progress and
	141	performance indicators (\code{TRUE}). If this option is left unspecified for
	142	\code{gbm.more}, then it uses \code{verbose} from \code{object}. Default is
	143	\code{FALSE}.}
	144
	145	\item{var.names}{Vector of strings of length equal to the number of columns
	146	of \code{x} containing the names of the predictor variables.}
	147
	148	\item{response.name}{Character string label for the response variable.}
	149
	150	\item{group}{The \code{group} to use when \code{distribution = "pairwise"}.}
	151	}
	152	\value{
	153	A \code{\link{gbm.object}} object.
	154	}
	155	\description{
	156	Workhorse function providing the link between R and the C++ gbm engine.
	157	\code{gbm} is a front-end to \code{gbm.fit} that uses the familiar R
	158	modeling formulas. However, \code{\link[stats]{model.frame}} is very slow if
	159	there are many predictor variables. For power-users with many variables use
	160	\code{gbm.fit}. For general practice \code{gbm} is preferable.
	161	}
	162	\details{
	163	This package implements the generalized boosted modeling framework. Boosting
	164	is the process of iteratively adding basis functions in a greedy fashion so
	165	that each additional basis function further reduces the selected loss
	166	function. This implementation closely follows Friedman's Gradient Boosting
	167	Machine (Friedman, 2001).
	168
	169	In addition to many of the features documented in the Gradient Boosting
	170	Machine, \code{gbm} offers additional features including the out-of-bag
	171	estimator for the optimal number of iterations, the ability to store and
	172	manipulate the resulting \code{gbm} object, and a variety of other loss
	173	functions that had not previously had associated boosting algorithms,
	174	including the Cox partial likelihood for censored data, the poisson
	175	likelihood for count outcomes, and a gradient boosting implementation to
	176	minimize the AdaBoost exponential loss function.
	177	}
	178	\references{
	179	Y. Freund and R.E. Schapire (1997) \dQuote{A decision-theoretic
	180	generalization of on-line learning and an application to boosting,}
	181	\emph{Journal of Computer and System Sciences,} 55(1):119-139.
	182
	183	G. Ridgeway (1999). \dQuote{The state of boosting,} \emph{Computing Science
	184	and Statistics} 31:172-181.
	185
	186	J.H. Friedman, T. Hastie, R. Tibshirani (2000). \dQuote{Additive Logistic
	187	Regression: a Statistical View of Boosting,} \emph{Annals of Statistics}
	188	28(2):337-374.
	189
	190	J.H. Friedman (2001). \dQuote{Greedy Function Approximation: A Gradient
	191	Boosting Machine,} \emph{Annals of Statistics} 29(5):1189-1232.
	192
	193	J.H. Friedman (2002). \dQuote{Stochastic Gradient Boosting,}
	194	\emph{Computational Statistics and Data Analysis} 38(4):367-378.
	195
	196	B. Kriegler (2007). Cost-Sensitive Stochastic Gradient Boosting Within a
	197	Quantitative Regression Framework. Ph.D. Dissertation. University of
	198	California at Los Angeles, Los Angeles, CA, USA. Advisor(s) Richard A. Berk.
	199	url{https://dl.acm.org/citation.cfm?id=1354603}.
	200
	201	C. Burges (2010). \dQuote{From RankNet to LambdaRank to LambdaMART: An
	202	Overview,} Microsoft Research Technical Report MSR-TR-2010-82.
	203	}
	204	\seealso{
	205	\code{\link{gbm.object}}, \code{\link{gbm.perf}},
	206	\code{\link{plot.gbm}}, \code{\link{predict.gbm}}, \code{\link{summary.gbm}},
	207	and \code{\link{pretty.gbm.tree}}.
	208	}
	209	\author{
	210	Greg Ridgeway \email{gregridgeway@gmail.com}
	211
	212	Quantile regression code developed by Brian Kriegler
	213	\email{bk@stat.ucla.edu}
	214
	215	t-distribution, and multinomial code developed by Harry Southworth and
	216	Daniel Edwards
	217
	218	Pairwise code developed by Stefan Schroedl \email{schroedl@a9.com}
	219	}

+136

-0

man/gbm.more.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/gbm.more.R
	2	\name{gbm.more}
	3	\alias{gbm.more}
	4	\title{Generalized Boosted Regression Modeling (GBM)}
	5	\usage{
	6	gbm.more(object, n.new.trees = 100, data = NULL, weights = NULL,
	7	offset = NULL, verbose = NULL)
	8	}
	9	\arguments{
	10	\item{object}{A \code{\link{gbm.object}} object created from an initial call
	11	to \code{\link{gbm}}.}
	12
	13	\item{n.new.trees}{Integer specifying the number of additional trees to add
	14	to \code{object}. Default is 100.}
	15
	16	\item{data}{An optional data frame containing the variables in the model. By
	17	default the variables are taken from \code{environment(formula)}, typically
	18	the environment from which \code{gbm} is called. If \code{keep.data=TRUE} in
	19	the initial call to \code{gbm} then \code{gbm} stores a copy with the
	20	object. If \code{keep.data=FALSE} then subsequent calls to
	21	\code{\link{gbm.more}} must resupply the same dataset. It becomes the user's
	22	responsibility to resupply the same data at this point.}
	23
	24	\item{weights}{An optional vector of weights to be used in the fitting
	25	process. Must be positive but do not need to be normalized. If
	26	\code{keep.data=FALSE} in the initial call to \code{gbm} then it is the
	27	user's responsibility to resupply the weights to \code{\link{gbm.more}}.}
	28
	29	\item{offset}{A vector of offset values.}
	30
	31	\item{verbose}{Logical indicating whether or not to print out progress and
	32	performance indicators (\code{TRUE}). If this option is left unspecified for
	33	\code{gbm.more}, then it uses \code{verbose} from \code{object}. Default is
	34	\code{FALSE}.}
	35	}
	36	\value{
	37	A \code{\link{gbm.object}} object.
	38	}
	39	\description{
	40	Adds additional trees to a \code{\link{gbm.object}} object.
	41	}
	42	\examples{
	43	#
	44	# A least squares regression example
	45	#
	46
	47	# Simulate data
	48	set.seed(101) # for reproducibility
	49	N <- 1000
	50	X1 <- runif(N)
	51	X2 <- 2 * runif(N)
	52	X3 <- ordered(sample(letters[1:4], N, replace = TRUE), levels = letters[4:1])
	53	X4 <- factor(sample(letters[1:6], N, replace = TRUE))
	54	X5 <- factor(sample(letters[1:3], N, replace = TRUE))
	55	X6 <- 3 * runif(N)
	56	mu <- c(-1, 0, 1, 2)[as.numeric(X3)]
	57	SNR <- 10 # signal-to-noise ratio
	58	Y <- X1 ^ 1.5 + 2 * (X2 ^ 0.5) + mu
	59	sigma <- sqrt(var(Y) / SNR)
	60	Y <- Y + rnorm(N, 0, sigma)
	61	X1[sample(1:N,size=500)] <- NA # introduce some missing values
	62	X4[sample(1:N,size=300)] <- NA # introduce some missing values
	63	data <- data.frame(Y, X1, X2, X3, X4, X5, X6)
	64
	65	# Fit a GBM
	66	set.seed(102) # for reproducibility
	67	gbm1 <- gbm(Y ~ ., data = data, var.monotone = c(0, 0, 0, 0, 0, 0),
	68	distribution = "gaussian", n.trees = 100, shrinkage = 0.1,
	69	interaction.depth = 3, bag.fraction = 0.5, train.fraction = 0.5,
	70	n.minobsinnode = 10, cv.folds = 5, keep.data = TRUE,
	71	verbose = FALSE, n.cores = 1)
	72
	73	# Check performance using the out-of-bag (OOB) error; the OOB error typically
	74	# underestimates the optimal number of iterations
	75	best.iter <- gbm.perf(gbm1, method = "OOB")
	76	print(best.iter)
	77
	78	# Check performance using the 50\% heldout test set
	79	best.iter <- gbm.perf(gbm1, method = "test")
	80	print(best.iter)
	81
	82	# Check performance using 5-fold cross-validation
	83	best.iter <- gbm.perf(gbm1, method = "cv")
	84	print(best.iter)
	85
	86	# Plot relative influence of each variable
	87	par(mfrow = c(1, 2))
	88	summary(gbm1, n.trees = 1) # using first tree
	89	summary(gbm1, n.trees = best.iter) # using estimated best number of trees
	90
	91	# Compactly print the first and last trees for curiosity
	92	print(pretty.gbm.tree(gbm1, i.tree = 1))
	93	print(pretty.gbm.tree(gbm1, i.tree = gbm1$n.trees))
	94
	95	# Simulate new data
	96	set.seed(103) # for reproducibility
	97	N <- 1000
	98	X1 <- runif(N)
	99	X2 <- 2 * runif(N)
	100	X3 <- ordered(sample(letters[1:4], N, replace = TRUE))
	101	X4 <- factor(sample(letters[1:6], N, replace = TRUE))
	102	X5 <- factor(sample(letters[1:3], N, replace = TRUE))
	103	X6 <- 3 * runif(N)
	104	mu <- c(-1, 0, 1, 2)[as.numeric(X3)]
	105	Y <- X1 ^ 1.5 + 2 * (X2 ^ 0.5) + mu + rnorm(N, 0, sigma)
	106	data2 <- data.frame(Y, X1, X2, X3, X4, X5, X6)
	107
	108	# Predict on the new data using the "best" number of trees; by default,
	109	# predictions will be on the link scale
	110	Yhat <- predict(gbm1, newdata = data2, n.trees = best.iter, type = "link")
	111
	112	# least squares error
	113	print(sum((data2$Y - Yhat)^2))
	114
	115	# Construct univariate partial dependence plots
	116	p1 <- plot(gbm1, i.var = 1, n.trees = best.iter)
	117	p2 <- plot(gbm1, i.var = 2, n.trees = best.iter)
	118	p3 <- plot(gbm1, i.var = "X3", n.trees = best.iter) # can use index or name
	119	grid.arrange(p1, p2, p3, ncol = 3)
	120
	121	# Construct bivariate partial dependence plots
	122	plot(gbm1, i.var = 1:2, n.trees = best.iter)
	123	plot(gbm1, i.var = c("X2", "X3"), n.trees = best.iter)
	124	plot(gbm1, i.var = 3:4, n.trees = best.iter)
	125
	126	# Construct trivariate partial dependence plots
	127	plot(gbm1, i.var = c(1, 2, 6), n.trees = best.iter,
	128	continuous.resolution = 20)
	129	plot(gbm1, i.var = 1:3, n.trees = best.iter)
	130	plot(gbm1, i.var = 2:4, n.trees = best.iter)
	131	plot(gbm1, i.var = 3:5, n.trees = best.iter)
	132
	133	# Add more (i.e., 100) boosting iterations to the ensemble
	134	gbm2 <- gbm.more(gbm1, n.new.trees = 100, verbose = FALSE)
	135	}

+37

-32

man/gbm.object.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/gbm.object.R
0	2	\name{gbm.object}
1	3	\alias{gbm.object}
2	4	\title{Generalized Boosted Regression Model Object}
3		\description{These are objects representing fitted \code{gbm}s.}
4		\section{Structure}{The following components must be included in a legitimate \code{gbm} object.}
5	5	\value{
6		\item{initF}{the "intercept" term, the initial predicted value to which trees
7		make adjustments}
8		\item{fit}{a vector containing the fitted values on the scale of regression
9		function (e.g. log-odds scale for bernoulli, log scale for poisson)}
10		\item{train.error}{a vector of length equal to the number of fitted trees
11		containing the value of the loss function for each boosting iteration
12		evaluated on the training data}
	6	\item{initF}{the "intercept" term, the initial predicted value to
	7	which trees make adjustments} \item{fit}{a vector containing the fitted
	8	values on the scale of regression function (e.g. log-odds scale for
	9	bernoulli, log scale for poisson)} \item{train.error}{a vector of length
	10	equal to the number of fitted trees containing the value of the loss
	11	function for each boosting iteration evaluated on the training data}
13	12	\item{valid.error}{a vector of length equal to the number of fitted trees
14	13	containing the value of the loss function for each boosting iteration
15		evaluated on the validation data}
16		\item{cv.error}{if \code{cv.folds}<2 this component is NULL. Otherwise, this
17		component is a vector of length equal to the number of fitted trees
18		containing a cross-validated estimate of the loss function for each boosting
19		iteration}
20		\item{oobag.improve}{a vector of length equal to the number of fitted trees
21		containing an out-of-bag estimate of the marginal reduction in the expected
22		value of the loss function. The out-of-bag estimate uses only the training
23		data and is useful for estimating the optimal number of boosting iterations.
24		See \code{\link{gbm.perf}}}
	14	evaluated on the validation data} \item{cv.error}{if \code{cv.folds}<2 this
	15	component is NULL. Otherwise, this component is a vector of length equal to
	16	the number of fitted trees containing a cross-validated estimate of the loss
	17	function for each boosting iteration} \item{oobag.improve}{a vector of
	18	length equal to the number of fitted trees containing an out-of-bag estimate
	19	of the marginal reduction in the expected value of the loss function. The
	20	out-of-bag estimate uses only the training data and is useful for estimating
	21	the optimal number of boosting iterations. See \code{\link{gbm.perf}}}
25	22	\item{trees}{a list containing the tree structures. The components are best
26		viewed using \code{\link{pretty.gbm.tree}}}
27		\item{c.splits}{a list of all the categorical splits in the collection of
28		trees. If the \code{trees[[i]]} component of a \code{gbm} object describes a
29		categorical split then the splitting value will refer to a component of
30		\code{c.splits}. That component of \code{c.splits} will be a vector of length
31		equal to the number of levels in the categorical split variable. -1 indicates
32		left, +1 indicates right, and 0 indicates that the level was not present in the
33		training data}
	23	viewed using \code{\link{pretty.gbm.tree}}} \item{c.splits}{a list of all
	24	the categorical splits in the collection of trees. If the \code{trees[[i]]}
	25	component of a \code{gbm} object describes a categorical split then the
	26	splitting value will refer to a component of \code{c.splits}. That component
	27	of \code{c.splits} will be a vector of length equal to the number of levels
	28	in the categorical split variable. -1 indicates left, +1 indicates right,
	29	and 0 indicates that the level was not present in the training data}
34	30	\item{cv.fitted}{If cross-validation was performed, the cross-validation
35		predicted values on the scale of the linear predictor. That is, the
36		fitted values from the ith CV-fold, for the model having been trained on
37		the data in all other folds.}
	31	predicted values on the scale of the linear predictor. That is, the fitted
	32	values from the ith CV-fold, for the model having been trained on the data
	33	in all other folds.}
38	34	}
39		\author{Greg Ridgeway \email{gregridgeway@gmail.com}}
	35	\description{
	36	These are objects representing fitted \code{gbm}s.
	37	}
	38	\section{Structure}{
	39	The following components must be included in a
	40	legitimate \code{gbm} object.
	41	}
	42
40	43	\seealso{
41	44	\code{\link{gbm}}
42	45	}
43
	46	\author{
	47	Greg Ridgeway \email{gregridgeway@gmail.com}
	48	}
44	49	\keyword{methods}

+40

-34

man/gbm.perf.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/gbm.perf.R
0	2	\name{gbm.perf}
1	3	\alias{gbm.perf}
2	4	\title{GBM performance}
3		\description{
4		Estimates the optimal number of boosting iterations for a \code{gbm} object and
5		optionally plots various performance measures
6		}
7	5	\usage{
8		gbm.perf(object,
9		plot.it = TRUE,
10		oobag.curve = FALSE,
11		overlay = TRUE,
12		method)
	6	gbm.perf(object, plot.it = TRUE, oobag.curve = FALSE, overlay = TRUE,
	7	method)
13	8	}
14	9	\arguments{
15		\item{object}{a \code{\link{gbm.object}} created from an initial call to
	10	\item{object}{A \code{\link{gbm.object}} created from an initial call to
16	11	\code{\link{gbm}}.}
17		\item{plot.it}{an indicator of whether or not to plot the performance measures.
18		Setting \code{plot.it=TRUE} creates two plots. The first plot plots
19		\code{object$train.error} (in black) and \code{object$valid.error} (in red)
20		versus the iteration number. The scale of the error measurement, shown on the
21		left vertical axis, depends on the \code{distribution} argument used in the
22		initial call to \code{\link{gbm}}.}
23		\item{oobag.curve}{indicates whether to plot the out-of-bag performance measures
24		in a second plot.}
25		\item{overlay}{if TRUE and oobag.curve=TRUE then a right y-axis is added to the
26		training and test error plot and the estimated cumulative improvement in the loss
27		function is plotted versus the iteration number.}
28		\item{method}{indicate the method used to estimate the optimal number
29		of boosting iterations. \code{method="OOB"} computes the out-of-bag
30		estimate and \code{method="test"} uses the test (or validation) dataset
31		to compute an out-of-sample estimate. \code{method="cv"} extracts the
32		optimal number of iterations using cross-validation if \code{gbm} was called
33		with \code{cv.folds}>1}
	12
	13	\item{plot.it}{An indicator of whether or not to plot the performance
	14	measures. Setting \code{plot.it = TRUE} creates two plots. The first plot
	15	plots \code{object$train.error} (in black) and \code{object$valid.error}
	16	(in red) versus the iteration number. The scale of the error measurement,
	17	shown on the left vertical axis, depends on the \code{distribution}
	18	argument used in the initial call to \code{\link{gbm}}.}
	19
	20	\item{oobag.curve}{Indicates whether to plot the out-of-bag performance
	21	measures in a second plot.}
	22
	23	\item{overlay}{If TRUE and oobag.curve=TRUE then a right y-axis is added to
	24	the training and test error plot and the estimated cumulative improvement
	25	in the loss function is plotted versus the iteration number.}
	26
	27	\item{method}{Indicate the method used to estimate the optimal number of
	28	boosting iterations. \code{method = "OOB"} computes the out-of-bag estimate
	29	and \code{method = "test"} uses the test (or validation) dataset to compute
	30	an out-of-sample estimate. \code{method = "cv"} extracts the optimal number
	31	of iterations using cross-validation if \code{gbm} was called with
	32	\code{cv.folds} > 1.}
34	33	}
35	34	\value{
36		\code{gbm.perf} returns the estimated optimal number of iterations. The method
37		of computation depends on the \code{method} argument.}
38
39		\author{Greg Ridgeway \email{gregridgeway@gmail.com}}
40		\seealso{\code{\link{gbm}}, \code{\link{gbm.object}}}
41
	35	\code{gbm.perf} Returns the estimated optimal number of iterations.
	36	The method of computation depends on the \code{method} argument.
	37	}
	38	\description{
	39	Estimates the optimal number of boosting iterations for a \code{gbm} object
	40	and optionally plots various performance measures
	41	}
	42	\seealso{
	43	\code{\link{gbm}}, \code{\link{gbm.object}}
	44	}
	45	\author{
	46	Greg Ridgeway \email{gregridgeway@gmail.com}
	47	}
42	48	\keyword{nonlinear}
	49	\keyword{nonparametric}
43	50	\keyword{survival}
44		\keyword{nonparametric}
45	51	\keyword{tree}

+56

-45

man/gbm.roc.area.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/ir.measures.R
0	2	\name{gbm.roc.area}
1	3	\alias{gbm.roc.area}
2	4	\alias{gbm.conc}

6	8	\alias{ir.measure.map}
7	9	\alias{ir.measure.ndcg}
8	10	\alias{perf.pairwise}
9		\title{
10		Compute Information Retrieval measures.
	11	\title{Compute Information Retrieval measures.}
	12	\usage{
	13	gbm.roc.area(obs, pred)
	14
	15	gbm.conc(x)
	16
	17	ir.measure.conc(y.f, max.rank = 0)
	18
	19	ir.measure.auc(y.f, max.rank = 0)
	20
	21	ir.measure.mrr(y.f, max.rank)
	22
	23	ir.measure.map(y.f, max.rank = 0)
	24
	25	ir.measure.ndcg(y.f, max.rank)
	26
	27	perf.pairwise(y, f, group, metric = "ndcg", w = NULL, max.rank = 0)
	28	}
	29	\arguments{
	30	\item{obs}{Observed value.}
	31
	32	\item{pred}{Predicted value.}
	33
	34	\item{x}{?.}
	35
	36	\item{y, y.f, f, w, group, max.rank}{Used internally.}
	37
	38	\item{metric}{What type of performance measure to compute.}
	39	}
	40	\value{
	41	The requested performance measure.
11	42	}
12	43	\description{
13		Functions to compute Information Retrieval measures for pairwise loss for
14		a single group. The function returns the respective metric, or a negative value if
15		it is undefined for the given group.
16		}
17		\usage{
18		gbm.roc.area(obs, pred)
19		ir.measure.conc(y.f, max.rank)
20		ir.measure.auc(y.f, max.rank)
21		ir.measure.mrr(y.f, max.rank)
22		ir.measure.map(y.f, max.rank)
23		ir.measure.ndcg(y.f, max.rank)
24		perf.pairwise(y, f, group, metric="ndcg", w=NULL, max.rank=0)
25		}
26		%- maybe also 'usage' for other objects documented here.
27		\arguments{
28		\item{obs}{Observed value}
29		\item{pred}{Predicted value}
30		\item{metric}{What type of performance measure to compute.}
31		\item{y, y.f, f, w, group, max.rank}{Used internally.}
	44	Functions to compute Information Retrieval measures for pairwise loss for a
	45	single group. The function returns the respective metric, or a negative
	46	value if it is undefined for the given group.
32	47	}
33	48	\details{
34		For simplicity, we have no special handling for ties;
35		instead, we break ties randomly. This is slightly
36		inaccurate for individual groups, but should have
	49	For simplicity, we have no special handling for ties; instead, we break ties
	50	randomly. This is slightly inaccurate for individual groups, but should have
37	51	only a small effect on the overall measure.
38	52
39		\code{gbm.conc} computes the concordance index:
40		Fraction of all pairs (i,j) with i<j, x[i] != x[j], such that x[j] < x[i]
	53	\code{gbm.conc} computes the concordance index: Fraction of all pairs (i,j)
	54	with i<j, x[i] != x[j], such that x[j] < x[i]
41	55
42		If \code{obs} is binary, then
43		\code{gbm.roc.area(obs, pred) = gbm.conc(obs[order(-pred)])}.
	56	If \code{obs} is binary, then \code{gbm.roc.area(obs, pred) =
	57	gbm.conc(obs[order(-pred)])}.
44	58
45		\code{gbm.conc} is more general as it allows non-binary targets,
46		but is significantly slower.
	59	\code{gbm.conc} is more general as it allows non-binary targets, but is
	60	significantly slower.
47	61	}
48		\value{The requested performance measure.}
	62	\examples{
	63
	64	##---- Should be DIRECTLY executable !! ----
	65	##-- ==> Define data, use random,
	66	##-- or do help(data=index) for the standard data sets.
	67	}
49	68	\references{
50		C. Burges (2010). "From RankNet to LambdaRank to LambdaMART: An Overview",
51		Microsoft Research Technical Report MSR-TR-2010-82.
	69	C. Burges (2010). "From RankNet to LambdaRank to LambdaMART: An
	70	Overview", Microsoft Research Technical Report MSR-TR-2010-82.
	71	}
	72	\seealso{
	73	\code{\link{gbm}}
52	74	}
53	75	\author{
54	76	Stefan Schroedl
55	77	}
56
57		\seealso{
58		\code{\link{gbm}}
59		}
60		\examples{
61		##---- Should be DIRECTLY executable !! ----
62		##-- ==> Define data, use random,
63		##-- or do help(data=index) for the standard data sets.
64
65		}
66		\keyword{ models }
67
	78	\keyword{models}

+67

-43

man/gbmCrossVal.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/gbmCrossVal.R
0	2	\name{gbmCrossVal}
1	3	\alias{gbmCrossVal}
2	4	\alias{gbmCrossValModelBuild}

4	6	\alias{gbmCrossValErr}
5	7	\alias{gbmCrossValPredictions}
6	8	\title{Cross-validate a gbm}
7		\description{Functions for cross-validating gbm. These functions are
8		used internally and are not intended for end-user direct usage.}
9	9	\usage{
10		gbmCrossVal(cv.folds, nTrain, n.cores,
11		class.stratify.cv, data,
12		x, y, offset, distribution, w, var.monotone,
13		n.trees, interaction.depth, n.minobsinnode,
14		shrinkage, bag.fraction,
15		var.names, response.name, group)
16
17		gbmCrossValModelBuild(cv.folds, cv.group, n.cores,
18		i.train, x, y, offset,
19		distribution, w, var.monotone,
20		n.trees, interaction.depth,
21		n.minobsinnode, shrinkage,
22		bag.fraction, var.names,
23		response.name, group)
24
25		gbmDoFold(X, i.train, x, y, offset, distribution, w, var.monotone, n.trees,
26		interaction.depth, n.minobsinnode, shrinkage, bag.fraction,
27		cv.group, var.names, response.name, group, s)
	10	gbmCrossVal(cv.folds, nTrain, n.cores, class.stratify.cv, data, x, y,
	11	offset, distribution, w, var.monotone, n.trees, interaction.depth,
	12	n.minobsinnode, shrinkage, bag.fraction, var.names, response.name, group)
28	13
29	14	gbmCrossValErr(cv.models, cv.folds, cv.group, nTrain, n.trees)
30	15
31		gbmCrossValPredictions(cv.models, cv.folds, cv.group,
32		best.iter.cv, distribution, data, y)
	16	gbmCrossValPredictions(cv.models, cv.folds, cv.group, best.iter.cv,
	17	distribution, data, y)
33	18
	19	gbmCrossValModelBuild(cv.folds, cv.group, n.cores, i.train, x, y, offset,
	20	distribution, w, var.monotone, n.trees, interaction.depth,
	21	n.minobsinnode, shrinkage, bag.fraction, var.names, response.name, group)
34	22
	23	gbmDoFold(X, i.train, x, y, offset, distribution, w, var.monotone, n.trees,
	24	interaction.depth, n.minobsinnode, shrinkage, bag.fraction, cv.group,
	25	var.names, response.name, group, s)
35	26	}
36	27	\arguments{
37	28	\item{cv.folds}{The number of cross-validation folds.}
	29
38	30	\item{nTrain}{The number of training samples.}
	31
39	32	\item{n.cores}{The number of cores to use.}
40		\item{class.stratify.cv}{Whether or not stratified cross-validation
41		samples are used.}
	33
	34	\item{class.stratify.cv}{Whether or not stratified cross-validation samples
	35	are used.}
	36
42	37	\item{data}{The data.}
	38
43	39	\item{x}{The model matrix.}
	40
44	41	\item{y}{The response variable.}
	42
45	43	\item{offset}{The offset.}
	44
46	45	\item{distribution}{The type of loss function. See \code{\link{gbm}}.}
	46
47	47	\item{w}{Observation weights.}
	48
48	49	\item{var.monotone}{See \code{\link{gbm}}.}
	50
49	51	\item{n.trees}{The number of trees to fit.}
50		\item{interaction.depth}{The degree of allowed interactions. See \code{\link{gbm}}.}
	52
	53	\item{interaction.depth}{The degree of allowed interactions. See
	54	\code{\link{gbm}}.}
	55
51	56	\item{n.minobsinnode}{See \code{\link{gbm}}.}
	57
52	58	\item{shrinkage}{See \code{\link{gbm}}.}
	59
53	60	\item{bag.fraction}{See \code{\link{gbm}}.}
	61
54	62	\item{var.names}{See \code{\link{gbm}}.}
	63
55	64	\item{response.name}{See \code{\link{gbm}}.}
	65
56	66	\item{group}{Used when \code{distribution = "pairwise"}. See
57		\code{\link{gbm}}.}
	67	\code{\link{gbm}}.}
	68
	69	\item{cv.models}{A list containing the models for each fold.}
	70
	71	\item{cv.group}{A vector indicating the cross-validation fold for each
	72	member of the training set.}
	73
	74	\item{best.iter.cv}{The iteration with lowest cross-validation error.}
	75
58	76	\item{i.train}{Items in the training set.}
59		\item{cv.models}{A list containing the models for each fold.}
60		\item{cv.group}{A vector indicating the cross-validation fold for each
61		member of the training set.}
62		\item{best.iter.cv}{The iteration with lowest cross-validation error.}
	77
63	78	\item{X}{Index (cross-validation fold) on which to subset.}
	79
64	80	\item{s}{Random seed.}
65
66		} % Close arguments
	81	}
	82	\value{
	83	A list containing the cross-validation error and predictions.
	84	}
	85	\description{
	86	Functions for cross-validating gbm. These functions are used internally and
	87	are not intended for end-user direct usage.
	88	}
67	89	\details{
68	90	These functions are not intended for end-user direct usage, but are used
69		internally by \code{gbm}.}
70		\value{A list containing the cross-validation error and predictions.}
	91	internally by \code{gbm}.
	92	}
71	93	\references{
72		J.H. Friedman (2001). "Greedy Function Approximation: A Gradient Boosting
73		Machine," Annals of Statistics 29(5):1189-1232.
	94	J.H. Friedman (2001). "Greedy Function Approximation: A Gradient
	95	Boosting Machine," Annals of Statistics 29(5):1189-1232.
74	96
75		L. Breiman (2001). \href{https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf}{Random Forests}.
76
	97	L. Breiman (2001).
	98	\url{https://www.stat.berkeley.edu/users/breiman/randomforest2001.pdf}.
77	99	}
78		\author{Greg Ridgeway \email{gregridgeway@gmail.com}}
79
80		\seealso{ \code{\link{gbm}} }
81
82		\keyword{ models }
	100	\seealso{
	101	\code{\link{gbm}}
	102	}
	103	\author{
	104	Greg Ridgeway \email{gregridgeway@gmail.com}
	105	}
	106	\keyword{models}

+12

-0

man/grid.arrange.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/utils.R
	2	\name{grid.arrange}
	3	\alias{grid.arrange}
	4	\title{Arrange multiple grobs on a page}
	5	\usage{
	6	grid.arrange(..., newpage = TRUE)
	7	}
	8	\description{
	9	See \code{\link[gridExtra]{grid.arrange}} for more details.
	10	}
	11	\keyword{internal}

+41

-35

man/interact.gbm.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/interact.gbm.R
0	2	\name{interact.gbm}
1	3	\alias{interact.gbm}
2		\title{ Estimate the strength of interaction effects }
3		\description{ Computes Friedman's H-statistic to assess the strength of variable interactions. }
	4	\title{Estimate the strength of interaction effects}
4	5	\usage{
5		interact.gbm(x,
6		data,
7		i.var = 1,
8		n.trees = x$n.trees)
	6	interact.gbm(x, data, i.var = 1, n.trees = x$n.trees)
9	7	}
10	8	\arguments{
11		\item{x}{ a \code{\link{gbm.object}} fitted using a call to \code{\link{gbm}}}
12		\item{data}{ the dataset used to construct \code{x}. If the original dataset is
13		large, a random subsample may be used to accelerate the computation in
14		\code{interact.gbm}}
15		\item{i.var}{a vector of indices or the names of the variables for compute
16		the interaction effect. If using indices, the variables are indexed in the
17		same order that they appear in the initial \code{gbm} formula.}
18		\item{n.trees}{ the number of trees used to generate the plot. Only the first
19		\code{n.trees} trees will be used}
	9	\item{x}{A \code{\link{gbm.object}} fitted using a call to \code{\link{gbm}}.}
	10
	11	\item{data}{The dataset used to construct \code{x}. If the original dataset
	12	is large, a random subsample may be used to accelerate the computation in
	13	\code{interact.gbm}.}
	14
	15	\item{i.var}{A vector of indices or the names of the variables for compute
	16	the interaction effect. If using indices, the variables are indexed in the
	17	same order that they appear in the initial \code{gbm} formula.}
	18
	19	\item{n.trees}{The number of trees used to generate the plot. Only the first
	20	\code{n.trees} trees will be used.}
	21	}
	22	\value{
	23	Returns the value of \eqn{H}.
	24	}
	25	\description{
	26	Computes Friedman's H-statistic to assess the strength of variable
	27	interactions.
20	28	}
21	29	\details{
22	30	\code{interact.gbm} computes Friedman's H-statistic to assess the relative
23	31	strength of interaction effects in non-linear models. H is on the scale of
24		[0-1] with higher values indicating larger interaction effects. To connect to
25		a more familiar measure, if \eqn{x_1} and \eqn{x_2} are uncorrelated covariates
26		with mean 0 and variance 1 and the model is of the form
27		\deqn{y=\beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3}
28		then
	32	[0-1] with higher values indicating larger interaction effects. To connect
	33	to a more familiar measure, if \eqn{x_1} and \eqn{x_2} are uncorrelated
	34	covariates with mean 0 and variance 1 and the model is of the form
	35	\deqn{y=\beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3} then
29	36	\deqn{H=\frac{\beta_3}{\sqrt{\beta_1^2+\beta_2^2+\beta_3^2}}}
30	37
31		Note that if the main effects are weak, the estimated H will be unstable. For
32		example, if (in the case of a two-way interaction) neither main effect is in
33		the selected model (relative influence is zero), the result will be 0/0. Also,
34		with weak main effects, rounding errors can result in values of H > 1 which are
35		not possible.
36		}
37		\value{
38		Returns the value of \eqn{H}.
	38	Note that if the main effects are weak, the estimated H will be unstable.
	39	For example, if (in the case of a two-way interaction) neither main effect
	40	is in the selected model (relative influence is zero), the result will be
	41	0/0. Also, with weak main effects, rounding errors can result in values of H
	42	> 1 which are not possible.
39	43	}
40	44	\references{
41		J.H. Friedman and B.E. Popescu (2005). \dQuote{Predictive Learning via Rule
42		Ensembles.} Section 8.1
	45	J.H. Friedman and B.E. Popescu (2005). \dQuote{Predictive
	46	Learning via Rule Ensembles.} Section 8.1
43	47	}
44		\author{Greg Ridgeway \email{gregridgeway@gmail.com}}
45
46		\seealso{ \code{\link{gbm}}, \code{\link{gbm.object}} }
47
48		\keyword{ methods }
	48	\seealso{
	49	\code{\link{gbm}}, \code{\link{gbm.object}}
	50	}
	51	\author{
	52	Greg Ridgeway \email{gregridgeway@gmail.com}
	53	}
	54	\keyword{methods}

+80

-46

man/plot.gbm.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/plot.gbm.R
0	2	\name{plot.gbm}
1	3	\alias{plot.gbm}
2		\title{ Marginal plots of fitted gbm objects }
3		\description{
4		Plots the marginal effect of the selected variables by "integrating" out the other variables.
5		}
	4	\title{Marginal plots of fitted gbm objects}
6	5	\usage{
7		\method{plot}{gbm}(x,
8		i.var = 1,
9		n.trees = x$n.trees,
10		continuous.resolution = 100,
11		return.grid = FALSE,
12		type = "link",
13		...)
	6	\method{plot}{gbm}(x, i.var = 1, n.trees = x$n.trees,
	7	continuous.resolution = 100, return.grid = FALSE, type = c("link",
	8	"response"), level.plot = TRUE, contour = FALSE, number = 4,
	9	overlap = 0.1, col.regions = viridis::viridis, ...)
14	10	}
15	11	\arguments{
16		\item{x}{ a \code{\link{gbm.object}} fitted using a call to \code{\link{gbm}}}
17		\item{i.var}{a vector of indices or the names of the variables to plot. If
18		using indices, the variables are indexed in the same order that they appear
19		in the initial \code{gbm} formula.
20		If \code{length(i.var)} is between 1 and 3 then \code{plot.gbm} produces the plots. Otherwise,
21		\code{plot.gbm} returns only the grid of evaluation points and their average predictions}
22		\item{n.trees}{ the number of trees used to generate the plot. Only the first
23		\code{n.trees} trees will be used}
24		\item{continuous.resolution}{ The number of equally space points at which to
25		evaluate continuous predictors }
26		\item{return.grid}{ if \code{TRUE} then \code{plot.gbm} produces no graphics and only returns
27		the grid of evaluation points and their average predictions. This is useful for
28		customizing the graphics for special variable types or for dimensions greater
29		than 3 }
30		\item{type}{ the type of prediction to plot on the vertical axis. See
31		\code{predict.gbm}}
32		\item{\dots}{ other arguments passed to the plot function }
	12	\item{x}{A \code{\link{gbm.object}} that was fit using a call to
	13	\code{\link{gbm}}.}
	14
	15	\item{i.var}{Vector of indices or the names of the variables to plot. If
	16	using indices, the variables are indexed in the same order that they appear
	17	in the initial \code{gbm} formula. If \code{length(i.var)} is between 1 and
	18	3 then \code{plot.gbm} produces the plots. Otherwise, \code{plot.gbm}
	19	returns only the grid of evaluation points and their average predictions}
	20
	21	\item{n.trees}{Integer specifying the number of trees to use to generate the
	22	plot. Default is to use \code{x$n.trees} (i.e., the entire ensemble).}
	23
	24	\item{continuous.resolution}{Integer specifying the number of equally space
	25	points at which to evaluate continuous predictors.}
	26
	27	\item{return.grid}{Logical indicating whether or not to produce graphics
	28	\code{FALSE} or only return the grid of evaluation points and their average
	29	predictions \code{TRUE}. This is useful for customizing the graphics for
	30	special variable types, or for higher dimensional graphs.}
	31
	32	\item{type}{Character string specifying the type of prediction to plot on the
	33	vertical axis. See \code{\link{predict.gbm}} for details.}
	34
	35	\item{level.plot}{Logical indicating whether or not to use a false color
	36	level plot (\code{TRUE}) or a 3-D surface (\code{FALSE}). Default is
	37	\code{TRUE}.}
	38
	39	\item{contour}{Logical indicating whether or not to add contour lines to the
	40	level plot. Only used when \code{level.plot = TRUE}. Default is \code{FALSE}.}
	41
	42	\item{number}{Integer specifying the number of conditional intervals to use
	43	for the continuous panel variables. See \code{\link[graphics]{co.intervals}}
	44	and \code{\link[lattice]{equal.count}} for further details.}
	45
	46	\item{overlap}{The fraction of overlap of the conditioning variables. See
	47	\code{\link[graphics]{co.intervals}} and \code{\link[lattice]{equal.count}}
	48	for further details.}
	49
	50	\item{col.regions}{Color vector to be used if \code{level.plot} is
	51	\code{TRUE}. Defaults to the wonderful Matplotlib 'viridis' color map
	52	provided by the \code{viridis} package. See \code{\link[viridis]{viridis}}
	53	for details.}
	54
	55	\item{...}{Additional optional arguments to be passed onto
	56	\code{\link[graphics]{plot}}.}
	57	}
	58	\value{
	59	If \code{return.grid = TRUE}, a grid of evaluation points and their
	60	average predictions. Otherwise, a plot is returned.
	61	}
	62	\description{
	63	Plots the marginal effect of the selected variables by "integrating" out the
	64	other variables.
33	65	}
34	66	\details{
35	67	\code{plot.gbm} produces low dimensional projections of the
36		\code{\link{gbm.object}} by integrating out the variables not included in the
37		\code{i.var} argument. The function selects a grid of points and uses the
38		weighted tree traversal method described in Friedman (2001) to do the
	68	\code{\link{gbm.object}} by integrating out the variables not included in
	69	the \code{i.var} argument. The function selects a grid of points and uses
	70	the weighted tree traversal method described in Friedman (2001) to do the
39	71	integration. Based on the variable types included in the projection,
40	72	\code{plot.gbm} selects an appropriate display choosing amongst line plots,
41		contour plots, and \code{\link[lattice]{lattice}} plots. If the default graphics
42		are not sufficient the user may set \code{return.grid=TRUE}, store the result
43		of the function, and develop another graphic display more appropriate to the
44		particular example.
	73	contour plots, and \code{\link[lattice]{lattice}} plots. If the default
	74	graphics are not sufficient the user may set \code{return.grid=TRUE}, store
	75	the result of the function, and develop another graphic display more
	76	appropriate to the particular example.
45	77	}
46		\value{
47		Nothing unless \code{return.grid} is true then \code{plot.gbm} produces no
48		graphics and only returns the grid of evaluation points and their average
49		predictions.
	78	\note{
	79	More flexible plotting is available using the
	80	\code{\link[pdp]{partial}} and \code{\link[pdp]{plotPartial}} functions.
50	81	}
51	82	\references{
52		J.H. Friedman (2001). "Greedy Function Approximation: A Gradient Boosting
53		Machine," Annals of Statistics 29(4).
	83	J. H. Friedman (2001). "Greedy Function Approximation: A Gradient
	84	Boosting Machine," Annals of Statistics 29(4).
	85
	86	B. M. Greenwell (2017). "pdp: An R Package for Constructing
	87	Partial Dependence Plots," The R Journal 9(1), 421--436.
	88	\url{https://journal.r-project.org/archive/2017/RJ-2017-016/index.html}.
54	89	}
55		\author{Greg Ridgeway \email{gregridgeway@gmail.com}}
56
57		\seealso{ \code{\link{gbm}}, \code{\link{gbm.object}}, \code{\link[graphics]{plot}} }
58
59		\keyword{ hplot }
	90	\seealso{
	91	\code{\link[pdp]{partial}}, \code{\link[pdp]{plotPartial}},
	92	\code{\link{gbm}}, and \code{\link{gbm.object}}.
	93	}

+47

-33

man/predict.gbm.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/predict.gbm.R
0	2	\name{predict.gbm}
1	3	\alias{predict.gbm}
2		\title{ Predict method for GBM Model Fits }
3		\description{
4		Predicted values based on a generalized boosted model object
5		}
	4	\title{Predict method for GBM Model Fits}
6	5	\usage{
7		\method{predict}{gbm}(object,
8		newdata,
9		n.trees,
10		type="link",
11		single.tree=FALSE,
12		...)
	6	\method{predict}{gbm}(object, newdata, n.trees, type = "link",
	7	single.tree = FALSE, ...)
13	8	}
14	9	\arguments{
15		\item{object}{ Object of class inheriting from (\code{\link{gbm.object}}) }
16		\item{newdata}{ Data frame of observations for which to make predictions }
17		\item{n.trees}{ Number of trees used in the prediction. \code{n.trees} may
18		be a vector in which case predictions are returned for each
19		iteration specified}
20		\item{type}{ The scale on which gbm makes the predictions }
21		\item{single.tree}{If \code{single.tree=TRUE} then \code{predict.gbm} returns
22		only the predictions from tree(s) \code{n.trees}}
23		\item{\dots}{ further arguments passed to or from other methods }
	10	\item{object}{Object of class inheriting from (\code{\link{gbm.object}})}
	11
	12	\item{newdata}{Data frame of observations for which to make predictions}
	13
	14	\item{n.trees}{Number of trees used in the prediction. \code{n.trees} may be
	15	a vector in which case predictions are returned for each iteration specified}
	16
	17	\item{type}{The scale on which gbm makes the predictions}
	18
	19	\item{single.tree}{If \code{single.tree=TRUE} then \code{predict.gbm}
	20	returns only the predictions from tree(s) \code{n.trees}}
	21
	22	\item{\dots}{further arguments passed to or from other methods}
	23	}
	24	\value{
	25	Returns a vector of predictions. By default the predictions are on
	26	the scale of f(x). For example, for the Bernoulli loss the returned value is
	27	on the log odds scale, poisson loss on the log scale, and coxph is on the
	28	log hazard scale.
	29
	30	If \code{type="response"} then \code{gbm} converts back to the same scale as
	31	the outcome. Currently the only effect this will have is returning
	32	probabilities for bernoulli and expected counts for poisson. For the other
	33	distributions "response" and "link" return the same.
	34	}
	35	\description{
	36	Predicted values based on a generalized boosted model object
24	37	}
25	38	\details{
26		\code{predict.gbm} produces predicted values for each observation in \code{newdata} using the the first \code{n.trees} iterations of the boosting sequence. If \code{n.trees} is a vector than the result is a matrix with each column representing the predictions from gbm models with \code{n.trees[1]} iterations, \code{n.trees[2]} iterations, and so on.
	39	\code{predict.gbm} produces predicted values for each observation in
	40	\code{newdata} using the the first \code{n.trees} iterations of the boosting
	41	sequence. If \code{n.trees} is a vector than the result is a matrix with
	42	each column representing the predictions from gbm models with
	43	\code{n.trees[1]} iterations, \code{n.trees[2]} iterations, and so on.
27	44
28		The predictions from \code{gbm} do not include the offset term. The user may add the value of the offset to the predicted value if desired.
	45	The predictions from \code{gbm} do not include the offset term. The user may
	46	add the value of the offset to the predicted value if desired.
29	47
30	48	If \code{object} was fit using \code{\link{gbm.fit}} there will be no
31		\code{Terms} component. Therefore, the user has greater responsibility to make
32		sure that \code{newdata} is of the same format (order and number of variables)
33		as the one originally used to fit the model.
	49	\code{Terms} component. Therefore, the user has greater responsibility to
	50	make sure that \code{newdata} is of the same format (order and number of
	51	variables) as the one originally used to fit the model.
34	52	}
35		\value{
36		Returns a vector of predictions. By default the predictions are on the scale of f(x). For example, for the Bernoulli loss the returned value is on the log odds scale, poisson loss on the log scale, and coxph is on the log hazard scale.
37
38		If \code{type="response"} then \code{gbm} converts back to the same scale as the outcome. Currently the only effect this will have is returning probabilities for bernoulli and expected counts for poisson. For the other distributions "response" and "link" return the same.
39		}
40		\author{Greg Ridgeway \email{gregridgeway@gmail.com}}
41	53	\seealso{
42	54	\code{\link{gbm}}, \code{\link{gbm.object}}
43	55	}
44
45		\keyword{ models }
46		\keyword{ regression }
	56	\author{
	57	Greg Ridgeway \email{gregridgeway@gmail.com}
	58	}
	59	\keyword{models}
	60	\keyword{regression}

+31

-27

man/pretty.gbm.tree.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/pretty.gbm.tree.R
0	2	\name{pretty.gbm.tree}
1	3	\alias{pretty.gbm.tree}
2		\title{ Print gbm tree components }
3		\description{
4		\code{gbm} stores the collection of trees used to construct the model in a
5		compact matrix structure. This function extracts the information from a single
6		tree and displays it in a slightly more readable form. This function is mostly
7		for debugging purposes and to satisfy some users' curiosity.
8		}
	4	\title{Print gbm tree components}
9	5	\usage{
10		pretty.gbm.tree(object, i.tree = 1)
	6	\method{pretty}{gbm.tree}(object, i.tree = 1)
11	7	}
12	8	\arguments{
13		\item{object}{ a \code{\link{gbm.object}} initially fit using \code{\link{gbm}}}
14		\item{i.tree}{ the index of the tree component to extract from \code{object}
15		and display }
	9	\item{object}{a \code{\link{gbm.object}} initially fit using
	10	\code{\link{gbm}}}
	11
	12	\item{i.tree}{the index of the tree component to extract from \code{object}
	13	and display}
16	14	}
17	15	\value{
18		\code{pretty.gbm.tree} returns a data frame. Each row corresponds to a node in
19		the tree. Columns indicate
20		\item{SplitVar}{index of which variable is used to split. -1 indicates a
21		terminal node.}
22		\item{SplitCodePred}{if the split variable is continuous then this component
23		is the split point. If the split variable is categorical then this component
24		contains the index of \code{object$c.split} that describes the categorical
25		split. If the node is a terminal node then this is the prediction.}
26		\item{LeftNode}{the index of the row corresponding to the left node.}
27		\item{RightNode}{the index of the row corresponding to the right node.}
28		\item{ErrorReduction}{the reduction in the loss function as a result of
29		splitting this node.}
30		\item{Weight}{the total weight of observations in the node. If weights are all
31		equal to 1 then this is the number of observations in the node.}
	16	\code{pretty.gbm.tree} returns a data frame. Each row corresponds to
	17	a node in the tree. Columns indicate \item{SplitVar}{index of which variable
	18	is used to split. -1 indicates a terminal node.} \item{SplitCodePred}{if the
	19	split variable is continuous then this component is the split point. If the
	20	split variable is categorical then this component contains the index of
	21	\code{object$c.split} that describes the categorical split. If the node is a
	22	terminal node then this is the prediction.} \item{LeftNode}{the index of the
	23	row corresponding to the left node.} \item{RightNode}{the index of the row
	24	corresponding to the right node.} \item{ErrorReduction}{the reduction in the
	25	loss function as a result of splitting this node.} \item{Weight}{the total
	26	weight of observations in the node. If weights are all equal to 1 then this
	27	is the number of observations in the node.}
32	28	}
33		\author{Greg Ridgeway \email{gregridgeway@gmail.com}}
	29	\description{
	30	\code{gbm} stores the collection of trees used to construct the model in a
	31	compact matrix structure. This function extracts the information from a
	32	single tree and displays it in a slightly more readable form. This function
	33	is mostly for debugging purposes and to satisfy some users' curiosity.
	34	}
34	35	\seealso{
35	36	\code{\link{gbm}}, \code{\link{gbm.object}}
36	37	}
37		\keyword{ print }
	38	\author{
	39	Greg Ridgeway \email{gregridgeway@gmail.com}
	40	}
	41	\keyword{print}

+39

-38

man/print.gbm.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/print.gbm.R
0	2	\name{print.gbm}
1	3	\alias{print.gbm}
2	4	\alias{show.gbm}
3
4		\title{ Print model summary }
5		\description{
6		Display basic information about a \code{gbm} object.
7		}
	5	\title{Print model summary}
8	6	\usage{
9	7	\method{print}{gbm}(x, ...)
10		\method{show}{gbm}(x, ...)
	8
	9	show.gbm(x, ...)
11	10	}
12	11	\arguments{
13		\item{x}{ an object of class \code{gbm}. }
14		\item{\dots}{ arguments passed to \code{print.default}. }
	12	\item{x}{an object of class \code{gbm}.}
	13
	14	\item{\dots}{arguments passed to \code{print.default}.}
	15	}
	16	\description{
	17	Display basic information about a \code{gbm} object.
15	18	}
16	19	\details{
17		Prints some information about the model object. In particular, this method
18		prints the call to \code{gbm()}, the type of loss function
19		that was used, and the total number of iterations.
	20	Prints some information about the model object. In particular, this method
	21	prints the call to \code{gbm()}, the type of loss function that was used,
	22	and the total number of iterations.
20	23
21		If cross-validation was performed, the 'best' number of trees as
22		estimated by cross-validation error is displayed. If a test set
23		was used, the 'best' number
24		of trees as estimated by the test set error is displayed.
	24	If cross-validation was performed, the 'best' number of trees as estimated
	25	by cross-validation error is displayed. If a test set was used, the 'best'
	26	number of trees as estimated by the test set error is displayed.
25	27
26		The number of available predictors, and the number of those having
27		non-zero influence on predictions is given (which might be interesting
28		in data mining applications).
	28	The number of available predictors, and the number of those having non-zero
	29	influence on predictions is given (which might be interesting in data mining
	30	applications).
29	31
30		If multinomial, bernoulli or adaboost was used,
31		the confusion matrix and prediction accuracy are printed (objects
32		being allocated to the class with highest probability for multinomial
33		and bernoulli). These classifications are performed on the entire
34		training
35		data using the model with the 'best' number of trees as described
36		above, or the maximum number of trees if the 'best' cannot be
37		computed.
	32	If multinomial, bernoulli or adaboost was used, the confusion matrix and
	33	prediction accuracy are printed (objects being allocated to the class with
	34	highest probability for multinomial and bernoulli). These classifications
	35	are performed on the entire training data using the model with the 'best'
	36	number of trees as described above, or the maximum number of trees if the
	37	'best' cannot be computed.
38	38
39		If the 'distribution' was specified as gaussian, laplace, quantile
40		or t-distribution, a summary of the residuals is displayed.
41		The residuals are for the training data with the model at the 'best'
42		number of trees, as
43		described above, or the maximum number of trees if the 'best' cannot
44		be computed.
	39	If the 'distribution' was specified as gaussian, laplace, quantile or
	40	t-distribution, a summary of the residuals is displayed. The residuals are
	41	for the training data with the model at the 'best' number of trees, as
	42	described above, or the maximum number of trees if the 'best' cannot be
	43	computed.
45	44	}
	45	\examples{
46	46
47
48		\author{ Harry Southworth, Daniel Edwards }
49
50		\seealso{ \code{\link{gbm}} }
51		\examples{
52	47	data(iris)
53	48	iris.mod <- gbm(Species ~ ., distribution="multinomial", data=iris,
54	49	n.trees=2000, shrinkage=0.01, cv.folds=5,

59	54	# n.trees=2000, shrinkage=0.01, cv.folds=5,verbose =FALSE)
60	55	#lung.mod
61	56	}
	57	\seealso{
	58	\code{\link{gbm}}
	59	}
	60	\author{
	61	Harry Southworth, Daniel Edwards
	62	}
62	63	\keyword{models}
63	64	\keyword{nonlinear}
	65	\keyword{nonparametric}
64	66	\keyword{survival}
65		\keyword{nonparametric}

+22

-13

man/quantile.rug.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/calibrate.plot.R
0	2	\name{quantile.rug}
1	3	\alias{quantile.rug}
2	4	\title{Quantile rug plot}
3		\description{Marks the quantiles on the axes of the current plot.}
4	5	\usage{
5		quantile.rug(x,prob=(0:10)/10,...)
	6	\method{quantile}{rug}(x, prob = 0:10/10, ...)
6	7	}
7	8	\arguments{
8		\item{x}{a numeric vector.}
9		\item{prob}{the quantiles of x to mark on the x-axis.}
10		\item{...}{additional graphics parameters currently ignored.}
	9	\item{x}{A numeric vector.}
	10
	11	\item{prob}{The quantiles of x to mark on the x-axis.}
	12
	13	\item{...}{Additional optional arguments to be passed onto
	14	\code{\link[graphics]{rug}}}
11	15	}
12		\value{No return values}
13		\author{Greg Ridgeway \email{gregridgeway@gmail.com}}
14		\seealso{
15		\code{\link[graphics]{plot}},
16		\code{\link[stats]{quantile}},
17		\code{\link[base]{jitter}},
18		\code{\link[graphics]{rug}}.
	16	\value{
	17	No return values.
	18	}
	19	\description{
	20	Marks the quantiles on the axes of the current plot.
19	21	}
20	22	\examples{
21	23	x <- rnorm(100)
22	24	y <- rnorm(100)
23		plot(x,y)
	25	plot(x, y)
24	26	quantile.rug(x)
25	27	}
	28	\seealso{
	29	\code{\link[graphics]{plot}}, \code{\link[stats]{quantile}},
	30	\code{\link[base]{jitter}}, \code{\link[graphics]{rug}}.
	31	}
	32	\author{
	33	Greg Ridgeway \email{gregridgeway@gmail.com}.
	34	}
26	35	\keyword{aplot}

+15

-20

man/reconstructGBMdata.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/reconstructGBMdata.R
0	2	\name{reconstructGBMdata}
1		\Rdversion{1.1}
2	3	\alias{reconstructGBMdata}
3	4	\title{Reconstruct a GBM's Source Data}
4
5
	5	\usage{
	6	reconstructGBMdata(x)
	7	}
	8	\arguments{
	9	\item{x}{a \code{\link{gbm.object}} initially fit using \code{\link{gbm}}}
	10	}
	11	\value{
	12	Returns a data used to fit the gbm in a format that can subsequently
	13	be used for plots and summaries
	14	}
6	15	\description{
7	16	Helper function to reconstitute the data for plots and summaries. This
8	17	function is not intended for the user to call directly.
9	18	}
10		\usage{
11		reconstructGBMdata(x)
12		}
13		\arguments{
14		\item{x}{
15		a \code{\link{gbm.object}} initially fit using \code{\link{gbm}}
16		}
17		}
18		\value{
19		Returns a data used to fit the gbm in a format that can subsequently be used
20		for plots and summaries
	19	\seealso{
	20	\code{\link{gbm}}, \code{\link{gbm.object}}
21	21	}
22	22	\author{
23	23	Harry Southworth
24	24	}
25
26		\seealso{
27		\code{\link{gbm}}, \code{\link{gbm.object}}
28		}
29
30		\keyword{ manip }
	25	\keyword{manip}

+50

-34

man/relative.influence.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/relative.influence.R
0	2	\name{relative.influence}
1	3	\alias{relative.influence}
2	4	\alias{permutation.test.gbm}
3	5	\alias{gbm.loss}
4		\title{ Methods for estimating relative influence }
5		\description{
6		Helper functions for computing the relative influence of each variable in the gbm object.
7		}
	6	\title{Methods for estimating relative influence}
8	7	\usage{
9		relative.influence(object, n.trees, scale., sort.)
	8	relative.influence(object, n.trees, scale. = FALSE, sort. = FALSE)
	9
10	10	permutation.test.gbm(object, n.trees)
11		gbm.loss(y,f,w,offset,dist,baseline, group, max.rank)
	11
	12	gbm.loss(y, f, w, offset, dist, baseline, group = NULL,
	13	max.rank = NULL)
12	14	}
13	15	\arguments{
14		\item{object}{a \code{gbm} object created from an initial call to \code{\link{gbm}}.}
15		\item{n.trees}{ the number of trees to use for computations. If not provided, the
16		the function will guess: if a test set was used in fitting, the number of
17		trees resulting in lowest test set error will be used; otherwise, if
18		cross-validation was performed, the number of trees resulting in lowest
19		cross-validation error will be used; otherwise, all trees will be used.}
20		\item{scale.}{ whether or not the result should be scaled. Defaults to \code{FALSE}.}
21		\item{sort.}{ whether or not the results should be (reverse) sorted.
22		Defaults to \code{FALSE}.}
23		\item{y,f,w,offset,dist,baseline}{For \code{gbm.loss}: These components are the
24		outcome, predicted value, observation weight, offset, distribution, and comparison
25		loss function, respectively.}
26		\item{group, max.rank}{Used internally when \code{distribution = \'pairwise\'}.}
	16	\item{object}{a \code{gbm} object created from an initial call to
	17	\code{\link{gbm}}.}
	18
	19	\item{n.trees}{the number of trees to use for computations. If not provided,
	20	the the function will guess: if a test set was used in fitting, the number
	21	of trees resulting in lowest test set error will be used; otherwise, if
	22	cross-validation was performed, the number of trees resulting in lowest
	23	cross-validation error will be used; otherwise, all trees will be used.}
	24
	25	\item{scale.}{whether or not the result should be scaled. Defaults to
	26	\code{FALSE}.}
	27
	28	\item{sort.}{whether or not the results should be (reverse) sorted.
	29	Defaults to \code{FALSE}.}
	30
	31	\item{y, f, w, offset, dist, baseline}{For \code{gbm.loss}: These components are
	32	the outcome, predicted value, observation weight, offset, distribution, and
	33	comparison loss function, respectively.}
	34
	35	\item{group, max.rank}{Used internally when \code{distribution =
	36	\'pairwise\'}.}
	37	}
	38	\value{
	39	By default, returns an unprocessed vector of estimated relative
	40	influences. If the \code{scale.} and \code{sort.} arguments are used,
	41	returns a processed version of the same.
	42	}
	43	\description{
	44	Helper functions for computing the relative influence of each variable in
	45	the gbm object.
27	46	}
28	47	\details{
29	48	This is not intended for end-user use. These functions offer the different
30	49	methods for computing the relative influence in \code{\link{summary.gbm}}.
31	50	\code{gbm.loss} is a helper function for \code{permutation.test.gbm}.
32	51	}
33		\value{
34		By default, returns an unprocessed vector of estimated relative influences.
35		If the \code{scale.} and \code{sort.} arguments are used, returns a processed
36		version of the same.
	52	\references{
	53	J.H. Friedman (2001). "Greedy Function Approximation: A Gradient
	54	Boosting Machine," Annals of Statistics 29(5):1189-1232.
	55
	56	L. Breiman (2001).
	57	\url{https://www.stat.berkeley.edu/users/breiman/randomforest2001.pdf}.
37	58	}
38		\references{
39		J.H. Friedman (2001). "Greedy Function Approximation: A Gradient Boosting
40		Machine," Annals of Statistics 29(5):1189-1232.
41
42		L. Breiman (2001). \href{https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf}{Random Forests}.
43
	59	\seealso{
	60	\code{\link{summary.gbm}}
44	61	}
45		\author{Greg Ridgeway \email{gregridgeway@gmail.com}}
46
47		\seealso{ \code{\link{summary.gbm}} }
48
49		\keyword{ hplot }
	62	\author{
	63	Greg Ridgeway \email{gregridgeway@gmail.com}
	64	}
	65	\keyword{hplot}

+41

-26

man/shrink.gbm.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/shrink.gbm.R
0	2	\name{shrink.gbm}
1	3	\alias{shrink.gbm}
2		\title{ L1 shrinkage of the predictor variables in a GBM }
3		\description{
4		Performs recursive shrinkage in each of the trees in a GBM fit using different shrinkage parameters for each variable.
5		}
	4	\title{L1 shrinkage of the predictor variables in a GBM}
6	5	\usage{
7		shrink.gbm(object,
8		n.trees,
9		lambda = rep(10, length(object$var.names)),
10		...)
	6	shrink.gbm(object, n.trees, lambda = rep(10, length(object$var.names)),
	7	...)
11	8	}
12	9	\arguments{
13		\item{object}{ A \code{\link{gbm.object}} }
14		\item{n.trees}{ the number of trees to use }
15		\item{lambda}{ a vector with length equal to the number of variables containing the shrinkage parameter for each variable }
16		\item{\dots}{ other parameters (ignored) }
	10	\item{object}{A \code{\link{gbm.object}}.}
	11
	12	\item{n.trees}{Integer specifying the number of trees to use.}
	13
	14	\item{lambda}{Vector of length equal to the number of variables containing
	15	the shrinkage parameter for each variable.}
	16
	17	\item{\dots}{Additional optional arguments. (Currently ignored.)}
	18	}
	19	\value{
	20	\item{predF}{Predicted values from the shrunken tree}
	21	\item{objective}{The value of the loss function associated with the
	22	predicted values} \item{gradient}{A vector with length equal to the number
	23	of variables containing the derivative of the objective function with
	24	respect to beta, the logit transform of the shrinkage parameter for each
	25	variable}
	26	}
	27	\description{
	28	Performs recursive shrinkage in each of the trees in a GBM fit using
	29	different shrinkage parameters for each variable.
17	30	}
18	31	\details{
19		This function is currently experimental. Used in conjunction with a gradient ascent search for inclusion of variables.
	32	This function is currently experimental. Used in conjunction with a gradient
	33	ascent search for inclusion of variables.
20	34	}
21		\value{
22		\item{predF}{Predicted values from the shrunken tree}
23		\item{objective}{The value of the loss function associated with the predicted values}
24		\item{gradient}{A vector with length equal to the number of variables containing the derivative of the objective function with respect to beta, the logit transform of the shrinkage parameter for each variable}
	35	\note{
	36	Warning: This function is experimental.
25	37	}
26		\references{ Hastie, T. J., and Pregibon, D. \href{http://www-stat.stanford.edu/~hastie/Papers/shrinktree.ps}{Shrinking Trees}. AT&T Bell Laboratories Technical Report (March 1990).}
27		\author{ Greg Ridgeway \email{gregridgeway@gmail.com} }
28
29
30		\section{Warning}{This function is experimental.}
31
32		\seealso{ \code{\link{shrink.gbm.pred}}, \code{\link{gbm}} }
33		\keyword{ methods}% at least one, from doc/KEYWORDS
34
	38	\references{
	39	Hastie, T. J., and Pregibon, D.
	40	\url{https://web.stanford.edu/~hastie/Papers/shrink_tree.pdf}. AT&T Bell
	41	Laboratories Technical Report (March 1990).
	42	}
	43	\seealso{
	44	\code{\link{shrink.gbm.pred}}, \code{\link{gbm}}
	45	}
	46	\author{
	47	Greg Ridgeway \email{gregridgeway@gmail.com}
	48	}
	49	\keyword{methods}

+31

-26

man/shrink.gbm.pred.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/shrink.gbm.pred.R
0	2	\name{shrink.gbm.pred}
1	3	\alias{shrink.gbm.pred}
2		%- Also NEED an '\alias' for EACH other topic documented here.
3		\title{ Predictions from a shrunked GBM }
	4	\title{Predictions from a shrunked GBM}
	5	\usage{
	6	shrink.gbm.pred(object, newdata, n.trees, lambda = rep(1,
	7	length(object$var.names)), ...)
	8	}
	9	\arguments{
	10	\item{object}{a \code{\link{gbm.object}}}
	11
	12	\item{newdata}{dataset for predictions}
	13
	14	\item{n.trees}{the number of trees to use}
	15
	16	\item{lambda}{a vector with length equal to the number of variables
	17	containing the shrinkage parameter for each variable}
	18
	19	\item{\dots}{other parameters (ignored)}
	20	}
	21	\value{
	22	A vector with length equal to the number of observations in newdata
	23	containing the predictions
	24	}
4	25	\description{
5		Makes predictions from a shrunken GBM model.
	26	Makes predictions from a shrunken GBM model.
6	27	}
7		\usage{
8		shrink.gbm.pred(object,
9		newdata,
10		n.trees,
11		lambda = rep(1, length(object$var.names)),
12		...)
	28	\section{Warning}{
	29	This function is experimental
13	30	}
14	31
15		\arguments{
16		\item{object}{ a \code{\link{gbm.object}} }
17		\item{newdata}{ dataset for predictions }
18		\item{n.trees}{ the number of trees to use }
19		\item{lambda}{ a vector with length equal to the number of variables containing the shrinkage parameter for each variable }
20		\item{\dots}{ other parameters (ignored) }
	32	\seealso{
	33	\code{\link{shrink.gbm}}, \code{\link{gbm}}
21	34	}
22
23		\value{
24		A vector with length equal to the number of observations in newdata containing the predictions
	35	\author{
	36	Greg Ridgeway \email{gregridgeway@gmail.com}
25	37	}
26
27		\author{ Greg Ridgeway \email{gregridgeway@gmail.com} }
28
29		\section{Warning}{This function is experimental}
30
31		\seealso{ \code{\link{shrink.gbm}}, \code{\link{gbm}} }
32
33		\keyword{ methods }
	38	\keyword{methods}

+55

-48

man/summary.gbm.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/print.gbm.R
0	2	\name{summary.gbm}
1	3	\alias{summary.gbm}
2		\title{ Summary of a gbm object }
3		\description{
4		Computes the relative influence of each variable in the gbm object.
5		}
	4	\title{Summary of a gbm object}
6	5	\usage{
7		\method{summary}{gbm}(object,
8		cBars=length(object$var.names),
9		n.trees=object$n.trees,
10		plotit=TRUE,
11		order=TRUE,
12		method=relative.influence,
13		normalize=TRUE,
14		...)
	6	\method{summary}{gbm}(object, cBars = length(object$var.names),
	7	n.trees = object$n.trees, plotit = TRUE, order = TRUE,
	8	method = relative.influence, normalize = TRUE, ...)
15	9	}
16	10	\arguments{
17	11	\item{object}{a \code{gbm} object created from an initial call to
18	12	\code{\link{gbm}}.}
19		\item{cBars}{ the number of bars to plot. If \code{order=TRUE} the only the
20		variables with the \code{cBars} largest relative influence will appear in the
21		barplot. If \code{order=FALSE} then the first \code{cBars} variables will
22		appear in the plot. In either case, the function will return the relative
23		influence of all of the variables.}
24		\item{n.trees}{ the number of trees used to generate the plot. Only the first
	13
	14	\item{cBars}{the number of bars to plot. If \code{order=TRUE} the only the
	15	variables with the \code{cBars} largest relative influence will appear in
	16	the barplot. If \code{order=FALSE} then the first \code{cBars} variables
	17	will appear in the plot. In either case, the function will return the
	18	relative influence of all of the variables.}
	19
	20	\item{n.trees}{the number of trees used to generate the plot. Only the first
25	21	\code{n.trees} trees will be used.}
26		\item{plotit}{ an indicator as to whether the plot is generated. }
27		\item{order}{ an indicator as to whether the plotted and/or returned relative
28		influences are sorted. }
29		\item{method}{ The function used to compute the relative influence.
	22
	23	\item{plotit}{an indicator as to whether the plot is generated.}
	24
	25	\item{order}{an indicator as to whether the plotted and/or returned relative
	26	influences are sorted.}
	27
	28	\item{method}{The function used to compute the relative influence.
30	29	\code{\link{relative.influence}} is the default and is the same as that
31	30	described in Friedman (2001). The other current (and experimental) choice is
32		\code{\link{permutation.test.gbm}}. This method randomly permutes each predictor
33		variable at a time and computes the associated reduction in predictive
34		performance. This is similar to the variable importance measures Breiman uses
35		for random forests, but \code{gbm} currently computes using the entire training
36		dataset (not the out-of-bag observations).}
37		\item{normalize}{ if \code{FALSE} then \code{summary.gbm} returns the
38		unnormalized influence. }
39		\item{...}{ other arguments passed to the plot function. }
	31	\code{\link{permutation.test.gbm}}. This method randomly permutes each
	32	predictor variable at a time and computes the associated reduction in
	33	predictive performance. This is similar to the variable importance measures
	34	Breiman uses for random forests, but \code{gbm} currently computes using the
	35	entire training dataset (not the out-of-bag observations).}
	36
	37	\item{normalize}{if \code{FALSE} then \code{summary.gbm} returns the
	38	unnormalized influence.}
	39
	40	\item{...}{other arguments passed to the plot function.}
	41	}
	42	\value{
	43	Returns a data frame where the first component is the variable name
	44	and the second is the computed relative influence, normalized to sum to 100.
	45	}
	46	\description{
	47	Computes the relative influence of each variable in the gbm object.
40	48	}
41	49	\details{
42		For \code{distribution="gaussian"} this returns exactly the reduction
43		of squared error attributable to each variable. For other loss functions this
44		returns the reduction attributeable to each varaible in sum of squared error in
45		predicting the gradient on each iteration. It describes the relative influence
46		of each variable in reducing the loss function. See the references below for
47		exact details on the computation.
48		}
49		\value{
50		Returns a data frame where the first component is the variable name and the
51		second is the computed relative influence, normalized to sum to 100.
	50	For \code{distribution="gaussian"} this returns exactly the reduction of
	51	squared error attributable to each variable. For other loss functions this
	52	returns the reduction attributable to each variable in sum of squared error
	53	in predicting the gradient on each iteration. It describes the relative
	54	influence of each variable in reducing the loss function. See the references
	55	below for exact details on the computation.
52	56	}
53	57	\references{
54		J.H. Friedman (2001). "Greedy Function Approximation: A Gradient Boosting
55		Machine," Annals of Statistics 29(5):1189-1232.
	58	J.H. Friedman (2001). "Greedy Function Approximation: A Gradient
	59	Boosting Machine," Annals of Statistics 29(5):1189-1232.
56	60
57		L. Breiman (2001).\href{https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf}{Random Forests}.
	61	L. Breiman
	62	(2001).\url{https://www.stat.berkeley.edu/users/breiman/randomforest2001.pdf}.
58	63	}
59		\author{Greg Ridgeway \email{gregridgeway@gmail.com}}
60
61		\seealso{ \code{\link{gbm}} }
62
63		\keyword{ hplot }
	64	\seealso{
	65	\code{\link{gbm}}
	66	}
	67	\author{
	68	Greg Ridgeway \email{gregridgeway@gmail.com}
	69	}
	70	\keyword{hplot}

+41

-0

man/test.gbm.Rd less more

	0	% Generated by roxygen2: do not edit by hand
	1	% Please edit documentation in R/test.gbm.R
	2	\name{test.gbm}
	3	\alias{test.gbm}
	4	\alias{validate.gbm}
	5	\alias{test.relative.influence}
	6	\title{Test the \code{gbm} package.}
	7	\usage{
	8	test.gbm()
	9	}
	10	\value{
	11	An object of class \code{RUnitTestData}. See the help for
	12	\code{RUnit} for details.
	13	}
	14	\description{
	15	Run tests on \code{gbm} functions to perform logical checks and
	16	reproducibility.
	17	}
	18	\details{
	19	The function uses functionality in the \code{RUnit} package. A fairly small
	20	validation suite is executed that checks to see that relative influence
	21	identifies sensible variables from simulated data, and that predictions from
	22	GBMs with Gaussian, Cox or binomial distributions are sensible,
	23	}
	24	\note{
	25	The test suite is not comprehensive.
	26	}
	27	\examples{
	28
	29	# Uncomment the following lines to run - commented out to make CRAN happy
	30	#library(RUnit)
	31	#val <- validate.texmex()
	32	#printHTMLProtocol(val, "texmexReport.html")
	33	}
	34	\seealso{
	35	\code{\link{gbm}}
	36	}
	37	\author{
	38	Harry Southworth
	39	}
	40	\keyword{models}

+0

-43

~~man/validate.Rd~~ less more

0		\name{validate.gbm}
1		\alias{validate.gbm}
2		\alias{test.gbm}
3		\alias{test.relative.influence}
4		%- Also NEED an '\alias' for EACH other topic documented here.
5		\title{Test the \code{gbm} package.}
6		\description{
7		Run tests on \code{gbm} functions to perform logical checks and
8		reproducibility.
9		}
10		\usage{
11		validate.gbm()
12		}
13		%- maybe also 'usage' for other objects documented here.
14		\details{
15		The function uses functionality in the \code{RUnit} package.
16		A fairly small validation suite is executed that checks to see that
17		relative influence identifies sensible variables from simulated data,
18		and that predictions from GBMs with Gaussian, Cox or binomial distributions
19		are sensible,
20		}
21		\value{
22		An object of class \code{RUnitTestData}. See the help for \code{RUnit} for
23		details.
24		}
25		\author{
26		Harry Southworth
27		}
28		\note{The test suite is not comprehensive.}
29
30		%% ~Make other sections like Warning with \section{Warning }{....} ~
31
32		\seealso{
33		\code{\link{gbm}}
34		}
35		\examples{
36		# Uncomment the following lines to run - commented out to make CRAN happy
37		#library(RUnit)
38		#val <- validate.texmex()
39		#printHTMLProtocol(val, "texmexReport.html")
40		}
41		\keyword{models}
42

+30

-0

src/gbm-init.c less more

	0	#include <R.h>
	1	#include <Rinternals.h>
	2	#include <stdlib.h> // for NULL
	3	#include <R_ext/Rdynload.h>
	4
	5	/* FIXME:
	6	Check these declarations against the C/Fortran source code.
	7	*/
	8
	9	/* .Call calls */
	10	extern SEXP gbm_fit(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
	11	extern SEXP gbm_plot(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
	12	extern SEXP gbm_pred(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
	13	extern SEXP gbm_shrink_gradient(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
	14	extern SEXP gbm_shrink_pred(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
	15
	16	static const R_CallMethodDef CallEntries[] = {
	17	{"gbm_fit", (DL_FUNC) &gbm_fit, 22},
	18	{"gbm_plot", (DL_FUNC) &gbm_plot, 10},
	19	{"gbm_pred", (DL_FUNC) &gbm_pred, 10},
	20	{"gbm_shrink_gradient", (DL_FUNC) &gbm_shrink_gradient, 11},
	21	{"gbm_shrink_pred", (DL_FUNC) &gbm_shrink_pred, 10},
	22	{NULL, NULL, 0}
	23	};
	24
	25	void R_init_gbm(DllInfo *dll)
	26	{
	27	R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
	28	R_useDynamicSymbols(dll, FALSE);
	29	}

+1

-1

src/gbmentry.cpp less more

6	6	#include <R.h>
7	7	#include <Rinternals.h>
8	8
9		SEXP gbm
	9	SEXP gbm_fit
10	10	(
11	11	SEXP radY, // outcome or response
12	12	SEXP radOffset, // offset for f(x), NA for no offset

+0

-26

~~src/init.c~~ less more

0		#include <R.h>
1		#include <Rinternals.h>
2		#include <stdlib.h> // for NULL
3		#include <R_ext/Rdynload.h>
4
5		/* .Call calls */
6		extern SEXP gbm(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
7		extern SEXP gbm_plot(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
8		extern SEXP gbm_pred(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
9		extern SEXP gbm_shrink_gradient(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
10		extern SEXP gbm_shrink_pred(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
11
12		static const R_CallMethodDef CallEntries[] = {
13		{"gbm", (DL_FUNC) &gbm, 22},
14		{"gbm_plot", (DL_FUNC) &gbm_plot, 10},
15		{"gbm_pred", (DL_FUNC) &gbm_pred, 10},
16		{"gbm_shrink_gradient", (DL_FUNC) &gbm_shrink_gradient, 11},
17		{"gbm_shrink_pred", (DL_FUNC) &gbm_shrink_pred, 10},
18		{NULL, NULL, 0}
19		};
20
21		void R_init_gbm(DllInfo *dll)
22		{
23		R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
24		R_useDynamicSymbols(dll, FALSE);
25		}

+2

-3

src/locationm.cpp less more

11	11
12	12	#include "locationm.h"
13	13	#include <algorithm>
14		#include <Rmath.h> // for fmax2
15	14
16	15	using namespace std;
17	16

189	188	}
190	189
191	190	double dScale0 = 1.4826 * Median(iN, adDiff, adW);
192		dScale0 = fmax2(dScale0, mdEps);
	191	dScale0 = fmax(dScale0, mdEps);
193	192
194	193	// Loop over until the error is low enough
195	194	double dErr = 1.0;

202	201	for (ii = 0; ii < iN; ii++)
203	202	{
204	203	double dT = fabs(adX[ii] - dBeta0) / dScale0;
205		dT = fmax2(dT, mdEps);
	204	dT = fmax(dT, mdEps);
206	205	double dWt = adW[ii] * PsiFun(dT) / dT;
207	206
208	207	dSumWX += dWt * adX[ii];

+373

-0

vignettes/gbm.Rnw less more

	0	\documentclass{article}
	1
	2	\bibliographystyle{plain}
	3
	4	\newcommand{\EV}{\mathrm{E}}
	5	\newcommand{\Var}{\mathrm{Var}}
	6	\newcommand{\aRule}{\begin{center} \rule{5in}{1mm} \end{center}}
	7
	8	\title{Generalized Boosted Models:\\A guide to the gbm package} \author{Greg Ridgeway}
	9
	10	%\VignetteEngine{knitr::knitr}
	11	%\VignetteIndexEntry{Generalized Boosted Models: A guide to the gbm package}
	12
	13	\newcommand{\mathgbf}[1]{{\mbox{\boldmath$#1$\unboldmath}}}
	14
	15	\begin{document}
	16
	17	\maketitle
	18
	19	Boosting takes on various forms with different programs using different loss functions, different base models, and different optimization schemes. The gbm package takes the approach described in \cite{Friedman:2001} and \cite{Friedman:2002}. Some of the terminology differs, mostly due to an effort to cast boosting terms into more standard statistical terminology (e.g. deviance). In addition, the gbm package implements boosting for models commonly used in statistics but not commonly associated with boosting. The Cox proportional hazard model, for example, is an incredibly useful model and the boosting framework applies quite readily with only slight modification \cite{Ridgeway:1999}. Also some algorithms implemented in the gbm package differ from the standard implementation. The AdaBoost algorithm \cite{FreundSchapire:1997} has a particular loss function and a particular optimization algorithm associated with it. The gbm implementation of AdaBoost adopts AdaBoost's exponential loss function (its bound on misclassification rate) but uses Friedman's gradient descent algorithm rather than the original one proposed. So the main purposes of this document is to spell out in detail what the gbm package implements.
	20
	21	\section{Gradient boosting}
	22
	23	This section essentially presents the derivation of boosting described in \cite{Friedman:2001}. The gbm package also adopts the stochastic gradient boosting strategy, a small but important tweak on the basic algorithm, described in \cite{Friedman:2002}.
	24
	25	\subsection{Friedman's gradient boosting machine} \label{sec:GradientBoostingMachine}
	26
	27	\begin{figure}
	28	\aRule Initialize $\hat f(\mathbf{x})$ to be a constant, $\hat f(\mathbf{x}) = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\rho)$. \\
	29	For $t$ in $1,\ldots,T$ do
	30	\begin{enumerate}
	31	\item Compute the negative gradient as the working response
	32	\begin{equation}
	33	z_i = -\frac{\partial}{\partial f(\mathbf{x}_i)} \Psi(y_i,f(\mathbf{x}_i)) \mbox{\Huge $\|$}_{f(\mathbf{x}_i)=\hat f(\mathbf{x}_i)}
	34	\end{equation}
	35	\item Fit a regression model, $g(\mathbf{x})$, predicting $z_i$ from the covariates $\mathbf{x}_i$. \item Choose a gradient descent step size as
	36	\begin{equation}
	37	\rho = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\hat f(\mathbf{x}_i)+\rho g(\mathbf{x}_i))
	38	\end{equation}
	39	\item Update the estimate of $f(\mathbf{x})$ as
	40	\begin{equation}
	41	\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \rho g(\mathbf{x})
	42	\end{equation}
	43	\end{enumerate} \aRule \caption{Friedman's Gradient Boost algorithm} \label{fig:GradientBoost} \end{figure}
	44
	45	Friedman (2001) and the companion paper Friedman (2002) extended the work of Friedman, Hastie, and Tibshirani (2000) and laid the ground work for a new generation of boosting algorithms. Using the connection between boosting and optimization, this new work proposes the Gradient Boosting Machine.
	46
	47	In any function estimation problem we wish to find a regression function, $\hat f(\mathbf{x})$, that minimizes the expectation of some loss function, $\Psi(y,f)$, as shown in (\ref{NonparametricRegression1}).
	48
	49	\begin{eqnarray}
	50	\hspace{0.5in}
	51	\hat f(\mathbf{x}) &=& \arg \min_{f(\mathbf{x})} \EV_{y,\mathbf{x}} \Psi(y,f(\mathbf{x})) \nonumber \\ \label{NonparametricRegression1}
	52	&=& \arg \min_{f(\mathbf{x})} \EV_x \left[ \EV_{y\|\mathbf{x}} \Psi(y,f(\mathbf{x})) \Big\| \mathbf{x} \right]
	53	\end{eqnarray}
	54
	55	We will focus on finding estimates of $f(\mathbf{x})$ such that \begin{equation}
	56	\label{NonparametricRegression2}
	57	\hspace{0.5in}
	58	\hat f(\mathbf{x}) = \arg \min_{f(\mathbf{x})} \EV_{y\|\mathbf{x}} \left[ \Psi(y,f(\mathbf{x}))\|\mathbf{x} \right]
	59	\end{equation}
	60	Parametric regression models assume that $f(\mathbf{x})$ is a function with a finite number of parameters, $\beta$, and estimates them by selecting those values that minimize a loss function (e.g. squared error loss) over a training sample of $N$ observations on $(y,\mathbf{x})$ pairs as in (\ref{eq:Friedman1}).
	61	\begin{equation}
	62	\label{eq:Friedman1}
	63	\hspace{0.5in}
	64	\hat\beta = \arg \min_{\beta} \sum_{i=1}^N \Psi(y_i,f(\mathbf{x}_i;\beta))
	65	\end{equation}
	66	When we wish to estimate $f(\mathbf{x})$ non-parametrically the task becomes more difficult. Again we can proceed similarly to \cite{FHT:2000} and modify our current estimate of $f(\mathbf{x})$ by adding a new function $f(\mathbf{x})$ in a greedy fashion. Letting $f_i = f(\mathbf{x}_i)$, we see that we want to decrease the $N$ dimensional function
	67	\begin{eqnarray}
	68	\label{EQ:Friedman2}
	69	\hspace{0.5in}
	70	J(\mathbf{f}) &=& \sum_{i=1}^N \Psi(y_i,f(\mathbf{x}_i)) \nonumber \\
	71	&=& \sum_{i=1}^N \Psi(y_i,F_i).
	72	\end{eqnarray}
	73	The negative gradient of $J(\mathbf{f})$ indicates the direction of the locally greatest decrease in $J(\mathbf{f})$. Gradient descent would then have us modify $\mathbf{f}$ as
	74	\begin{equation}
	75	\label{eq:Friedman3}
	76	\hspace{0.5in}
	77	\hat \mathbf{f} \leftarrow \hat \mathbf{f} - \rho \nabla J(\mathbf{f})
	78	\end{equation}
	79	where $\rho$ is the size of the step along the direction of greatest descent. Clearly, this step alone is far from our desired goal. First, it only fits $f$ at values of $\mathbf{x}$ for which we have observations. Second, it does not take into account that observations with similar $\mathbf{x}$ are likely to have similar values of $f(\mathbf{x})$. Both these problems would have disastrous effects on generalization error. However, Friedman suggests selecting a class of functions that use the covariate information to approximate the gradient, usually a regression tree. This line of reasoning produces his Gradient Boosting algorithm shown in Figure~\ref{fig:GradientBoost}. At each iteration the algorithm determines the direction, the gradient, in which it needs to improve the fit to the data and selects a particular model from the allowable class of functions that is in most agreement with the direction. In the case of squared-error loss, $\Psi(y_i,f(\mathbf{x}_i)) = \sum_{i=1}^N (y_i-f(\mathbf{x}_i))^2$, this algorithm corresponds exactly to residual fitting.
	80
	81	There are various ways to extend and improve upon the basic framework suggested in Figure~\ref{fig:GradientBoost}. For example, Friedman (2001) substituted several choices in for $\Psi$ to develop new boosting algorithms for robust regression with least absolute deviation and Huber loss functions. Friedman (2002) showed that a simple subsampling trick can greatly improve predictive performance while simultaneously reduce computation time. Section~\ref{GBMModifications} discusses some of these modifications.
	82
	83	\section{Improving boosting methods using control of the learning rate, sub-sampling, and a decomposition for interpretation} \label{GBMModifications}
	84
	85	This section explores the variations of the previous algorithms that have the potential to improve their predictive performance and interpretability. In particular, by controlling the optimization speed or learning rate, introducing low-variance regression methods, and applying ideas from robust regression we can produce non-parametric regression procedures with many desirable properties. As a by-product some of these modifications lead directly into implementations for learning from massive datasets. All these methods take advantage of the general form of boosting
	86	\begin{equation}
	87	\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \EV(z(y,\hat f(\mathbf{x}))\|\mathbf{x}).
	88	\end{equation} So far we have taken advantage of this form only by substituting in our favorite regression procedure for $\EV_w(z\|\mathbf{x})$. I will discuss some modifications to estimating $\EV_w(z\|\mathbf{x})$ that have the potential to improve our algorithm.
	89
	90	\subsection{Decreasing the learning rate} As several authors have phrased slightly differently, ``...boosting, whatever flavor, seldom seems to overfit, no matter how many terms are included in the additive expansion''. This is not true as the discussion to \cite{FHT:2000} points out.
	91
	92	In the update step of any boosting algorithm we can introduce a learning rate to dampen the proposed move.
	93	\begin{equation}
	94	\label{eq:shrinkage}
	95	\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \lambda \EV(z(y,\hat f(\mathbf{x}))\|\mathbf{x}).
	96	\end{equation}
	97	By multiplying the gradient step by $\lambda$ as in equation~\ref{eq:shrinkage} we have control on the rate at which the boosting algorithm descends the error surface (or ascends the likelihood surface). When $\lambda=1$ we return to performing full gradient steps. Friedman (2001) relates the learning rate to regularization through shrinkage.
	98
	99	The optimal number of iterations, $T$, and the learning rate, $\lambda$, depend on each other. In practice I set $\lambda$ to be as small as possible and then select $T$ by cross-validation. Performance is best when $\lambda$ is as small as possible performance with decreasing marginal utility for smaller and smaller $\lambda$. Slower learning rates do not necessarily scale the number of optimal iterations. That is, if when $\lambda=1.0$ and the optimal $T$ is 100 iterations, does {\it not} necessarily imply that when $\lambda=0.1$ the optimal $T$ is 1000 iterations.
	100
	101	\subsection{Variance reduction using subsampling}
	102
	103	Friedman (2002) proposed the stochastic gradient boosting algorithm that simply samples uniformly without replacement from the dataset before estimating the next gradient step. He found that this additional step greatly improved performance. We estimate the regression $\EV(z(y,\hat f(\mathbf{x}))\|\mathbf{x})$ using a random subsample of the dataset.
	104
	105	\subsection{ANOVA decomposition}
	106
	107	Certain function approximation methods are decomposable in terms of a ``functional ANOVA decomposition''. That is a function is decomposable as
	108	\begin{equation}
	109	\label{ANOVAdecomp}
	110	f(\mathbf{x}) = \sum_j f_j(x_j) + \sum_{jk} f_{jk}(x_j,x_k) + \sum_{jk\ell} f_{jk\ell}(x_j,x_k,x_\ell) + \cdots.
	111	\end{equation} This applies to boosted trees. Regression stumps (one split decision trees) depend on only one variable and fall into the first term of \ref{ANOVAdecomp}. Trees with two splits fall into the second term of \ref{ANOVAdecomp} and so on. By restricting the depth of the trees produced on each boosting iteration we can control the order of approximation. Often additive components are sufficient to approximate a multivariate function well, generalized additive models, the na\"{\i}ve Bayes classifier, and boosted stumps for example. When the approximation is restricted to a first order we can also produce plots of $x_j$ versus $f_j(x_j)$ to demonstrate how changes in $x_j$ might affect changes in the response variable.
	112
	113	\subsection{Relative influence} Friedman (2001) also develops an extension of a variable's ``relative influence'' for boosted estimates. For tree based methods the approximate relative influence of a variable $x_j$ is
	114	\begin{equation}
	115	\label{RelInfluence}
	116	\hspace{0.5in}
	117	\hat J_j^2 = \hspace{-0.1in}\sum_{\mathrm{splits~on~}x_j}\hspace{-0.2in}I_t^2
	118	\end{equation} where $I_t^2$ is the empirical improvement by splitting on $x_j$ at that point. Friedman's extension to boosted models is to average the relative influence of variable $x_j$ across all the trees generated by the boosting algorithm.
	119
	120	\begin{figure}
	121	\aRule
	122	Select
	123	\begin{itemize}
	124	\item a loss function (\texttt{distribution})
	125	\item the number of iterations, $T$ (\texttt{n.trees})
	126	\item the depth of each tree, $K$ (\texttt{interaction.depth})
	127	\item the shrinkage (or learning rate) parameter, $\lambda$ (\texttt{shrinkage})
	128	\item the subsampling rate, $p$ (\texttt{bag.fraction})
	129	\end{itemize}
	130	Initialize $\hat f(\mathbf{x})$ to be a constant, $\hat f(\mathbf{x}) = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\rho)$ \\
	131	For $t$ in $1,\ldots,T$ do
	132	\begin{enumerate}
	133	\item Compute the negative gradient as the working response
	134	\begin{equation}
	135	z_i = -\frac{\partial}{\partial f(\mathbf{x}_i)} \Psi(y_i,f(\mathbf{x}_i)) \mbox{\Huge $\|$}_{f(\mathbf{x}_i)=\hat f(\mathbf{x}_i)}
	136	\end{equation}
	137	\item Randomly select $p\times N$ cases from the dataset
	138	\item Fit a regression tree with $K$ terminal nodes, $g(\mathbf{x})=\EV(z\|\mathbf{x})$. This tree is fit using only those randomly selected observations
	139	\item Compute the optimal terminal node predictions, $\rho_1,\ldots,\rho_K$, as
	140	\begin{equation}
	141	\rho_k = \arg \min_{\rho} \sum_{\mathbf{x}_i\in S_k} \Psi(y_i,\hat f(\mathbf{x}_i)+\rho)
	142	\end{equation}
	143	where $S_k$ is the set of $\mathbf{x}$s that define terminal node $k$. Again this step uses only the randomly selected observations.
	144	\item Update $\hat f(\mathbf{x})$ as
	145	\begin{equation}
	146	\hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \lambda\rho_{k(\mathbf{x})}
	147	\end{equation}
	148	where $k(\mathbf{x})$ indicates the index of the terminal node into which an observation with features $\mathbf{x}$ would fall.
	149	\end{enumerate}
	150	\aRule
	151	\caption{Boosting as implemented in \texttt{gbm()}}
	152	\label{fig:gbm}
	153	\end{figure}
	154
	155	\section{Common user options}
	156
	157	This section discusses the options to gbm that most users will need to change or tune.
	158
	159	\subsection{Loss function}
	160
	161	The first and foremost choice is \texttt{distribution}. This should be easily dictated by the application. For most classification problems either \texttt{bernoulli} or \texttt{adaboost} will be appropriate, the former being recommended. For continuous outcomes the choices are \texttt{gaussian} (for minimizing squared error), \texttt{laplace} (for minimizing absolute error), and quantile regression (for estimating percentiles of the conditional distribution of the outcome). Censored survival outcomes should require \texttt{coxph}. Count outcomes may use \texttt{poisson} although one might also consider \texttt{gaussian} or \texttt{laplace} depending on the analytical goals.
	162
	163	\subsection{The relationship between shrinkage and number of iterations} The issues that most new users of gbm struggle with are the choice of \texttt{n.trees} and \texttt{shrinkage}. It is important to know that smaller values of \texttt{shrinkage} (almost) always give improved predictive performance. That is, setting \texttt{shrinkage=0.001} will almost certainly result in a model with better out-of-sample predictive performance than setting \texttt{shrinkage=0.01}. However, there are computational costs, both storage and CPU time, associated with setting \texttt{shrinkage} to be low. The model with \texttt{shrinkage=0.001} will likely require ten times as many iterations as the model with \texttt{shrinkage=0.01}, increasing storage and computation time by a factor of 10. Figure~\ref{fig:shrinkViters} shows the relationship between predictive performance, the number of iterations, and the shrinkage parameter. Note that the increase in the optimal number of iterations between two choices for shrinkage is roughly equal to the ratio of the shrinkage parameters. It is generally the case that for small shrinkage parameters, 0.001 for example, there is a fairly long plateau in which predictive performance is at its best. My rule of thumb is to set \texttt{shrinkage} as small as possible while still being able to fit the model in a reasonable amount of time and storage. I usually aim for 3,000 to 10,000 iterations with shrinkage rates between 0.01 and 0.001.
	164
	165	\begin{figure}[ht] \begin{center} \includegraphics[width=5in]{shrinkage-v-iterations} \end{center} \caption{Out-of-sample predictive performance by number of iterations and shrinkage. Smaller values of the shrinkage parameter offer improved predictive performance, but with decreasing marginal improvement.} \label{fig:shrinkViters} \end{figure}
	166
	167	\subsection{Estimating the optimal number of iterations} gbm offers three methods for estimating the optimal number of iterations after the gbm model has been fit, an independent test set (\texttt{test}), out-of-bag estimation (\texttt{OOB}), and $v$-fold cross validation (\texttt{cv}). The function \texttt{gbm.perf} computes the iteration estimate.
	168
	169	Like Friedman's MART software, the independent test set method uses a single holdout test set to select the optimal number of iterations. If \texttt{train.fraction} is set to be less than 1, then only the \textit{first} \texttt{train.fraction}$\times$\texttt{nrow(data)} will be used to fit the model. Note that if the data are sorted in a systematic way (such as cases for which $y=1$ come first), then the data should be shuffled before running gbm. Those observations not used in the model fit can be used to get an unbiased estimate of the optimal number of iterations. The downside of this method is that a considerable number of observations are used to estimate the single regularization parameter (number of iterations) leaving a reduced dataset for estimating the entire multivariate model structure. Use \texttt{gbm.perf(...,method="test")} to obtain an estimate of the optimal number of iterations using the held out test set.
	170
	171	If \texttt{bag.fraction} is set to be greater than 0 (0.5 is recommended), gbm computes an out-of-bag estimate of the improvement in predictive performance. It evaluates the reduction in deviance on those observations not used in selecting the next regression tree. The out-of-bag estimator underestimates the reduction in deviance. As a result, it almost always is too conservative in its selection for the optimal number of iterations. The motivation behind this method was to avoid having to set aside a large independent dataset, which reduces the information available for learning the model structure. Use \texttt{gbm.perf(...,method="OOB")} to obtain the OOB estimate.
	172
	173	Lastly, gbm offers $v$-fold cross validation for estimating the optimal number of iterations. If when fitting the gbm model, \texttt{cv.folds=5} then gbm will do 5-fold cross validation. gbm will fit five gbm models in order to compute the cross validation error estimate and then will fit a sixth and final gbm model with \texttt{n.trees}iterations using all of the data. The returned model object will have a component labeled \texttt{cv.error}. Note that \texttt{gbm.more} will do additional gbm iterations but will not add to the \texttt{cv.error} component. Use \texttt{gbm.perf(...,method="cv")} to obtain the cross validation estimate.
	174
	175	\begin{figure}[ht]
	176	\begin{center}
	177	\includegraphics[width=5in]{oobperf2}
	178	\end{center}
	179	\caption{Out-of-sample predictive performance of four methods of selecting the optimal number of iterations. The vertical axis plots performance relative the best. The boxplots indicate relative performance across thirteen real datasets from the UCI repository. See \texttt{demo(OOB-reps)}.}
	180	\label{fig:oobperf}
	181	\end{figure}
	182
	183	Figure~\ref{fig:oobperf} compares the three methods for estimating the optimal number of iterations across 13 datasets. The boxplots show the methods performance relative to the best method on that dataset. For most datasets the method perform similarly, however, 5-fold cross validation is consistently the best of them. OOB, using a 33\% test set, and using a 20\% test set all have datasets for which the perform considerably worse than the best method. My recommendation is to use 5- or 10-fold cross validation if you can afford the computing time. Otherwise you may choose among the other options, knowing that OOB is conservative.
	184
	185	\section{Available distributions}
	186
	187	This section gives some of the mathematical detail for each of the distribution options that gbm offers. The gbm engine written in C++ has access to a C++ class for each of these distributions. Each class contains methods for computing the associated deviance, initial value, the gradient, and the constants to predict in each terminal node.
	188
	189	In the equations shown below, for non-zero offset terms, replace $f(\mathbf{x}_i)$ with $o_i + f(\mathbf{x}_i)$.
	190
	191	\subsection{Gaussian}
	192
	193	\begin{tabular}{ll}
	194	Deviance & $\displaystyle \frac{1}{\sum w_i} \sum w_i(y_i-f(\mathbf{x}_i))^2$ \\
	195	Initial value & $\displaystyle f(\mathbf{x})=\frac{\sum w_i(y_i-o_i)}{\sum w_i}$ \\
	196	Gradient & $z_i=y_i - f(\mathbf{x}_i)$ \\
	197	Terminal node estimates & $\displaystyle \frac{\sum w_i(y_i-f(\mathbf{x}_i))}{\sum w_i}$
	198	\end{tabular}
	199
	200	\subsection{AdaBoost}
	201
	202	\begin{tabular}{ll} Deviance & $\displaystyle \frac{1}{\sum w_i} \sum w_i\exp(-(2y_i-1)f(\mathbf{x}_i))$ \\ Initial value & $\displaystyle \frac{1}{2}\log\frac{\sum y_iw_ie^{-o_i}}{\sum (1-y_i)w_ie^{o_i}}$ \\ Gradient & $\displaystyle z_i= -(2y_i-1)\exp(-(2y_i-1)f(\mathbf{x}_i))$ \\ Terminal node estimates & $\displaystyle \frac{\sum (2y_i-1)w_i\exp(-(2y_i-1)f(\mathbf{x}_i))}
	203	{\sum w_i\exp(-(2y_i-1)f(\mathbf{x}_i))}$
	204	\end{tabular}
	205
	206	\subsection{Bernoulli}
	207
	208	\begin{tabular}{ll} Deviance & $\displaystyle -2\frac{1}{\sum w_i} \sum w_i(y_if(\mathbf{x}_i)-\log(1+\exp(f(\mathbf{x}_i))))$ \\ Initial value & $\displaystyle \log\frac{\sum w_iy_i}{\sum w_i(1-y_i)}$ \\ Gradient & $\displaystyle z_i=y_i-\frac{1}{1+\exp(-f(\mathbf{x}_i))}$ \\ Terminal node estimates & $\displaystyle \frac{\sum w_i(y_i-p_i)}{\sum w_ip_i(1-p_i)}$ \\
	209	& where $\displaystyle p_i = \frac{1}{1+\exp(-f(\mathbf{x}_i))}$ \\
	210	\end{tabular}
	211
	212	Notes: \begin{itemize} \item For non-zero offset terms, the computation of the initial value requires Newton-Raphson. Initialize $f_0=0$ and iterate $\displaystyle f_0 \leftarrow f_0 + \frac{\sum w_i(y_i-p_i)}{\sum w_ip_i(1-p_i)}$ where $\displaystyle p_i = \frac{1}{1+\exp(-(o_i+f_0))}$. \end{itemize}
	213
	214	\subsection{Laplace}
	215
	216	\begin{tabular}{ll} Deviance & $\frac{1}{\sum w_i} \sum w_i\|y_i-f(\mathbf{x}_i)\|$ \\ Initial value & $\mbox{median}_w(y)$ \\ Gradient & $z_i=\mbox{sign}(y_i-f(\mathbf{x}_i))$ \\ Terminal node estimates & $\mbox{median}_w(z)$ \end{tabular}
	217
	218	Notes: \begin{itemize} \item $\mbox{median}_w(y)$ denotes the weighted median, defined as the solution to the equation $\frac{\sum w_iI(y_i\leq m)}{\sum w_i}=\frac{1}{2}$ \item \texttt{gbm()} currently does not implement the weighted median and issues a warning when the user uses weighted data with \texttt{distribution="laplace"}. \end{itemize}
	219
	220
	221	\subsection{Quantile regression}
	222
	223	Contributed by Brian Kriegler (see \cite{Kriegler:2010}).
	224
	225	\begin{tabular}{ll} Deviance & $\frac{1}{\sum w_i}
	226	\left(\alpha\sum_{y_i>f(\mathbf{x}_i)} w_i(y_i-f(\mathbf{x}_i))\right. +$ \\
	227	& \hspace{0.5in}$\left.(1-\alpha)\sum_{y_i\leq f(\mathbf{x}_i)} w_i(f(\mathbf{x}_i)-y_i)\right)$ \\
	228	Initial value & $\mathrm{quantile}^{(\alpha)}_w(y)$ \\ Gradient & $z_i=\alpha I(y_i>f(\mathbf{x}_i))-(1-\alpha)I(y_i\leq f(\mathbf{x}_i))$ \\ Terminal node estimates & $\mathrm{quantile}^{(\alpha)}_w(z)$ \end{tabular}
	229
	230	Notes: \begin{itemize} \item $\mathrm{quantile}^{(\alpha)}_w(y)$ denotes the weighted quantile, defined as the solution to the equation $\frac{\sum w_iI(y_i\leq q)}{\sum w_i}=\alpha$ \item \texttt{gbm()} currently does not implement the weighted median and issues a warning when the user uses weighted data with \texttt{distribution=list(name="quantile")}. \end{itemize}
	231
	232
	233	\subsection{Cox Proportional Hazard}
	234
	235	\begin{tabular}{ll} Deviance & $-2\sum w_i(\delta_i(f(\mathbf{x}_i)-\log(R_i/w_i)))$\\ Gradient & $\displaystyle z_i=\delta_i - \sum_j \delta_j
	236	\frac{w_jI(t_i\geq t_j)e^{f(\mathbf{x}_i)}}
	237	{\sum_k w_kI(t_k\geq t_j)e^{f(\mathbf{x}_k)}}$ \\
	238	Initial value & 0 \\ Terminal node estimates & Newton-Raphson algorithm \end{tabular}
	239
	240	\begin{enumerate}
	241	\item Initialize the terminal node predictions to 0, $\mathgbf{\rho}=0$
	242	\item Let $\displaystyle
	243	p_i^{(k)}=\frac{\sum_j I(k(j)=k)I(t_j\geq t_i)e^{f(\mathbf{x}_i)+\rho_k}}
	244	{\sum_j I(t_j\geq t_i)e^{f(\mathbf{x}_i)+\rho_k}}$
	245	\item Let $g_k=\sum w_i\delta_i\left(I(k(i)=k)-p_i^{(k)}\right)$
	246	\item Let $\mathbf{H}$ be a $k\times k$ matrix with diagonal elements
	247	\begin{enumerate}
	248	\item Set diagonal elements $H_{mm}=\sum w_i\delta_i p_i^{(m)}\left(1-p_i^{(m)}\right)$
	249	\item Set off diagonal elements $H_{mn}=-\sum w_i\delta_i p_i^{(m)}p_i^{(n)}$
	250	\end{enumerate}
	251	\item Newton-Raphson update $\mathgbf{\rho} \leftarrow \mathgbf{\rho} - \mathbf{H}^{-1}\mathbf{g}$
	252	\item Return to step 2 until convergence
	253	\end{enumerate}
	254
	255	Notes:
	256	\begin{itemize}
	257	\item $t_i$ is the survival time and $\delta_i$ is the death indicator.
	258	\item $R_i$ denotes the hazard for the risk set, $R_i=\sum_{j=1}^N w_jI(t_j\geq t_i)e^{f(\mathbf{x}_i)}$
	259	\item $k(i)$ indexes the terminal node of observation $i$
	260	\item For speed, \texttt{gbm()} does only one step of the Newton-Raphson algorithm rather than iterating to convergence. No appreciable loss of accuracy since the next boosting iteration will simply correct for the prior iterations inadequacy.
	261	\item \texttt{gbm()} initially sorts the data by survival time. Doing this reduces the computation of the risk set from $O(n^2)$ to $O(n)$ at the cost of a single up front sort on survival time. After the model is fit, the data are then put back in their original order.
	262	\end{itemize}
	263
	264	\subsection{Poisson}
	265	\begin{tabular}{ll}
	266	Deviance & -2$\frac{1}{\sum w_i} \sum w_i(y_if(\mathbf{x}_i)-\exp(f(\mathbf{x}_i)))$ \\
	267	Initial value & $\displaystyle f(\mathbf{x})= \log\left(\frac{\sum w_iy_i}{\sum w_ie^{o_i}}\right)$ \\
	268	Gradient & $z_i=y_i - \exp(f(\mathbf{x}_i))$ \\
	269	Terminal node estimates & $\displaystyle \log\frac{\sum w_iy_i}{\sum w_i\exp(f(\mathbf{x}_i))}$
	270	\end{tabular}
	271
	272	The Poisson class includes special safeguards so that the most extreme predicted values are $e^{-19}$ and $e^{+19}$. This behavior is consistent with \texttt{glm()}.
	273
	274	\subsection{Pairwise}
	275
	276	This distribution implements ranking measures following the
	277	\emph{LambdaMart} algorithm \cite{Burges:2010}. Instances belong to
	278	\emph{groups}; all pairs of items with different labels, belonging to
	279	the same group, are used for training. In \emph{Information Retrieval}
	280	applications, groups correspond to user queries,
	281	and items to (feature vectors of) documents in the associated match
	282	set to be ranked.
	283
	284	For consistency with typical usage, our goal is to \emph{maximize} one
	285	of the \emph{utility} functions listed below. Consider a group with
	286	instances $x_1, \dots, x_n$, ordered such that $f(x_1) \geq f(x_2)
	287	\geq \dots f(x_n)$; i.e., the \emph{rank} of $x_i$ is $i$, where
	288	smaller ranks are preferable. Let $P$ be the set of all ordered pairs
	289	such that $y_i > y_j$.
	290
	291	\begin{enumerate}
	292	\item[{\bf Concordance:}] Fraction of concordant (i.e, correctly ordered)
	293	pairs. For the special case of binary labels, this is equivalent to
	294	the Area under the ROC Curve.
	295	$$\left\{ \begin{array}{l l}\frac{\\|\{(i,j)\in P \|
	296	f(x_i)>f(x_j)\}\\|}{\\|P\\|}
	297	& P \neq \emptyset\\
	298	0 & \mbox{otherwise.}
	299	\end{array}\right.
	300	$$
	301	\item[{\bf MRR:}] Mean reciprocal rank of the highest-ranked positive
	302	instance (it is assumed $y_i\in\{0,1\}$):
	303	$$\left\{ \begin{array}{l l}\frac{1}{\min\{1 \leq i \leq n \|y_i=1\}}
	304	& \exists i: \, 1 \leq i \leq n, y_i=1\\
	305	0 & \mbox{otherwise.}\end{array}\right.$$
	306	\item[{\bf MAP:}] Mean average precision, a generalization of
	307	MRR to multiple positive instances:
	308	$$\left\{ \begin{array}{l l} \frac{\sum_{1\leq i\leq n \| y_i=1} \\|\{1\leq j\leq i
	309	\|y_j=1\}\\|\,/\,i}{\\|\{1\leq i\leq n \| y_i=1\}\\|} & \exists i: \,
	310	1 \leq i \leq n, y_i=1\\
	311	0 & \mbox{otherwise.}\end{array}\right.$$
	312	\item[{\bf nDCG:}] Normalized discounted cumulative gain:
	313	$$\frac{\sum_{1\leq i\leq n} \log_2(i+1) \, y_i}{\sum_{1\leq i\leq n}
	314	\log_2(i+1) \, y'_i},$$ where $y'_1, \dots, y'_n$ is a reordering of $y_1,
	315	\dots,y_n$ with $y'_1 \geq y'_2 \geq \dots \geq y'_n$.
	316	\end{enumerate}
	317
	318	The generalization to multiple (possibly weighted) groups is
	319	straightforward. Sometimes a cut-off rank $k$ is given for \emph{MRR}
	320	and \emph{nDCG}, in which case we replace the outer index $n$ by
	321	$\min(n,k)$.
	322
	323	The initial value for $f(x_i)$ is always zero. We derive the gradient of
	324	a cost function whose gradient locally approximates the gradient of
	325	the IR measure for a fixed ranking:
	326
	327	\begin{eqnarray*}
	328	\Phi & = & \sum_{(i,j) \in P} \Phi_{ij}\\
	329	& = & \sum_{(i,j) \in P} \|\Delta Z_{ij}\| \log \left( 1 + e^{-(f(x_i) -
	330	f(x_j))}\right),
	331	\end{eqnarray*}
	332	where $\|\Delta Z_{ij}\|$ is the absolute utility difference when
	333	swapping the ranks of $i$ and $j$, while leaving all other instances
	334	the same. Define
	335	\begin{eqnarray*}
	336	\lambda_{ij} & = & \frac{\partial\Phi_{ij}}{\partial f(x_i)}\\
	337	& = & - \|\Delta Z_{ij}\| \frac{1}{1 + e^{f(x_i) - f(x_j)}}\\
	338	& = & - \|\Delta Z_{ij}\| \, \rho_{ij},
	339	\end{eqnarray*}
	340	with
	341	$$ \rho_{ij} = - \frac{\lambda_{ij }}{\|\Delta Z_{ij}\|} = \frac{1}{1 + e^{f(x_i) - f(x_j)}}$$
	342
	343	For the gradient of $\Phi$ with respect to $f(x_i)$, define
	344	\begin{eqnarray*}
	345	\lambda_i & = & \frac{\partial \Phi}{\partial f(x_i)}\\
	346	& = & \sum_{j\|(i,j) \in P} \lambda_{ij} - \sum_{j\|(j,i) \in P} \lambda_{ji}\\
	347	& = & - \sum_{j\|(i,j) \in P} \|\Delta Z_{ij}\| \, \rho_{ij}\\
	348	& & \mbox{} + \sum_{j\|(j,i) \in P} \|\Delta Z_{ji}\| \, \rho_{ji}.
	349	\end{eqnarray*}
	350
	351	The second derivative is
	352	\begin{eqnarray*}
	353	\gamma_i & \stackrel{def}{=} & \frac{\partial^2\Phi}{\partial f(x_i)^2}\\
	354	& = & \sum_{j\|(i,j) \in P} \|\Delta Z_{ij}\| \, \rho_{ij} \, (1-\rho_{ij})\\
	355	& & \mbox{} + \sum_{j\|(j,i) \in P} \|\Delta Z_{ji}\| \, \rho_{ji} \, (1-\rho_{ji}).
	356	\end{eqnarray*}
	357
	358	Now consider again all groups with associated weights. For a given terminal node, let $i$
	359	range over all contained instances. Then its estimate is
	360	$$-\frac{\sum_i v_i\lambda_{i}}{\sum_i v_i \gamma_i},$$ where
	361	$v_i=w(\mbox{\em group}(i))/\\|\{(j,k)\in\mbox{\em group}(i)\}\\|.$
	362
	363	In each iteration, instances are reranked according to the preliminary
	364	scores $f(x_i)$ to determine the $\|\Delta Z_{ij}\|$. Note that in order
	365	to avoid ranking bias, we break ties by adding a small amount of
	366	random noise.
	367
	368
	369
	370	\bibliography{gbm}
	371
	372	\end{document}

+65

-0

vignettes/gbm.bib less more

	0	@article{FreundSchapire:1997,
	1	author = {Y. Freund and R. E. Schapire},
	2	title = {A decision-theoretic generalization of on-line learning and an application to boosting},
	3	journal = {Journal of Computer and System Sciences},
	4	volume = {55},
	5	number = {1},
	6	pages = {119--139},
	7	year = {1997}
	8	}
	9
	10	@article{Friedman:2001,
	11	author = {J. H. Friedman},
	12	title = {Greedy Function Approximation: A Gradient Boosting Machine},
	13	journal = {Annals of Statistics},
	14	volume = {29},
	15	number = {5},
	16	pages = {1189--1232},
	17	year = {2001}
	18	}
	19
	20	@article{Friedman:2002,
	21	author = {J. H. Friedman},
	22	title = {Stochastic Gradient Boosting},
	23	journal = {Computational Statistics and Data Analysis},
	24	volume = {38},
	25	number = {4},
	26	pages = {367--378},
	27	year = {2002}
	28	}
	29
	30	@article{FHT:2000,
	31	author = {J. H. Friedman and T. Hastie and and R. Tibshirani},
	32	title = {Additive Logistic Regression: a Statistical View of Boosting},
	33	journal = {Annals of Statistics},
	34	volume = {28},
	35	number = {2},
	36	pages = {337--374},
	37	year = {2000}
	38	}
	39
	40	@article{Kriegler:2010,
	41	author = {B. Kriegler and R. Berk},
	42	title = {Small Area Estimation of the Homeless in Los Angeles, An Application of Cost-Sensitive Stochastic Gradient Boosting},
	43	journal = {Annals of Applied Statistics},
	44	volume = {4},
	45	number = {3},
	46	pages = {1234--1255},
	47	year = {2010}
	48	}
	49
	50	@article{Ridgeway:1999,
	51	author = {G. Ridgeway},
	52	title = {The state of boosting},
	53	journal = {Computing Science and Statistics},
	54	volume = {31},
	55	pages = {172--181},
	56	year = {1999}
	57	}
	58
	59	@article{Burges:2010,
	60	author = {C. Burges},
	61	title = {From RankNet to LambdaRank to LambdaMART: An Overview},
	62	journal = {Microsoft Research Technical Report MSR-TR-2010-82},
	63	year = {2010}
	64	}

vignettes/oobperf2.pdf less more

Binary diff not shown

vignettes/shrinkage-v-iterations.pdf less more

Binary diff not shown

+172

-0

vignettes/srcltx.sty less more

	0	%%
	1	%% This is file `srcltx.sty',
	2	%% generated with the docstrip utility.
	3	%%
	4	%% The original source files were:
	5	%%
	6	%% srcltx.dtx (with options: `package,latex')
	7	%%
	8	%% This package is in the public domain. It comes with no guarantees
	9	%% and no reserved rights. You can use or modify this package at your
	10	%% own risk.
	11	%% Originally written by: Aleksander Simonic
	12	%% Current maintainer: Stefan Ulrich <stefanulrich@users.sourceforge.net>
	13	%%
	14	\NeedsTeXFormat{LaTeX2e}
	15	\ProvidesPackage{srcltx}[2006/11/12 v1.6 Source specials for inverse search in DVI files]
	16	\newif\ifSRCOK \SRCOKtrue
	17	\newif\ifsrc@debug@
	18	\newif\ifsrc@dviwin@
	19	\newif\ifsrc@winedt@\src@winedt@true
	20	\newif\ifsrc@everypar@\src@everypar@true
	21	\newif\ifsrc@everymath@\src@everymath@true
	22	\RequirePackage{ifthen}
	23	\DeclareOption{active}{\SRCOKtrue}
	24	\DeclareOption{inactive}{\SRCOKfalse}
	25	\DeclareOption{nowinedt}{\src@winedt@false}
	26	\DeclareOption{debug}{\src@debug@true}
	27	\DeclareOption{nopar}{\global\src@everypar@false}
	28	\DeclareOption{nomath}{\global\src@everymath@false}
	29	\newcommand*\src@maybe@space{}
	30	\let\src@maybe@space\space
	31	\DeclareOption{dviwin}{\let\src@maybe@space\relax}
	32	\ExecuteOptions{active}
	33	\ProcessOptions
	34	\newcount\src@lastline
	35	\global\src@lastline=-1
	36	\newcommand*\src@debug{}
	37	\def\src@debug#1{\ifsrc@debug@\typeout{DBG: \|#1\|}\fi}
	38	\newcommand*\MainFile{}
	39	\def\MainFile{\jobname.tex}
	40	\newcommand*\CurrentInput{}
	41	\gdef\CurrentInput{\MainFile}
	42	\newcommand*\WinEdt{}
	43	\def\WinEdt#1{\ifsrc@winedt@\typeout{:#1}\fi}
	44	\newcommand\src@AfterFi{}
	45	\def\src@AfterFi#1\fi{\fi#1}
	46	\AtBeginDocument{%
	47	\@ifpackageloaded{soul}{%
	48	\let\src@SOUL@\SOUL@
	49	\def\SOUL@#1{%
	50	\ifSRCOK
	51	\SRCOKfalse\src@SOUL@{#1}\SRCOKtrue
	52	\else
	53	\src@AfterFi\src@SOUL@{#1}%
	54	\fi
	55	}%
	56	}{}%
	57	}
	58	\newcommand*\srcIncludeHook[1]{\protected@xdef\CurrentInput{#1.tex}}
	59	\newcommand*\srcInputHook[1]{%
	60	\src@getfilename@with@ext{#1}%
	61	}
	62	\newcommand*\src@spec{}
	63	\def\src@spec{%
	64	\ifSRCOK
	65	\ifnum\inputlineno>\src@lastline
	66	\global\src@lastline=\inputlineno
	67	\src@debug{%
	68	src:\the\inputlineno\src@maybe@space\CurrentInput}%
	69	\special{src:\the\inputlineno\src@maybe@space\CurrentInput}%
	70	\fi
	71	\fi
	72	}
	73	\newcommand\src@before@file@hook{}
	74	\newcommand\src@after@file@hook{}
	75	\def\src@before@file@hook{%
	76	\WinEdt{<+ \CurrentInput}%
	77	\global\src@lastline=0
	78	\ifSRCOK\special{src:1\src@maybe@space\CurrentInput}\fi
	79	}
	80	\def\src@after@file@hook#1{%
	81	\WinEdt{<-}%
	82	\global\src@lastline=\inputlineno
	83	\global\advance\src@lastline by -1%
	84	\gdef\CurrentInput{#1}%
	85	\src@spec
	86	}
	87	\newcommand*\src@fname{}%
	88	\newcommand*\src@tempa{}%
	89	\newcommand*\src@extensions@path{}%
	90	\newcommand*\src@getfilename@with@ext{}%
	91	\def\src@extensions@path#1.#2\end{%
	92	\ifthenelse{\equal{#2}{}}{%
	93	\protected@edef\src@extensions@last{#1}%
	94	\let\src@tempa\relax
	95	}{%
	96	\def\src@tempa{\src@extensions@path#2\end}%
	97	}%
	98	\src@tempa
	99	}
	100	\def\src@getfilename@with@ext#1{%
	101	\expandafter\src@extensions@path#1.\end
	102	\ifthenelse{\equal{\src@extensions@last}{tex}}{%
	103	\protected@xdef\CurrentInput{#1}%
	104	}{%
	105	\protected@xdef\CurrentInput{#1.tex}%
	106	}%
	107	\PackageInfo{srcltx}{Expanded filename `#1' to `\CurrentInput'}%
	108	}
	109	\newcommand*\src@include{}
	110	\newcommand*\src@@include{}
	111	\let\src@include\include
	112	\def\include#1{%
	113	\src@spec
	114	\clearpage
	115	\expandafter\src@@include\expandafter{\CurrentInput}{#1}%
	116	}%
	117	\def\src@@include#1#2{%
	118	\srcIncludeHook{#2}%
	119	\src@before@file@hook
	120	\src@include{#2}%
	121	\src@after@file@hook{#1}%
	122	}
	123	\newcommand*\src@input{}
	124	\newcommand*\src@@input{}
	125	\newcommand*\src@@@input{}
	126	\let\src@input\input
	127	\def\input{\src@spec\@ifnextchar\bgroup\src@@input\@@input}%
	128	\def\src@@input#1{%
	129	\expandafter\src@@@input\expandafter{\CurrentInput}{#1}%
	130	}
	131	\def\src@@@input#1#2{%
	132	\srcInputHook{#2}%
	133	\src@before@file@hook
	134	\src@input{#2}%
	135	\src@after@file@hook{#1}%
	136	}
	137	\newcommand\Input{}
	138	\let\Input\input
	139	\ifsrc@everypar@
	140	\newcommand*\src@old@everypar{}
	141	\let\src@old@everypar\everypar
	142	\newtoks\src@new@everypar
	143	\let\everypar\src@new@everypar
	144	\everypar\expandafter{\the\src@old@everypar}
	145	\src@old@everypar{\the\src@new@everypar\src@spec}
	146	\fi
	147	\ifsrc@everymath@
	148	\def\@tempa#1\the\everymath#2\delimiter{{#1\src@spec\the\everymath#2}}
	149	\frozen@everymath=\expandafter\@tempa\the\frozen@everymath\delimiter
	150	\fi
	151	\newcommand*\src@bibliography{}
	152	\newcommand*\src@@bibliography{}
	153	\let\src@bibliography\bibliography
	154	\def\bibliography#1{%
	155	\expandafter\src@@bibliography\expandafter{\CurrentInput}{#1}%
	156	}
	157	\def\src@@bibliography#1#2{%
	158	\protected@xdef\CurrentInput{\jobname.bbl}%
	159	\src@before@file@hook
	160	\src@bibliography{#2}%
	161	\src@after@file@hook{#1}%
	162	}
	163	\newcommand*\src@old@output{}
	164	\let\src@old@output\output
	165	\newtoks\src@new@output
	166	\let\output\src@new@output
	167	\output\expandafter{\the\src@old@output}
	168	\src@old@output{\SRCOKfalse\the\src@new@output}
	169	\endinput
	170	%%
	171	%% End of file `srcltx.sty'.