diff --git a/src/booster/gradient_boost/gbm.rs b/src/booster/gradient_boost/gbm.rs index 5803940..ce042ef 100644 --- a/src/booster/gradient_boost/gbm.rs +++ b/src/booster/gradient_boost/gbm.rs @@ -57,13 +57,12 @@ use std::ops::ControlFlow; /// // Note that the default tolerance parameter is set as `1 / n_sample`, /// // where `n_sample = data.shape().0` is /// // the number of training examples in `data`. -/// let booster = GBM::init(&sample) -/// .loss(GBMLoss::L1); +/// let booster = GBM::init_with_loss(&sample, GBMLoss::L2); /// /// // Set the weak learner with setting parameters. /// let weak_learner = RegressionTreeBuilder::new(&sample) /// .max_depth(2) -/// .loss(LossType::L1) +/// .loss(LossType::L2) /// .build(); /// /// // Run `GBM` and obtain the resulting hypothesis `f`. @@ -80,7 +79,7 @@ use std::ops::ControlFlow; /// let training_loss = sample.target() /// .into_iter() /// .zip(predictions) -/// .map(|(y, fx)| (y - fx).abs()) +/// .map(|(y, fx)| (y - fx).powi(2)) /// .sum::() /// / n_sample; /// diff --git a/src/common/loss_functions.rs b/src/common/loss_functions.rs index 31a9978..792a24b 100644 --- a/src/common/loss_functions.rs +++ b/src/common/loss_functions.rs @@ -20,10 +20,16 @@ pub trait LossFunction { / n_items as f64 } - /// Gradient vector at current point. + /// Gradient vector at the current point. fn gradient(&self, predictions: &[f64], target: &[f64]) -> Vec; + /// Hessian at the current point. + /// Here, this method assumes that the Hessian is diagonal, + /// so that it returns a diagonal vector. + fn hessian(&self, predictions: &[f64], target: &[f64]) -> Vec; + + /// Best coffecient for the newly-attained hypothesis. fn best_coefficient( &self, @@ -45,6 +51,17 @@ pub enum GBMLoss { /// This loss function is also known as /// **Mean Squared Error (MSE)**. L2, + + + // /// Huber loss with parameter `delta`. + // /// Huber loss maps the given scalar `z` to + // /// `0.5 * z.powi(2)` if `z.abs() < delta`, + // /// `delta * (z.abs() - 0.5 * delta)`, otherwise. + // Huber(f64), + + + // /// Quantile loss + // Quantile(f64), } @@ -53,6 +70,7 @@ impl LossFunction for GBMLoss { match self { Self::L1 => "L1 loss", Self::L2 => "L2 loss", + // Self::Huber(_) => "Huber loss", } } @@ -61,6 +79,14 @@ impl LossFunction for GBMLoss { match self { Self::L1 => (prediction - true_value).abs(), Self::L2 => (prediction - true_value).powi(2), + // Self::Huber(delta) => { + // let diff = (prediction - true_value).abs(); + // if diff < *delta { + // 0.5 * diff.powi(2) + // } else { + // delta * (diff - 0.5 * delta) + // } + // }, } } @@ -73,17 +99,59 @@ impl LossFunction for GBMLoss { match self { Self::L1 => { - predictions.iter() - .zip(target) - .map(|(p, y)| (p - y).signum() / n_sample) + target.iter() + .zip(predictions) + .map(|(y, p)| (y - p).signum()) + .collect() + }, + Self::L2 => { + target.iter() + .zip(predictions) + .map(|(y, p)| p - y) + .collect() + }, + // Self::Huber(delta) => { + // target.iter() + // .zip(predictions) + // .map(|(y, p)| { + // let diff = y - p; + // if diff.abs() < *delta { + // -diff + // } else { + // delta * diff.signum() + // } + // }) + // .collect::>() + // }, + } + } + + + fn hessian(&self, predictions: &[f64], target: &[f64]) -> Vec + { + let n_sample = predictions.len(); + assert_eq!(n_sample as usize, target.len()); + + match self { + Self::L1 => { + std::iter::repeat(0f64) + .take(n_sample) .collect() }, Self::L2 => { - predictions.iter() - .zip(target) - .map(|(p, y)| 2.0 * (p - y) / n_sample) + std::iter::repeat(1f64) + .take(n_sample) .collect() }, + // Self::Huber(delta) => { + // target.iter() + // .zip(predictions) + // .map(|(y, p)| { + // let diff = (y - p).abs(); + // if diff < *delta { 1f64 } else { 0f64 } + // }) + // .collect::>() + // }, } } diff --git a/src/lib.rs b/src/lib.rs index bc7b82c..4c9df33 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -216,7 +216,6 @@ pub use weak_learner::{ RegressionTree, RegressionTreeBuilder, RegressionTreeRegressor, - LossType, }; /// Some useful functions / traits diff --git a/src/prelude.rs b/src/prelude.rs index fee9c03..2f0c9ec 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -64,7 +64,6 @@ pub use crate::weak_learner::{ RegressionTree, RegressionTreeBuilder, RegressionTreeRegressor, - LossType, }; @@ -81,6 +80,7 @@ pub use crate::{ pub use crate::common::{ loss_functions::GBMLoss, + loss_functions::LossFunction, frank_wolfe::FWType, }; diff --git a/src/weak_learner.rs b/src/weak_learner.rs index 502aa2e..c851f96 100644 --- a/src/weak_learner.rs +++ b/src/weak_learner.rs @@ -43,7 +43,6 @@ pub use self::naive_bayes::{ pub use self::regression_tree::{ - LossType, RegressionTree, RegressionTreeBuilder, RegressionTreeRegressor, diff --git a/src/weak_learner/regression_tree.rs b/src/weak_learner/regression_tree.rs index 3dfdaa8..9abcc14 100644 --- a/src/weak_learner/regression_tree.rs +++ b/src/weak_learner/regression_tree.rs @@ -3,9 +3,6 @@ mod regression_tree_algorithm; // This file defines the regression tree regressor. mod regression_tree_regressor; -// This file defines the loss type. -mod loss; - // Regression Tree builder. mod builder; @@ -18,5 +15,4 @@ mod train_node; pub use regression_tree_algorithm::RegressionTree; pub use regression_tree_regressor::RegressionTreeRegressor; -pub use loss::LossType; pub use builder::RegressionTreeBuilder; diff --git a/src/weak_learner/regression_tree/bin.rs b/src/weak_learner/regression_tree/bin.rs index 4007dd4..5782d36 100644 --- a/src/weak_learner/regression_tree/bin.rs +++ b/src/weak_learner/regression_tree/bin.rs @@ -18,19 +18,8 @@ const EPS: f64 = 0.001; const NUM_TOLERANCE: f64 = 1e-9; -/// A struct that stores the first/second order derivative information. -#[derive(Clone,Default)] -pub(crate) struct GradientHessian { - pub(crate) grad: f64, - pub(crate) hess: f64, -} - - -impl GradientHessian { - pub(super) fn new(grad: f64, hess: f64) -> Self { - Self { grad, hess } - } -} +type Gradient = f64; +type Hessian = f64; /// Binning: A feature processing. @@ -209,11 +198,13 @@ impl Bins { &self, indices: &[usize], feat: &Feature, - gh: &[GradientHessian], - ) -> Vec<(Bin, GradientHessian)> + gradient: &[Gradient], + hessian: &[Hessian], + ) -> Vec<(Bin, Gradient, Hessian)> { let n_bins = self.0.len(); - let mut packed = vec![GradientHessian::default(); n_bins]; + let mut grad_pack = vec![0f64; n_bins]; + let mut hess_pack = vec![0f64; n_bins]; for &i in indices { let xi = feat[i]; @@ -226,10 +217,10 @@ impl Bins { range.0.start.partial_cmp(&xi).unwrap() }) .unwrap(); - packed[pos].grad += gh[i].grad; - packed[pos].hess += gh[i].hess; + grad_pack[pos] += gradient[i]; + hess_pack[pos] += hessian[i]; } - self.remove_zero_weight_pack_and_normalize(packed) + self.remove_zero_weight_pack_and_normalize(grad_pack, hess_pack) } @@ -254,42 +245,45 @@ impl Bins { /// - fn remove_zero_weight_pack_and_normalize( &self, - pack: Vec, - ) -> Vec<(Bin, GradientHessian)> + grad_pack: Vec, + hess_pack: Vec, + ) -> Vec<(Bin, Gradient, Hessian)> { - let mut iter = self.0.iter().zip(pack); + let mut iter = self.0.iter().zip(grad_pack.into_iter().zip(hess_pack)); - let (prev_bin, mut prev_gh) = iter.next().unwrap(); + let (prev_bin, (mut prev_grad, mut prev_hess)) = iter.next().unwrap(); let mut prev_bin = Bin::new(prev_bin.0.clone()); - let mut iter = iter.filter(|(_, gh)| { - gh.grad != 0.0 || gh.hess != 0.0 + let mut iter = iter.filter(|(_, (grad, hess))| { + *grad != 0.0 || *hess != 0.0 }); // The left-most bin might have zero weight. // In this case, find the next non-zero weight bin and merge. - if prev_gh.grad == 0.0 && prev_gh.hess == 0.0 { - let (next_bin, next_gh) = iter.next().unwrap(); + if prev_grad == 0.0 && prev_hess == 0.0 { + let (next_bin, (next_grad, next_hess)) = iter.next().unwrap(); let start = prev_bin.0.start; let end = next_bin.0.end; prev_bin = Bin::new(start..end); - prev_gh = next_gh; + prev_grad = next_grad; + prev_hess = next_hess; } let mut bin_and_gh = Vec::new(); - for (next_bin, next_gh) in iter { + for (next_bin, (next_grad, next_hess)) in iter { let start = prev_bin.0.start; let end = (prev_bin.0.end + next_bin.0.start) / 2.0; let bin = Bin::new(start..end); - bin_and_gh.push((bin, prev_gh)); + bin_and_gh.push((bin, prev_grad, prev_hess)); prev_bin = Bin::new(next_bin.0.clone()); prev_bin.0.start = end; - prev_gh = next_gh; + prev_grad = next_grad; + prev_hess = next_hess; } - bin_and_gh.push((prev_bin, prev_gh)); + bin_and_gh.push((prev_bin, prev_grad, prev_hess)); bin_and_gh } @@ -310,7 +304,7 @@ impl fmt::Display for Bins { let tail = bins.last() .map(|bin| format!("{bin}")) .unwrap(); - write!(f, "{head}, ... , {tail}") + write!(f, "{head}, ... , {tail}") } else { let line = bins.iter() .map(|bin| format!("{}", bin)) @@ -335,7 +329,7 @@ impl fmt::Display for Bin { ' ' }; let start = start.abs(); - format!("{sgn}{start: >.2}") + format!("{sgn}{start: >.1}") }; let end = if self.0.end == f64::MAX { String::from("+Inf") @@ -349,9 +343,9 @@ impl fmt::Display for Bin { ' ' }; let end = end.abs(); - format!("{sgn}{end: >.2}") + format!("{sgn}{end: >.1}") }; - write!(f, "[{start}, {end})") + write!(f, "[{start: >8}, {end: >8})") } } diff --git a/src/weak_learner/regression_tree/builder.rs b/src/weak_learner/regression_tree/builder.rs index e295dc9..d0e1d80 100644 --- a/src/weak_learner/regression_tree/builder.rs +++ b/src/weak_learner/regression_tree/builder.rs @@ -1,6 +1,7 @@ use crate::{Sample, RegressionTree}; use super::bin::*; -use super::loss::*; + +use crate::common::loss_functions::LossFunction; use std::collections::HashMap; @@ -28,7 +29,7 @@ pub const DEFAULT_LAMBDA_L2: f64 = 0.01; /// .build(); /// ``` #[derive(Clone)] -pub struct RegressionTreeBuilder<'a> { +pub struct RegressionTreeBuilder<'a, L> { sample: &'a Sample, /// Number of bins per feature. n_bins: HashMap<&'a str, usize>, @@ -41,11 +42,11 @@ pub struct RegressionTreeBuilder<'a> { lambda_l2: f64, /// Loss function - loss: LossType, + loss: Option, } -impl<'a> RegressionTreeBuilder<'a> { +impl<'a, L> RegressionTreeBuilder<'a, L> { /// Construct a new instance of `RegressionTreeBuilder`. /// By default, /// `RegressionTreeBuilder` sets the parameters as follows; @@ -66,15 +67,15 @@ impl<'a> RegressionTreeBuilder<'a> { let lambda_l2 = DEFAULT_LAMBDA_L2; - let loss = LossType::L2; + let loss = None; Self { sample, n_bins, max_depth, loss, lambda_l2, } } /// Specify the loss type. Default is `LossType::L2`. - pub fn loss(mut self, loss: LossType) -> Self { - self.loss = loss; + pub fn loss(mut self, loss: L) -> Self { + self.loss = Some(loss); self } @@ -108,11 +109,15 @@ impl<'a> RegressionTreeBuilder<'a> { }, } } +} +impl<'a, L> RegressionTreeBuilder<'a, L> + where L: LossFunction, +{ /// Build a `RegressionTree`. /// This method consumes `self`. - pub fn build(self) -> RegressionTree<'a> { + pub fn build(self) -> RegressionTree<'a, L> { let bins = self.sample.features() .iter() .map(|feature| { @@ -123,10 +128,12 @@ impl<'a> RegressionTreeBuilder<'a> { }) .collect::>(); + let loss = self.loss + .expect("failed to get loss function. you need to specify a function that implements `LossFunction` trait"); let n_sample = self.sample.shape().0; let regression_tree = RegressionTree::from_components( - bins, n_sample, self.max_depth, self.lambda_l2, self.loss, + bins, n_sample, self.max_depth, self.lambda_l2, loss, ); diff --git a/src/weak_learner/regression_tree/loss.rs b/src/weak_learner/regression_tree/loss.rs deleted file mode 100644 index a3181b6..0000000 --- a/src/weak_learner/regression_tree/loss.rs +++ /dev/null @@ -1,189 +0,0 @@ -//! Defines some criterions for regression tree. -use rayon::prelude::*; -use serde::{ - Serialize, - Deserialize -}; - -use std::fmt; -use crate::Sample; -use crate::weak_learner::common::type_and_struct::*; - -use super::bin::*; - -use std::collections::HashMap; - -/// The type of loss (error) function. -#[derive(Clone, Copy, Debug, Serialize, Deserialize)] -pub enum LossType { - /// Least Absolute Error - L1, - - - /// Least Squared Error - L2, - - - /// Huber loss with parameter `delta`. - /// Huber loss maps the given scalar `z` to - /// `0.5 * z.powi(2)` if `z.abs() < delta`, - /// `delta * (z.abs() - 0.5 * delta)`, otherwise. - Huber(f64), - - - // /// Quantile loss - // Quantile(f64), -} - - -impl fmt::Display for LossType { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let loss = match self { - Self::L1 => "L1 (Least Absolute) loss".to_string(), - Self::L2 => "L2 (Least Squared) loss".to_string(), - Self::Huber(delta) => format!("Huber loss (delta = {delta})"), - }; - write!(f, "{loss}") - } -} - - -impl LossType { - pub(crate) fn gradient_and_hessian( - &self, - targets: &[f64], - predictions: &[f64], - ) -> Vec - { - match self { - Self::L1 => { - targets.iter() - .zip(predictions) - .map(|(y, p)| { - let grad = (y - p).signum(); - GradientHessian::new(grad, 0.0) - }) - .collect::>() - }, - Self::L2 => { - targets.iter() - .zip(predictions) - .map(|(y, p)| { - let grad = p - y; - GradientHessian::new(grad, 1.0) - }) - .collect::>() - }, - - Self::Huber(delta) => { - targets.iter() - .zip(predictions) - .map(|(y, p)| { - let diff = p - y; - let (grad, hess) = if diff.abs() < *delta { - (diff, 1.0) - } else { - (delta * diff.signum(), 0.0) - }; - - GradientHessian::new(grad, hess) - }) - .collect::>() - }, - } - } - - - /// Returns the best splitting rule based on the loss function. - pub(super) fn best_split<'a>( - &self, - bins_map: &HashMap<&'_ str, Bins>, - sample: &'a Sample, - gh: &[GradientHessian], - idx: &[usize], - lambda_l2: f64, - ) -> (&'a str, Threshold) - { - - sample.features() - .par_iter() - .map(|feature| { - let name = feature.name(); - let bin = bins_map.get(name).unwrap(); - let pack = bin.pack(idx, feature, gh); - let (score, threshold) = self.best_split_at(pack, lambda_l2); - - (score, name, threshold) - }) - .max_by(|x, y| x.0.partial_cmp(&y.0).unwrap()) - .map(|(_, name, threshold)| (name, threshold)) - .expect("No feature that maximizes the score.") - } - - - fn best_split_at( - &self, - pack: Vec<(Bin, GradientHessian)>, - lambda_l2: f64, - ) -> (LossValue, Threshold) - { - let mut right_grad_sum = pack.par_iter() - .map(|(_, gh)| gh.grad) - .sum::(); - let mut right_hess_sum = pack.par_iter() - .map(|(_, gh)| gh.hess) - .sum::(); - - - let mut left_grad_sum = 0.0; - let mut left_hess_sum = 0.0; - - - let mut best_score = f64::MIN; - let mut best_threshold = f64::MIN; - - - for (bin, gh) in pack { - left_grad_sum += gh.grad; - left_hess_sum += gh.hess; - right_grad_sum -= gh.grad; - right_hess_sum -= gh.hess; - - - let score = - left_grad_sum.powi(2) / (left_hess_sum + lambda_l2) - + right_grad_sum.powi(2) / (right_hess_sum + lambda_l2); - if best_score < score { - best_score = score; - best_threshold = bin.0.end; - } - } - - (best_score.into(), best_threshold.into()) - } - - - pub(super) fn prediction_and_loss( - &self, - indices: &[usize], - gh: &[GradientHessian], - lambda_l2: f64, - ) -> (Prediction, LossValue) - { - let grad_sum = indices.par_iter() - .map(|&i| gh[i].grad) - .sum::(); - - let hess_sum = indices.par_iter() - .map(|&i| gh[i].hess) - .sum::(); - - let prediction = - grad_sum / (hess_sum + lambda_l2); - let loss_value = -0.5 * grad_sum.powi(2) / (hess_sum + lambda_l2); - - (prediction.into(), loss_value.into()) - } - -} - - diff --git a/src/weak_learner/regression_tree/regression_tree_algorithm.rs b/src/weak_learner/regression_tree/regression_tree_algorithm.rs index b8b700b..b5774f2 100644 --- a/src/weak_learner/regression_tree/regression_tree_algorithm.rs +++ b/src/weak_learner/regression_tree/regression_tree_algorithm.rs @@ -1,15 +1,19 @@ use crate::{Sample, WeakLearner}; use super::bin::*; +use crate::common::loss_functions::LossFunction; + use crate::weak_learner::common::{ split_rule::*, + type_and_struct::*, }; +use rayon::prelude::*; + use super::{ node::*, train_node::*, - loss::LossType, regression_tree_regressor::RegressionTreeRegressor, }; @@ -20,6 +24,10 @@ use std::cell::RefCell; use std::collections::HashMap; +type Gradient = f64; +type Hessian = f64; + + /// `RegressionTree` is the factory that generates /// a `RegressionTreeClassifier` for a given distribution over examples. /// @@ -42,7 +50,7 @@ use std::collections::HashMap; /// // Further, this example uses `Criterion::Edge` for splitting rule. /// let tree = RegressionTreeBuilder::new(&sample) /// .max_depth(2) -/// .loss(LossType::L2) +/// .loss(GBMLoss::L2) /// .build(); /// /// let n_sample = sample.shape().0; @@ -59,7 +67,7 @@ use std::collections::HashMap; /// / n_sample as f64; /// println!("loss (train) is: {loss}"); /// ``` -pub struct RegressionTree<'a> { +pub struct RegressionTree<'a, L> { bins: HashMap<&'a str, Bins>, // The maximal depth of the output trees max_depth: usize, @@ -70,23 +78,22 @@ pub struct RegressionTree<'a> { // Regularization parameter lambda_l2: f64, - - // LossType function - loss_type: LossType, + // Loss function + loss_func: L, } -impl<'a> RegressionTree<'a> { +impl<'a, L> RegressionTree<'a, L> { #[inline] pub(super) fn from_components( bins: HashMap<&'a str, Bins>, n_sample: usize, max_depth: usize, lambda_l2: f64, - loss_type: LossType, + loss_func: L, ) -> Self { - Self { bins, n_sample, max_depth, lambda_l2, loss_type, } + Self { bins, n_sample, max_depth, lambda_l2, loss_func, } } @@ -94,15 +101,16 @@ impl<'a> RegressionTree<'a> { fn full_tree( &self, sample: &Sample, - gh: &[GradientHessian], + gradient: &[Gradient], + hessian: &[Hessian], indices: Vec, max_depth: usize, ) -> Rc> { // Compute the best prediction that minimizes the training error // on this node. - let (pred, loss) = self.loss_type.prediction_and_loss( - &indices, gh, self.lambda_l2, + let (pred, loss) = prediction_and_loss( + &indices[..], gradient, hessian, self.lambda_l2, ); @@ -113,8 +121,8 @@ impl<'a> RegressionTree<'a> { // Find the best splitting rule. - let (feature, threshold) = self.loss_type.best_split( - &self.bins, &sample, gh, &indices[..], self.lambda_l2, + let (feature, threshold) = best_split( + &self.bins, sample, gradient, hessian, &indices[..], self.lambda_l2, ); let rule = Splitter::new(feature, threshold); @@ -139,8 +147,8 @@ impl<'a> RegressionTree<'a> { // ----- // At this point, `max_depth > 1` is guaranteed // so that one can grow the tree. - let ltree = self.full_tree(sample, gh, lindices, max_depth-1); - let rtree = self.full_tree(sample, gh, rindices, max_depth-1); + let ltree = self.full_tree(sample, gradient, hessian, lindices, max_depth-1); + let rtree = self.full_tree(sample, gradient, hessian, rindices, max_depth-1); TrainNode::branch(rule, ltree, rtree, pred, loss) @@ -148,7 +156,9 @@ impl<'a> RegressionTree<'a> { } -impl<'a> WeakLearner for RegressionTree<'a> { +impl<'a, L> WeakLearner for RegressionTree<'a, L> + where L: LossFunction, +{ type Hypothesis = RegressionTreeRegressor; @@ -165,30 +175,29 @@ impl<'a> WeakLearner for RegressionTree<'a> { let info = Vec::from([ ("# of bins (max)", format!("{n_bins}")), ("Max depth", format!("{}", self.max_depth)), - ("Split criterion", format!("{}", self.loss_type)), + ("Split criterion", format!("{}", self.loss_func.name())), ("Regularization param.", format!("{}", self.lambda_l2)), ]); Some(info) } - fn produce(&self, sample: &Sample, predictions: &[f64]) -> Self::Hypothesis { - let gh = self.loss_type.gradient_and_hessian( - sample.target(), - predictions, - ); - + let gradient = self.loss_func.gradient(predictions, sample.target()); + let hessian = self.loss_func.hessian(predictions, sample.target()); - let indices = (0..self.n_sample).filter(|&i| { - gh[i].grad != 0.0 || gh[i].hess != 0.0 - }) - .collect::>(); + let indices = (0..self.n_sample).collect::>(); - let tree = self.full_tree(sample, &gh, indices, self.max_depth); + let tree = self.full_tree( + sample, + &gradient[..], + &hessian[..], + indices, + self.max_depth, + ); let root = Node::from( Rc::try_unwrap(tree) @@ -200,9 +209,9 @@ impl<'a> WeakLearner for RegressionTree<'a> { } } - - -impl fmt::Display for RegressionTree<'_> { +impl fmt::Display for RegressionTree<'_, L> + where L: LossFunction, +{ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!( f, @@ -214,7 +223,7 @@ impl fmt::Display for RegressionTree<'_> { - Bins:\ ", self.max_depth, - self.loss_type, + self.loss_func.name(), )?; @@ -242,3 +251,99 @@ impl fmt::Display for RegressionTree<'_> { write!(f, "----------") } } + + +/// Returns the best splitting rule based on the loss function. +fn best_split<'a>( + bins_map: &HashMap<&'_ str, Bins>, + sample: &'a Sample, + gradient: &[Gradient], + hessian: &[Hessian], + idx: &[usize], + lambda_l2: f64, +) -> (&'a str, Threshold) +{ + + sample.features() + .par_iter() + .map(|feature| { + let name = feature.name(); + let bin = bins_map.get(name).unwrap(); + let pack = bin.pack(idx, feature, gradient, hessian); + let (score, threshold) = best_split_at(pack, lambda_l2); + + (score, name, threshold) + }) + .max_by(|x, y| x.0.partial_cmp(&y.0).unwrap()) + .map(|(_, name, threshold)| (name, threshold)) + .expect("No feature that maximizes the score.") +} + +/// this code is implemented based on Algorithm 3 of the following paper: +/// Tianqi Chen and Carlos Guestrin. +/// XGBoost: A scalable tree boosting system [KDD '16] +fn best_split_at( + pack: Vec<(Bin, Gradient, Hessian)>, + lambda_l2: f64, +) -> (LossValue, Threshold) +{ + let mut right_grad_sum = pack.par_iter() + .map(|(_, grad, _)| grad) + .sum::(); + let mut right_hess_sum = pack.par_iter() + .map(|(_, _, hess)| hess) + .sum::(); + + + let mut left_grad_sum = 0.0; + let mut left_hess_sum = 0.0; + + + let mut best_score = f64::MIN; + let mut best_threshold = f64::MIN; + + + for (bin, grad, hess) in pack { + left_grad_sum += grad; + left_hess_sum += hess; + right_grad_sum -= grad; + right_hess_sum -= hess; + + + let score = + left_grad_sum.powi(2) / (left_hess_sum + lambda_l2) + + right_grad_sum.powi(2) / (right_hess_sum + lambda_l2); + if best_score < score { + best_score = score; + best_threshold = bin.0.end; + } + } + + (best_score.into(), best_threshold.into()) +} + +/// returns the prediction value and the loss value of a leaf. +/// this function is implemented based on Eqs. (5), (6) of the following paper: +/// Tianqi Chen and Carlos Guestrin. +/// XGBoost: A scalable tree boosting system [KDD '16] +fn prediction_and_loss( + indices: &[usize], + gradient: &[Gradient], + hessian: &[Hessian], + lambda_l2: f64, +) -> (Prediction, LossValue) +{ + let grad_sum = indices.par_iter() + .map(|&i| gradient[i]) + .sum::(); + + let hess_sum = indices.par_iter() + .map(|&i| hessian[i]) + .sum::(); + + let prediction = - grad_sum / (hess_sum + lambda_l2); + let loss_value = -0.5 * grad_sum.powi(2) / (hess_sum + lambda_l2); + + (prediction.into(), loss_value.into()) +} + diff --git a/tests/gbm.rs b/tests/gbm.rs index 9bfd1b9..6b221a5 100644 --- a/tests/gbm.rs +++ b/tests/gbm.rs @@ -8,8 +8,9 @@ pub mod gbm_boston { use super::*; #[test] fn boston() { + let file = "california-housing.csv"; let mut path = env::current_dir().unwrap(); - path.push("tests/dataset/california-housing.csv"); + path.push(format!("tests/dataset/{file}")); let sample = SampleReader::new() .file(path) @@ -21,11 +22,10 @@ pub mod gbm_boston { let n_sample = sample.shape().0 as f64; - let mut gbm = GBM::init_with_loss(&sample, GBMLoss::L2) - .loss(GBMLoss::L2); + let mut gbm = GBM::init_with_loss(&sample, GBMLoss::L2); let tree = RegressionTreeBuilder::new(&sample) .max_depth(3) - .loss(LossType::L2) + .loss(GBMLoss::L2) .build(); println!("{tree}"); @@ -40,24 +40,24 @@ pub mod gbm_boston { .zip(&predictions[..]) .map(|(t, p)| (t - p).abs()) .sum::() / n_sample; - println!("L1-Loss (boston_housing.csv, GBM, RegressionTree): {loss}"); + println!("L1-Loss ({file}, GBM, RegressionTree): {loss}"); let loss = target.iter() .copied() .zip(&predictions[..]) - .map(|(t, p)| (t - p).powi(2)) + .map(|(t, p)| (t - p).abs()) .sum::() / n_sample; - println!("L2-Loss (boston_housing.csv, GBM, RegressionTree): {loss}"); + println!("L2-Loss ({file}, GBM, RegressionTree): {loss}"); let mean = target.iter().sum::() / n_sample; let loss = target.iter() .copied() - .map(|y| (y - mean).powi(2)) + .map(|y| (y - mean).abs()) .sum::() / n_sample; - println!("Loss (mean prediction): {loss}"); + println!("Loss ({file}, mean prediction): {loss}"); assert!(true); }