From 9592182ccb0374fba48b5b870ac65f28d09a62f6 Mon Sep 17 00:00:00 2001
From: rmitsuboshi <ryotaro.mitsuboshi@gmail.com>
Date: Mon, 13 Jan 2025 13:51:20 +0900
Subject: [PATCH 1/4] minor updates in gbm.rs

---
 src/booster/gradient_boost/gbm.rs | 17 +++++++++--------
 tests/gbm.rs                      |  6 +++---
 2 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/src/booster/gradient_boost/gbm.rs b/src/booster/gradient_boost/gbm.rs
index 9f36cc9..5803940 100644
--- a/src/booster/gradient_boost/gbm.rs
+++ b/src/booster/gradient_boost/gbm.rs
@@ -87,7 +87,7 @@ use std::ops::ControlFlow;
 ///
 /// println!("Training Loss is: {training_loss}");
 /// ```
-pub struct GBM<'a, F> {
+pub struct GBM<'a, F, L> {
     // Training data
     sample: &'a Sample,
 
@@ -103,7 +103,7 @@ pub struct GBM<'a, F> {
 
 
     // Some struct that implements `LossFunction` trait
-    loss: GBMLoss,
+    loss: L,
 
 
     // Max iteration until GBM guarantees the optimality.
@@ -122,11 +122,11 @@ pub struct GBM<'a, F> {
 
 
 
-impl<'a, F> GBM<'a, F>
+impl<'a, F, L> GBM<'a, F, L>
 {
     /// Initialize the `GBM`.
     /// This method sets some parameters `GBM` holds.
-    pub fn init(sample: &'a Sample) -> Self {
+    pub fn init_with_loss(sample: &'a Sample, loss: L) -> Self {
 
         let n_sample = sample.shape().0;
         let predictions = vec![0.0; n_sample];
@@ -138,7 +138,7 @@ impl<'a, F> GBM<'a, F>
             weights: Vec::new(),
             hypotheses: Vec::new(),
 
-            loss: GBMLoss::L2,
+            loss,
 
             max_iter: 100,
 
@@ -150,7 +150,7 @@ impl<'a, F> GBM<'a, F>
 }
 
 
-impl<'a, F> GBM<'a, F> {
+impl<'a, F, L> GBM<'a, F, L> {
     /// Returns the maximum iteration
     /// of the `GBM` to find a combined hypothesis
     /// that has error at most `tolerance`.
@@ -170,15 +170,16 @@ impl<'a, F> GBM<'a, F> {
 
 
     /// Set the Loss Type.
-    pub fn loss(mut self, loss_type: GBMLoss) -> Self {
+    pub fn loss(mut self, loss_type: L) -> Self {
         self.loss = loss_type;
         self
     }
 }
 
 
-impl<F> Booster<F> for GBM<'_, F>
+impl<F, L> Booster<F> for GBM<'_, F, L>
     where F: Regressor + Clone,
+          L: LossFunction,
 {
     type Output = WeightedMajority<F>;
 
diff --git a/tests/gbm.rs b/tests/gbm.rs
index 78b5c88..9bfd1b9 100644
--- a/tests/gbm.rs
+++ b/tests/gbm.rs
@@ -9,19 +9,19 @@ pub mod gbm_boston {
     #[test]
     fn boston() {
         let mut path = env::current_dir().unwrap();
-        path.push("tests/dataset/boston_housing.csv");
+        path.push("tests/dataset/california-housing.csv");
 
         let sample = SampleReader::new()
             .file(path)
             .has_header(true)
-            .target_feature("target")
+            .target_feature("MedHouseVal")
             .read()
             .unwrap();
 
 
         let n_sample = sample.shape().0 as f64;
 
-        let mut gbm = GBM::init(&sample)
+        let mut gbm = GBM::init_with_loss(&sample, GBMLoss::L2)
             .loss(GBMLoss::L2);
         let tree = RegressionTreeBuilder::new(&sample)
             .max_depth(3)

From 1f44b400cc598d519ce83763615a634dd9dcdc74 Mon Sep 17 00:00:00 2001
From: rmitsuboshi <ryotaro.mitsuboshi@gmail.com>
Date: Sun, 12 Jan 2025 14:46:42 +0900
Subject: [PATCH 2/4] minor updates

---
 src/booster/mlpboost.rs                       |   2 +-
 src/booster/mlpboost/mlpboost_algorithm.rs    |   4 +-
 src/booster/mlpboost/perturbed_lp_model.rs    | 225 ++++++++++++++++++
 src/common/checker.rs                         |  23 +-
 src/common/frank_wolfe.rs                     |   1 -
 src/common/utils.rs                           |  89 ++-----
 src/weak_learner/decision_tree/criterion.rs   |  52 ++--
 .../decision_tree/decision_tree_algorithm.rs  |  18 +-
 8 files changed, 296 insertions(+), 118 deletions(-)
 create mode 100644 src/booster/mlpboost/perturbed_lp_model.rs

diff --git a/src/booster/mlpboost.rs b/src/booster/mlpboost.rs
index 01fb4b3..a7a23f9 100644
--- a/src/booster/mlpboost.rs
+++ b/src/booster/mlpboost.rs
@@ -3,7 +3,7 @@
 pub mod mlpboost_algorithm;
 
 #[cfg(not(feature="gurobi"))]
-mod lp_model;
+mod perturbed_lp_model;
 
 #[cfg(feature="gurobi")]
 mod gurobi_lp_model;
diff --git a/src/booster/mlpboost/mlpboost_algorithm.rs b/src/booster/mlpboost/mlpboost_algorithm.rs
index 625cc45..bf3c70f 100644
--- a/src/booster/mlpboost/mlpboost_algorithm.rs
+++ b/src/booster/mlpboost/mlpboost_algorithm.rs
@@ -4,7 +4,7 @@
 //! 
 
 #[cfg(not(feature="gurobi"))]
-use super::lp_model::LPModel;
+use super::perturbed_lp_model::LPModel;
 
 #[cfg(feature="gurobi")]
 use super::gurobi_lp_model::LPModel;
@@ -271,7 +271,7 @@ impl<'a, F> MLPBoost<'a, F> {
         // `ub` is the upper-bound of distribution for each example.
         let ub = 1.0 / self.nu;
 
-        let lp_model = RefCell::new(LPModel::init(self.n_sample, ub));
+        let lp_model = RefCell::new(LPModel::init(self.eta, self.n_sample, ub));
 
         self.secondary = Some(lp_model);
     }
diff --git a/src/booster/mlpboost/perturbed_lp_model.rs b/src/booster/mlpboost/perturbed_lp_model.rs
new file mode 100644
index 0000000..e288470
--- /dev/null
+++ b/src/booster/mlpboost/perturbed_lp_model.rs
@@ -0,0 +1,225 @@
+use clarabel::{
+    algebra::*,
+    solver::*,
+};
+
+use rand;
+use rand::rngs::StdRng;
+use rand::prelude::*;
+
+use crate::{
+    Sample,
+    common::utils,
+};
+
+use crate::hypothesis::Classifier;
+
+use std::iter;
+
+const SEED: u64 = 7777;
+
+/// A linear programming model for edge minimization with perturbation. 
+/// `LPModel` solves the following:
+///
+/// ```txt
+/// min γ - (1/η) Σ_i [ r_i * d_i ]
+/// γ,d
+/// s.t. Σ_i d_i y_i h_j (x_i) ≤ γ,   ∀j = 1, 2, ..., t
+///      Σ_i d_i = 1,
+///      d_1, d_2, ..., d_m ≤ 1/ν
+///      d_1, d_2, ..., d_m ≥ 0.
+/// ```
+///
+/// To solve the problem we build the constraint matrix
+/// ```txt
+/// # of   
+/// rows    γ        d1        ...     dm
+///       ┏    ┃                                 ┓   ┏     ┓
+///   1   ┃  0 ┃      1        ...      1        ┃ = ┃  1  ┃
+///      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+///       ┃  0 ┃                                 ┃ ≤ ┃  0  ┃
+///       ┃  0 ┃                                 ┃ ≤ ┃  0  ┃
+///       ┃  . ┃     (-1) * Identity matrix      ┃ . ┃  .  ┃
+///   m   ┃  . ┃              m x m              ┃ . ┃  .  ┃
+///       ┃  . ┃                                 ┃ . ┃  .  ┃
+///       ┃  0 ┃                                 ┃ ≤ ┃  0  ┃
+///      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+///       ┃  0 ┃                                 ┃ ≤ ┃ 1/ν ┃
+///       ┃  0 ┃                                 ┃ ≤ ┃ 1/ν ┃
+///   m   ┃  . ┃         Identity matrix         ┃ . ┃  .  ┃
+///       ┃  . ┃              m x m              ┃ . ┃  .  ┃
+///       ┃  . ┃                                 ┃ . ┃  .  ┃
+///       ┃  0 ┃                                 ┃ ≤ ┃ 1/ν ┃
+///      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+///       ┃ -1 ┃  y_1 h_1(x_1) ...  y_m h_1(x_m) ┃ ≤ ┃  0  ┃
+///       ┃ -1 ┃  y_1 h_2(x_1) ...  y_m h_2(x_m) ┃ ≤ ┃  0  ┃
+///       ┃  . ┃      .        ...      .        ┃ . ┃  .  ┃
+///   H   ┃  . ┃      .        ...      .        ┃ . ┃  .  ┃
+///       ┃  . ┃      .        ...      .        ┃ . ┃  .  ┃
+///       ┃ -1 ┃  y_1 h_T(x_1) ...  y_m h_T(x_m) ┃ ≤ ┃  0  ┃
+///       ┗    ┃                                 ┛   ┗     ┛
+///
+/// # of
+/// cols     1 ┃               m
+/// ```
+pub(super) struct LPModel {
+    pub(self) n_examples: usize,        // number of columns
+    pub(self) n_hypotheses: usize,      // number of rows
+    pub(self) margins: Vec<Vec<f64>>,   // margin vectors
+    pub(self) weights: Vec<f64>,        // weight on hypothesis
+    pub(self) rng: StdRng,              // Rng
+    pub(self) dist: Vec<f64>,           // distribution over examples
+    pub(self) cap_inv: f64,             // the capping parameter, `1/ν.`
+    pub(self) eta: f64,                 // regularization parameter
+}
+
+
+impl LPModel {
+    /// Initialize the LP model.
+    /// arguments.
+    /// - `size`: Number of variables (Number of examples).
+    /// - `upper_bound`: Capping parameter. `[1, size]`.
+    pub(super) fn init(eta: f64, size: usize, upper_bound: f64) -> Self {
+        let margins = vec![vec![]; size];
+        Self {
+            n_examples:   size,
+            n_hypotheses: 0usize,
+            rng:          rand::SeedableRng::seed_from_u64(SEED),
+            margins,
+            weights:      Vec::with_capacity(0usize),
+            dist:         Vec::with_capacity(0usize),
+            cap_inv:      upper_bound,
+            eta,
+        }
+    }
+
+
+    /// Solve the edge minimization problem 
+    /// over the hypotheses `h1, ..., ht` 
+    /// and outputs the optimal value.
+    pub(super) fn update<F>(
+        &mut self,
+        sample: &Sample,
+        opt_h: Option<&F>
+    ) -> Vec<f64>
+        where F: Classifier
+    {
+        if let Some(clf) = opt_h {
+            self.n_hypotheses += 1;
+            let margins = utils::margins_of_hypothesis(sample, clf);
+            self.margins.iter_mut()
+                .zip(margins)
+                .for_each(|(mvec, yh)| { mvec.push(yh); });
+            let constraint_matrix = self.build_constraint_matrix();
+            let sense = self.build_sense();
+            let rhs = self.build_rhs();
+
+
+            let settings = DefaultSettingsBuilder::default()
+                .equilibrate_enable(true)
+                .verbose(false)
+                .build()
+                .unwrap();
+            let linear = self.build_linear_part_objective();
+            let n_variables = self.n_examples + 1;
+            let zero_mat = CscMatrix::<f64>::zeros((n_variables, n_variables));
+            let mut solver = DefaultSolver::new(
+                &zero_mat,
+                &linear,
+                &constraint_matrix,
+                &rhs,
+                &sense[..],
+                settings
+            );
+
+            solver.solve();
+
+            let ix = 2 * self.n_examples + 1;
+            self.weights = solver.solution.z[ix..].to_vec();
+            self.dist = solver.solution.x[1..].to_vec();
+        }
+
+        self.weights.clone()
+    }
+
+
+    pub(self) fn build_linear_part_objective(&mut self) -> Vec<f64> {
+        std::iter::once(1f64)
+            .chain((0..self.n_examples).map(|_| {
+                let r = self.rng.gen::<f64>();
+                (1f64 / r).ln().ln() / self.eta
+            }))
+            .collect()
+    }
+
+
+    /// Build the constraint matrix in the 0-indexed CSC form.
+    pub(self) fn build_constraint_matrix(&self) -> CscMatrix::<f64> {
+        let n_rows = 1 + 2*self.n_examples + self.n_hypotheses;
+        let n_cols = 1 + self.n_examples;
+
+        let mut col_ptr = Vec::new();
+        let mut row_val = Vec::new();
+        let mut nonzero = Vec::new();
+
+        // the first index of margin constraints
+        let gam = 1 + 2 * self.n_examples;
+        col_ptr.push(0);
+        row_val.extend(gam..n_rows);
+        nonzero.extend(iter::repeat(-1f64).take(n_rows - gam));
+
+        for (j, margins) in (1..).zip(&self.margins) {
+            col_ptr.push(row_val.len());
+            // the sum constraint: `Σ_i d_i = 1`
+            row_val.push(0);
+            nonzero.push(1f64);
+
+            // non-negative constraint: `d_i ≥ 0`
+            row_val.push(j);
+            nonzero.push(-1f64);
+
+            // capping constraint: `d_i ≤ 1/ν`
+            row_val.push(self.n_examples + j);
+            nonzero.push(self.cap_inv);
+
+            // margin constraints of `i`-th column
+            for (i, &yh) in (0..).zip(margins) {
+                row_val.push(gam + i);
+                nonzero.push(yh);
+            }
+        }
+        col_ptr.push(row_val.len());
+
+        CscMatrix::new(
+            n_rows,
+            n_cols,
+            col_ptr,
+            row_val,
+            nonzero,
+        )
+    }
+
+
+    /// Build the vector of constraint sense: `[=, ≤, ≥, ...].`
+    pub(self) fn build_sense(&self) -> Vec<SupportedConeT<f64>> {
+        let n_ineq = 2*self.n_examples + self.n_hypotheses;
+        vec![
+            ZeroConeT(1),
+            NonnegativeConeT(n_ineq),
+        ]
+    }
+
+
+    /// Build the right-hand-side of the constraints.
+    pub(self) fn build_rhs(&self) -> Vec<f64> {
+        let n_constraints = 1 + 2*self.n_examples + self.n_hypotheses;
+        let mut rhs = Vec::with_capacity(n_constraints);
+        rhs.push(1f64);
+        rhs.extend(iter::repeat(0f64).take(self.n_examples));
+        rhs.extend(iter::repeat(self.cap_inv).take(self.n_examples));
+        rhs.extend(iter::repeat(0f64).take(self.n_hypotheses));
+        rhs
+    }
+}
+
+
diff --git a/src/common/checker.rs b/src/common/checker.rs
index c00c018..9e88e6a 100644
--- a/src/common/checker.rs
+++ b/src/common/checker.rs
@@ -4,6 +4,9 @@
 use crate::Sample;
 
 
+const SIMPLEX_TOLERANCE: f64 = 1e-5;
+
+
 /// Check whether the training sample is valid or not.
 #[inline(always)]
 pub(crate) fn check_sample(sample: &Sample)
@@ -26,13 +29,13 @@ pub(crate) fn check_sample(sample: &Sample)
 #[inline(always)]
 pub(crate) fn check_nu(nu: f64, n_sample: usize) {
     let n_sample = n_sample as f64;
-    assert!((1.0..=n_sample).contains(&nu));
+    assert!((1f64..=n_sample).contains(&nu));
 }
 
 /// Check the stepsize
 #[inline(always)]
 pub(crate) fn check_stepsize(size: f64) {
-    assert!((0.0..=1.0).contains(&size));
+    assert!((0f64..=1f64).contains(&size));
 }
 
 
@@ -46,8 +49,18 @@ pub(crate) fn check_capped_simplex_condition(
     check_nu(nu, length);
 
     let sum = slice.iter().sum::<f64>();
-    assert!((sum - 1.0).abs() < 1e-6, "Got sum = {sum}");
+    let diff = (sum - 1f64).abs();
+    if diff > SIMPLEX_TOLERANCE {
+        println!(">> diff is {diff} > {SIMPLEX_TOLERANCE}");
+        println!(">> sum  = {sum}");
+        assert!((sum - 1f64).abs() < SIMPLEX_TOLERANCE, "sum(dist[..]) = {sum}");
+    }
+    assert!((sum - 1f64).abs() < SIMPLEX_TOLERANCE, "sum(dist[..]) = {sum}");
 
-    let ub = 1.0 / nu;
-    assert!(slice.iter().all(|s| (0.0..=ub).contains(s)));
+    let ub = 1f64 / nu;
+    assert!(
+        slice.iter().all(|s| (0f64..=ub).contains(s)),
+        "capping constraint is violated!"
+    );
 }
+
diff --git a/src/common/frank_wolfe.rs b/src/common/frank_wolfe.rs
index f4c54ed..7ef873d 100644
--- a/src/common/frank_wolfe.rs
+++ b/src/common/frank_wolfe.rs
@@ -330,7 +330,6 @@ impl FrankWolfe {
             // Pairwise update!
             let max_stepsize = weights[position_of_worst_one];
 
-            // TODO
             // Find the best stepsize by line-search
             let local_best_margins = utils::margins_of_hypothesis(
                 sample, &hypotheses[position_of_local_best_one]
diff --git a/src/common/utils.rs b/src/common/utils.rs
index 93528f2..98b3475 100644
--- a/src/common/utils.rs
+++ b/src/common/utils.rs
@@ -6,6 +6,7 @@ use rayon::prelude::*;
 use crate::{Sample, Classifier};
 use crate::common::checker;
 
+
 /// Returns the edge of a single hypothesis for the given distribution.
 /// Here `edge` is the weighted training loss.
 /// 
@@ -167,53 +168,6 @@ pub fn exp_distribution_from_margins<I>(
 }
 
 
-// /// Projects the given distribution onto the capped simplex.
-// /// Capped simplex with parameter `ν (nu)` is defined as
-// /// 
-// /// ```txt
-// /// Δ_{m, ν} := { d ∈ [0, 1/ν]^m | sum( d[i] ) = 1 }
-// /// ```
-// /// 
-// /// That is, each coordinate takes at most `1/ν`.
-// /// Specifying `ν = 1` yields the no-capped simplex.
-// #[inline(always)]
-// pub fn project_distribution_to_capped_simplex<I>(
-//     nu: f64,
-//     iter: I,
-// ) -> Vec<f64>
-//     where I: Iterator<Item = f64>,
-// {
-//     let mut dist: Vec<_> = iter.collect();
-//     let n_sample = dist.len();
-// 
-//     // Construct a vector of indices sorted in descending order of `dist`.
-//     let mut ix = (0..n_sample).collect::<Vec<usize>>();
-//     ix.sort_by(|&i, &j| dist[j].partial_cmp(&dist[i]).unwrap());
-// 
-//     let mut sum = dist.iter().sum::<f64>();
-// 
-//     let ub = 1.0 / nu;
-// 
-// 
-//     let mut ix = ix.into_iter().enumerate();
-//     for (k, i) in ix.by_ref() {
-//         let xi = (1.0 - ub * k as f64) / sum;
-//         if xi * dist[i] <= ub {
-//             dist[i] = xi * dist[i];
-//             for (_, j) in ix {
-//                 dist[j] = xi * dist[j];
-//             }
-//             break;
-//         }
-//         sum -= dist[i];
-//         dist[i] = ub;
-//     }
-// 
-//     checker::check_capped_simplex_condition(&dist, nu);
-//     dist
-// }
-
-
 /// Projects the given logarithmic distribution onto the capped simplex.
 /// Capped simplex with parameter `ν (nu)` is defined as
 /// 
@@ -233,37 +187,12 @@ pub fn project_log_distribution_to_capped_simplex<I>(
     let mut dist: Vec<_> = iter.collect();
     let n_sample = dist.len();
 
-    // Construct a vector of indices sorted in descending order of `dist`.
+    // Construct a vector of indices `ix.`
     let mut ix = (0..n_sample).collect::<Vec<usize>>();
+    // sort `ix` in the descending order of `dist`.
     ix.sort_by(|&i, &j| dist[j].partial_cmp(&dist[i]).unwrap());
 
 
-    // let mut ln_z = dist[ix[0]];
-    // for &i in &ix[1..] {
-    //     let ln_d = dist[i];
-    //     let small = ln_z.min(ln_d);
-    //     let large = ln_z.max(ln_d);
-    //     ln_z = large + (1f64 + (small - large).exp()).ln();
-    // }
-
-    // let v = 1f64 / nu;
-    // let ln_v = v.ln();
-
-    // for i in 0..n_sample {
-    //     let ln_xi = (1f64 - v * i as f64).ln() - ln_z;
-    //     let ln_di = dist[ix[i]];
-    //     if ln_xi + ln_di <= ln_v {
-    //         for j in i..n_sample {
-    //             let ln_dj = dist[ix[j]];
-    //             dist[ix[j]] = (ln_xi + ln_dj).exp();
-    //         }
-    //         break;
-    //     }
-    //     dist[ix[i]] = v;
-    //     ln_z = ln_z + (1f64 - (ln_di - ln_z).exp()).ln();
-    // }
-
-
     // `logsums[k] = ln( sum_{i=0}^{k-1} exp( -η (Aw)i ) )
     let mut logsums: Vec<f64> = Vec::with_capacity(n_sample);
     ix.iter().rev()
@@ -288,6 +217,18 @@ pub fn project_log_distribution_to_capped_simplex<I>(
     let mut ix_with_logsum = ix.into_iter().zip(logsums).enumerate();
 
 
+    // NOTE:
+    // The following is the efficient projection 
+    // onto the probability simplex capped by `1/ν.`
+    // This code comes from the paper:
+    //
+    // Shai Shalev-Shwartz and Yoram Singer.
+    // On the equivalence of weak learnability and linear separability:
+    // new relaxations and efficient boosting algorithms.
+    // [Journal of Machine Learning 2010]
+    //
+    // Note that the parameter `ν` in the paper corresponds to
+    // `1/nu` in this code.
     for (i, (i_sorted, logsum)) in ix_with_logsum.by_ref() {
         let log_xi = (1.0 - ub * i as f64).ln() - logsum;
         // TODO replace this line by `get_unchecked`
diff --git a/src/weak_learner/decision_tree/criterion.rs b/src/weak_learner/decision_tree/criterion.rs
index 52aa27c..a4a91d2 100644
--- a/src/weak_learner/decision_tree/criterion.rs
+++ b/src/weak_learner/decision_tree/criterion.rs
@@ -186,13 +186,13 @@ fn split_by_entropy(pack: Vec<(Bin, LabelToWeight)>)
 
 
     let mut left_weight = LabelToWeight::new();
-    let mut left_weight_sum = 0.0;
+    let mut left_weight_sum = 0f64;
     let mut right_weight = LabelToWeight::new();
-    let mut right_weight_sum = 0.0;
+    let mut right_weight_sum = 0f64;
 
     for (_, mp) in pack.iter() {
         for (y, w) in mp.iter() {
-            let entry = right_weight.entry(*y).or_insert(0.0);
+            let entry = right_weight.entry(*y).or_insert(0f64);
             *entry += w;
             right_weight_sum += w;
         }
@@ -203,7 +203,7 @@ fn split_by_entropy(pack: Vec<(Bin, LabelToWeight)>)
 
     for (bin, map) in pack {
         for (y, w) in map {
-            let entry = left_weight.entry(y).or_insert(0.0);
+            let entry = left_weight.entry(y).or_insert(0f64);
             *entry += w;
             left_weight_sum += w;
             let entry = right_weight.get_mut(&y).unwrap();
@@ -211,7 +211,7 @@ fn split_by_entropy(pack: Vec<(Bin, LabelToWeight)>)
             right_weight_sum -= w;
         }
         let lp = left_weight_sum / weight_sum;
-        let rp = (1.0 - lp).max(0.0);
+        let rp = (1f64 - lp).max(0f64);
 
         let left_impurity = entropic_impurity(&left_weight);
         let right_impurity = entropic_impurity(&right_weight);
@@ -244,7 +244,7 @@ fn split_by_edge(pack: Vec<(Bin, LabelToWeight)>) -> (f64, Score) {
 
 
     for (bin, map) in pack {
-        edge -= 2.0 * map.into_iter()
+        edge -= 2f64 * map.into_iter()
             .map(|(y, d)| y as f64 * d)
             .sum::<f64>();
 
@@ -266,13 +266,13 @@ fn split_by_gini(pack: Vec<(Bin, LabelToWeight)>) -> (f64, Score) {
 
 
     let mut left_weight = LabelToWeight::new();
-    let mut left_weight_sum = 0.0;
+    let mut left_weight_sum = 0f64;
     let mut right_weight = LabelToWeight::new();
-    let mut right_weight_sum = 0.0;
+    let mut right_weight_sum = 0f64;
 
     for (_, mp) in pack.iter() {
         for (y, w) in mp.iter() {
-            let entry = right_weight.entry(*y).or_insert(0.0);
+            let entry = right_weight.entry(*y).or_insert(0f64);
             *entry += w;
             right_weight_sum += w;
         }
@@ -283,7 +283,7 @@ fn split_by_gini(pack: Vec<(Bin, LabelToWeight)>) -> (f64, Score) {
 
     for (bin, map) in pack {
         for (y, w) in map {
-            let entry = left_weight.entry(y).or_insert(0.0);
+            let entry = left_weight.entry(y).or_insert(0f64);
             *entry += w;
             left_weight_sum += w;
             let entry = right_weight.get_mut(&y).unwrap();
@@ -291,10 +291,10 @@ fn split_by_gini(pack: Vec<(Bin, LabelToWeight)>) -> (f64, Score) {
             right_weight_sum -= w;
 
 
-            if *entry <= 0.0 { right_weight.remove(&y); }
+            if *entry <= 0f64 { right_weight.remove(&y); }
         }
         let lp = left_weight_sum / weight_sum;
-        let rp = (1.0 - lp).max(0.0);
+        let rp = (1f64 - lp).max(0f64);
 
         let left_impurity = gini_impurity(&left_weight);
         let right_impurity = gini_impurity(&right_weight);
@@ -318,26 +318,26 @@ fn split_by_twoing(pack: Vec<(Bin, LabelToWeight)>) -> (f64, Score) {
     let mut labels = HashSet::new();
     for (_, mp) in pack.iter() {
         for (y, w) in mp.iter() {
-            let entry = right_weight.entry(*y).or_insert(0.0);
+            let entry = right_weight.entry(*y).or_insert(0f64);
             *entry += w;
 
             labels.insert(*y);
         }
     }
 
-    let mut best_score = 0.0;
+    let mut best_score = 0f64;
     let mut best_threshold = f64::MIN;
 
     for (bin, map) in pack {
         // Move the weights in a `pack` from right to left.
         for (y, w) in map {
-            let entry = left_weight.entry(y).or_insert(0.0);
+            let entry = left_weight.entry(y).or_insert(0f64);
             *entry += w;
             if let Some(entry) = right_weight.get_mut(&y) {
                 *entry -= w;
             }
 
-            if *entry <= 0.0 { right_weight.remove(&y); }
+            if *entry <= 0f64 { right_weight.remove(&y); }
         }
 
 
@@ -358,12 +358,12 @@ fn split_by_twoing(pack: Vec<(Bin, LabelToWeight)>) -> (f64, Score) {
 #[inline(always)]
 pub(self) fn entropic_impurity(map: &HashMap<i32, f64>) -> f64 {
     let total = map.values().sum::<f64>();
-    if total <= 0.0 || map.is_empty() { return 0.0.into(); }
+    if total <= 0f64 || map.is_empty() { return 0f64.into(); }
 
     map.par_iter()
         .map(|(_, &p)| {
             let r = p / total;
-            if r <= 0.0 { 0.0 } else { -r * r.ln() }
+            if r <= 0f64 { 0f64 } else { -r * r.ln() }
         })
         .sum::<f64>()
 }
@@ -373,13 +373,13 @@ pub(self) fn entropic_impurity(map: &HashMap<i32, f64>) -> f64 {
 #[inline(always)]
 pub(self) fn gini_impurity(map: &HashMap<i32, f64>) -> f64 {
     let total = map.values().sum::<f64>();
-    if total <= 0.0 || map.is_empty() { return 0.0.into(); }
+    if total <= 0f64 || map.is_empty() { return 0f64.into(); }
 
     let correct = map.par_iter()
         .map(|(_, &w)| (w / total).powi(2))
         .sum::<f64>();
 
-    (1.0 - correct).max(0.0)
+    (1f64 - correct).max(0f64)
 }
 
 
@@ -395,17 +395,17 @@ pub(self) fn twoing_score(
     let pr = right.values().sum::<f64>();
     let pt = pl + pr;
 
-    if pl == 0.0 || pr == 0.0 { return 0.0; }
-    assert!(pt > 0.0);
+    if pl == 0f64 || pr == 0f64 { return 0f64; }
+    assert!(pt > 0f64);
 
-    let mut score = 0.0;
+    let mut score = 0f64;
     for y in labels {
-        let l = left.get(y).unwrap_or(&0.0);
-        let r = right.get(y).unwrap_or(&0.0);
+        let l = left.get(y).unwrap_or(&0f64);
+        let r = right.get(y).unwrap_or(&0f64);
 
         score += ((l / pl) - (r / pr)).abs();
     }
-    score = score.powi(2) * pl * pr / (2.0 * pt).powi(2);
+    score = score.powi(2) * pl * pr / (2f64 * pt).powi(2);
 
     score
 }
diff --git a/src/weak_learner/decision_tree/decision_tree_algorithm.rs b/src/weak_learner/decision_tree/decision_tree_algorithm.rs
index 23b7fb0..80f69da 100644
--- a/src/weak_learner/decision_tree/decision_tree_algorithm.rs
+++ b/src/weak_learner/decision_tree/decision_tree_algorithm.rs
@@ -58,7 +58,7 @@ use std::collections::HashMap;
 ///     .criterion(Criterion::Entropy)
 ///     .build();
 ///
-/// let n_sample = sample.shape().0;
+/// let n_sample = sample.shape()f64;
 /// let dist = vec![1f64 / n_sample as f64; n_sample];
 /// let f = tree.produce(&sample, &dist);
 /// 
@@ -67,7 +67,7 @@ use std::collections::HashMap;
 /// let loss = sample.target()
 ///     .into_iter()
 ///     .zip(predictions)
-///     .map(|(ty, py)| if *ty == py as f64 { 0.0 } else { 1.0 })
+///     .map(|(ty, py)| if *ty == py as f64 { 0f64 } else { 1f64 })
 ///     .sum::<f64>()
 ///     / n_sample as f64;
 /// println!("loss (train) is: {loss}");
@@ -116,7 +116,7 @@ impl<'a> DecisionTree<'a> {
 
 
         // If sum of `dist` over `train` is zero, construct a leaf node.
-        if loss == 0.0 || depth < 1 {
+        if loss == 0f64 || depth < 1 {
             return TrainNode::leaf(conf, total_weight, loss);
         }
 
@@ -194,7 +194,7 @@ impl<'a> WeakLearner for DecisionTree<'a> {
     {
         let n_sample = sample.shape().0;
 
-        let indices = (0..n_sample).filter(|&i| dist[i] > 0.0)
+        let indices = (0..n_sample).filter(|&i| dist[i] > 0f64)
             .collect::<Vec<usize>>();
         assert_ne!(indices.len(), 0);
 
@@ -238,7 +238,7 @@ fn confidence_and_loss(sample: &Sample, dist: &[f64], indices: &[usize])
 
     for &i in indices {
         let l = target[i] as i64;
-        let cnt = counter.entry(l).or_insert(0.0);
+        let cnt = counter.entry(l).or_insert(0f64);
         *cnt += dist[i];
     }
 
@@ -253,13 +253,13 @@ fn confidence_and_loss(sample: &Sample, dist: &[f64], indices: &[usize])
 
     // From the update rule of boosting algorithm,
     // the sum of `dist` over `indices` may become zero,
-    let loss = if total > 0.0 { total * (1.0 - (p / total)) } else { 0.0 };
+    let loss = if total > 0f64 { total * (1f64 - (p / total)) } else { 0f64 };
 
     // `label` takes value in `{-1, +1}`.
-    let confidence = if total > 0.0 {
-        (label as f64 * (2.0 * (p / total) - 1.0)).clamp(-1.0, 1.0)
+    let confidence = if total > 0f64 {
+        (label as f64 * (2f64 * (p / total) - 1f64)).clamp(-1f64, 1f64)
     } else {
-        (label as f64).clamp(-1.0, 1.0)
+        (label as f64).clamp(-1f64, 1f64)
     };
 
     let confidence = Confidence::from(confidence);

From 3347cf5c3ba94c9c2203695c1370c9da8a6b31aa Mon Sep 17 00:00:00 2001
From: rmitsuboshi <ryotaro.mitsuboshi@gmail.com>
Date: Mon, 13 Jan 2025 13:40:53 +0900
Subject: [PATCH 3/4] remove unused crate

---
 Cargo.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 7b9b6f9..5c73264 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,7 +19,6 @@ all-features = true
 [dependencies]
 grb         = { version = "2.0.1", optional = true }
 clarabel    = { version = "0.9.0" }
-plotters    = { version = "0.3.5" }
 rand        = { version = "0.8.5" }
 rand_distr  = { version = "0.4.3" }
 rayon       = { version = "1.10.0" }

From 4a156aa722ad8a92b542988b7e1a229468230215 Mon Sep 17 00:00:00 2001
From: rmitsuboshi <ryotaro.mitsuboshi@gmail.com>
Date: Mon, 13 Jan 2025 16:59:38 +0900
Subject: [PATCH 4/4] introduce LossFunction trait to gradient boosting and
 regression tree.

from now on, you can use custom loss function.
---
 src/booster/gradient_boost/gbm.rs             |   7 +-
 src/common/loss_functions.rs                  |  82 +++++++-
 src/lib.rs                                    |   1 -
 src/prelude.rs                                |   2 +-
 src/weak_learner.rs                           |   1 -
 src/weak_learner/regression_tree.rs           |   4 -
 src/weak_learner/regression_tree/bin.rs       |  66 +++---
 src/weak_learner/regression_tree/builder.rs   |  25 ++-
 src/weak_learner/regression_tree/loss.rs      | 189 ------------------
 .../regression_tree_algorithm.rs              | 171 +++++++++++++---
 tests/gbm.rs                                  |  18 +-
 11 files changed, 272 insertions(+), 294 deletions(-)
 delete mode 100644 src/weak_learner/regression_tree/loss.rs

diff --git a/src/booster/gradient_boost/gbm.rs b/src/booster/gradient_boost/gbm.rs
index 5803940..ce042ef 100644
--- a/src/booster/gradient_boost/gbm.rs
+++ b/src/booster/gradient_boost/gbm.rs
@@ -57,13 +57,12 @@ use std::ops::ControlFlow;
 /// // Note that the default tolerance parameter is set as `1 / n_sample`,
 /// // where `n_sample = data.shape().0` is 
 /// // the number of training examples in `data`.
-/// let booster = GBM::init(&sample)
-///     .loss(GBMLoss::L1);
+/// let booster = GBM::init_with_loss(&sample, GBMLoss::L2);
 /// 
 /// // Set the weak learner with setting parameters.
 /// let weak_learner = RegressionTreeBuilder::new(&sample)
 ///     .max_depth(2)
-///     .loss(LossType::L1)
+///     .loss(LossType::L2)
 ///     .build();
 /// 
 /// // Run `GBM` and obtain the resulting hypothesis `f`.
@@ -80,7 +79,7 @@ use std::ops::ControlFlow;
 /// let training_loss = sample.target()
 ///     .into_iter()
 ///     .zip(predictions)
-///     .map(|(y, fx)| (y - fx).abs())
+///     .map(|(y, fx)| (y - fx).powi(2))
 ///     .sum::<f64>()
 ///     / n_sample;
 /// 
diff --git a/src/common/loss_functions.rs b/src/common/loss_functions.rs
index 31a9978..792a24b 100644
--- a/src/common/loss_functions.rs
+++ b/src/common/loss_functions.rs
@@ -20,10 +20,16 @@ pub trait LossFunction {
             / n_items as f64
     }
 
-    /// Gradient vector at current point.
+    /// Gradient vector at the current point.
     fn gradient(&self, predictions: &[f64], target: &[f64]) -> Vec<f64>;
 
 
+    /// Hessian at the current point.
+    /// Here, this method assumes that the Hessian is diagonal,
+    /// so that it returns a diagonal vector.
+    fn hessian(&self, predictions: &[f64], target: &[f64]) -> Vec<f64>;
+
+
     /// Best coffecient for the newly-attained hypothesis.
     fn best_coefficient(
         &self, 
@@ -45,6 +51,17 @@ pub enum GBMLoss {
     /// This loss function is also known as
     /// **Mean Squared Error (MSE)**.
     L2,
+
+
+    // /// Huber loss with parameter `delta`.
+    // /// Huber loss maps the given scalar `z` to
+    // /// `0.5 * z.powi(2)` if `z.abs() < delta`,
+    // /// `delta * (z.abs() - 0.5 * delta)`, otherwise.
+    // Huber(f64),
+
+
+    // /// Quantile loss
+    // Quantile(f64),
 }
 
 
@@ -53,6 +70,7 @@ impl LossFunction for GBMLoss {
         match self {
             Self::L1 => "L1 loss",
             Self::L2 => "L2 loss",
+            // Self::Huber(_) => "Huber loss",
         }
     }
 
@@ -61,6 +79,14 @@ impl LossFunction for GBMLoss {
         match self {
             Self::L1 => (prediction - true_value).abs(),
             Self::L2 => (prediction - true_value).powi(2),
+            // Self::Huber(delta) => {
+            //     let diff = (prediction - true_value).abs();
+            //     if diff < *delta {
+            //         0.5 * diff.powi(2)
+            //     } else {
+            //         delta * (diff - 0.5 * delta)
+            //     }
+            // },
         }
     }
 
@@ -73,17 +99,59 @@ impl LossFunction for GBMLoss {
 
         match self {
             Self::L1 => {
-                predictions.iter()
-                    .zip(target)
-                    .map(|(p, y)| (p - y).signum() / n_sample)
+                target.iter()
+                    .zip(predictions)
+                    .map(|(y, p)| (y - p).signum())
+                    .collect()
+            },
+            Self::L2 => {
+                target.iter()
+                    .zip(predictions)
+                    .map(|(y, p)| p - y)
+                    .collect()
+            },
+            // Self::Huber(delta) => {
+            //     target.iter()
+            //         .zip(predictions)
+            //         .map(|(y, p)| {
+            //             let diff = y - p;
+            //             if diff.abs() < *delta {
+            //                 -diff
+            //             } else {
+            //                 delta * diff.signum()
+            //             }
+            //         })
+            //         .collect::<Vec<_>>()
+            // },
+        }
+    }
+
+
+    fn hessian(&self, predictions: &[f64], target: &[f64]) -> Vec<f64>
+    {
+        let n_sample = predictions.len();
+        assert_eq!(n_sample as usize, target.len());
+
+        match self {
+            Self::L1 => {
+                std::iter::repeat(0f64)
+                    .take(n_sample)
                     .collect()
             },
             Self::L2 => {
-                predictions.iter()
-                    .zip(target)
-                    .map(|(p, y)| 2.0 * (p - y) / n_sample)
+                std::iter::repeat(1f64)
+                    .take(n_sample)
                     .collect()
             },
+            // Self::Huber(delta) => {
+            //     target.iter()
+            //         .zip(predictions)
+            //         .map(|(y, p)| {
+            //             let diff = (y - p).abs();
+            //             if diff < *delta { 1f64 } else { 0f64 }
+            //         })
+            //         .collect::<Vec<_>>()
+            // },
         }
     }
 
diff --git a/src/lib.rs b/src/lib.rs
index bc7b82c..4c9df33 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -216,7 +216,6 @@ pub use weak_learner::{
     RegressionTree,
     RegressionTreeBuilder,
     RegressionTreeRegressor,
-    LossType,
 };
 
 /// Some useful functions / traits
diff --git a/src/prelude.rs b/src/prelude.rs
index fee9c03..2f0c9ec 100644
--- a/src/prelude.rs
+++ b/src/prelude.rs
@@ -64,7 +64,6 @@ pub use crate::weak_learner::{
     RegressionTree,
     RegressionTreeBuilder,
     RegressionTreeRegressor,
-    LossType,
 };
 
 
@@ -81,6 +80,7 @@ pub use crate::{
 
 pub use crate::common::{
     loss_functions::GBMLoss,
+    loss_functions::LossFunction,
     frank_wolfe::FWType,
 };
 
diff --git a/src/weak_learner.rs b/src/weak_learner.rs
index 502aa2e..c851f96 100644
--- a/src/weak_learner.rs
+++ b/src/weak_learner.rs
@@ -43,7 +43,6 @@ pub use self::naive_bayes::{
 
 
 pub use self::regression_tree::{
-    LossType,
     RegressionTree,
     RegressionTreeBuilder,
     RegressionTreeRegressor,
diff --git a/src/weak_learner/regression_tree.rs b/src/weak_learner/regression_tree.rs
index 3dfdaa8..9abcc14 100644
--- a/src/weak_learner/regression_tree.rs
+++ b/src/weak_learner/regression_tree.rs
@@ -3,9 +3,6 @@ mod regression_tree_algorithm;
 // This file defines the regression tree regressor.
 mod regression_tree_regressor;
 
-// This file defines the loss type.
-mod loss;
-
 // Regression Tree builder.
 mod builder;
 
@@ -18,5 +15,4 @@ mod train_node;
 
 pub use regression_tree_algorithm::RegressionTree;
 pub use regression_tree_regressor::RegressionTreeRegressor;
-pub use loss::LossType;
 pub use builder::RegressionTreeBuilder;
diff --git a/src/weak_learner/regression_tree/bin.rs b/src/weak_learner/regression_tree/bin.rs
index 4007dd4..5782d36 100644
--- a/src/weak_learner/regression_tree/bin.rs
+++ b/src/weak_learner/regression_tree/bin.rs
@@ -18,19 +18,8 @@ const EPS: f64 = 0.001;
 const NUM_TOLERANCE: f64 = 1e-9;
 
 
-/// A struct that stores the first/second order derivative information.
-#[derive(Clone,Default)]
-pub(crate) struct GradientHessian {
-    pub(crate) grad: f64,
-    pub(crate) hess: f64,
-}
-
-
-impl GradientHessian {
-    pub(super) fn new(grad: f64, hess: f64) -> Self {
-        Self { grad, hess }
-    }
-}
+type Gradient = f64;
+type Hessian  = f64;
 
 
 /// Binning: A feature processing.
@@ -209,11 +198,13 @@ impl Bins {
         &self,
         indices: &[usize],
         feat: &Feature,
-        gh: &[GradientHessian],
-    ) -> Vec<(Bin, GradientHessian)>
+        gradient: &[Gradient],
+        hessian: &[Hessian],
+    ) -> Vec<(Bin, Gradient, Hessian)>
     {
         let n_bins = self.0.len();
-        let mut packed = vec![GradientHessian::default(); n_bins];
+        let mut grad_pack = vec![0f64; n_bins];
+        let mut hess_pack = vec![0f64; n_bins];
 
         for &i in indices {
             let xi = feat[i];
@@ -226,10 +217,10 @@ impl Bins {
                     range.0.start.partial_cmp(&xi).unwrap()
                 })
                 .unwrap();
-            packed[pos].grad += gh[i].grad;
-            packed[pos].hess += gh[i].hess;
+            grad_pack[pos] += gradient[i];
+            hess_pack[pos] += hessian[i];
         }
-        self.remove_zero_weight_pack_and_normalize(packed)
+        self.remove_zero_weight_pack_and_normalize(grad_pack, hess_pack)
     }
 
 
@@ -254,42 +245,45 @@ impl Bins {
     /// - 
     fn remove_zero_weight_pack_and_normalize(
         &self,
-        pack: Vec<GradientHessian>,
-    ) -> Vec<(Bin, GradientHessian)>
+        grad_pack: Vec<Gradient>,
+        hess_pack: Vec<Hessian>,
+    ) -> Vec<(Bin, Gradient, Hessian)>
     {
-        let mut iter = self.0.iter().zip(pack);
+        let mut iter = self.0.iter().zip(grad_pack.into_iter().zip(hess_pack));
 
-        let (prev_bin, mut prev_gh) = iter.next().unwrap();
+        let (prev_bin, (mut prev_grad, mut prev_hess)) = iter.next().unwrap();
 
         let mut prev_bin = Bin::new(prev_bin.0.clone());
-        let mut iter = iter.filter(|(_, gh)| {
-            gh.grad != 0.0 || gh.hess != 0.0
+        let mut iter = iter.filter(|(_, (grad, hess))| {
+            *grad != 0.0 || *hess != 0.0
         });
 
         // The left-most bin might have zero weight.
         // In this case, find the next non-zero weight bin and merge.
-        if prev_gh.grad == 0.0 && prev_gh.hess == 0.0 {
-            let (next_bin, next_gh) = iter.next().unwrap();
+        if prev_grad == 0.0 && prev_hess == 0.0 {
+            let (next_bin, (next_grad, next_hess)) = iter.next().unwrap();
 
             let start = prev_bin.0.start;
             let end = next_bin.0.end;
             prev_bin = Bin::new(start..end);
-            prev_gh = next_gh;
+            prev_grad = next_grad;
+            prev_hess = next_hess;
         }
 
         let mut bin_and_gh = Vec::new();
-        for (next_bin, next_gh) in iter {
+        for (next_bin, (next_grad, next_hess)) in iter {
             let start = prev_bin.0.start;
             let end = (prev_bin.0.end + next_bin.0.start) / 2.0;
             let bin = Bin::new(start..end);
-            bin_and_gh.push((bin, prev_gh));
+            bin_and_gh.push((bin, prev_grad, prev_hess));
 
 
             prev_bin = Bin::new(next_bin.0.clone());
             prev_bin.0.start = end;
-            prev_gh = next_gh;
+            prev_grad = next_grad;
+            prev_hess = next_hess;
         }
-        bin_and_gh.push((prev_bin, prev_gh));
+        bin_and_gh.push((prev_bin, prev_grad, prev_hess));
 
         bin_and_gh
     }
@@ -310,7 +304,7 @@ impl fmt::Display for Bins {
             let tail = bins.last()
                 .map(|bin| format!("{bin}"))
                 .unwrap();
-            write!(f, "{head},      ...     , {tail}")
+            write!(f, "{head}, ... , {tail}")
         } else {
             let line = bins.iter()
                 .map(|bin| format!("{}", bin))
@@ -335,7 +329,7 @@ impl fmt::Display for Bin {
                 ' '
             };
             let start = start.abs();
-            format!("{sgn}{start: >.2}")
+            format!("{sgn}{start: >.1}")
         };
         let end = if self.0.end == f64::MAX {
             String::from("+Inf")
@@ -349,9 +343,9 @@ impl fmt::Display for Bin {
                 ' '
             };
             let end = end.abs();
-            format!("{sgn}{end: >.2}")
+            format!("{sgn}{end: >.1}")
         };
 
-        write!(f, "[{start}, {end})")
+        write!(f, "[{start: >8}, {end: >8})")
     }
 }
diff --git a/src/weak_learner/regression_tree/builder.rs b/src/weak_learner/regression_tree/builder.rs
index e295dc9..d0e1d80 100644
--- a/src/weak_learner/regression_tree/builder.rs
+++ b/src/weak_learner/regression_tree/builder.rs
@@ -1,6 +1,7 @@
 use crate::{Sample, RegressionTree};
 use super::bin::*;
-use super::loss::*;
+
+use crate::common::loss_functions::LossFunction;
 
 use std::collections::HashMap;
 
@@ -28,7 +29,7 @@ pub const DEFAULT_LAMBDA_L2: f64 = 0.01;
 ///     .build();
 /// ```
 #[derive(Clone)]
-pub struct RegressionTreeBuilder<'a> {
+pub struct RegressionTreeBuilder<'a, L> {
     sample: &'a Sample,
     /// Number of bins per feature.
     n_bins: HashMap<&'a str, usize>,
@@ -41,11 +42,11 @@ pub struct RegressionTreeBuilder<'a> {
     lambda_l2: f64,
 
     /// Loss function
-    loss: LossType,
+    loss: Option<L>,
 }
 
 
-impl<'a> RegressionTreeBuilder<'a> {
+impl<'a, L> RegressionTreeBuilder<'a, L> {
     /// Construct a new instance of `RegressionTreeBuilder`.
     /// By default, 
     /// `RegressionTreeBuilder` sets the parameters as follows;
@@ -66,15 +67,15 @@ impl<'a> RegressionTreeBuilder<'a> {
 
         let lambda_l2 = DEFAULT_LAMBDA_L2;
 
-        let loss = LossType::L2;
+        let loss = None;
 
         Self { sample, n_bins, max_depth, loss, lambda_l2, }
     }
 
 
     /// Specify the loss type. Default is `LossType::L2`.
-    pub fn loss(mut self, loss: LossType) -> Self {
-        self.loss = loss;
+    pub fn loss(mut self, loss: L) -> Self {
+        self.loss = Some(loss);
         self
     }
 
@@ -108,11 +109,15 @@ impl<'a> RegressionTreeBuilder<'a> {
             },
         }
     }
+}
 
 
+impl<'a, L> RegressionTreeBuilder<'a, L>
+    where L: LossFunction,
+{
     /// Build a `RegressionTree`.
     /// This method consumes `self`.
-    pub fn build(self) -> RegressionTree<'a> {
+    pub fn build(self) -> RegressionTree<'a, L> {
         let bins = self.sample.features()
             .iter()
             .map(|feature| {
@@ -123,10 +128,12 @@ impl<'a> RegressionTreeBuilder<'a> {
             })
             .collect::<HashMap<_, _>>();
 
+        let loss = self.loss
+            .expect("failed to get loss function. you need to specify a function that implements `LossFunction` trait");
 
         let n_sample = self.sample.shape().0;
         let regression_tree = RegressionTree::from_components(
-            bins, n_sample, self.max_depth, self.lambda_l2, self.loss,
+            bins, n_sample, self.max_depth, self.lambda_l2, loss,
         );
 
 
diff --git a/src/weak_learner/regression_tree/loss.rs b/src/weak_learner/regression_tree/loss.rs
deleted file mode 100644
index a3181b6..0000000
--- a/src/weak_learner/regression_tree/loss.rs
+++ /dev/null
@@ -1,189 +0,0 @@
-//! Defines some criterions for regression tree.
-use rayon::prelude::*;
-use serde::{
-    Serialize,
-    Deserialize
-};
-
-use std::fmt;
-use crate::Sample;
-use crate::weak_learner::common::type_and_struct::*;
-
-use super::bin::*;
-
-use std::collections::HashMap;
-
-/// The type of loss (error) function.
-#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
-pub enum LossType {
-    /// Least Absolute Error
-    L1,
-
-
-    /// Least Squared Error
-    L2,
-
-
-    /// Huber loss with parameter `delta`.
-    /// Huber loss maps the given scalar `z` to
-    /// `0.5 * z.powi(2)` if `z.abs() < delta`,
-    /// `delta * (z.abs() - 0.5 * delta)`, otherwise.
-    Huber(f64),
-
-
-    // /// Quantile loss
-    // Quantile(f64),
-}
-
-
-impl fmt::Display for LossType {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let loss = match self {
-            Self::L1 => "L1 (Least Absolute) loss".to_string(),
-            Self::L2 => "L2 (Least  Squared) loss".to_string(),
-            Self::Huber(delta) => format!("Huber loss (delta = {delta})"),
-        };
-        write!(f, "{loss}")
-    }
-}
-
-
-impl LossType {
-    pub(crate) fn gradient_and_hessian(
-        &self,
-        targets: &[f64],
-        predictions: &[f64],
-    ) -> Vec<GradientHessian>
-    {
-        match self {
-            Self::L1 => {
-                targets.iter()
-                    .zip(predictions)
-                    .map(|(y, p)| {
-                        let grad = (y - p).signum();
-                        GradientHessian::new(grad, 0.0)
-                    })
-                    .collect::<Vec<_>>()
-            },
-            Self::L2 => {
-                targets.iter()
-                    .zip(predictions)
-                    .map(|(y, p)| {
-                        let grad = p - y;
-                        GradientHessian::new(grad, 1.0)
-                    })
-                    .collect::<Vec<_>>()
-            },
-
-            Self::Huber(delta) => {
-                targets.iter()
-                    .zip(predictions)
-                    .map(|(y, p)| {
-                        let diff = p - y;
-                        let (grad, hess) = if diff.abs() < *delta {
-                            (diff, 1.0)
-                        } else {
-                            (delta * diff.signum(), 0.0)
-                        };
-
-                        GradientHessian::new(grad, hess)
-                    })
-                    .collect::<Vec<_>>()
-            },
-        }
-    }
-
-
-    /// Returns the best splitting rule based on the loss function.
-    pub(super) fn best_split<'a>(
-        &self,
-        bins_map: &HashMap<&'_ str, Bins>,
-        sample: &'a Sample,
-        gh: &[GradientHessian],
-        idx: &[usize],
-        lambda_l2: f64,
-    ) -> (&'a str, Threshold)
-    {
-
-        sample.features()
-            .par_iter()
-            .map(|feature| {
-                let name = feature.name();
-                let bin = bins_map.get(name).unwrap();
-                let pack = bin.pack(idx, feature, gh);
-                let (score, threshold) = self.best_split_at(pack, lambda_l2);
-
-                (score, name, threshold)
-            })
-            .max_by(|x, y| x.0.partial_cmp(&y.0).unwrap())
-            .map(|(_, name, threshold)| (name, threshold))
-            .expect("No feature that maximizes the score.")
-    }
-
-
-    fn best_split_at(
-        &self,
-        pack: Vec<(Bin, GradientHessian)>,
-        lambda_l2: f64,
-    ) -> (LossValue, Threshold)
-    {
-        let mut right_grad_sum = pack.par_iter()
-            .map(|(_, gh)| gh.grad)
-            .sum::<f64>();
-        let mut right_hess_sum = pack.par_iter()
-            .map(|(_, gh)| gh.hess)
-            .sum::<f64>();
-
-
-        let mut left_grad_sum = 0.0;
-        let mut left_hess_sum = 0.0;
-
-
-        let mut best_score = f64::MIN;
-        let mut best_threshold = f64::MIN;
-
-
-        for (bin, gh) in pack {
-            left_grad_sum  += gh.grad;
-            left_hess_sum  += gh.hess;
-            right_grad_sum -= gh.grad;
-            right_hess_sum -= gh.hess;
-
-
-            let score = 
-                left_grad_sum.powi(2) / (left_hess_sum + lambda_l2)
-                + right_grad_sum.powi(2) / (right_hess_sum + lambda_l2);
-            if best_score < score {
-                best_score = score;
-                best_threshold = bin.0.end;
-            }
-        }
-
-        (best_score.into(), best_threshold.into())
-    }
-
-
-    pub(super) fn prediction_and_loss(
-        &self,
-        indices: &[usize],
-        gh: &[GradientHessian],
-        lambda_l2: f64,
-    ) -> (Prediction<f64>, LossValue)
-    {
-        let grad_sum = indices.par_iter()
-            .map(|&i| gh[i].grad)
-            .sum::<f64>();
-
-        let hess_sum = indices.par_iter()
-            .map(|&i| gh[i].hess)
-            .sum::<f64>();
-
-        let prediction = - grad_sum / (hess_sum + lambda_l2);
-        let loss_value = -0.5 * grad_sum.powi(2) / (hess_sum + lambda_l2);
-
-        (prediction.into(), loss_value.into())
-    }
-
-}
-
-
diff --git a/src/weak_learner/regression_tree/regression_tree_algorithm.rs b/src/weak_learner/regression_tree/regression_tree_algorithm.rs
index b8b700b..b5774f2 100644
--- a/src/weak_learner/regression_tree/regression_tree_algorithm.rs
+++ b/src/weak_learner/regression_tree/regression_tree_algorithm.rs
@@ -1,15 +1,19 @@
 use crate::{Sample, WeakLearner};
 use super::bin::*;
 
+use crate::common::loss_functions::LossFunction;
+
 
 use crate::weak_learner::common::{
     split_rule::*,
+    type_and_struct::*,
 };
 
+use rayon::prelude::*;
+
 use super::{
     node::*,
     train_node::*,
-    loss::LossType,
     regression_tree_regressor::RegressionTreeRegressor,
 };
 
@@ -20,6 +24,10 @@ use std::cell::RefCell;
 use std::collections::HashMap;
 
 
+type Gradient = f64;
+type Hessian  = f64;
+
+
 /// `RegressionTree` is the factory that generates
 /// a `RegressionTreeClassifier` for a given distribution over examples.
 /// 
@@ -42,7 +50,7 @@ use std::collections::HashMap;
 /// // Further, this example uses `Criterion::Edge` for splitting rule.
 /// let tree = RegressionTreeBuilder::new(&sample)
 ///     .max_depth(2)
-///     .loss(LossType::L2)
+///     .loss(GBMLoss::L2)
 ///     .build();
 /// 
 /// let n_sample = sample.shape().0;
@@ -59,7 +67,7 @@ use std::collections::HashMap;
 ///     / n_sample as f64;
 /// println!("loss (train) is: {loss}");
 /// ```
-pub struct RegressionTree<'a> {
+pub struct RegressionTree<'a, L> {
     bins: HashMap<&'a str, Bins>,
     // The maximal depth of the output trees
     max_depth: usize,
@@ -70,23 +78,22 @@ pub struct RegressionTree<'a> {
     // Regularization parameter
     lambda_l2: f64,
 
-
-    // LossType function
-    loss_type: LossType,
+    // Loss function
+    loss_func: L,
 }
 
 
-impl<'a> RegressionTree<'a> {
+impl<'a, L> RegressionTree<'a, L> {
     #[inline]
     pub(super) fn from_components(
         bins: HashMap<&'a str, Bins>,
         n_sample: usize,
         max_depth: usize,
         lambda_l2: f64,
-        loss_type: LossType,
+        loss_func: L,
     ) -> Self
     {
-        Self { bins, n_sample, max_depth, lambda_l2, loss_type, }
+        Self { bins, n_sample, max_depth, lambda_l2, loss_func, }
     }
 
 
@@ -94,15 +101,16 @@ impl<'a> RegressionTree<'a> {
     fn full_tree(
         &self,
         sample: &Sample,
-        gh: &[GradientHessian],
+        gradient: &[Gradient],
+        hessian: &[Hessian],
         indices: Vec<usize>,
         max_depth: usize,
     ) -> Rc<RefCell<TrainNode>>
     {
         // Compute the best prediction that minimizes the training error
         // on this node.
-        let (pred, loss) = self.loss_type.prediction_and_loss(
-            &indices, gh, self.lambda_l2,
+        let (pred, loss) = prediction_and_loss(
+            &indices[..], gradient, hessian, self.lambda_l2,
         );
 
 
@@ -113,8 +121,8 @@ impl<'a> RegressionTree<'a> {
 
 
         // Find the best splitting rule.
-        let (feature, threshold) = self.loss_type.best_split(
-            &self.bins, &sample, gh, &indices[..], self.lambda_l2,
+        let (feature, threshold) = best_split(
+            &self.bins, sample, gradient, hessian, &indices[..], self.lambda_l2,
         );
 
         let rule = Splitter::new(feature, threshold);
@@ -139,8 +147,8 @@ impl<'a> RegressionTree<'a> {
         // -----
         // At this point, `max_depth > 1` is guaranteed
         // so that one can grow the tree.
-        let ltree = self.full_tree(sample, gh, lindices, max_depth-1);
-        let rtree = self.full_tree(sample, gh, rindices, max_depth-1);
+        let ltree = self.full_tree(sample, gradient, hessian, lindices, max_depth-1);
+        let rtree = self.full_tree(sample, gradient, hessian, rindices, max_depth-1);
 
 
         TrainNode::branch(rule, ltree, rtree, pred, loss)
@@ -148,7 +156,9 @@ impl<'a> RegressionTree<'a> {
 }
 
 
-impl<'a> WeakLearner for RegressionTree<'a> {
+impl<'a, L> WeakLearner for RegressionTree<'a, L>
+    where L: LossFunction,
+{
     type Hypothesis = RegressionTreeRegressor;
 
 
@@ -165,30 +175,29 @@ impl<'a> WeakLearner for RegressionTree<'a> {
         let info = Vec::from([
             ("# of bins (max)", format!("{n_bins}")),
             ("Max depth", format!("{}", self.max_depth)),
-            ("Split criterion", format!("{}", self.loss_type)),
+            ("Split criterion", format!("{}", self.loss_func.name())),
             ("Regularization param.", format!("{}", self.lambda_l2)),
         ]);
         Some(info)
     }
 
-
     fn produce(&self, sample: &Sample, predictions: &[f64])
         -> Self::Hypothesis
     {
-        let gh = self.loss_type.gradient_and_hessian(
-            sample.target(),
-            predictions,
-        );
-
+        let gradient = self.loss_func.gradient(predictions, sample.target());
+        let hessian = self.loss_func.hessian(predictions, sample.target());
 
-        let indices = (0..self.n_sample).filter(|&i| {
-                gh[i].grad != 0.0 || gh[i].hess != 0.0
-            })
-            .collect::<Vec<usize>>();
 
+        let indices = (0..self.n_sample).collect::<Vec<_>>();
 
-        let tree = self.full_tree(sample, &gh, indices, self.max_depth);
 
+        let tree = self.full_tree(
+            sample,
+            &gradient[..],
+            &hessian[..],
+            indices,
+            self.max_depth,
+        );
 
         let root = Node::from(
             Rc::try_unwrap(tree)
@@ -200,9 +209,9 @@ impl<'a> WeakLearner for RegressionTree<'a> {
     }
 }
 
-
-
-impl fmt::Display for RegressionTree<'_> {
+impl<L> fmt::Display for RegressionTree<'_, L>
+    where L: LossFunction,
+{
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         writeln!(
             f,
@@ -214,7 +223,7 @@ impl fmt::Display for RegressionTree<'_> {
             - Bins:\
             ",
             self.max_depth,
-            self.loss_type,
+            self.loss_func.name(),
         )?;
 
 
@@ -242,3 +251,99 @@ impl fmt::Display for RegressionTree<'_> {
         write!(f, "----------")
     }
 }
+
+
+/// Returns the best splitting rule based on the loss function.
+fn best_split<'a>(
+    bins_map: &HashMap<&'_ str, Bins>,
+    sample: &'a Sample,
+    gradient: &[Gradient],
+    hessian: &[Hessian],
+    idx: &[usize],
+    lambda_l2: f64,
+) -> (&'a str, Threshold)
+{
+
+    sample.features()
+        .par_iter()
+        .map(|feature| {
+            let name = feature.name();
+            let bin = bins_map.get(name).unwrap();
+            let pack = bin.pack(idx, feature, gradient, hessian);
+            let (score, threshold) = best_split_at(pack, lambda_l2);
+
+            (score, name, threshold)
+        })
+        .max_by(|x, y| x.0.partial_cmp(&y.0).unwrap())
+        .map(|(_, name, threshold)| (name, threshold))
+        .expect("No feature that maximizes the score.")
+}
+
+/// this code is implemented based on Algorithm 3 of the following paper:
+/// Tianqi Chen and Carlos Guestrin.
+/// XGBoost: A scalable tree boosting system [KDD '16]
+fn best_split_at(
+    pack: Vec<(Bin, Gradient, Hessian)>,
+    lambda_l2: f64,
+) -> (LossValue, Threshold)
+{
+    let mut right_grad_sum = pack.par_iter()
+        .map(|(_, grad, _)| grad)
+        .sum::<f64>();
+    let mut right_hess_sum = pack.par_iter()
+        .map(|(_, _, hess)| hess)
+        .sum::<f64>();
+
+
+    let mut left_grad_sum = 0.0;
+    let mut left_hess_sum = 0.0;
+
+
+    let mut best_score = f64::MIN;
+    let mut best_threshold = f64::MIN;
+
+
+    for (bin, grad, hess) in pack {
+        left_grad_sum  += grad;
+        left_hess_sum  += hess;
+        right_grad_sum -= grad;
+        right_hess_sum -= hess;
+
+
+        let score = 
+            left_grad_sum.powi(2) / (left_hess_sum + lambda_l2)
+            + right_grad_sum.powi(2) / (right_hess_sum + lambda_l2);
+        if best_score < score {
+            best_score = score;
+            best_threshold = bin.0.end;
+        }
+    }
+
+    (best_score.into(), best_threshold.into())
+}
+
+/// returns the prediction value and the loss value of a leaf.
+/// this function is implemented based on Eqs. (5), (6) of the following paper:
+/// Tianqi Chen and Carlos Guestrin.
+/// XGBoost: A scalable tree boosting system [KDD '16]
+fn prediction_and_loss(
+    indices: &[usize],
+    gradient: &[Gradient],
+    hessian: &[Hessian],
+    lambda_l2: f64,
+) -> (Prediction<f64>, LossValue)
+{
+    let grad_sum = indices.par_iter()
+        .map(|&i| gradient[i])
+        .sum::<f64>();
+
+    let hess_sum = indices.par_iter()
+        .map(|&i| hessian[i])
+        .sum::<f64>();
+
+    let prediction = - grad_sum / (hess_sum + lambda_l2);
+    let loss_value = -0.5 * grad_sum.powi(2) / (hess_sum + lambda_l2);
+
+    (prediction.into(), loss_value.into())
+}
+
diff --git a/tests/gbm.rs b/tests/gbm.rs
index 9bfd1b9..6b221a5 100644
--- a/tests/gbm.rs
+++ b/tests/gbm.rs
@@ -8,8 +8,9 @@ pub mod gbm_boston {
     use super::*;
     #[test]
     fn boston() {
+        let file = "california-housing.csv";
         let mut path = env::current_dir().unwrap();
-        path.push("tests/dataset/california-housing.csv");
+        path.push(format!("tests/dataset/{file}"));
 
         let sample = SampleReader::new()
             .file(path)
@@ -21,11 +22,10 @@ pub mod gbm_boston {
 
         let n_sample = sample.shape().0 as f64;
 
-        let mut gbm = GBM::init_with_loss(&sample, GBMLoss::L2)
-            .loss(GBMLoss::L2);
+        let mut gbm = GBM::init_with_loss(&sample, GBMLoss::L2);
         let tree = RegressionTreeBuilder::new(&sample)
             .max_depth(3)
-            .loss(LossType::L2)
+            .loss(GBMLoss::L2)
             .build();
 
         println!("{tree}");
@@ -40,24 +40,24 @@ pub mod gbm_boston {
             .zip(&predictions[..])
             .map(|(t, p)| (t - p).abs())
             .sum::<f64>() / n_sample;
-        println!("L1-Loss (boston_housing.csv, GBM, RegressionTree): {loss}");
+        println!("L1-Loss ({file}, GBM, RegressionTree): {loss}");
 
         let loss = target.iter()
             .copied()
             .zip(&predictions[..])
-            .map(|(t, p)| (t - p).powi(2))
+            .map(|(t, p)| (t - p).abs())
             .sum::<f64>() / n_sample;
 
-        println!("L2-Loss (boston_housing.csv, GBM, RegressionTree): {loss}");
+        println!("L2-Loss ({file}, GBM, RegressionTree): {loss}");
 
         let mean = target.iter().sum::<f64>() / n_sample;
         let loss = target.iter()
             .copied()
-            .map(|y| (y - mean).powi(2))
+            .map(|y| (y - mean).abs())
             .sum::<f64>()
             / n_sample;
 
-        println!("Loss (mean prediction): {loss}");
+        println!("Loss    ({file},     mean prediction): {loss}");
 
         assert!(true);
     }