Canopy  1.0
The header-only random forests library
circularRegressor.hpp
Go to the documentation of this file.
1 #ifndef CIRCULARREGRESSOR_HPP
2 #define CIRCULARREGRESSOR_HPP
3 
12 
13 namespace canopy
14 {
15 
25 template <unsigned TNumParams>
26 class circularRegressor : public randomForestBase<circularRegressor<TNumParams>,float,vonMisesDistribution,vonMisesDistribution,TNumParams>
27 {
28  public:
29 
30  // Methods
31  // -------
33  circularRegressor(const int num_trees, const int num_levels, const float info_gain_tresh = C_DEFAULT_MIN_INFO_GAIN);
34 
35  protected:
38 
39  // Methods
40  // -------
41 
42  void initialiseNodeDist(const int t, const int n);
43  template <class TLabelIterator>
44  float singleNodeImpurity(const TLabelIterator first_label, const std::vector<int>& nodebag, const int /*tree*/, const int /*node*/) const;
45  template <class TLabelIterator, class TIdIterator>
46  void trainingPrecalculations(const TLabelIterator first_label, const TLabelIterator last_label, const TIdIterator /*unused*/);
48  template <class TLabelIterator>
49  void bestSplit(const std::vector<scoreInternalIndexStruct> &data_structs, const TLabelIterator first_label, const int /*tree*/, const int /*node*/, const float initial_impurity,float& info_gain, float& thresh) const;
50  float minInfoGain(const int /*tree*/, const int /*node*/) const;
51  void printHeaderDescription(std::ofstream& /*stream*/) const;
52  void printHeaderData(std::ofstream& /*stream*/) const;
53  void readHeader(std::ifstream& /*stream*/);
54 
55  // Data
56  // ----
57  std::vector<double> sin_precalc;
58  std::vector<double> cos_precalc;
59  float min_info_gain;
60 
61  // Constants
62  // ---------
63  static constexpr int C_NUM_SPLIT_TRIALS = 100;
64  static constexpr float C_DEFAULT_MIN_INFO_GAIN = 0.1;
65 };
66 
67 } // end of namespace
68 
70 #endif
71 // CIRCULARREGRESSOR_HPP
circularRegressor()
Default constructor.
Definition: circularRegressor.tpp:19
Contains the canopy::vonMisesDistribution class, which is the node and output distribution for the ca...
static constexpr float C_DEFAULT_MIN_INFO_GAIN
Default value for the information gain threshold.
Definition: circularRegressor.hpp:64
std::vector< double > cos_precalc
Used during training to store pre-calculated cosines of the training labels.
Definition: circularRegressor.hpp:58
float minInfoGain(const int, const int) const
Get the information gain threshold for a given node.
Definition: circularRegressor.tpp:325
A distribution that defines the probabilities over a circular-valued label.
Definition: vonMisesDistribution.hpp:28
Implements a random forest classifier model to predict a circular-valued output label.
Definition: circularRegressor.hpp:26
void printHeaderDescription(std::ofstream &) const
Prints a string that allows a human to interpret the header information to a stream.
Definition: circularRegressor.tpp:278
Namespace containing the canopy library for random forest models.
Definition: circularRegressor.hpp:13
void readHeader(std::ifstream &)
Read the header information specific to the circularRegressor model from a stream.
Definition: circularRegressor.tpp:308
Contains the declaration of the canopy::randomForestBase class.
std::vector< double > sin_precalc
Used during training to store pre-calculated sines of the training labels.
Definition: circularRegressor.hpp:57
float singleNodeImpurity(const TLabelIterator first_label, const std::vector< int > &nodebag, const int, const int) const
Calculate the impurity of the label set in a single node.
Definition: circularRegressor.tpp:251
void initialiseNodeDist(const int t, const int n)
Initialise a vonMisesDistribution as a node distribution for training.
Definition: circularRegressor.tpp:51
void printHeaderData(std::ofstream &) const
Print the header information specific to the circularRegressor model to a stream. ...
Definition: circularRegressor.tpp:293
void cleanupPrecalculations()
Clean-up of data to perform after training ends.
Definition: circularRegressor.tpp:98
void bestSplit(const std::vector< scoreInternalIndexStruct > &data_structs, const TLabelIterator first_label, const int, const int, const float initial_impurity, float &info_gain, float &thresh) const
Find the best way to split training data using the scores of a certain feature.
Definition: circularRegressor.tpp:144
randomForestBase< circularRegressor< TNumParams >, float, vonMisesDistribution, vonMisesDistribution, TNumParams >::scoreInternalIndexStruct scoreInternalIndexStruct
Forward the definition of the type declared in the randomForestBase class.
Definition: circularRegressor.hpp:37
Base class for random forests models from which all specific models are derived using CRTP...
Definition: randomForestBase.hpp:44
void trainingPrecalculations(const TLabelIterator first_label, const TLabelIterator last_label, const TIdIterator)
Preliminary calculations to perform berfore training begins.
Definition: circularRegressor.tpp:75
Contains implementations of the methods of the canopy::circularRegressor class.
static constexpr int C_NUM_SPLIT_TRIALS
This is the number of possible splits tested for each feature during training.
Definition: circularRegressor.hpp:63
float min_info_gain
If during training, the best information gain at a node goes below this threshold, a lead node is declared.
Definition: circularRegressor.hpp:59