25 template <
unsigned TNumParams>
33 void setClassNames(
const std::vector<std::string>& new_class_names);
34 void getClassNames(std::vector<std::string>& end_class_names)
const;
43 template <
class TLabelIterator>
44 void bestSplit(
const std::vector<scoreInternalIndexStruct> &data_structs,
const TLabelIterator first_label,
const int ,
const int ,
const float initial_impurity,
float& info_gain,
float& thresh)
const;
49 template <
class TLabelIterator>
50 float singleNodeImpurity(
const TLabelIterator first_label,
const std::vector<int>& nodebag,
const int ,
const int )
const;
51 template <
class TLabelIterator,
class TIdIterator>
52 void trainingPrecalculations(
const TLabelIterator first_label,
const TLabelIterator last_label,
const TIdIterator);
float minInfoGain(const int, const int) const
Get the information gain threshold for a given node.
Definition: classifier.tpp:282
static constexpr double C_DEFAULT_MIN_INFO_GAIN
Default value for the information gain threshold.
Definition: classifier.hpp:62
void readHeader(std::ifstream &stream)
Read the header information specific to the classifier model from a stream.
Definition: classifier.tpp:245
Contains implementations of the methods of the canopy::classifier class.
std::vector< std::string > class_names
The names of the classes.
Definition: classifier.hpp:57
Contains the canopy::discreteDistribution class, which is the node and output distribution for the cl...
void cleanupPrecalculations()
Clean-up of data to perform after training ends.
Definition: classifier.tpp:116
void initialiseNodeDist(const int t, const int n)
Initialise a discreteDistribution as a node distribution for training.
Definition: classifier.tpp:78
int getNumberClasses() const
Get the number of classes in the discrete label space of the model.
Definition: classifier.tpp:265
void setClassNames(const std::vector< std::string > &new_class_names)
Set the class name strings.
Definition: classifier.tpp:52
void bestSplit(const std::vector< scoreInternalIndexStruct > &data_structs, const TLabelIterator first_label, const int, const int, const float initial_impurity, float &info_gain, float &thresh) const
Find the best way to split training data using the scores of a certain feature.
Definition: classifier.tpp:160
void raiseNodeTemperature(const double T)
Smooth the distributions in all of the leaf nodes using the softmax function.
Definition: classifier.tpp:303
void getClassNames(std::vector< std::string > &end_class_names) const
Get the class name strings.
Definition: classifier.tpp:65
std::vector< double > xlogx_precalc
Used for storing temporary precalculations of x*log(x) values during training.
Definition: classifier.hpp:58
Namespace containing the canopy library for random forest models.
Definition: circularRegressor.hpp:13
void printHeaderData(std::ofstream &stream) const
Print the header information specific to the classifier model to a stream.
Definition: classifier.tpp:228
Contains the declaration of the canopy::randomForestBase class.
Implements a random forest classifier model to predict a discrete output label.
Definition: classifier.hpp:26
randomForestBase< classifier< TNumParams >, int, discreteDistribution, discreteDistribution, TNumParams >::scoreInternalIndexStruct scoreInternalIndexStruct
Forward the definition of the type declared in the randomForestBase class.
Definition: classifier.hpp:39
A distribution that defines the probabilities over a number of discrete (integer-valued) class labels...
Definition: discreteDistribution.hpp:26
void trainingPrecalculations(const TLabelIterator first_label, const TLabelIterator last_label, const TIdIterator)
Preliminary calculations to perform berfore training begins.
Definition: classifier.tpp:102
double min_info_gain
If during training, the best information gain at a node goes below this threshold, a lead node is declared.
Definition: classifier.hpp:59
Base class for random forests models from which all specific models are derived using CRTP...
Definition: randomForestBase.hpp:44
int n_classes
The number of classes in the discrete label space.
Definition: classifier.hpp:56
float singleNodeImpurity(const TLabelIterator first_label, const std::vector< int > &nodebag, const int, const int) const
Calculate the impurity of the label set in a single node.
Definition: classifier.tpp:200
void printHeaderDescription(std::ofstream &stream) const
Prints a string that allows a human to interpret the header information to a stream.
Definition: classifier.tpp:213
classifier()
Default constructor.
Definition: classifier.tpp:38