Canopy  1.0
The header-only random forests library
discreteDistribution.hpp
Go to the documentation of this file.
1 #ifndef DISCRETEDISTRIBUTION_HPP
2 #define DISCRETEDISTRIBUTION_HPP
3 
11 #include <cmath>
12 #include <vector>
13 #include <fstream>
14 #include <algorithm>
15 
16 namespace canopy
17 {
18 
27 {
28  public:
29  // Methods
30  //--------
31 
37 
43  discreteDistribution(const int num_classes)
44  {
45  initialise(num_classes);
46  }
47 
52  void initialise(const int num_classes)
53  {
54  n_classes = num_classes;
55  prob.resize(n_classes);
56  std::fill(prob.begin(),prob.end(),0.0);
57  }
58 
64  void reset()
65  {
66  std::fill(prob.begin(),prob.end(),0.0);
67  }
68 
75  float pdf(const int x) const
76  {
77  return prob[x];
78  }
79 
85  void normalise()
86  {
87  float sum = 0.0;
88  for(int c = 0; c < n_classes; ++c)
89  sum += prob[c];
90 
91  for(int c = 0; c < n_classes; ++c)
92  prob[c] /= sum;
93  }
94 
101  void printOut(std::ofstream& stream) const
102  {
103  for(int c = 0; c < n_classes - 1; c++)
104  stream << prob[c] << " ";
105  stream << prob[n_classes - 1];
106  }
107 
114  void readIn(std::ifstream& stream)
115  {
116  for(int c = 0; c < n_classes; c++)
117  stream >> prob[c];
118  }
119 
135  void raiseDistributionTemperature(const double T)
136  {
137  if(T > 0.0)
138  {
139  for(int c = 0; c < n_classes; ++c)
140  prob[c] = std::exp(prob[c]/T);
141  normalise();
142  }
143  }
144 
145  // Template methods, defined below
146  // --------------------------------
147 
148  // Function to fit the parameters of the distribution, given a set of labels
149  template <class TLabelIterator, class TIdIterator>
150  void fit(TLabelIterator first_label, TLabelIterator last_label, TIdIterator /*unused*/);
151 
152  // Get the pdf of a given id and label
153  template <class TId>
154  float pdf(const int x, const TId /*id*/) const;
155 
156  // Aggregates the influence of the dist into the model, but does not necessarily normalise
157  template <class TId>
158  void combineWith(const discreteDistribution& dist, const TId /*id*/);
159 
163  friend std::ofstream& operator<< (std::ofstream& stream, const discreteDistribution& dist) { dist.printOut(stream); return stream;}
164 
168  friend std::ifstream& operator>> (std::ifstream& stream, discreteDistribution& dist) { dist.readIn(stream); return stream;}
169 
170  protected:
171  // Data
172  int n_classes;
173  std::vector<float> prob;
174 
175 };
176 
177 
178 
196 template <class TLabelIterator, class TIdIterator>
197 void discreteDistribution::fit(TLabelIterator first_label, const TLabelIterator last_label, TIdIterator /*unused*/)
198 {
199  const int n_data = std::distance(first_label, last_label);
200 
201  if(n_data == 0)
202  {
203  std::fill(prob.begin(),prob.end(),1.0/float(n_classes));
204  }
205  else
206  {
207  std::fill(prob.begin(),prob.end(),0.0);
208 
209  for( ; first_label != last_label; ++first_label)
210  prob[*first_label] += 1.0;
211 
212  std::for_each(prob.begin(),prob.end(), [=] (float& p) { p /= float(n_data); });
213  }
214 }
215 
225 template<class TId>
226 float discreteDistribution::pdf(const int x, const TId /*id*/) const
227 {
228  return prob[x];
229 }
230 
243 template <class TId>
244 void discreteDistribution::combineWith(const discreteDistribution& dist, const TId /*id*/)
245 {
246  for(int c = 0; c < n_classes; c++)
247  prob[c] += dist.prob[c];
248 }
249 
250 }// end of namespace
251 
252 #endif
253 // DISCRETEDISTRIBUTION_HPP
void reset()
Reset function - return probabilities to zero.
Definition: discreteDistribution.hpp:64
int n_classes
The number of discrete classes.
Definition: discreteDistribution.hpp:172
void printOut(std::ofstream &stream) const
Prints the defining parameters of the distribution to an output filestream.
Definition: discreteDistribution.hpp:101
discreteDistribution(const int num_classes)
Constructor.
Definition: discreteDistribution.hpp:43
void normalise()
Normalise the distribution to ensure it is valid.
Definition: discreteDistribution.hpp:85
void fit(TLabelIterator first_label, TLabelIterator last_label, TIdIterator)
Fit the distribution to a set of labels.
Definition: discreteDistribution.hpp:197
float pdf(const int x) const
Returns the probability of a particular label.
Definition: discreteDistribution.hpp:75
void raiseDistributionTemperature(const double T)
Smooth the distribution using the softmax function.
Definition: discreteDistribution.hpp:135
void readIn(std::ifstream &stream)
Reads the defining parameters of the distribution from a filestream.
Definition: discreteDistribution.hpp:114
Namespace containing the canopy library for random forest models.
Definition: circularRegressor.hpp:13
void initialise(const int num_classes)
Initialise with a certain number of classes and reset probabilities to zero.
Definition: discreteDistribution.hpp:52
friend std::ifstream & operator>>(std::ifstream &stream, discreteDistribution &dist)
Allows the distribution to be written to read from a file via the streaming operator &#39;>>&#39;...
Definition: discreteDistribution.hpp:168
A distribution that defines the probabilities over a number of discrete (integer-valued) class labels...
Definition: discreteDistribution.hpp:26
discreteDistribution()
Default constructor.
Definition: discreteDistribution.hpp:36
std::vector< float > prob
Vector containing the probabilities of each class.
Definition: discreteDistribution.hpp:173
void combineWith(const discreteDistribution &dist, const TId)
Combine this distribution with a second by summing the probability values, without normalisation...
Definition: discreteDistribution.hpp:244
friend std::ofstream & operator<<(std::ofstream &stream, const discreteDistribution &dist)
Allows the distribution to be written to a file via the streaming operator &#39;<<&#39;.
Definition: discreteDistribution.hpp:163