A decision tree. More...
#include <YaDT.h>
Public Types | |
enum | Options { SET_ABSOLUTE_CORRECTION, SET_WEIGHTED_CORRECTION } |
Other options. More... | |
enum | PruningStrategy { PRUNING_NO, PRUNING_C45, PRUNING_DT } |
Pruning strategy options. More... | |
enum | SplitType { ST_GAIN, ST_GAIN_RATIO } |
Split type of a splitting decision node. More... | |
Public Member Functions | |
void | build (table *maintable, table::subset *subtable, bool evaluate=true, int ff_worker=0) throw (std::runtime_error) |
Build an pruned tree. | |
dtree * | clone () const |
Return a clone of the called object. | |
size_t | depth () const |
Return tree depth. | |
dtree (const std::string &name="my_decision_tree") | |
Constructor. | |
dtree (const dtree &) | |
Copy constructor not defined. | |
double | evaluate (const datasource &ds, std::ostream &output, char sep= '\t') const throw (std::runtime_error) |
Predict classes of unseen cases. | |
double | get_elapsed () const |
Return elapsed time (in secs) taken to build tree. | |
conf_matrix * | get_prediction () |
Return confusion matrix over the training set. | |
const dtree & | operator= (const dtree &) |
Assignment constructor not defined. | |
conf_matrix * | predict (table *cases, table::subset *subtable) const throw (std::runtime_error) |
Test classes of unseen cases. | |
conf_matrix * | predict (const datasource &ds) const throw (std::runtime_error) |
Test classes of unseen cases. | |
std::pair< std::string, float > | predict (table *cases, size_t pos, float weight=1) const |
Predict class and confidence of an unseen case stored in a table of the same format as training (except for the class attribute). | |
std::pair< std::string, float > | predict (const std::vector< std::string > &attributes, float weight=1) const |
Predict class and confidence of an unseen case. | |
bool | set_conf_level (float conf_level) |
Set confidence level in simplifying a decision tree. | |
bool | set_min_obj (float min_objects) |
Set mininum weight of cases in sons in order to further split a node during tree building. | |
bool | set_option (Options opt) |
Set other options. | |
bool | set_pruning_strategy (PruningStrategy strategy) |
Set simplification strategy. | |
bool | set_split_type (SplitType st) |
Set split strategy. | |
size_t | size () const |
Return number of tree nodes. | |
void | toBinary (const std::string &filename) |
Binary output. | |
void | toDOT (std::ostream &os=std::cout) const |
Dot output. | |
void | toTEXT (std::ostream &os=std::cout) const |
Textual output. | |
void | toXML (std::ostream &os=std::cout, const conf_matrix *cmTest=NULL) const |
XML output. | |
size_t | training_n_rows () const |
Return number of cases used in building the tree. | |
~dtree () | |
Destructor. | |
Static Public Member Functions | |
static dtree * | fromBinary (const std::string &filename) |
Binary input. |
A decision tree.
The class provides methods for building, simplifying, and evaluating a decision tree.
enum yadt::dtree::Options |
yadt::dtree::dtree | ( | const std::string & | name = "my_decision_tree" |
) |
Constructor.
name | decision tree name. |
yadt::dtree::~dtree | ( | ) |
Destructor.
yadt::dtree::dtree | ( | const dtree & | ) |
Copy constructor not defined.
void yadt::dtree::build | ( | table * | maintable, | |
table::subset * | subtable, | |||
bool | evaluate = true , |
|||
int | ff_worker = 0 | |||
) | throw (std::runtime_error) |
Build an pruned tree.
The method builds a tree and simplifies it.
maintable | a table containing the training set. | |
subtable | the subset of maintable used as training set. NULL value denotes all the table as training set. | |
evaluate | true if a confusion matrix must be also computed. The resulting confusion matrix can be obtained by calling the get_prediction() method. | |
ff_worker | number of worker in multi-core execution |
dtree* yadt::dtree::clone | ( | ) | const |
Return a clone of the called object.
size_t yadt::dtree::depth | ( | ) | const |
Return tree depth.
double yadt::dtree::evaluate | ( | const datasource & | ds, | |
std::ostream & | output, | |||
char | sep = '\t' | |||
) | const throw (std::runtime_error) |
Predict classes of unseen cases.
The source rst::inpout::stream is required to provide for each case all attributes in the same order as the columns of training set: no class or weights must be provided. Optionally, a further attribute may be provided (tipically a key of the case) that is produced in output together with predicted class and confidence.
ds | provider of unseen cases. | |
output | output stream of predictions. | |
sep | column separator in output stream. |
static dtree* yadt::dtree::fromBinary | ( | const std::string & | filename | ) | [static] |
Binary input.
filename | the input filename. |
double yadt::dtree::get_elapsed | ( | ) | const |
Return elapsed time (in secs) taken to build tree.
conf_matrix* yadt::dtree::get_prediction | ( | ) |
Return confusion matrix over the training set.
The method returns NULL if no tree was build or it was build by not requiring the computation of a confusion matrix.
std::pair<std::string, float> yadt::dtree::predict | ( | table * | cases, | |
size_t | pos, | |||
float | weight = 1 | |||
) | const |
Predict class and confidence of an unseen case stored in a table of the same format as training (except for the class attribute).
Optionally, a case weight may be provided (which affects confidence of prediction).
cases | a table containing the unseen cases. | |
pos | the case position in the table. | |
weight | case weight. |
std::pair<std::string, float> yadt::dtree::predict | ( | const std::vector< std::string > & | attributes, | |
float | weight = 1 | |||
) | const |
Predict class and confidence of an unseen case.
The attributes of the case are provided as a vector of C std::strings in the same order as the columns of training set: no class must be provided. Optionally, a case weight may be provided (which affects confidence of prediction).
attributes | vector of C std::string representing case attributes. | |
weight | case weight. |
conf_matrix* yadt::dtree::predict | ( | table * | cases, | |
table::subset * | subtable | |||
) | const throw (std::runtime_error) |
Test classes of unseen cases.
The cases table is required to provide for each case all attributes in the same order as the columns of training set and the actual class of cases. If a weights column is prese: no weights must be provided.
cases | a table containing the unseen cases. | |
subtable | the subset of cases to test. NULL value denotes all the table. |
conf_matrix* yadt::dtree::predict | ( | const datasource & | ds | ) | const throw (std::runtime_error) |
Test classes of unseen cases.
The source rst::inpout::stream is required to provide for each case all attributes in the same order as the columns of training set and then the actual class: no weights must be provided.
ds | provider of unseen cases. |
bool yadt::dtree::set_conf_level | ( | float | conf_level | ) |
Set confidence level in simplifying a decision tree.
The new confidence level must be in the range [0,1].
bool yadt::dtree::set_min_obj | ( | float | min_objects | ) |
Set mininum weight of cases in sons in order to further split a node during tree building.
Default value is 2.0. Any value must be > 0.
min_objects | new minimum weight. |
bool yadt::dtree::set_option | ( | Options | opt | ) |
Set other options.
opt | other option. |
bool yadt::dtree::set_pruning_strategy | ( | PruningStrategy | strategy | ) |
Set simplification strategy.
strategy | new pruning strategy. |
bool yadt::dtree::set_split_type | ( | SplitType | st | ) |
Set split strategy.
st | new split strategy. |
size_t yadt::dtree::size | ( | ) | const |
Return number of tree nodes.
void yadt::dtree::toBinary | ( | const std::string & | filename | ) |
Binary output.
filename | the output filename. |
void yadt::dtree::toDOT | ( | std::ostream & | os = std::cout |
) | const |
Dot output.
void yadt::dtree::toTEXT | ( | std::ostream & | os = std::cout |
) | const |
Textual output.
void yadt::dtree::toXML | ( | std::ostream & | os = std::cout , |
|
const conf_matrix * | cmTest = NULL | |||
) | const |
XML output.
PMML 2.0 complaint.
size_t yadt::dtree::training_n_rows | ( | ) | const |
Return number of cases used in building the tree.