• Main Page
  • Related Pages
  • Namespaces
  • Classes
  • Files
  • File List

YaDT.h

Go to the documentation of this file.
00001 /*
00002                          Salvatore Ruggieri (c), 2002-
00003 */
00004 
00010 #if !defined(_YADT_H__INCLUDED_)
00011 #define _YADT_H__INCLUDED_
00012 
00013 #include <string>
00014 #include <vector>
00015 #include <stdexcept>
00016 #include <ostream> /* standard istd::ostream and fstream */
00017 #include <fstream> 
00018 #include <iostream>
00019 
00021 namespace yadt
00022 {
00024         const char *get_version();
00026         const int get_build();
00028         const char *get_copyright();
00029 
00030 
00039         class datasource
00040         {
00041         public:
00050                 datasource(const std::string &specs);
00052                 ~datasource();
00053         private:
00054         #ifndef DOXYGEN_SHOULD_SKIP_THIS
00055                         friend class dtree;
00056                         friend class table;
00057         #endif /* DOXYGEN_SHOULD_SKIP_THIS */
00058                 std::string specifications;
00059         };
00060 
00061         // forward references
00062         class conf_matrix;
00063         class dtree;
00064 
00068         class table
00069         {
00070         public:
00074                 table(const std::string &name);
00076                 ~table();
00078                 table( const table& );
00080                 const table& operator=( const table& );
00082                 void load_meta_data(const datasource &ds)
00083                                  throw(std::runtime_error);
00085                 std::string get_name() const;
00086 
00091                 void load_data(const datasource &ds, const std::string &unknown = "?") 
00092                         throw(std::runtime_error);
00094                 size_t get_no_columns() const;
00096                 size_t get_no_rows() const;
00099                 float get_weight(size_t pos) const;
00101                 std::string get_class(size_t pos) const;
00102 
00106                 void toBinary(const std::string &filename) const; 
00110                 static table* fromBinary(const std::string &filename); 
00111 
00112                 /* utility methods */
00113 
00117                 std::ostream *set_log(std::ostream *new_log);
00122                 size_t set_verbosity(size_t newverbosity);
00126                 double get_elapsed() const;
00128                 void toXML_data_dictionary(std::ostream &os = std::cout) const; 
00130                 void toXML_mining_schema(std::ostream &os = std::cout) const; 
00131 
00140                 class subset
00141                 {
00142                 public:
00144                         subset();
00146                         ~subset();
00148                         subset( const subset& );
00150                         const subset& operator=( const subset& );
00152                         size_t size() const;
00154                         void resize(size_t newsize);
00156                         void reserve(size_t size);
00161                         void push_back(size_t tablepos, float weight);
00166                         std::pair<size_t, float> get(size_t subsetpos);
00172                         void set(size_t subsetpos, size_t tablepos, float weight);
00173 
00174                 private:
00175         #ifndef DOXYGEN_SHOULD_SKIP_THIS
00176                         friend class dtree;
00177                         friend class table;
00178         #endif /* DOXYGEN_SHOULD_SKIP_THIS */
00179 
00180                         subset(void *actual);
00181                         void *real;
00182                 };
00183 
00187                 subset* get_wsubset_all() const;
00191                 subset* get_wsubset_first_n(size_t n) const;
00196                 subset* get_wsubset_random(size_t n) const;
00202                 subset* get_wsubset_difference(subset *subtable) const;
00203 
00204         private:
00205         #ifndef DOXYGEN_SHOULD_SKIP_THIS
00206                 friend class conf_matrix;
00207                 friend class dtree;
00208         #endif /* DOXYGEN_SHOULD_SKIP_THIS */
00209 
00210                 void *real;
00211         };
00212 
00227         class conf_matrix 
00228         {
00229         public:
00231                 ~conf_matrix();
00233                 conf_matrix( const conf_matrix& );
00235                 const conf_matrix& operator=( const conf_matrix& );
00239                 conf_matrix* clone() const;
00241                 size_t size() const;
00243                 float get_element(size_t actual, size_t predicted) const;
00248                 float mis_perc() const;
00252                 float cases() const;
00254                 double get_elapsed() const;
00259                 void toTEXT(std::ostream& os = std::cout, size_t space = 0) const;
00261                 void toXML(std::ostream& os = std::cout) const;
00262 
00263         private:
00264         #ifndef DOXYGEN_SHOULD_SKIP_THIS
00265                 friend class dtree;
00266         #endif /* DOXYGEN_SHOULD_SKIP_THIS */
00267 
00271                 conf_matrix(table *maintable)
00272                         throw(std::runtime_error);
00273                 conf_matrix(const conf_matrix *cm)
00274                         throw(std::runtime_error);
00276                 void *real;
00277         };
00278 
00283         class dtree 
00284         {
00285         public:
00287                 dtree(const std::string &name="my_decision_tree");
00289                 ~dtree();
00291                 dtree( const dtree& );
00293                 const dtree& operator=( const dtree& );
00295                 dtree* clone() const;
00296 
00298                 double get_elapsed() const;
00300                 size_t size() const;
00302                 size_t depth() const;
00304                 size_t training_n_rows() const;
00305 
00307                 typedef enum {
00309                         PRUNING_NO, 
00311                         PRUNING_C45, 
00313                         PRUNING_DT
00314                 } PruningStrategy;
00315 
00317                 typedef enum {
00319                         ST_GAIN,
00321                         ST_GAIN_RATIO
00322                 } SplitType;
00323 
00325                 typedef enum {
00327                         SET_ABSOLUTE_CORRECTION,
00329                         SET_WEIGHTED_CORRECTION
00330                 } Options;
00331 
00335                 bool set_pruning_strategy(PruningStrategy strategy);
00339                 bool set_split_type(SplitType st);
00343                 bool set_option(Options opt);
00348                 bool set_min_obj(float min_objects);
00351                 bool set_conf_level(float conf_level);
00352 
00363                 void build(table* maintable, table::subset *subtable, bool evaluate = true, int ff_worker = 0)
00364                         throw (std::runtime_error);
00370                 conf_matrix* get_prediction();
00377                 conf_matrix* predict(const datasource &ds) const
00378                         throw(std::runtime_error);
00389                 double evaluate(const datasource &ds, std::ostream &output, char sep = '\t') const
00390                         throw(std::runtime_error);
00399                 std::pair<std::string, float> predict(table* cases, size_t pos, float weight = 1) const;
00408                 std::pair<std::string, float> predict(const std::vector<std::string> &attributes, float weight = 1) const;
00409                 // predict all cases in a given table
00419                 conf_matrix* predict(table* cases, table::subset *subtable) const
00420                         throw(std::runtime_error);
00421 
00423                 void toTEXT(std::ostream& os = std::cout) const;
00425                 void toDOT(std::ostream& os = std::cout) const;
00427                 void toXML(std::ostream &os = std::cout, const conf_matrix *cmTest = NULL) const; 
00428 
00431                 void toBinary(const std::string &filename);
00435                 static dtree *fromBinary(const std::string &filename);
00436 
00437         private:
00439                 void *real;
00440         };
00441 
00442 } // namespace yadt
00443 
00887 #endif // !defined(_YADT_H__INCLUDED_)

Generated on Tue Oct 19 2010 13:09:32 for YaDT by  doxygen 1.7.1