45#include "EST_cutils.h"
46#include "EST_multistats.h"
48#include "EST_cmd_line.h"
50static int wagon_test_main(
int argc,
char **
argv);
92static int wagon_test_main(
int argc,
char **
argv)
97 LISP description,tree=NIL;;
104 "Summary: program to test CART models on data\n"+
105 "-desc <ifile> Field description file\n"+
106 "-data <ifile> Datafile, one vector per line\n"+
107 "-tree <ifile> File containing CART tree\n"+
109 " track for vertex indices\n"+
110 "-predict Predict for each vector returning full vector\n"+
111 "-predict_val Predict for each vector returning just value\n"+
112 "-predictee <string>\n"+
113 " name of field to predict (default is first field)\n"+
114 "-heap <int> {210000}\n"+
115 " Set size of Lisp heap, should not normally need\n"+
116 " to be changed from its default\n"+
117 "-o <ofile> File to save output in\n",
120 siod_init(
al.ival(
"-heap"));
122 if (
al.present(
"-desc"))
124 gc_protect(&description);
125 description = car(vload(
al.val(
"-desc"),1));
129 cerr <<
argv[0] <<
": no description file specified" <<
endl;
133 if (
al.present(
"-tree"))
136 tree = car(vload(
al.val(
"-tree"),1));
139 cerr <<
argv[0] <<
": no tree found in \"" <<
al.val(
"-tree")
150 if (
al.present(
"-data"))
152 if (data.
open(
al.val(
"-data")) != 0)
154 cerr <<
argv[0] <<
": can't open data file \"" <<
155 al.val(
"-data") <<
"\" for input." <<
endl;
165 if (
al.present(
"-track"))
167 wgn_VertexTrack.
load(
al.val(
"-track"));
170 if (
al.present(
"-o"))
174 cerr <<
argv[0] <<
": can't open output file \"" <<
175 al.val(
"-o") <<
"\"" <<
endl;
181 if (
al.present(
"-predictee"))
185 wgn_predictee_name =
al.val(
"-predictee");
186 for (l=description,i=0; l != NIL; l=cdr(l),i++)
187 if (streq(wgn_predictee_name,get_c_string(car(car(l)))))
194 cerr <<
argv[0] <<
": predictee \"" << wgn_predictee <<
195 "\" not in description\n";
199 get_c_string(car(cdr(siod_nth(wgn_predictee,description))));
201 if (
al.present(
"-predict"))
202 simple_predict(data,
wgn_output,tree,description,FALSE);
203 else if (
al.present(
"-predict_val"))
204 simple_predict(data,
wgn_output,tree,description,TRUE);
207 test_tree_float(data,
wgn_output,tree,description);
210 test_tree_vector(data,
wgn_output,tree,description);
213 test_tree_class(data,
wgn_output,tree,description);
229 for (d=description; d != NIL; d=cdr(d))
233 if ((d != description) && (t.whitespace().contains(
"\n")))
235 cerr <<
"wagon_test: unexpected newline within vector " <<
236 t.
row() <<
" wrong number of features" <<
endl;
239 if (streq(get_c_string(car(cdr(car(d)))),
"float") ||
240 streq(get_c_string(car(cdr(car(d)))),
"int"))
241 v = cons(flocons(
atof(t.string())),v);
242 else if ((streq(get_c_string(car(cdr(car(d)))),
"_other_")) &&
243 (siod_member_str(t.string(),cdr(car(d))) == NIL))
244 v = cons(strintern(
"_other_"),v);
246 v = cons(strintern(t.string()),v);
258 for (
vector=get_data_vector(data,description);
259 vector != NIL;
vector=get_data_vector(data,description))
261 predict = wagon_vector_predict(tree,
vector,description);
263 val = siod_sprint(car(reverse(predict)));
265 val = siod_sprint(predict);
279 for (
vector=get_data_vector(data,description);
280 vector != NIL;
vector=get_data_vector(data,description))
282 predict = wagon_vector_predict(tree,
vector,description);
295 cor = (
xy.mean() - (
x.mean()*
y.mean()))/
296 (
sqrt(
xx.mean()-(
x.mean()*
x.mean())) *
297 sqrt(
yy.mean()-(
y.mean()*
y.mean())));
299 fprintf(
output,
";; RMSE %1.4f Correlation is %1.4f Mean (abs) Error %1.4f (%1.4f)\n",
317 for (
vector=get_data_vector(data,description);
318 vector != NIL;
vector=get_data_vector(data,description))
320 predict = wagon_vector_predict(tree,
vector,description);
323 prob = get_c_float(car(cdr(siod_assoc_str(
real_class,
332 for (w=cdr(siod_nth(wgn_predictee,description)); w != NIL; w = cdr(w))
333 lex.append(get_c_string(car(w)));
338 (-1*(H/
Q)),
pow(2.0,(-1*(H/
Q))));
355 for (
vector=get_data_vector(data,description);
356 vector != NIL;
vector=get_data_vector(data,description))
358 predict = wagon_vector_predict(tree,
vector,description);
361 prob = get_c_float(car(cdr(siod_assoc_str(
real_class,
370 for (w=cdr(siod_nth(wgn_predictee,description)); w != NIL; w = cdr(w))
371 lex.append(get_c_string(car(w)));
376 (-1*(H/
Q)),
pow(2.0,(-1*(H/
Q))));
385 if (cdr(tree) == NIL)
388 LISP value = find_feature_value(wgn_ques_feature(car(tree)),
391 if (wagon_ask_question(car(tree),value))
393 return wagon_vector_predict(car(cdr(tree)),
vector,description);
396 return wagon_vector_predict(car(cdr(cdr(tree))),
vector,description);
404 for (v=
vector,d=description; v != NIL; v=cdr(v),d=cdr(d))
405 if (streq(
feature,get_c_string(car(car(d)))))
408 cerr <<
"wagon_test: can't find feature \"" <<
feature <<
409 "\" in description" <<
endl;
void close(void)
Close stream.
int open(const EST_String &filename)
open a \Ref{EST_TokenStream} for a file.
EST_TokenStream & get(EST_Token &t)
get next token in stream
int row(void) const
Line number in original \Ref{EST_TokenStream}.
EST_read_status load(const EST_String name, float ishift=0.0, float startt=0.0)