Class MrsCake::DataSet
In: mrscake.rb.c
Parent: Object

A DataSet stores training data (examples of features and desired corresponding output values). DataSets can be used to train models (in order to do predictions.)

Methods

add   print   save   train  

Public Instance methods

Adds a row of training data to the model.

[Source]

/* call-seq:
 *   dataset.add({feature1=>value1,feature2=>value2}, output) -> nil
 *
 * Adds a row of training data to the model.
 */
static VALUE rb_dataset_add(VALUE cls, VALUE input, VALUE response)
{
    Get_DataSet(dataset,cls);
    example_t*e = value_to_example(input);
    e->desired_response = value_to_variable(response);
    if(e->desired_response.type == MISSING) {
        rb_raise(rb_eArgError, "bad argument to add(): second parameter must be an int or a symbol");
    }
    trainingdata_add_example(dataset->trainingdata, e);
    return cls;
}

Print the dataset to stdout.

[Source]

/* call-seq:
 *   dataset.print()
 *
 * Print the dataset to stdout.
 */
static VALUE rb_dataset_print(VALUE cls)
{
    Get_DataSet(dataset,cls);
    trainingdata_print(dataset->trainingdata);
    return cls;
}

Save the dataset to a file, using mrscake‘s internal file format.

[Source]

/* call-seq:
 *   dataset.save()
 *
 * Save the dataset to a file, using mrscake's internal file format.
 */
static VALUE rb_dataset_save(VALUE cls, VALUE _filename)
{
    Check_Type(_filename, T_STRING);
    const char*filename = StringValuePtr(_filename);
    Get_DataSet(dataset,cls);
    trainingdata_save(dataset->trainingdata, filename);
    return cls;
}

Train a classifier

[Source]

/* call-seq:
 *   dataset.train() -> model
 *
 * Train a classifier
 */
static VALUE rb_dataset_train(int argc, VALUE* argv, VALUE cls)
{
    Get_DataSet(dataset,cls);
    VALUE model_value = rb_model_allocate(Model);
    Get_Model(model, model_value);

    volatile VALUE model_name;
    int count = rb_scan_args(argc, argv, "01", &model_name);
    if(NIL_P(model_name)){
        model->model = trainingdata_train(dataset->trainingdata);
    } else {
        model->model = trainingdata_train_specific_model(dataset->trainingdata, RSTRING_PTR(model_name));
    }
    if(!model->model)
        rb_raise(rb_eArgError, "bad (empty?) data");
    return model_value;
}

[Validate]