orf

OCaml Random Forests
IN THIS PACKAGE
Module Orf . RFC
module IntMap = BatMap.Int
module IntSet = BatSet.Int
type features = int IntMap.t
type class_label = int
type sample = features * class_label
type metric =
| Gini
| Shannon
| MCC
type forest

trained Random Forests model

type int_or_float =
| Int of int
| Float of float
val train : int -> Random.State.t -> metric -> int -> int_or_float -> int -> int_or_float -> int -> sample array -> forest

train ncores rng metric ntrees max_features card_features max_samples min_node_size training_set

val predict_one : int -> Random.State.t -> forest -> sample -> class_label * float

(pred_label, pred_proba) = predict_one ncores rng trained_forest sample

val predict_one_margin : int -> Random.State.t -> forest -> sample -> class_label * float * float

(pred_label, pred_proba, pred_margin) = predict_one_margin ncores rng trained_forest sample

val predict_many : int -> Random.State.t -> forest -> sample array -> (class_label * float) array

like predict_one but for an array of samples

val predict_many_margin : int -> Random.State.t -> forest -> sample array -> (class_label * float * float) array

like predict_one_margin but for an array of samples

val predict_OOB : Random.State.t -> forest -> sample array -> (class_label * class_label) array

use a trained forest to predict on the Out Of Bag (OOB) training set of each tree. The training_set must be provided in the same order than when the model was trained. Can be used to get a reliable model performance estimate, even if you don't have a left out test set. truth_preds = predict_OOB rng forest training_set

val mcc : class_label -> (class_label * class_label) array -> float

Matthews Correlation Coefficient (MCC). mcc target_class_label truth_preds

val accuracy : (class_label * class_label) array -> float

Percentage of correct prediction accuracy truth_preds

val roc_auc : class_label -> (class_label * float) array -> class_label array -> float

ROC AUC roc_auc target_class_label preds true_labels

val drop_OOB : forest -> forest

make trained model forget OOB samples (reduce model size)

type filename = string
val save : filename -> forest -> unit

Save model to file (Marshal) OOB samples are dropped prior to saving the model.

val restore : filename -> forest

Restore model from file (Marshal)

The following are needed to implement RFR

val collect_non_constant_features : (int IntMap.t * 'a) array -> (int * IntSet.t) list
val partition_samples : int -> int -> (int IntMap.t * 'a) array -> (int IntMap.t * 'a) array * (int IntMap.t * 'a) array
val cost_function : ( 'a array -> float ) -> 'a array -> 'a array -> float
val choose_min_cost : Random.State.t -> (float * 'b * 'c * ('d * 'e)) list -> float * 'b * 'c * ('d * 'e)
val array_parmap : int -> ( 'a -> 'b ) -> 'a array -> 'b -> 'b array
val ratio_to_int : int -> int -> string -> int_or_float -> int