Source code for Orange.classification.simple_random_forest
import numpy as np
from Orange.classification import Learner, Model
from Orange.classification.simple_tree import SimpleTreeLearner
__all__ = ['SimpleRandomForestLearner']
[docs]
class SimpleRandomForestLearner(Learner):
"""
A random forest classifier, optimized for speed. Trees in the forest
are constructed with :obj:`SimpleTreeLearner` classification trees.
Parameters
----------
n_estimators : int, optional (default = 10)
Number of trees in the forest.
min_instances : int, optional (default = 2)
Minimal number of data instances in leaves. When growing the three,
new nodes are not introduced if they would result in leaves
with fewer instances than min_instances. Instance count is weighed.
max_depth : int, optional (default = 1024)
Maximal depth of tree.
max_majority : float, optional (default = 1.0)
Maximal proportion of majority class. When this is
exceeded, induction stops (only used for classification).
skip_prob : string, optional (default = "sqrt")
Data attribute will be skipped with probability ``skip_prob``.
- if float, then skip attribute with this probability.
- if "sqrt", then `skip_prob = 1 - sqrt(n_features) / n_features`
- if "log2", then `skip_prob = 1 - log2(n_features) / n_features`
seed : int, optional (default = 42)
Random seed.
"""
name = 'simple rf class'
def __init__(self, n_estimators=10, min_instances=2, max_depth=1024,
max_majority=1.0, skip_prob='sqrt', seed=42):
super().__init__()
self.n_estimators = n_estimators
self.skip_prob = skip_prob
self.max_depth = max_depth
self.min_instances = min_instances
self.max_majority = max_majority
self.seed = seed
[docs]
def fit_storage(self, data):
return SimpleRandomForestModel(self, data)
class SimpleRandomForestModel(Model):
def __init__(self, learner, data):
self.estimators_ = []
self.cls_vals = len(data.domain.class_var.values)
self.learn(learner, data)
def learn(self, learner, data):
tree = SimpleTreeLearner(
learner.min_instances, learner.max_depth,
learner.max_majority, learner.skip_prob, True)
for i in range(learner.n_estimators):
tree.seed = learner.seed + i
self.estimators_.append(tree(data))
def predict(self, X):
p = np.zeros((X.shape[0], self.cls_vals))
X = np.ascontiguousarray(X) # so that it is a no-op for individual trees
for tree in self.estimators_:
# SimpleTrees do not have preprocessors and domain conversion
# was already handled within this class so we can call tree.predict() directly
# instead of going through tree.__call__
_, pt = tree.predict(X)
p += pt
p /= len(self.estimators_)
return p.argmax(axis=1), p