Skip to content

Commit

Permalink
Processes for Random Forest (#306)
Browse files Browse the repository at this point in the history
Co-authored-by: clausmichele <[email protected]>
  • Loading branch information
m-mohr and clausmichele authored Mar 9, 2022
1 parent 63e3e9d commit e9bbfa1
Show file tree
Hide file tree
Showing 5 changed files with 222 additions and 3 deletions.
88 changes: 88 additions & 0 deletions proposals/fit_class_random_forest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{
"id": "fit_class_random_forest",
"summary": "Train a random forest classification model",
"description": "Executes the fit of a random forest classification based on the user input of target and predictors. The Random Forest classification model is based on the approach by Breiman (2001).",
"categories": [
"machine learning"
],
"experimental": true,
"parameters": [
{
"name": "predictors",
"description": "The predictors for the classification model as a vector data cube. Aggregated to the features (vectors) of the target input variable.",
"schema": {
"type": "object",
"subtype": "vector-cube"
}
},
{
"name": "target",
"description": "The training sites for the classification model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).",
"schema": {
"type": "object",
"subtype": "vector-cube"
}
},
{
"name": "training",
"description": "The amount of training data to be used in the classification, given as a fraction. The sampling will be chosen randomly through the data object. The remaining data will be used as test data for the validation.",
"schema": {
"type": "number",
"exclusiveMinimum": 0,
"maximum": 1
}
},
{
"name": "num_trees",
"description": "The number of trees build within the Random Forest classification.",
"optional": true,
"default": 100,
"schema": {
"type": "integer",
"minimum": 1
}
},
{
"name": "mtry",
"description": "Specifies how many split variables will be used at a node. Default value is `null`, which corresponds to the number of predictors divided by 3.",
"optional": true,
"default": null,
"schema": [
{
"type": "integer",
"minimum": 1
},
{
"type": "null"
}
]
},
{
"name": "seed",
"description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.",
"optional": true,
"default": null,
"schema": {
"type": [
"integer",
"null"
]
}
}
],
"returns": {
"description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.",
"schema": {
"type": "object",
"subtype": "ml-model"
}
},
"links": [
{
"href": "https://doi.org/10.1023/A:1010933404324",
"title": "Breiman (2001): Random Forests",
"type": "text/html",
"rel": "about"
}
]
}
88 changes: 88 additions & 0 deletions proposals/fit_regr_random_forest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{
"id": "fit_regr_random_forest",
"summary": "Train a random forest regression model",
"description": "Executes the fit of a random forest regression based on the user input of target and predictors. The Random Forest regression model is based on the approach by Breiman (2001).",
"categories": [
"machine learning"
],
"experimental": true,
"parameters": [
{
"name": "predictors",
"description": "The predictors for the regression model as a vector data cube. Aggregated to the features (vectors) of the target input variable.",
"schema": {
"type": "object",
"subtype": "vector-cube"
}
},
{
"name": "target",
"description": "The training sites for the regression model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).",
"schema": {
"type": "object",
"subtype": "vector-cube"
}
},
{
"name": "training",
"description": "The amount of training data to be used in the regression, given as a fraction. The sampling will be randomly through the data object. The remaining data will be used as test data for the validation.",
"schema": {
"type": "number",
"exclusiveMinimum": 0,
"maximum": 1
}
},
{
"name": "num_trees",
"description": "The number of trees build within the Random Forest regression.",
"optional": true,
"default": 100,
"schema": {
"type": "integer",
"minimum": 1
}
},
{
"name": "mtry",
"description": "Specifies how many split variables will be used at a node. Default value is `null`, which corresponds to the number of predictors divided by 3.",
"optional": true,
"default": null,
"schema": [
{
"type": "integer",
"minimum": 1
},
{
"type": "null"
}
]
},
{
"name": "seed",
"description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.",
"optional": true,
"default": null,
"schema": {
"type": [
"integer",
"null"
]
}
}
],
"returns": {
"description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.",
"schema": {
"type": "object",
"subtype": "ml-model"
}
},
"links": [
{
"href": "https://doi.org/10.1023/A:1010933404324",
"title": "Breiman (2001): Random Forests",
"type": "text/html",
"rel": "about"
}
]
}
4 changes: 2 additions & 2 deletions proposals/load_ml_model.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"id": "load_ml_model",
"summary": "Load a ML model",
"description": "Loads a machine learning model from a STAC Item.\n\nSuch a model could be trained and saved as part of a previous batch job with processes such as ``save_ml_model()``.",
"description": "Loads a machine learning model from a STAC Item.\n\nSuch a model could be trained and saved as part of a previous batch job with processes such as ``fit_regr_random_forest()`` and ``save_ml_model()``.",
"categories": [
"machine learning",
"import"
Expand Down Expand Up @@ -36,7 +36,7 @@
}
],
"returns": {
"description": "A machine learning model to be used with machine learning processes.",
"description": "A machine learning model to be used with machine learning processes such as ``predict_random_forest()``.",
"schema": {
"type": "object",
"subtype": "ml-model"
Expand Down
42 changes: 42 additions & 0 deletions proposals/predict_random_forest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"id": "predict_random_forest",
"summary": "Predict values from a Random Forest model",
"description": "Applies a Random Forest machine learning model to an array and predict a value for it.",
"categories": [
"machine learning",
"reducer"
],
"experimental": true,
"parameters": [
{
"name": "data",
"description": "An array of numbers.",
"schema": {
"type": "array",
"items": {
"type": [
"number",
"null"
]
}
}
},
{
"name": "model",
"description": "A model object that can be trained with the processes ``fit_regr_random_forest()`` (regression) and ``fit_class_random_forest()`` (classification).",
"schema": {
"type": "object",
"subtype": "ml-model"
}
}
],
"returns": {
"description": "The predicted value. Returns `null` if any of the given values in the array is a no-data value.",
"schema": {
"type": [
"number",
"null"
]
}
}
}
3 changes: 2 additions & 1 deletion tests/.words
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,5 @@ gdalwarp
Lanczos
sinc
interpolants
Hyndman
Breiman
Hyndman

0 comments on commit e9bbfa1

Please sign in to comment.