From 1c856f87ebd0a618214f89386c65870bbb650b26 Mon Sep 17 00:00:00 2001 From: Michael Louis Date: Wed, 11 Oct 2023 13:05:35 -0400 Subject: [PATCH] Removed conduit from docs --- available-hardware.mdx | 6 +- .../model-ensembles.mdx | 24 - .../persistent-memory.mdx | 34 -- .../processing-functions.mdx | 56 -- .../advanced-functionality/saving-files.mdx | 24 - .../advanced-functionality/test-locally.mdx | 56 -- .../advanced-functionality/using-files.mdx | 87 ---- .../advanced-functionality/using-secrets.mdx | 55 -- .../using-webhook-endpoints.mdx | 58 --- .../examples/huggingface/onnx-conversion.mdx | 114 ----- .../examples/huggingface/transformers.mdx | 173 ------- cerebrium/conduit/examples/onnx.mdx | 213 -------- cerebrium/conduit/examples/pytorch.mdx | 205 -------- cerebrium/conduit/examples/scikit.mdx | 133 ----- cerebrium/conduit/examples/spacy.mdx | 148 ------ cerebrium/conduit/examples/tensorflow.mdx | 133 ----- cerebrium/conduit/examples/xgboost.mdx | 99 ---- cerebrium/conduit/introduction.mdx | 480 ------------------ cerebrium/conduit/model-status.mdx | 35 -- .../multi-gpu-inferencing.mdx | 2 +- installation.mdx | 15 +- introduction.mdx | 5 +- mint.json | 44 +- 23 files changed, 11 insertions(+), 2188 deletions(-) delete mode 100644 cerebrium/conduit/advanced-functionality/model-ensembles.mdx delete mode 100644 cerebrium/conduit/advanced-functionality/persistent-memory.mdx delete mode 100644 cerebrium/conduit/advanced-functionality/processing-functions.mdx delete mode 100644 cerebrium/conduit/advanced-functionality/saving-files.mdx delete mode 100644 cerebrium/conduit/advanced-functionality/test-locally.mdx delete mode 100644 cerebrium/conduit/advanced-functionality/using-files.mdx delete mode 100644 cerebrium/conduit/advanced-functionality/using-secrets.mdx delete mode 100644 cerebrium/conduit/advanced-functionality/using-webhook-endpoints.mdx delete mode 100644 cerebrium/conduit/examples/huggingface/onnx-conversion.mdx delete mode 100644 cerebrium/conduit/examples/huggingface/transformers.mdx delete mode 100644 cerebrium/conduit/examples/onnx.mdx delete mode 100644 cerebrium/conduit/examples/pytorch.mdx delete mode 100644 cerebrium/conduit/examples/scikit.mdx delete mode 100644 cerebrium/conduit/examples/spacy.mdx delete mode 100644 cerebrium/conduit/examples/tensorflow.mdx delete mode 100644 cerebrium/conduit/examples/xgboost.mdx delete mode 100644 cerebrium/conduit/introduction.mdx delete mode 100644 cerebrium/conduit/model-status.mdx diff --git a/available-hardware.mdx b/available-hardware.mdx index 37546adf..220d4377 100644 --- a/available-hardware.mdx +++ b/available-hardware.mdx @@ -28,7 +28,7 @@ We have the following graphics cards available on the platform: _NOTE: The maximum model sizes are calculated as a guideline, assuming that the model is the only thing loaded into VRAM. Longer inputs will result in a smaller maximum model size. Your mileage may vary._ -These GPUs can be selected using the `--hardware` flag when deploying your model on Cortex or by using the `hardware` parameter when deploying your model with the Conduit. +These GPUs can be selected using the `--hardware` flag when deploying your model on Cortex. For more help with deciding which GPU you require, see this section [here](#choosing-a-gpu). _Due to the global shortage of GPUs at the moment, we may not always have the Enterprise edition of your GPU available. In this case, we will deploy to the Workstation edition of the GPU._ @@ -50,7 +50,7 @@ Once again, you only pay for what you need! ## Storage We provide you with a persistent storage volume that is attached to your deployment. -You can use this storage volume to store any data that you need to persist between deployments. Accessing your persistent storage is covered in depth for [conduit here](/cerebrium/conduit/advanced-functionality/persistent-memory) and [cortex here](/cerebrium/cortex/advanced-functionality/persistent-storage). +You can use this storage volume to store any data that you need to persist between deployments. Accessing your persistent storage is covered in depth for [cortex here](/cerebrium/cortex/advanced-functionality/persistent-storage). The storage volume is backed by high-performance SSDs so that you can get the best performance possible Pricing for storage is based on the amount of storage you use and is charged per GB per month. @@ -106,7 +106,7 @@ This storage is persistent and will be available to you for as long as you need # Advanced Parameters This section is for those users that have a large volume of requests and want to optimise their deployments for cost and performance. -The following parameters are available to you when deploying your model on Cortex or Conduit. +The following parameters are available to you when deploying your model on Cortex. ## Setting your minimum number of instances after a deployment diff --git a/cerebrium/conduit/advanced-functionality/model-ensembles.mdx b/cerebrium/conduit/advanced-functionality/model-ensembles.mdx deleted file mode 100644 index 8cd9b3e3..00000000 --- a/cerebrium/conduit/advanced-functionality/model-ensembles.mdx +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: "Deploying Model Ensembles" ---- - -With Cerebrium you are also able to deploy a sequence of models simply by -specifying a list of tuples of model types and model files. Currently, this -functionality is supported across all model types, with the caveat that we -assume the models are evaluated in the order they are supplied. If you require -that data be formatted in between models, please use [pre/post processing functions](/cerebrium/advanced-functionality/processing-functions). - -For example, if you have a PyTorch model that takes in a 2D image and outputs a 1D vector, and -you have an XGB model that takes in a 1D vector and outputs a single class -prediction, you can deploy them as a sequence of models by supplying the -following to the `deploy` function: - -```bash -model_flow = [(model_type.TORCH, 'torch.pt'), (model_type.XGBOOST_CLASSIFIER, 'xgb.json')] -endpoint = deploy(model_flow, 'my-flow', "") -``` - -Unfortunately, we don't allow you to integrate endpoints or our prebuilt models into these flows - the benefit of our ensemble -implementation is that everything is run on the same machine so execution is extremely quick, and you don't have multiple network requests. -If you would like more control over implementing model ensembles or to make network requests, we recommend you look at deploying your -own [custom python code](/quickstarts/custom) diff --git a/cerebrium/conduit/advanced-functionality/persistent-memory.mdx b/cerebrium/conduit/advanced-functionality/persistent-memory.mdx deleted file mode 100644 index 9a547001..00000000 --- a/cerebrium/conduit/advanced-functionality/persistent-memory.mdx +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: "Persistent Memory in Conduit Processing Functions" ---- - -There are certain occasions when you want to store data in a pre or post-processing function that you will access in a further function further in the pipeline. -For example, you may want to store the number of rows in a dataset in a pre-processing function and then use that number in a post-processing function to calculate a percentage. -To do this, the Cerebrium framework provides a persistent memory store that can access through the use of the `get`, `save` and `delete` functions. - -The persistent memory store is a key-value store that can be used to store data in a processing function and then retrieve that data in a further function in the pipeline. -The persistent memory store is a global store, meaning that all functions in the pipeline can access the same data. However, while you can save any Python object, -you may only save objects created **within** the scope of the function. This means that you cannot save objects created in a parent function, or created in a global scope -of your local interpreter. - -```python -from cerebrium import deploy, model_type, save, get - -# This function will be applied to the input of your model -def pre_process(data, files): - import numpy as np - from PIL import Image - labelled = {d: Image.open(f) for d, f in zip(data, files)} - labels = [d["name"] for d in data] - save("labels", labels) # Save the labels for later - return labelled - -def post_process(result, input_data, files): - import numpy as np - output = np.argmax(result, axis=1) - labels = get("labels") # Get the labels we saved earlier - return {d: l for d, l in zip(labels, output)} - -model_flow = [(model_type.TORCH, 'torch.pt'), (model_type.XGBOOST_CLASSIFIER, 'xgb.json', {"pre": pre_process, "post": post_process})] -endpoint = deploy(model_flow, 'my-flow', "") -``` diff --git a/cerebrium/conduit/advanced-functionality/processing-functions.mdx b/cerebrium/conduit/advanced-functionality/processing-functions.mdx deleted file mode 100644 index acc1caa7..00000000 --- a/cerebrium/conduit/advanced-functionality/processing-functions.mdx +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: "Pre & Post Processing Functions" ---- - -In many cases, you may want to process the data in your single or multi-model flow before returning it to the next flow stage or to the user. To do this, you can supply both a pre-processing and a post-processing function to your flow, `pre` and `post` respectively. -The pre-processing will be applied to the input of the model, while the post-processing function will be applied to the output of your model before it is returned to the next flow stage. -You will **need** to implement both of these functions if you wish to use binary files along with your input data. - -In particular: - -- The pre-processing function should take 1-2 arguments **in order**: the input to your model and any input files as binary data. It should return a single value, which is the pre-processed input to your model. It is crucial that the output of your pre-processing function is the same shape and type as the input of the model in the flow stage. -- The post-processing function should take a 1-3 arguments **in order**: the output of your model, the original input data and the original binary input files. It should return a single value, which is the post-processed output of your model. You should ensure the return shape and type is the same as the input shape and type of the next flow stage. If you are at the end of your flow, you should ensure the return type is one of [list, dict, numpy.array, torch.tensor]. Note if you wish to use files, you should ensure your function signature has 3 arguments, regardless of whether you use the second argument. - -For example, if you have a model that outputs a 1D vector of probabilities, you may want to return the argmax. You can do this by supplying the following function to the `deploy` function with the last flow stage: - -```python -# This function will be applied to the input of your model -def pre_process(data, files): - # data is a list of input data, files is a list of binary objects - import numpy as np - from PIL import Image - labelled = {d: Image.open(f) for d, f in zip(data, files)} - return labelled - -def post_process(result, input_data, files): - # result is the output of your model, input_data is the original input data from the 1st flow stage, files is the original binary input files - import numpy as np - labels = np.argmax(result, axis=1) - return {d: l for d, l in zip(input_data, labels)} - -model_flow = [(model_type.TORCH, 'torch.pt'), (model_type.XGBOOST_CLASSIFIER, 'xgb.json', {"pre": pre_process, "post": post_process})] -endpoint = deploy(model_flow, 'my-flow', "") -``` - - - If you'd like to use objects across multiple functions, consider using our - [persistent memory store](/cerebrium/advanced-functionality/persistent-memory) - feature. - - -You may use functions inside your processor from the following libraries: - -- `numpy` -- `scikit-learn` -- `torch` -- `pandas` -- `Pillow` -- `transformers` -- `chitra` - We are expanding this list of libraries, so if you have a specific library you would like to use, please let us know! - - - Note that any imports you need to do in your processing functions must be done - inside the function. This is because the function is serialized and sent to - the Cerebrium servers, and the imports will not be available on the server. - diff --git a/cerebrium/conduit/advanced-functionality/saving-files.mdx b/cerebrium/conduit/advanced-functionality/saving-files.mdx deleted file mode 100644 index 7c8dacc6..00000000 --- a/cerebrium/conduit/advanced-functionality/saving-files.mdx +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: "Saving File Output" ---- - -We provide the ability to temporarily store files on our servers and provide a URL to access them. -This is useful for pipelines where output files or other binary data is too big to send in a request as a base64 string. -Using the `upload` function, you can upload a file to our servers and get a URL to access them. - -```python -from cerebrium import upload -def post_process(data, input_data, files): - # This example does 1 data point, but you can do multiple if you wish in a loop - from PIL import Image - image = Image.fromarray(data[0]) - image.save('output.png') - url = upload("output.png") - return url - -from cerebrium import Conduit, model_type -conduit = Conduit('', '', [('', '', {"post": post_process"})]) -conduit.load('./') -conduit.run(data) -conduit.deploy() -``` diff --git a/cerebrium/conduit/advanced-functionality/test-locally.mdx b/cerebrium/conduit/advanced-functionality/test-locally.mdx deleted file mode 100644 index 1703269d..00000000 --- a/cerebrium/conduit/advanced-functionality/test-locally.mdx +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: "Test locally" -description: "Test your Conduit model locally." ---- - -To run your model locally and ensure it is working as intended before -deploying, you can use the `load` and `run` command. The `load` command will -load your pipeline into memory using the paths you specified in `deploy` from a base path, while `run` -will sequentially execute your loaded pipeline. - -```python -from cerebrium import deploy, model_type - -conduit = deploy(('', ''), '', '') -conduit.load('./') -conduit.run(data) -``` - -Where `data` is the data you would send to your model. This would usually be -some numerical 2D/3D array for typical models or a list of strings for a language model. You may -feed an `ndarray` or `Tensor` directly into this function. However, if you are -using a custom data pipeline that is expecting another type, you may need to convert your data into the -appropriate format for your model. - -You can also define a Conduit object directly by using the `Conduit` class. Then call the `run` method on the Conduit object to test the model locally, or the `deploy` method to deploy the Conduit's model flow to Cerebrium. -When you use the Conduit object directly, you can specify what hardware you wish your model to run on by using the `hardware` parameter in the `Conduit` constructor. -The `hardware` parameter is an enum that can be one of the following: - -- **hardware.CPU**: This will run your model on a CPU. This is the default option for SKLearn, XGBoost, and SpaCy models. -- **hardware.GPU**: (Deprecated) This will run your model on a T4 GPU. This is the default option for Torch, ONNX, and HuggingFace models. -- **hardware.A10**: (Deprecated) This will run your model on an A10 GPU, which provides 24GB of VRAM. You should use this option if you are using a model that is too large to fit on the 16GB of VRAM that a T4 GPU provides. This will include most large HuggingFace models. -- **hardware.TURING_4000** : A 8GB GPU that is great for lightweight models with less than 3B parameters in FP16. -- **hardware.TURING_5000** : A 16GB GPU that is great for small models with less than 7B parameters in FP16. Most small HuggingFace models can run on this. -- **hardware.AMPERE_A4000** : A 16GB GPU that is great for small models with less than 7B parameters in FP16. Significantly faster than an RTX 4000. Most small HuggingFace models can run on this. -- **hardware.AMPERE_A5000** : A 24GB GPU that is great for medium models with less than 10B parameters in FP16. A great option for almost all HuggingFace models. -- **hardware.AMPERE_A6000** : A 48GB GPU offering a great cost to performance ratio. This is great for medium models with less than 21B parameters in FP16. A great option for almost all HuggingFace models. -- **hardware.A100** : A 80GB GPU offering some of the highest performance available. This is great for large models with less than 18B parameters in FP16. A great option for almost all HuggingFace models especially if inference speed is your priority. - -```python -from cerebrium import Conduit, model_type, hardware -conduit = Conduit( - '', - '', - [('', '')], - hardware=hardware. -) -conduit.load('./') -conduit.run(data) -conduit.deploy() -``` - -Additionally, defining a conduit object directly allows you to add more models to your flow dynamically using `add_model` method. - -```python -conduit.add_model('', '', {}) -``` diff --git a/cerebrium/conduit/advanced-functionality/using-files.mdx b/cerebrium/conduit/advanced-functionality/using-files.mdx deleted file mode 100644 index b7bbddb5..00000000 --- a/cerebrium/conduit/advanced-functionality/using-files.mdx +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: "Sending File Input" ---- - -You may also use files as input data for your model. -This is particularly useful if you have a model that takes in images, audio files, or other binary data. -To do this, you will need to supply at least an initial pre-processing function. -You can then use the files in your model as you would any other input data. -It is important to note that `conduit.run` expects the files to be passed in as **binary data** on the cloud, so you will need to open the files and read them in as binary data before passing them to test with `conduit.run` locally. - -```python -def pre_process(data, files): - import numpy as np - from PIL import Image - labelled = {d: Image.open(f) for d, f in zip(data, files)} - return labelled - -from cerebrium import Conduit, model_type -conduit = Conduit('', '', [('', '', {"pre": pre_process"})]) -conduit.load('./') -conduit.run(data, [files]) # files should be binary objects - for images it would the return of Image.open(f) -conduit.deploy() -``` - -### Request and Response with Forms - - - You currently cannot use `model_api_request` with file input. You will need to - use **curl** instead. - - - -```bash Request - curl --location --request POST '' \ - --header 'Authorization: ' \ - --header 'Content-Type: multipart/form-data' \ - -F data='[]' \ - -F files=@ -``` - -The parameters to a form request are similar to a JSON request, however, both the `data` and `files` are passed in as *form data*. - - - This is the Cerebrium API key used to authenticate your request. You can get - it from your Cerebrium dashboard. - - - The content type of your request. Must be multipart/form-data for forms. - - - A stringified list of data points you would like to send to your model. e.g. - for 1 data point of 3 features: '[[1,2,3]]'. - - - A file or list of files you would like to send to your model. The files should - be in the same order as the data points in the `data` field. - - - - -```json Response -{ - "result": [], - "run_id": "", - "run_time_ms": - "prediction_ids": [""] -} -``` - - - -#### Response Parameters - - - The result of your model prediction. - - - The run ID associated with your model predictions. - - - The amount of time if took your model to run down to the millisecond. This is - what we charge you based on. - - - The prediction IDs associated with each of your model predictions. Used to - track your model predictions with monitoring tools. - diff --git a/cerebrium/conduit/advanced-functionality/using-secrets.mdx b/cerebrium/conduit/advanced-functionality/using-secrets.mdx deleted file mode 100644 index dce6a402..00000000 --- a/cerebrium/conduit/advanced-functionality/using-secrets.mdx +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: "Using Secrets" ---- - -You may want to use API keys, passwords or other sensitive information in your application, -but you don't want it stored in your code. -If this is the case, it would be best to make use of our Secrets functionality. -Secrets are stored encrypted on our servers and are only decrypted when your model is run. - -```python -from cerebrium import get_secret - -def predict(item, run_id, logger): - item = Item(**item) - - logger.info(f"Run ID: {run_id}") - - my_secret = get_secret('my-secret') - logger.info("my_secret: " + my_secret) - - return {"result": f"Your secret is {my_secret}"} -``` - -### Managing Secrets - -Secrets are created, updated and deleted in your dashboard. - -![Secrets](/images/secrets_dashboard.png) - -Secrets are shared across all models in your project. - -## Local Development - -When running your model locally, you can use still make use of Secrets. -Store them in a file called `secrets.json` or `secrets.yaml` in the root of your project and add them to your .gitignore. -These files will not be uploaded to Cerebrium. - -### secret.yaml - -```yaml -"my-yaml-secret": "this value comes from yaml" -``` - -### secret.json - -```json -{ - "my-json-secret": "this value comes from json" -} -``` - - - Secrets are loaded on model start, you will need to redeploy your model for - changes to take effect. - diff --git a/cerebrium/conduit/advanced-functionality/using-webhook-endpoints.mdx b/cerebrium/conduit/advanced-functionality/using-webhook-endpoints.mdx deleted file mode 100644 index f4fac22b..00000000 --- a/cerebrium/conduit/advanced-functionality/using-webhook-endpoints.mdx +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: "Using Webhook Endpoints for Async Results" ---- - -Webhook endpoints can be used to obtain asynchronous results from your requests. This is useful when your model takes longer than the max timeout `(180 seconds)` or when you don't want to wait for the model to finish processing the request and prefer receiving the response once the processing is completed. - -To use this feature, simply include the `webhook_endpoint` field in your request body, assigning it to your desired URL. As soon as this is done, your requests will return immediately with the `run_id`. After processing, a **POST** request containing the results and `run_id` will be made to the provided webhook_endpoint URL. - - - This feature is available on all model types: custom & pre-built. If you have - any questions on how to use it please reach out to us on Slack, Discord or{" "} - support@cerebrium.ai. - - -## Example curl request - -Here is an example curl request with the `webhook_endpoint` field included: - -```bash -curl --location 'https://run.cerebrium.ai/dreambooth-webhook/predict' \ ---header 'Authorization: ' \ ---header 'Content-Type: application/json' \ ---data '{ - "prompt": "An astronaut riding a horse in space", - "webhook_endpoint": "https://your-webhook-url.com" -}' -``` - -## What to expect - -When your model has finished processing, it will send a POST request to `https://your-webhook-url.com` with the results and the `run_id`. - -The response returned from your initial request will look something like this: - -```json -{ - "run_id": "dummy-run-id-9ac612631200", - "message": "We will return the images to the endpoint provided when the task is complete." -} -``` - -The result sent to the webhook will look something like this: - -```json -{ - "run_id": "dummy-run-id-9ac612631200", - "run_time_ms": 5980.915546417236, - "message": "Successfully generated images", - "result": ["..."] -} -``` - -To test webhook endpoints, https://webhook.site/ provides a great platform. You can create a free URL to receive your POST requests and inspect the results. - - - Make sure that the "webhook_endpoint" provided is able to handle POST requests - and has the appropriate configuration to receive the response data. - diff --git a/cerebrium/conduit/examples/huggingface/onnx-conversion.mdx b/cerebrium/conduit/examples/huggingface/onnx-conversion.mdx deleted file mode 100644 index 7325f00f..00000000 --- a/cerebrium/conduit/examples/huggingface/onnx-conversion.mdx +++ /dev/null @@ -1,114 +0,0 @@ ---- -title: "Convert to Onnx" -description: "This example provides the steps to convert a Hugging Face model to Onnx format in order to deploy on Cerebrium." ---- - -## Intro - -Cerebrium supports the deployment of Onnx models where ONNX is an open format built to represent machine learning models. ONNX defines -a common set of operators — the building blocks of machine learning and deep learning models — and a common file format to enable AI developers to use models -with a variety of frameworks, tools, runtimes, and compilers. HuggingFace allows you to export transformer models to a serialized format such as Onnx through their -transformers.onnx package or their optimum.exporters.onnx package. - -Hugging Face has a list of ready-made configurations that you can export with one line — you can see the list [here](https://huggingface.co/docs/transformers/serialization). -In this quickstart I am going to show you how to export a LayoutLMv3 model which we will use for token classification — you will see a ready-made configuration exists in the list. -I will be using the notebook [here]() to fine-tune the LayoutLMv3 model - -By the end of this guide, you'll have an API endpoint of a deployed LayoutLMv3 model that can handle any scale -of traffic by running inference on serverless CPU's/GPUs. - -### Project set up - -Before building, you need to set up a Cerebrium account. This is as simple as -starting a new Project in Cerebrium and copying the API key. This will be used -to authenticate all calls for this project. - -### Create a project - -1. Go to [dashboard.cerebrium.ai](https://dashboard.cerebrium.ai) -2. Sign up or Login -3. Navigate to the API Keys page -4. You will need your private API key for deployments. Click the copy button to copy it to your clipboard - -![API Key](/images/cortex/api_keys_private_key.png) - -### Export model to Onnx - -```bash -pip install transformers[onnx] -``` - -Once the transformers Onnx package is installed, you want to convert either one of the existing models on Hugging Face or a fine-tuned model of yours. In this example, -I am exporting a model I fine-tuned with the LayoutLMv3 model as a base. The transformers ONNX model picks up the model format. We have to export feature token-classification to get the logits - -```python -!python -m transformers.onnx --model "/content/test/checkpoint-1000" --feature token-classification --atol 5 onnx/ -``` - -![Conversion](/images/quickstarts/convert-hf-onnx-1.png) - -In the above line of code I am doing the following: - -- Specifying the location of my fine-tuned model checkpoint. This could also be the model ID on Hugging Face such as microsoft/layoutlmv3-base. -- I specify the feature I want to export the model with. Since we are doing classification, I want to get the logits for the model so need to use the model that has that topology. - In this case, it would be token-classification but could be causal-lm etc. -- When you convert a model to Onnx, there could be a minor change in accuracy, and so you need to give a number which is the absolute difference tolerance when validating the model. -- Lastly, we specify the file path of where we would like to store this model. - -### Test model locally and deploy - -Before deploying the model to Cerebrium, I like to test locally, so I know I shouldn't have any problems in production. You could also test the model running OnnxInferenceSession. - -First, we prepare the payload - -```python -example = dataset["test"][0] -image = example["image"] -words = example["tokens"] -boxes = example["bboxes"] -word_labels = example["ner_tags"] - -##make sure return tensors is np -encoding = processor(image, words, boxes=boxes, return_tensors="np") -payload = {x: dict(encoding)[x].tolist() for x in dict(encoding).keys()} -``` - -Then we run it locally - -```python -from cerebrium import model_type, Conduit -conduit = Conduit('hf-model', "", [(model_type.ONNX, "onnx/model.onnx")]) -conduit.load('./onnx/') -result = conduit.run([payload]) -``` - -![Local run](/images/quickstarts/convert-hf-onnx-2.png) - -Once we have tested it is giving us the correct output we can deploy the model and start calling from production - -```python -conduit.deploy() -``` - -![Deployed](/images/quickstarts/hf-2.png) - -Your model is now deployed and ready for inference! -Navigate to the dashboard and on the Models page you will see your model. - -You can run inference using `curl`. - -```bash -curl --location --request POST '' \ ---header 'Authorization: ' \ ---header 'Content-Type: application/json' \ ---data-raw '[]' -``` - -Navigate back to the dashboard and click on the name of the model you just -deployed. You will see an API call was made and the inference time. From your -dashboard you can monitor your model, roll back to previous versions and see -traffic. - -With one line of code, your converted model was deployed in seconds with automatic -versioning, monitoring and the ability to scale based on traffic spikes. Try -deploying your own model now or check out our other frameworks. diff --git a/cerebrium/conduit/examples/huggingface/transformers.mdx b/cerebrium/conduit/examples/huggingface/transformers.mdx deleted file mode 100644 index b248a093..00000000 --- a/cerebrium/conduit/examples/huggingface/transformers.mdx +++ /dev/null @@ -1,173 +0,0 @@ ---- -title: "HuggingFace Transformers" -description: "This example provides the steps to deploy a HuggingFace Transformers model -using Cerebrium." ---- - -## Intro - -Cerebrium supports [HuggingFace Transformers](https://huggingface.co/) through the use of [`pipeline`](https://huggingface.co/docs/transformers/main_classes/pipelines). -By using `pipeline` we are able to load both the tokenizer and model easily within the same object. - -Using the Transformers library, you have instant access to pre-trained models for NLP tasks. If you would like to deploy HuggingFace models that aren't supported -by pipeline, I suggest you look at using our [custom python functionality](/quickstarts/custom) - -By the end of this guide, you'll have an API endpoint that can handle any scale -of traffic by running inference on serverless CPUs/GPUs. - -### Project set up - -Before building you need to set up a Cerebrium account. This is as simple as -starting a new Project in Cerebrium and copying the API key. This will be used -to authenticate all calls for this project. - -### Create a project - -1. Go to [dashboard.cerebrium.ai](https://dashboard.cerebrium.ai) -2. Sign up or Login -3. Navigate to the API Keys page -4. You will need your private API key for deployments. Click the copy button to copy it to your clipboard - -![API Key](/images/cortex/api_keys_private_key.png) - -### Develop model - -Now navigate to where your model code is stored. This could be in a notebook or -in a plain `.py` file. - -To start, you should install the Cerebrium framework running the following -command in your notebook or terminal - -```bash -pip install --upgrade cerebrium -``` - -Using a HuggingFace model is as simple as importing the `HUGGINGFACE_PIPELINE` model type and passing in the _task_ and _model id_. -In this case, we will use the `EleutherAI/gpt-neo-125M` model to generate text. The call signature follows the **exactly** same format as the [pipeline](https://huggingface.co/docs/transformers/main_classes/pipelines) call, -and so you should specify a minimum of either `task` or `model` in your `model_initialization` parameter. However, you can also pass in any other parameters that you would normally pass to the pipeline call() for the specific model you have chosen. - -```python -from cerebrium import Conduit, model_type, hardware - -# Create a conduit -c = Conduit( - name='my-hf-gpt-model', - api_key='', - flow=[ - (model_type.HUGGINGFACE_PIPELINE, {"task": "text-generation", "model": "EleutherAI/gpt-neo-125M", "max_new_tokens": 100}), - ] -) -``` - -Then, simply call the `deploy` method to deploy your model. HuggingFace models will typically take a few minutes to deploy. - -```python -c.deploy() -``` - -![Deployed Model](/images/quickstarts/hf-2.png) - -Your model is now deployed and ready for inference! -Navigate to the dashboard and on the Models page you will see your model. - -You can run inference using `curl`. - -```bash -curl --location --request POST '' \ ---header 'Authorization: ' \ ---header 'Content-Type: application/json' \ ---data-raw '[]' -``` - -Your input data should be in the same shape and typing that the HF pipeline would accept for the particular model you have chosen. -In this case, we should pass in a string. For input data of `["this is a test"]`, the response will be: - -![ThunderClient Response](/images/quickstarts/hf-3.png) - -Navigate back to the dashboard and click on the name of the model you just -deployed. You will see an API call was made and the inference time. From your -dashboard you can monitor your model, roll back to previous versions and see -traffic. - -With one line of code, your model was deployed in seconds with automatic -versioning, monitoring and the ability to scale based on traffic spikes. Try -deploying your own model now or check out our other frameworks. - -### Adding additional parameters to your pipeline - -In some cases, you may need to parse additional parameters to your pipeline. Say your model is too large and so you need to offload the model as well as quantise it to 8bit. Additionally, access to the model requires your authorization token. Normally, when using the huggingface libraries, you would do this as follows: - -```python -hf_auth_token="" -model_kwargs = { - "torch_dtype": "torch.float16", - "offload_folder":"./offload", - "offload_state_dict":True, - "device_map":"auto", - } - -pipeline( - task="text-generation", - model="mosaicml/mpt-7b", - model_kwargs=model_kwargs, - use_auth_token=hf_auth_token, -) -``` - -With Cerebrium's conduit, we've kept things simple so that you can easily parse any parameters you would typically place in your pipeline(). When deploying with the conduit, the implementation of the example above is as easy as: - -```python -model_kwargs = { - "torch_dtype": "torch.float16", - "offload_folder": "./offload", - "offload_state_dict": True, - "device_map": "auto", -} -hf_auth_token = "" - -c = Conduit( - "your-name-for-your-model", - "", - [ - ( - model_type.HUGGINGFACE_PIPELINE, - { - "task": "text-generation", - "model": "mosaicml/mpt-7b", - "model_kwargs": model_kwargs, - "use_auth_token": hf_auth_token, - }, - ), - ], -) -``` - -### Changing runtime parameters - -Sometimes, you may want to change the runtime parameters of your model periodically. For example, you may want to change the number of tokens generated by the GPT-Neo model. -To do this, you can simply change the input to your deployed model to be the following shape from a list: - -```json -{ - "data": "this is a test", - "parameters": { - "max_new_tokens": 100 - } -} -``` - -Therefore, your new `curl` command would be: - -```bash -curl --location --request POST '' \ ---header 'Authorization: ' \ ---header 'Content-Type: application/json' \ ---data-raw '{ - "data": "this is a test", - "parameters": { - "max_new_tokens": 100 - } -}' -``` - -Note, if you would like to include different parameters from the result of a processing function, you should follow the dictionary structure above, and include the parameters you would like to pass in the `parameters` key, rather than the raw input data list. diff --git a/cerebrium/conduit/examples/onnx.mdx b/cerebrium/conduit/examples/onnx.mdx deleted file mode 100644 index 07dcc964..00000000 --- a/cerebrium/conduit/examples/onnx.mdx +++ /dev/null @@ -1,213 +0,0 @@ ---- -title: "Onnx" -description: "This example provides the steps to deploy a simple Onnx model from scratch -using Cerebrium." ---- - -## Intro - -ONNX is an open format built to represent machine learning models. ONNX defines -a common set of operators - the building blocks of machine learning and deep -learning models - and a common file format to enable AI developers to use models -with a variety of frameworks, tools, runtimes, and compilers. - -By the end of this guide, you'll have an API endpoint that can handle any scale -of traffic by running inference on serverless CPUs/GPUs. - -### Project set up - -Before building you need to set up a Cerebrium account. This is as simple as -starting a new Project in Cerebrium and copying the API key. This will be used -to authenticate all calls for this project. - -### Create a project - -1. Go to [dashboard.cerebrium.ai](https://dashboard.cerebrium.ai) -2. Sign up or Login -3. Navigate to the API Keys page -4. You will need your private API key for deployments. Click the copy button to copy it to your clipboard - -![API Key](/images/cortex/api_keys_private_key.png) - -### Develop model - -Now navigate to where your model code is stored. This could be in a notebook or -in a plain `.py` file. - -To start, you should install the Cerebrium framework by running the following -command in your notebook or terminal. You will need the optional dependency -`onnxruntime` to run the model locally. - -```bash -pip install --upgrade cerebrium[onnxruntime] -``` - -If you are on a GPU machine you can also install the GPU version of the -runtime instead. - -```bash -pip install --upgrade cerebrium[onnxruntime-gpu] -``` - -Copy and paste our code below. This creates a simple Convolutional Neural -Network. This code could be replaced by any Pytorch model. Make sure you have -the required libraries installed. - -```bash -import torch -import torch.nn as nn -import torchvision -import torchvision.transforms as transforms - -batch_size = 64 -num_classes = 10 -learning_rate = 0.001 -num_epochs = 2 - -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - -# Use transforms.compose method to reformat images for modeling, -# and save to variable all_transforms for later use -all_transforms = transforms.Compose([transforms.Resize((32,32)), - transforms.ToTensor(), - transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], - std=[0.2023, 0.1994, 0.2010]) - ]) -# Create Training dataset -train_dataset = torchvision.datasets.CIFAR10(root = './data', - train = True, - transform = all_transforms, - download = True) - -# Create Testing dataset -test_dataset = torchvision.datasets.CIFAR10(root = './data', - train = False, - transform = all_transforms, - download=True) - -# Instantiate loader objects to facilitate processing -train_loader = torch.utils.data.DataLoader(dataset = train_dataset, - batch_size = batch_size, - shuffle = True) - -test_loader = torch.utils.data.DataLoader(dataset = test_dataset, - batch_size = batch_size, - shuffle = True) - -# Create Neural Network -class ConvNeuralNet(nn.Module): - def __init__(self, num_classes): - super(ConvNeuralNet, self).__init__() - self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3) - self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3) - self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2) - - self.conv_layer3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3) - self.conv_layer4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3) - self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2) - - self.fc1 = nn.Linear(1600, 128) - self.relu1 = nn.ReLU() - self.fc2 = nn.Linear(128, num_classes) - - def forward(self, x): - out = self.conv_layer1(x) - out = self.conv_layer2(out) - out = self.max_pool1(out) - - out = self.conv_layer3(out) - out = self.conv_layer4(out) - out = self.max_pool2(out) - - out = out.reshape(out.size(0), -1) - - out = self.fc1(out) - out = self.relu1(out) - out = self.fc2(out) - return out - -model = ConvNeuralNet(num_classes) - -# Create loss function and optimizer -criterion = nn.CrossEntropyLoss() - -optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9) - -total_step = len(train_loader) - -# Train our model -for epoch in range(num_epochs): - for i, (images, labels) in enumerate(train_loader): - images = images.to(device) - labels = labels.to(device) - - outputs = model(images) - loss = criterion(outputs, labels) - - optimizer.zero_grad() - loss.backward() - optimizer.step() - - print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item())) - -with torch.no_grad(): - correct = 0 - total = 0 - for images, labels in train_loader: - images = images.to(device) - labels = labels.to(device) - outputs = model(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum().item() - - print('Accuracy of the network on the {} train images: {} %'.format(50000, 100 * correct / total)) - -# convert to onnx -input_names = ["input"] -output_names = ["output"] -torch.onnx.export( - model, - images, - "pytorch.onnx", - verbose=True, - input_names=input_names, - output_names=output_names, -) -``` - -In the last line of code, you will see we exported the Pytorch model to an Onnx -format. This is all you need to deploy your model to Cerebrium! You can then import the `deploy()` function from the Cerebrium framework. I used the CloudPickle function to save my model below. - -```bash -from cerebrium import deploy, model_type -output_flow = deploy((model_type.ONNX, "pytorch.onnx"),"onnx-pytorch", "") -``` - -![Deployed Model](/images/quickstarts/onnx-2.png) - -Your model is now deployed and ready for inference all in under **10 seconds**! -Navigate to the dashboard and on the Models page, you will see your model. - -You can run inference using `curl` - -```bash -curl --location --request POST '' \ ---header 'Authorization: ' \ ---header 'Content-Type: application/json' \ ---data-raw '[]' -``` - -Your input data should be a **Dict** of the input variables you defined when you exported the model to Onnx. So make sure your input objects correspond otherwise you will get an error. The response will be: - -![Onnx Postman Response](/images/quickstarts/onnx-3_1.png) - -Navigate back to the dashboard and click on the name of the model you just -deployed. You will see an API call was made and the inference time. From your dashboard, you can monitor your model, roll back to previous versions and see -traffic. - -![XGB Monitoring](/images/quickstarts/onnx-4.png) - -With one line of code, your model was deployed in seconds with automatic -versioning, monitoring and the ability to scale based on traffic spikes. Try -deploying your own model now or check out our other frameworks. diff --git a/cerebrium/conduit/examples/pytorch.mdx b/cerebrium/conduit/examples/pytorch.mdx deleted file mode 100644 index c29e4984..00000000 --- a/cerebrium/conduit/examples/pytorch.mdx +++ /dev/null @@ -1,205 +0,0 @@ ---- -title: "Pytorch" -description: "This example provides the steps to deploy a simple Pytorch model (CNN) from -scratch using Cerebrium." ---- - -## Intro - -By the end of this guide, you'll have an API endpoint that can handle any scale -of traffic by running inference on serverless CPUs/GPUs. - -## Project set up - -Before building you need to set up a Cerebrium account. This is as simple as -starting a new Project in Cerebrium and copying the API key. This will be used -to authenticate all calls for this project. - -### Create a project - -1. Go to [dashboard.cerebrium.ai](https://dashboard.cerebrium.ai) -2. Sign up or Login -3. Navigate to the API Keys page -4. You will need your private API key for deployments. Click the copy button to copy it to your clipboard - -![API Key](/images/cortex/api_keys_private_key.png) - -### Develop model - -Now navigate to where your model code is stored. This could be in a notebook or -in a plain .py file. - -Install the Cerebrium framework by running the following command in your notebook -or terminal - -```bash -pip install --upgrade cerebrium -``` - -Copy and paste our code below. This creates a simple Convolutional Neural -Network. This code could be replaced by any Pytorch model. Make sure you have -the required libraries installed. - -```bash -import torch -import torch.nn as nn -import torchvision -import torchvision.transforms as transforms - -batch_size = 64 -num_classes = 10 -learning_rate = 0.001 -num_epochs = 2 - -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - -# Use transforms.compose method to reformat images for modeling, -# and save to variable all_transforms for later use -all_transforms = transforms.Compose([transforms.Resize((32,32)), - transforms.ToTensor(), - transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], - std=[0.2023, 0.1994, 0.2010]) - ]) -# Create Training dataset -train_dataset = torchvision.datasets.CIFAR10(root = './data', - train = True, - transform = all_transforms, - download = True) - -# Create Testing dataset -test_dataset = torchvision.datasets.CIFAR10(root = './data', - train = False, - transform = all_transforms, - download=True) - -# Instantiate loader objects to facilitate processing -train_loader = torch.utils.data.DataLoader(dataset = train_dataset, - batch_size = batch_size, - shuffle = True) - -test_loader = torch.utils.data.DataLoader(dataset = test_dataset, - batch_size = batch_size, - shuffle = True) - -# Create Neural Network -class ConvNeuralNet(nn.Module): - def __init__(self, num_classes): - super(ConvNeuralNet, self).__init__() - self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3) - self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3) - self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2) - - self.conv_layer3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3) - self.conv_layer4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3) - self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2) - - self.fc1 = nn.Linear(1600, 128) - self.relu1 = nn.ReLU() - self.fc2 = nn.Linear(128, num_classes) - - def forward(self, x): - out = self.conv_layer1(x) - out = self.conv_layer2(out) - out = self.max_pool1(out) - - out = self.conv_layer3(out) - out = self.conv_layer4(out) - out = self.max_pool2(out) - - out = out.reshape(out.size(0), -1) - - out = self.fc1(out) - out = self.relu1(out) - out = self.fc2(out) - return out - -model = ConvNeuralNet(num_classes) - -# Create loss function and optimizer -criterion = nn.CrossEntropyLoss() - -optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9) - -total_step = len(train_loader) - -# Train our model -for epoch in range(num_epochs): - for i, (images, labels) in enumerate(train_loader): - images = images.to(device) - labels = labels.to(device) - - outputs = model(images) - loss = criterion(outputs, labels) - - optimizer.zero_grad() - loss.backward() - optimizer.step() - - print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item())) - -with torch.no_grad(): - correct = 0 - total = 0 - for images, labels in train_loader: - images = images.to(device) - labels = labels.to(device) - outputs = model(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum().item() - - print('Accuracy of the network on the {} train images: {} %'.format(50000, 100 * correct / total)) - -## Save with Cloudpickle -import cloudpickle -with open("torch_model.pkl", "wb") as f: - cloudpickle.dump(model, f) - -## Save with TorchScript -scripted_model = torch.jit.script(model) -scripted_model.save("torch_model.pt") -``` - -In the last line of code, there are two ways you can save the model. This is all -you need to deploy your model to Cerebrium! You can then import the `deploy()` -function from the Cerebrium framework. I used the CloudPickle function to save -my model below. - -```bash -from cerebrium import deploy, model_type - -endpoint = deploy((model_type.TORCH, "torch_model.pkl"), "torch-model-cp", "") -``` - -![Deployed Model](/images/quickstarts/pytorch-2.png) - -Your model is now deployed and ready for inference all in under **10 seconds**! -Navigate to the dashboard and on the Models page, you will see your model. - -You can run inference using `curl` or the `model_api_request`()` Python helper function. The helper function is useful for large data. - -```bash -curl --location --request POST '' \ ---header 'Authorization: ' \ ---header 'Content-Type: application/json' \ ---data-raw '[]' -``` - -```bash -from cerebrium import model_api_request - -response = model_api_request("") -print(response["data"]) -``` - -![Pytorch Model Response](/images/quickstarts/pytorch-3.png) - -Navigate back to the dashboard and click on the name of the model you just -deployed. You will see an API call was made and the inference time. From your dashboard, you can monitor your model, roll back to previous versions and see -traffic. - -![Pytorch Monitoring](/images/quickstarts/pytorch-4.png) - -With one line of code, your model was deployed in seconds with automatic -versioning, monitoring and the ability to scale based on traffic spikes. Try -deploying your own model now or check out our other frameworks. diff --git a/cerebrium/conduit/examples/scikit.mdx b/cerebrium/conduit/examples/scikit.mdx deleted file mode 100644 index 34b13059..00000000 --- a/cerebrium/conduit/examples/scikit.mdx +++ /dev/null @@ -1,133 +0,0 @@ ---- -title: "Scikit" -description: "This example provides the steps to deploy a simple Sci-Kit Learn model from -scratch using Cerebrium." ---- - -## Intro - -By the end of this guide, you'll have an API endpoint that can handle any scale -of traffic by running inference on serverless CPUs/GPUs. - -## Project set up - -Before building you need to set up a Cerebrium account. This is as simple as -starting a new Project in Cerebrium and copying the API key. This will be used -to authenticate all calls for this project. - -### Create a project - -1. Go to [dashboard.cerebrium.ai](https://dashboard.cerebrium.ai) -2. Sign up or Login -3. Navigate to the API Keys page -4. You will need your private API key for deployments. Click the copy button to copy it to your clipboard - -![API Key](/images/cortex/api_keys_private_key.png) - -### Develop model - -To start, you should install the Cerebrium framework by running the following -command in your notebook or terminal - -```bash -pip install --upgrade cerebrium -``` - -Now navigate to where your model code is stored. This could be in a notebook or a `.py` file. - -Copy and paste our code below. This creates a simple random forest classifier on the Iris dataset. -This code could be replaced by any Sklearn model. Make sure you have -the required libraries installed. - -```bash -from sklearn.datasets import load_iris -from sklearn.ensemble import RandomForestClassifier -import pickle - -iris = load_iris() -X, y = iris.data, iris.target - -rf = RandomForestClassifier() -rf.fit(X, y) - -# Save to pickle -filename = 'iris.pkl' -pickle.dump(rf, open(filename, 'wb')) -``` - -In the last line of code, you will see we pickle the file. This is all -you need to deploy your model to Cerebrium! You can then import the `deploy()` -function from the Cerebrium framework. - -```bash -from cerebrium import deploy, model_type - -name_for_your_deployment= "sk-test-model" -endpoint = deploy((model_type.SKLEARN_CLASSIFIER, "iris.pkl"), name_for_your_deployment , "") -``` - -![Deployed Model](/images/quickstarts/scikit-1.png) - - - Your result format will change for the `_classifier` model types. The - `sklearn_classifier` will return a `result` object containing the probability - distribution for the predicted output classes, rather than the argmax of the - distribution. This is to allow you flexibility in how you want to handle the - output of your model for classification. For example, you may want to return - the top 3 predictions for your model, or you may want to return the top 3 - predictions with a minimum probability threshold. This is up to you. - - -Your model is now deployed and ready for inference all in under **10 seconds**! -Navigate to the dashboard and on the Models page, you will see your model. - -You can run inference using `curl` - -```bash -curl --location --request POST '' \ ---header 'Authorization: ' \ ---header 'Content-Type: application/json' \ ---data-raw '[[5.1, 3.5, 1.4, 0.2]]' -``` - -and your response should be: - -![Scikit Postman Response](/images/quickstarts/scikit-2.png) - -Navigate back to the dashboard and click on the name of the model you just -deployed. You will see an API call was made and the inference time. From your dashboard, you can monitor your model, roll back to previous versions and see traffic. - -![Scikit Monitoring](/images/quickstarts/scikit-3.png) - -With one line of code, your model was deployed in seconds with automatic -versioning, monitoring and the ability to scale based on traffic spikes. Try -deploying your own model now or check out our other frameworks. - -## Potential Pitfalls - -During your deployment, you may encounter an error along the lines of: - -```bash -ValueError: Couldn't import 'worker': node array from the pickle has an incompatible dtype: -- expected: {'names': ['left_child', 'right_child', 'feature', ... -- got : [('left_child', ' -``` - -Then, when you deploy your model, modify your conduit deploy line to include the requirements file: - -```python -from cerebrium import Conduit, model_type, hardware - -c = Conduit((model_type.SKLEARN_CLASSIFIER,,"iris.pkl"), '', '', requirements_file='requirements.txt') -c.deploy() - -``` diff --git a/cerebrium/conduit/examples/spacy.mdx b/cerebrium/conduit/examples/spacy.mdx deleted file mode 100644 index 9010085c..00000000 --- a/cerebrium/conduit/examples/spacy.mdx +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: "Spacy" -description: "This example provides the steps to deploy a simple Spacy model (from scratch!) using Cerebrium." ---- - -## Intro - -spaCy is an open-source software library for advanced natural language processing and excels at large-scale information extraction tasks -By the end of this guide you'll have a spaCy API endpoint that can handle any scale of traffic by running inference on serverless CPUs/GPUs. - -## Project set up - -Before building you need to set up a Cerebrium account. This is as simple as -starting a new Project in Cerebrium and copying the API key. This will be used -to authenticate all calls for this project. - -### Create a project - -1. Go to [dashboard.cerebrium.ai](https://dashboard.cerebrium.ai) -2. Sign up or Login -3. Navigate to the API Keys page -4. You will need your private API key for deployments. Click the copy button to copy it to your clipboard - -![API Key](/images/cortex/api_keys_private_key.png) - -### Prepare model - -We are going to be creating a custom-named entity recognition (NER) model. -This code could be replaced by any Spacy model. Make sure you have -the required libraries installed - in this case, we are just using **Spacy 3**. -First, let us create our training and evaluation data: - -```json -[ - "Who is Shaka Khan?", - { - "entities": [[7, 17, "PERSON"]] - } -] -``` - -```json -[ - "Who is Michael?", - { - "entities": [[7, 14, "PERSON"]] - } -] -``` - - - Spacy 3 no longer accepts JSON format so we have to convert it to their Spacy - format - - -```python -import srsly -import typer -import warnings -from pathlib import Path - -import spacy -from spacy.tokens import DocBin - - -def convert(lang: str, input_path: Path, output_path: Path): - nlp = spacy.blank(lang) - db = DocBin() - for text, annot in srsly.read_json(input_path): - doc = nlp.make_doc(text) - ents = [] - for start, end, label in annot["entities"]: - span = doc.char_span(start, end, label=label) - if span is None: - msg = f"Skipping entity [{start}, {end}, {label}] in the following text because the character span '{doc.text[start:end]}' does not align with token boundaries:\n\n{repr(text)}\n" - warnings.warn(msg) - else: - ents.append(span) - doc.ents = ents - db.add(doc) - db.to_disk(output_path) - -if __name__ == "__main__": - typer.run(convert) -``` - -Now run this script with the location of your training data and again with your evaluation data. -`!python convert.py en train.json train.spacy` - -When creating a Spacy model we can start with the standard template and edit it. You can run the following code to do this: -`!python -m spacy init config --lang en --pipeline ner config.cfg --force` - -Lastly, we can train our model: -`!python -m spacy train config.cfg --output ./training/ --paths.train train.spacy --paths.dev eval.spacy --training.eval_frequency 10 --training.max_steps 100 --gpu-id -1` - -### Developing model - -To start, you should install the Cerebrium framework by running the following -command in your notebook or terminal - -```bash -pip install --upgrade cerebrium -``` - -One of the main differences with a spaCy model deployed on Cerebrium, is it is **mandatory** to include a post-processing function which uses your trained model to manipulate text. -This post-processing function has to return a list or string to be accepted. This is what will be returned to you via the API. Below, we show a very basic post-processing function -and deploy our spaCy model. - -When you train a spaCy model, it will create a folder with a tokenizer, meta.json, config.cfg etc. You will need to specify the path of this folder when deploying your model. - -```bash -from cerebrium import Conduit, model_type - -###doc is the model returned from spacy.load() -def postSpacy(doc): - test = [] - for token in doc: - test.append(token.text) - return test - -conduit = Conduit("spacy-model" , "", (model_type.SPACY, "path/to/trained/model/", {"post": postSpacy})) -conduit.deploy() -``` - -![Deployed Model](/images/quickstarts/spacy-1.png) - -Your model is now deployed and ready for inference all in under **10 seconds**! -Navigate to the dashboard and on the Models page, you will see your model. - -You can run inference using `curl` - -```bash -curl --location --request POST '' \ ---header 'Authorization: ' \ ---header 'Content-Type: application/json' \ ---data-raw '["This is us testing the Spacy model]' -``` - -The response will be: - -![Spacy Postman Response](/images/quickstarts/spacy-2.png) - -Navigate back to the dashboard and click on the name of the model you just -deployed. You will see an API call was made and the inference time. From your dashboard, you can monitor your model, roll back to previous versions and see traffic. - -With one line of code, your model was deployed in seconds with automatic -versioning, monitoring and the ability to scale based on traffic spikes. Try -deploying your own model now or check out our other frameworks. diff --git a/cerebrium/conduit/examples/tensorflow.mdx b/cerebrium/conduit/examples/tensorflow.mdx deleted file mode 100644 index 3822e121..00000000 --- a/cerebrium/conduit/examples/tensorflow.mdx +++ /dev/null @@ -1,133 +0,0 @@ ---- -title: "Tensorflow" -description: "This example provides the steps to deploy a simple Tensorflow model from scratch -using Cerebrium." ---- - -## Intro - -Cerebrium supports Tensorflow, Keras and TFLite through its support for ONNX - we will convert our model from Tensorflow to ONNX. - -ONNX is an open format built to represent machine learning models. ONNX defines -a common set of operators - the building blocks of machine learning and deep -learning models - and a common file format to enable AI developers to use models -with a variety of frameworks, tools, runtimes, and compilers. - -By the end of this guide, you'll have an API endpoint that can handle any scale -of traffic by running inference on serverless CPUs/GPUs. - -### Project set up - -Before building you need to set up a Cerebrium account. This is as simple as -starting a new Project in Cerebrium and copying the API key. This will be used -to authenticate all calls for this project. - -### Create a project - -1. Go to [dashboard.cerebrium.ai](https://dashboard.cerebrium.ai) -2. Sign up or Login -3. Navigate to the API Keys page -4. You will need your private API key for deployments. Click the copy button to copy it to your clipboard - -![API Key](/images/cortex/api_keys_private_key.png) - -### Develop model - -Now navigate to where your model code is stored. This could be in a notebook or -in a plain `.py` file. - -To start, you should install the Cerebrium framework by running the following -command in your notebook or terminal - -```bash -pip install --upgrade cerebrium -``` - -Copy and paste our code below. This creates a simple 3-layer Neural Network that classifies flowers from the Iris dataset into 1 of 3 classes. -This code could be replaced by any Tensorflow model. -Make sure you have the required libraries installed. - -```bash -pip install tensorflow tf2onnx onnx -``` - -```bash -import tensorflow as tf -from tensorflow.keras import layers -import pandas as pd -import numpy as np -from tensorflow.keras import datasets, layers, models -from tensorflow.keras.utils import to_categorical -from sklearn.datasets import load_iris -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import train_test_split - -iris = load_iris() -X, y = iris.data, iris.target -encoder = LabelEncoder() -y1 = encoder.fit_transform(y) -Y = pd.get_dummies(y1).values - -X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0) - -model = tf.keras.Sequential([ - tf.keras.layers.Dense(10, activation='relu'), - tf.keras.layers.Dense(10, activation='relu'), - tf.keras.layers.Dense(3, activation='softmax') - ]) -model.compile(optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy']) - -#Training -model.fit(X_train, y_train, batch_size=50, epochs=100) - -#Convert to ONNX -import tf2onnx -import onnx - -input_signature = [tf.TensorSpec([1, 4], tf.float32, name='x')] -onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature, opset=13) -onnx.save(onnx_model, "model.onnx") -``` - -In the last 3 lines of code, you will see we: - -- Define the input signature of our model. This can be a tf.TensorSpec or a numpy array defining the shape/dtype of the input. Our input for the Iris dataset is a (1,4) array, and we called the input variable 'x'. -- Use the [tf2onnx](https://github.com/onnx/tensorflow-onnx#python-api-reference) library to convert our model to the ONNX format. We define the model we would like to convert, the input signature and the opset. The opset is usually just the latest. -- Save our model to a `.onnx` file which is what we will use to deploy to Cerebrium. - -This is all you need to deploy your model to Cerebrium! You can then import the `deploy()` function from the Cerebrium framework. - -```bash -from cerebrium import deploy, model_type -output_flow = deploy((model_type.ONNX, "model.onnx"),"onnx-tensorflow", "") -``` - -![Deployed Model](/images/quickstarts/tf-onnx-2.png) - -Your model is now deployed and ready for inference all in under **10 seconds**! -Navigate to the dashboard and on the Models page, you will see your model. - -You can run inference using `curl` - -```bash -curl --location --request POST '' \ ---header 'Authorization: ' \ ---header 'Content-Type: application/json' \ ---data-raw '[]' -``` - -Your input data should be a **Dict** of the input variables you defined when you exported the model to Onnx. So make sure your input objects correspond otherwise you will get an error. The response will be: - -![Onnx Postman Response](/images/quickstarts/tf-onnx-3.png) - -Navigate back to the dashboard and click on the name of the model you just -deployed. You will see an API call was made and the inference time. From your dashboard, you can monitor your model, roll back to previous versions and see -traffic. - -![XGB Monitoring](/images/quickstarts/tf-onnx-4.png) - -With one line of code, your model was deployed in seconds with automatic -versioning, monitoring and the ability to scale based on traffic spikes. Try -deploying your own model now or check out our other frameworks. diff --git a/cerebrium/conduit/examples/xgboost.mdx b/cerebrium/conduit/examples/xgboost.mdx deleted file mode 100644 index 1f38e25f..00000000 --- a/cerebrium/conduit/examples/xgboost.mdx +++ /dev/null @@ -1,99 +0,0 @@ ---- -title: "XGBoost" -description: "This example provides the steps to deploy a simple XGBoost model (from scratch!) using Cerebrium." ---- - -By the end of this guide, you'll have an API endpoint that can handle any scale -of traffic by running inference on serverless CPUs/GPUs. - -## Project set up - -Before building you need to set up a Cerebrium account. This is as simple as -starting a new Project in Cerebrium and copying the API key. This will be used -to authenticate all calls for this project. - -### Create a project - -1. Go to [dashboard.cerebrium.ai](https://dashboard.cerebrium.ai) -2. Sign up or Login -3. Navigate to the API Keys page -4. You will need your private API key for deployments. Click the copy button to copy it to your clipboard - -![API Key](/images/cortex/api_keys_private_key.png) - -### Develop model - -Now navigate to where your model code is stored. This could be in a notebook or -in a plain `.py` file. - -To start, you should install the Cerebrium framework by running the following -command in your notebook or terminal - -```bash -pip install --upgrade cerebrium -``` - -Copy and paste our code below. This creates a simple XGBoost classifier on the Iris dataset. This code could be replaced by any XGBoost model. Make sure you have -the required libraries installed. - -```bash -from sklearn.datasets import load_iris -from xgboost import XGBClassifier - -iris = load_iris() -X, y = iris.data, iris.target - -xgb = XGBClassifier() -xgb.fit(X, y) - -# Save to XGB JSON -xgb.save_model("iris.json") -``` - -In the last line of code, there are two ways you can save the model. This is all -you need to deploy your model to Cerebrium! You can then import the `deploy()` -function from the Cerebrium framework. I used the CloudPickle function to save -my model below. - -```bash -from cerebrium import deploy, model_type - -endpoint = deploy((model_type.XGBOOST_CLASSIFIER, "iris.json"), "xgb-test-model" , "") -``` - -![Deployed Model](/images/quickstarts/xgboost-1.png) - - - Your result format will change for the `_classifier` model types. The - `xgb_classifier` will return a `result` object containing the probability - distribution for the predicted output classes, rather than the argmax of the - distribution. This is to allow you flexibility in how you want to handle the - output of your model for classification. For example, you may want to return - the top 3 predictions for your model, or you may want to return the top 3 - predictions with a minimum probability threshold. This is up to you. - - -Your model is now deployed and ready for inference all in under **10 seconds**! -Navigate to the dashboard and on the Models page, you will see your model. - -You can run inference using `curl` - -```bash -curl --location --request POST '' \ ---header 'Authorization: ' \ ---header 'Content-Type: application/json' \ ---data-raw '[[5.1, 3.5, 1.4, 0.2]]' -``` - -The response will be: - -![XGB Postman Response](/images/quickstarts/xgboost-2.png) - -Navigate back to the dashboard and click on the name of the model you just -deployed. You will see an API call was made and the inference time. From your dashboard, you can monitor your model, roll back to previous versions and see traffic. - -![XGB Monitoring](/images/quickstarts/xgboost-3.png) - -With one line of code, your model was deployed in seconds with automatic -versioning, monitoring and the ability to scale based on traffic spikes. Try -deploying your own model now or check out our other frameworks. diff --git a/cerebrium/conduit/introduction.mdx b/cerebrium/conduit/introduction.mdx deleted file mode 100644 index 8f4c20d6..00000000 --- a/cerebrium/conduit/introduction.mdx +++ /dev/null @@ -1,480 +0,0 @@ ---- -title: "Introduction" -description: "To deploy your machine learning model to Cerebrium takes just 2 lines of -code." ---- - - - We are phasing out support for Python 3.8 at the end of September 2023. If you - are using Python 3.8, please update your dependencies and move your deployment - to Python >= 3.9. - - -The Cerebrium conduit object creates an abstraction for you to deploy a model across any framework seamlessly with just a few lines of code. - -Below is the general layout of the Conduit object: - -```bash -from cerebrium import Conduit, model_type, hardware - -Some model training logic... - -c = Conduit( - name="", - api_key="", - flow=("", ""), - hardware="", -) - -c.deploy() -``` - -In the Conduit object, there are the following parameters: - -- A tuple of the model type and the model file: -- **MODEL_TYPE**: This parameter specifies the type of model you are supplying - Cerebrium and **must** be a `model_type`. This is to ensure that Cerebrium - knows how to handle your model. The current supported model types are: - - **model_type.SKLEARN**: Expects a `.pkl` file (there is no requirement of - the model to be a regressor or classifier). - - **model_type.SKLEARN_CLASSIFIER**: Expects a `.pkl` file (the model must - be a classifier. returns a class probability distribution instead of a - single class prediction) - - **model_type.SKLEARN_PREPROCESSOR**: Expects a `.pkl` file. This is a - special model type that is used to preprocess data with the `.transform` method before it is sent to - the model, such as a scaler or a one-hot encoder. - - **model_type.TORCH**: Expects a `.pkl` file serialized with - `cloudpickle` or a JIT script **Torchscript** `.pt` file. - - **model_type.XGBOOST_REGRESSOR**: Expects a serialized `.pkl` file or a - XGB `.json` file. - - **model_type.XGBOOST_CLASSIFIER**: Expects a serialized `.pkl` file or a - XGB `.json` file. - - **model_type.ONNX**: Expects a serialized `.onnx` file - - **model_type.SPACY**: Expects a folder path of your Spacy model - - **model_type.HUGGINGFACE_PIPELINE**: Expects the task identifier and model identifier of the Hugging face model - - **MODEL_PATH**: The path to your weights. This is either the path to the weights on your local machine or the path to the weights on in the cloud. For huggingface models, this is the Hugginface model identifier e.g. 'meta-llama/Llama-2-13b-chat-hf'. -- **MODEL_NAME**: The name you would like to give your model (alphanumeric, with - hyphens and less than 20 characters). This is a unique identifier for your - model and will be used to call your model in the future. -- **API_KEY**: This is the API key that can be found on your profile. You can - get it [here](https://dashboard.cerebrium.ai/). -- **HARDWARE**: The hardware parameter is a enum that can be one of the following: - - **hardware.CPU**: This will run your model on a CPU. This is the default option for SKLearn, XGBoost, and SpaCy models. - - **hardware.GPU**: (Deprecated) This will run your model on a T4 GPU. This is the default option for Torch, ONNX, and HuggingFace models. - - **hardware.A10**: (Deprecated) This will run your model on an A10 GPU, which provides 24GB of VRAM. You should use this option if you are using a model that is too large to fit on the 16GB of VRAM that a T4 GPU provides. This will include most large HuggingFace models. - - **hardware.TURING_4000** : A 8GB GPU that is great for lightweight models with less than 3B parameters in FP16. - - **hardware.TURING_5000** : A 16GB GPU that is great for small models with less than 7B parameters in FP16. Most small HuggingFace models can run on this. - - **hardware.AMPERE_A4000** : A 16GB GPU that is great for small models with less than 7B parameters in FP16. Significantly faster than an RTX 4000. Most small HuggingFace models can run on this. - - **hardware.AMPERE_A5000** : A 24GB GPU that is great for medium models with less than 10B parameters in FP16. A great option for almost all HuggingFace models. - - **hardware.AMPERE_A6000** : A 48GB GPU offering a great cost to performance ratio. This is great for medium models with less than 21B parameters in FP16. A great option for almost all HuggingFace models. - - **hardware.A100** : A 40GB GPU offering some of the highest performance available. This is great for large models with less than 18B parameters in FP16. A great option for almost all HuggingFace models especially if inference speed is your priority. -- **CPU**: This is the number of CPU cores you want to allocate to your model. - Optional as it defaults to 2. Can be an integer between 1 and 32 -- **MEMORY**: This is the number of GB of memory you'd like to allocate to - your model. Optional as it defaults to 8.0GB. Depending on your hardware - selection, this float can be between 2.0 and 256.0 -- **NUM_GPUS**: Number of GPUs to use in your Cortex deployment. Defaults to 1 but can be an integer between 1 and 8 if you're using GPU. -- **COOLDOWN**: Cooldown period, in seconds since the last request is completed, before an inactive replica of your deployment is scaled down. Defaults to 60s. -- **MIN_REPLICAS**: The minimum number of replicas you would like to keep active. Defaults to 0 to allow serverless execution. Can be set \>0 to keep a single replica active at all times. The maximum number of replicas is dependent on your subscription plan. -- **MAX_REPLICAS**: The maximum number of replicas you would like to allow for your deployment. Useful for cost-sensitive applications when you need to limit the number of replicas that can be created. The maximum number of replicas is dependent on your subscription plan. -- **REQUIREMENTS_FILE**: Optional path to a requirements.txt file that will be installed in the deployment environment. This is useful when you need additional libraries or packages to run your model. Defaults to None. -- **FORCE_REBUILD**: Optional boolean to force a rebuild of the deployment environment. This is useful when you need to have a clean environment without any of the cached dependencies from previous deployments. Don't worry, your persistent storage is safe. Defaults to False. -- **PYTHON_VERSION**: You can choose the version of Python that you would like to use for your deployment by using this optional parameter. We support Python 3.8, 3.9, 3.10, and 3.11. If you do not specify a Python version, we will default to Python 3.10. - - - Every unique model name will create a _separate deployment_ with a _separate - endpoint_. It is important to keep track of the model names you have used so - that you can call the correct model in the future. If you deploy a model with - the same name as a previous model, the previous model will be archived and the - new model will be deployed automatically. This is useful for versioning your - models. - - -Once you've run the `deploy` function, give it a minute, and it should be -deployed - easy-peasy! If your deployment is successful, you will see the -following output: - -``` -✅ Authenticated with Cerebrium! -⬆️ Uploading conduit artifacts... -100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 179k/179k [00:04<00:00, 42.7kB/s] -✅ Conduit artifacts uploaded successfully. -✅ Conduit deployed! -🌍 Endpoint: https://run.cerebrium.ai/v1/YOUR-PROJECT-ID/YOUR-MODEL-NAME/predict -``` - -Our `deploy` function will also return the endpoint of your model directly. This -is the URL that you will use to call your model in the future. - -## API Specification and Helper methods - -You can see an example of the request and response objects for calls made to your models. It should resemble what it is like calling your model locally in your own python environment. - -#### Request Parameters - - -```bash Request - curl --location --request POST '' \ - --header 'Authorization: ' \ - --header 'Content-Type: application/json' \ - --data-raw '[]' -``` - - - - This is the Cerebrium API key used to authenticate your request. You can get - it from your Cerebrium dashboard. - - - The content type of your request. Must be application/json or - multipart/form-data if sending files. - - - A list of data points you would like to send to your model. e.g. for 1 data - point of 3 features: [[1,2,3]]. - - - - -```json Response -{ - "result": [], - "run_id": "", - "run_time_ms": - "prediction_ids": [""] -} -``` - - - -#### Response Parameters - - - The result of your model prediction. - - - The run ID associated with your model predictions. - - - The amount of time if took your model to run down to the millisecond. This is - what we charge you based on. - - - The prediction IDs associated with each of your model predictions. Used to - track your model predictions with monitoring tools. - - -You can test out your model endpoint quickly with our utility function supplied -in Cerebrium, `model_api_request`. - -```bash -from cerebrium import model_api_request -model_api_request(endpoint, data, '') -``` - -The function takes in the following parameters: - -- **endpoint**: The endpoint of your model that was returned by the `deploy` - function. -- **data**: The data you would like to send to your model. You may - feed an `ndarray` or `Tensor` directly into this function. -- **api_key**: This is the Cerebrium API key used to authenticate your request. - -To get started, see how easy it is to deploy any of the frameworks below: - -## Start with a framework - - -} - href="/cerebrium/conduit/examples/pytorch" -> - Deploy a simple Pytorch model (CNN) from scratch - - -image/svg+xml - - - - - - - - - - - - - - - scikit - - - -} href="/cerebrium/conduit/examples/scikit"> - Deploy a simple Sci-Kit Learn model from scratch - - - - - - } - href="/cerebrium/conduit/examples/xgboost" -> - Deploy a simple XGBoost model from scratch - - - - - - - - - - - - - - } - href="/cerebrium/conduit/examples/onnx" -> - Deploy a simple Onnx model from scratch - - - - - - - - } - href="/cerebrium/conduit/examples/tensorflow" -> - Deploy a simple Tensorflow model from scratch - - - - {" "} - {" "} - - } - href="/cerebrium/conduit/examples/spacy" -> - Deploy a simple Spacy model from scratch - - - - - - - - - - - } - href="/cerebrium/conduit/examples/transformers" -> - Deploy a Hugging Face model from scratch - - - diff --git a/cerebrium/conduit/model-status.mdx b/cerebrium/conduit/model-status.mdx deleted file mode 100644 index db4e9c5f..00000000 --- a/cerebrium/conduit/model-status.mdx +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: "Model Status" -description: "Know your model's current state for efficient deployment and management" ---- - -Your Cortex model can be in one of several states at any given time. - -Here's what each status represents: - -- **Active**: The model is successfully deployed and available for use. If your model is sourced from a pre-built setup, it will immediately be in the active state. - -- **Cold**: This state indicates that your model is ready and can be called but will take a few seconds to start up. - -- **Deploying**: The model is currently in the process of being deployed. During this state, your model is not available for use. - -- **Starting**: Your model has been built and is now starting up, none of the replicas are ready yet but should be soon. - -- **Deleted**: This status signifies that there is no deployment associated with the model, which usually means the model has been removed. - -- **Unknown**: This state is assigned when the model's status cannot be determined, which could be due to an issue with retrieving deployment details. - - - Remember, the deployment and startup time is proportional to the model size - (including dependencies). For lightning-fast deployments, start-ups, and - scale-ups, keep your model as small as possible! - - -When you navigate to your Cerebrium dashboard and click on the Models tab, -you will be able to see the current state of your models. - - - If you encounter an 'Unknown' status or you would like to know more, please - reach out to us on Slack, Discord or{" "} - support@cerebrium.ai. - diff --git a/cerebrium/cortex/advanced-functionality/multi-gpu-inferencing.mdx b/cerebrium/cortex/advanced-functionality/multi-gpu-inferencing.mdx index 395e3175..12770a1c 100644 --- a/cerebrium/cortex/advanced-functionality/multi-gpu-inferencing.mdx +++ b/cerebrium/cortex/advanced-functionality/multi-gpu-inferencing.mdx @@ -5,7 +5,7 @@ description: Tips and tricks for multi-GPU inferencing. # Multi-GPU Inferencing -Multi GPU inference on cerebrium is as simple as setting the `num_gpus` parameter when deploying your Cortex or Conduit model. +Multi GPU inference on cerebrium is as simple as setting the `num_gpus` parameter when deploying your Cortex model. If using huggingface transformers, you then just need to set the `device_map` to auto and the model will be automatically distributed across all available GPUs. While you can select any types of GPUs for your multi-GPU deployment, if you are looking for the best performance possible, we recommend using the A100 GPUs as these are connected with NVLINK. This means that the GPUs can communicate with each other much faster than with other GPUs. diff --git a/installation.mdx b/installation.mdx index 5ade5944..4d2f3e9d 100644 --- a/installation.mdx +++ b/installation.mdx @@ -53,20 +53,7 @@ Some important details to note: Awesome! Let's start creating ML-based applications -## How to navigate our documentation - -Cerebrium caters to many different types of businesses and users so we have created abstractions that suit different users in different situations. Based on your use-case we recommend you look at the starting points below and examples in each sub-directory. - -#### When would you use Conduit over Cortex - -- The conduit is a very high level abstraction for deploying ML models and gives the end user a very streamlined and easy way to deploy their ML models. Its best suited if you would like to deploy a model file or a HuggingFace model ID to production quickly with minimal setup. We recommend this for simple use cases that have simple pre/post processing or logging logic.But don't let the simplicity fool you, it can still scale to 10k requests per second. -- We recommend using Cortex if you would like fine-grain control over your environment, package versions and implementation. Typically our clients use this when they have more complex logic, stricter latency requirements using packages such as Deepspeed, TensorRT etc and are communicating with many external tools. - - - You have a model file or Hugging Face model ID and just want to turn it into - an API endpoint. - Deploy any custom Python code. Use this if you want very fine-grain control. @@ -74,7 +61,7 @@ Cerebrium caters to many different types of businesses and users so we have crea You would like to deploy popular existing models with one click. diff --git a/introduction.mdx b/introduction.mdx index 29ceb140..c28928c0 100644 --- a/introduction.mdx +++ b/introduction.mdx @@ -28,13 +28,14 @@ weekly based on your feedback. You can send us feedback requests at ## Our users favorite features -- \<30 second cold-start times +- \<20 second cold-start times - Wide variety of GPUs - Automatic scaling from 1 to 10k requests in \<15s - Define pip/conda container environments - Secrets manager - One-click deploys -- Monitoring tool integration +- Persistant Storage +- Jobs All of this in just a few lines of code! diff --git a/mint.json b/mint.json index b20a2eda..2255d94e 100644 --- a/mint.json +++ b/mint.json @@ -78,12 +78,12 @@ "cerebrium/cortex/advanced-functionality/persistent-storage", "cerebrium/cortex/advanced-functionality/using-secrets", "cerebrium/cortex/advanced-functionality/async-functions", - "cerebrium/cortex/advanced-functionality/config-files", - "cerebrium/cortex/advanced-functionality/init-cortex-project", "cerebrium/cortex/advanced-functionality/multi-gpu-inferencing", "cerebrium/cortex/advanced-functionality/long-running-tasks", "cerebrium/cortex/advanced-functionality/model-scaling", - "cerebrium/cortex/advanced-functionality/faster-model-loading" + "cerebrium/cortex/advanced-functionality/faster-model-loading", + "cerebrium/cortex/advanced-functionality/config-files" + ] }, { @@ -96,44 +96,6 @@ } ] }, - { - "group": "Conduit", - "pages": [ - "cerebrium/conduit/introduction", - "cerebrium/conduit/model-status", - { - "group": "Advanced functionality", - "pages": [ - "cerebrium/conduit/advanced-functionality/model-ensembles", - "cerebrium/conduit/advanced-functionality/persistent-memory", - "cerebrium/conduit/advanced-functionality/processing-functions", - "cerebrium/conduit/advanced-functionality/saving-files", - "cerebrium/conduit/advanced-functionality/test-locally", - "cerebrium/conduit/advanced-functionality/using-files", - "cerebrium/conduit/advanced-functionality/using-secrets", - "cerebrium/conduit/advanced-functionality/using-webhook-endpoints" - ] - }, - { - "group": "Examples", - "pages": [ - "cerebrium/conduit/examples/onnx", - "cerebrium/conduit/examples/pytorch", - "cerebrium/conduit/examples/scikit", - "cerebrium/conduit/examples/spacy", - "cerebrium/conduit/examples/tensorflow", - "cerebrium/conduit/examples/xgboost", - { - "group": "HuggingFace", - "pages": [ - "cerebrium/conduit/examples/huggingface/transformers", - "cerebrium/conduit/examples/huggingface/onnx-conversion" - ] - } - ] - } - ] - }, { "group": "Fine-tuning", "pages": [