-
Notifications
You must be signed in to change notification settings - Fork 1
/
start-triton-server.sh
executable file
·130 lines (106 loc) · 4.46 KB
/
start-triton-server.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/bin/bash
# Script to convert ONNX models to TensorRT engines and start NVIDIA Triton Inference Server.
# Usage:
# ./convert_and_start_triton.sh <max_batch_size> <opt_batch_size> [--force-build]
# - max_batch_size: Maximum batch size for TensorRT engines.
# - opt_batch_size: Optimal batch size for TensorRT engines.
# - Use the flag --force-build to rebuild TensorRT engines even if they already exist.
# Prerequisites:
# - NVIDIA TensorRT must be installed.
# - NVIDIA Triton Inference Server must be installed and configured.
# Function to calculate the available GPU memory
function get_free_gpu_memory() {
# Get the total memory and used memory from nvidia-smi
local total_memory=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | awk '{print $1}')
local used_memory=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | awk '{print $1}')
# Calculate free memory
local free_memory=$((total_memory - used_memory))
echo "$free_memory"
}
# Script Flow:
max_batch_size="$1"
opt_batch_size="$2"
# Validate input parameters
if [[ -z $max_batch_size || -z $opt_batch_size ]]; then
echo "Error: Please provide max_batch_size and opt_batch_size as input parameters."
exit 1
fi
if [[ $max_batch_size -lt $opt_batch_size ]]; then
echo "Error: max_batch_size must be greater than or equal to opt_batch_size."
exit 1
fi
if [[ ! -f "./models_onnx/yolov7/yolov7_end2end.onnx" || ! -f "./models_onnx/yolov7_qat/yolov7_qat_end2end.onnx" ]]; then
echo "YOLOv7 ONNX model files not found. Attempting to download..."
cd ./models_onnx || exit 1
bash ./download_models.sh
cd ../ || exit 1
fi
# Check if ONNX model files exist
if [[ ! -f "./models_onnx/yolov7/yolov7_end2end.onnx" ]]; then
echo "YOLOv7 ONNX model file not found: ./models_onnx/yolov7/yolov7_end2end.onnx"
exit 1
fi
if [[ ! -f "./models_onnx/yolov7_qat/yolov7_qat_end2end.onnx" ]]; then
echo "YOLOv7 QAT ONNX model file not found: ./models_onnx/yolov7_qat/yolov7_qat_end2end.onnx"
exit 1
fi
# Check if force-build flag is set
force_build=false
if [[ "$3" == "--force-build" ]]; then
force_build=true
fi
# Calculate workspace size based on free GPU memory
workspace=$(get_free_gpu_memory)
mkdir -p ./models/yolov7/1/
mkdir -p ./models/yolov7_qat/1/
# Convert YOLOv7 ONNX model to TensorRT engine with FP16 precision if force flag is set or model does not exist
if [[ $force_build == true || ! -f "./models/yolov7/1/model.plan" ]]; then
/usr/src/tensorrt/bin/trtexec \
--onnx=./models_onnx/yolov7/yolov7_end2end.onnx \
--minShapes=images:1x3x640x640 \
--optShapes=images:${opt_batch_size}x3x640x640 \
--maxShapes=images:${max_batch_size}x3x640x640 \
--fp16 \
--workspace=$workspace \
--saveEngine=./models/yolov7/1/model.plan
# Check return code of trtexec
if [[ $? -ne 0 ]]; then
echo "Conversion of YOLOv7 ONNX model to TensorRT engine failed"
exit 1
fi
fi
# Convert YOLOv7 QAT ONNX model to TensorRT engine with INT8 precision if force flag is set or model does not exist
if [[ $force_build == true || ! -f "./models/yolov7_qat/1/model.plan" ]]; then
/usr/src/tensorrt/bin/trtexec \
--onnx=./models_onnx/yolov7_qat/yolov7_qat_end2end.onnx \
--minShapes=images:1x3x640x640 \
--optShapes=images:${opt_batch_size}x3x640x640 \
--maxShapes=images:${max_batch_size}x3x640x640 \
--fp16 \
--int8 \
--workspace=$workspace \
--saveEngine=./models/yolov7_qat/1/model.plan
# Check return code of trtexec
if [[ $? -ne 0 ]]; then
echo "Conversion of YOLOv7 QAT ONNX model to TensorRT engine failed"
exit 1
fi
fi
### Deploy proto config File.
config_file=./models/yolov7/config.pbtxt
if [[ ! -f "$config_file" ]]; then
cp ./models_config/yolov7/config.pbtxt "$config_file"
fi
sed -i "s/max_batch_size: [0-9]*/max_batch_size: $max_batch_size/" "$config_file"
echo "max_batch_size updated to $max_batch_size in $config_file"
config_file=./models/yolov7_qat/config.pbtxt
if [[ ! -f "$config_file" ]]; then
cp ./models_config/yolov7_qat/config.pbtxt "$config_file"
fi
sed -i "s/max_batch_size: [0-9]*/max_batch_size: $max_batch_size/" "$config_file"
echo "max_batch_size updated to $max_batch_size in $config_file"
# Start Triton Inference Server with the converted models
/opt/tritonserver/bin/tritonserver \
--model-repository=/apps/models \
--disable-auto-complete-config \
--log-verbose=0