|
1 | 1 | {
|
2 |
| - "BM.GPU.B200.8": { |
3 |
| - "cpu_count": 128, |
4 |
| - "cpu_memory_in_gbs": 4096, |
5 |
| - "gpu_count": 8, |
6 |
| - "gpu_memory_in_gbs": 1440, |
7 |
| - "gpu_type": "B200", |
8 |
| - "quantization": [ |
9 |
| - "fp4", |
10 |
| - "fp8", |
11 |
| - "fp16", |
12 |
| - "bf16", |
13 |
| - "tf32", |
14 |
| - "int8", |
15 |
| - "fp64" |
16 |
| - ], |
17 |
| - "ranking": { |
18 |
| - "cost": 120, |
19 |
| - "performance": 130 |
20 |
| - } |
21 |
| - }, |
22 |
| - "BM.GPU.GB200.4": { |
23 |
| - "cpu_count": 144, |
24 |
| - "cpu_memory_in_gbs": 1024, |
25 |
| - "gpu_count": 4, |
26 |
| - "gpu_memory_in_gbs": 768, |
27 |
| - "gpu_type": "GB200", |
28 |
| - "quantization": [ |
29 |
| - "fp4", |
30 |
| - "fp8", |
31 |
| - "fp6", |
32 |
| - "int8", |
33 |
| - "fp16", |
34 |
| - "bf16", |
35 |
| - "tf32", |
36 |
| - "fp64" |
37 |
| - ], |
38 |
| - "ranking": { |
39 |
| - "cost": 110, |
40 |
| - "performance": 120 |
41 |
| - } |
42 |
| - }, |
43 |
| - "BM.GPU4.8": { |
44 |
| - "cpu_count": 64, |
45 |
| - "cpu_memory_in_gbs": 2048, |
46 |
| - "gpu_count": 8, |
47 |
| - "gpu_memory_in_gbs": 320, |
48 |
| - "gpu_type": "A100", |
49 |
| - "quantization": [ |
50 |
| - "int8", |
51 |
| - "fp16", |
52 |
| - "bf16", |
53 |
| - "tf32" |
54 |
| - ], |
55 |
| - "ranking": { |
56 |
| - "cost": 57, |
57 |
| - "performance": 65 |
58 |
| - } |
59 |
| - }, |
60 |
| - "VM.GPU3.8": { |
61 |
| - "cpu_count": 24, |
62 |
| - "cpu_memory_in_gbs": 768, |
63 |
| - "gpu_count": 8, |
64 |
| - "gpu_memory_in_gbs": 128, |
65 |
| - "gpu_type": "V100", |
66 |
| - "quantization": [ |
67 |
| - "gptq", |
68 |
| - "bitblas", |
69 |
| - "aqlm", |
70 |
| - "bitsandbytes", |
71 |
| - "deepspeedfp", |
72 |
| - "gguf" |
73 |
| - ], |
74 |
| - "ranking": { |
75 |
| - "cost": 56, |
76 |
| - "performance": 46 |
77 |
| - } |
78 |
| - }, |
79 | 2 | "shapes": {
|
80 | 3 | "BM.GPU.A10.4": {
|
81 | 4 | "cpu_count": 64,
|
|
121 | 44 | "performance": 70
|
122 | 45 | }
|
123 | 46 | },
|
| 47 | + "BM.GPU.B200.8": { |
| 48 | + "cpu_count": 128, |
| 49 | + "cpu_memory_in_gbs": 4096, |
| 50 | + "gpu_count": 8, |
| 51 | + "gpu_memory_in_gbs": 1440, |
| 52 | + "gpu_type": "B200", |
| 53 | + "quantization": [ |
| 54 | + "fp4", |
| 55 | + "fp8", |
| 56 | + "fp16", |
| 57 | + "bf16", |
| 58 | + "tf32", |
| 59 | + "int8", |
| 60 | + "fp64" |
| 61 | + ], |
| 62 | + "ranking": { |
| 63 | + "cost": 120, |
| 64 | + "performance": 130 |
| 65 | + } |
| 66 | + }, |
124 | 67 | "BM.GPU.B4.8": {
|
125 | 68 | "cpu_count": 64,
|
126 | 69 | "cpu_memory_in_gbs": 2048,
|
|
143 | 86 | "performance": 60
|
144 | 87 | }
|
145 | 88 | },
|
| 89 | + "BM.GPU.GB200.4": { |
| 90 | + "cpu_count": 144, |
| 91 | + "cpu_memory_in_gbs": 1024, |
| 92 | + "gpu_count": 4, |
| 93 | + "gpu_memory_in_gbs": 768, |
| 94 | + "gpu_type": "GB200", |
| 95 | + "quantization": [ |
| 96 | + "fp4", |
| 97 | + "fp8", |
| 98 | + "fp6", |
| 99 | + "int8", |
| 100 | + "fp16", |
| 101 | + "bf16", |
| 102 | + "tf32", |
| 103 | + "fp64" |
| 104 | + ], |
| 105 | + "ranking": { |
| 106 | + "cost": 110, |
| 107 | + "performance": 120 |
| 108 | + } |
| 109 | + }, |
146 | 110 | "BM.GPU.H100.8": {
|
147 | 111 | "cpu_count": 112,
|
148 | 112 | "cpu_memory_in_gbs": 2048,
|
|
264 | 228 | "performance": 20
|
265 | 229 | }
|
266 | 230 | },
|
| 231 | + "BM.GPU4.8": { |
| 232 | + "cpu_count": 64, |
| 233 | + "cpu_memory_in_gbs": 2048, |
| 234 | + "gpu_count": 8, |
| 235 | + "gpu_memory_in_gbs": 320, |
| 236 | + "gpu_type": "A100", |
| 237 | + "quantization": [ |
| 238 | + "int8", |
| 239 | + "fp16", |
| 240 | + "bf16", |
| 241 | + "tf32" |
| 242 | + ], |
| 243 | + "ranking": { |
| 244 | + "cost": 57, |
| 245 | + "performance": 65 |
| 246 | + } |
| 247 | + }, |
267 | 248 | "VM.GPU.A10.1": {
|
268 | 249 | "cpu_count": 15,
|
269 | 250 | "cpu_memory_in_gbs": 240,
|
|
378 | 359 | "cost": 55,
|
379 | 360 | "performance": 45
|
380 | 361 | }
|
| 362 | + }, |
| 363 | + "VM.GPU3.8": { |
| 364 | + "cpu_count": 24, |
| 365 | + "cpu_memory_in_gbs": 768, |
| 366 | + "gpu_count": 8, |
| 367 | + "gpu_memory_in_gbs": 128, |
| 368 | + "gpu_type": "V100", |
| 369 | + "quantization": [ |
| 370 | + "gptq", |
| 371 | + "bitblas", |
| 372 | + "aqlm", |
| 373 | + "bitsandbytes", |
| 374 | + "deepspeedfp", |
| 375 | + "gguf" |
| 376 | + ], |
| 377 | + "ranking": { |
| 378 | + "cost": 56, |
| 379 | + "performance": 46 |
| 380 | + } |
381 | 381 | }
|
382 | 382 | }
|
383 | 383 | }
|
0 commit comments