|
| 1 | +/* |
| 2 | + * Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved. |
| 3 | + */ |
| 4 | + |
| 5 | +#ifndef NCCL_NET_V8_H_ |
| 6 | +#define NCCL_NET_V8_H_ |
| 7 | +#include "net_device.h" |
| 8 | + |
| 9 | +typedef struct { |
| 10 | + char* name; // Used mostly for logging. |
| 11 | + char* pciPath; // Path to the PCI device in /sys. |
| 12 | + uint64_t guid; // Unique identifier for the NIC chip. Important for |
| 13 | + // cards with multiple PCI functions (Physical or virtual). |
| 14 | + int ptrSupport; // [NCCL_PTR_HOST|NCCL_PTR_CUDA|NCCL_PTR_DMABUF] |
| 15 | + int regIsGlobal; // regMr is not tied to a particular comm |
| 16 | + int speed; // Port speed in Mbps. |
| 17 | + int port; // Port number. |
| 18 | + float latency; // Network latency |
| 19 | + int maxComms; // Maximum number of comms we can create |
| 20 | + int maxRecvs; // Maximum number of grouped receives. |
| 21 | + ncclNetDeviceType netDeviceType; // Network offload type |
| 22 | + int netDeviceVersion; // Version number for network offload |
| 23 | +} ncclNetProperties_v8_t; |
| 24 | + |
| 25 | +typedef ncclNetProperties_v8_t ncclNetProperties_t; |
| 26 | + |
| 27 | +typedef struct { |
| 28 | + // Name of the network (mainly for logs) |
| 29 | + const char* name; |
| 30 | + // Initialize the network. |
| 31 | + ncclResult_t (*init)(ncclDebugLogger_t logFunction); |
| 32 | + // Return the number of adapters. |
| 33 | + ncclResult_t (*devices)(int* ndev); |
| 34 | + // Get various device properties. |
| 35 | + ncclResult_t (*getProperties)(int dev, ncclNetProperties_v8_t* props); |
| 36 | + // Create a receiving object and provide a handle to connect to it. The |
| 37 | + // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged |
| 38 | + // between ranks to create a connection. |
| 39 | + ncclResult_t (*listen)(int dev, void* handle, void** listenComm); |
| 40 | + // Connect to a handle and return a sending comm object for that peer. |
| 41 | + // This call must not block for the connection to be established, and instead |
| 42 | + // should return successfully with sendComm == NULL with the expectation that |
| 43 | + // it will be called again until sendComm != NULL. |
| 44 | + // If *sendDevComm points to a valid object, then NCCL is requesting device offload for this connection |
| 45 | + ncclResult_t (*connect)(int dev, void* handle, void** sendComm, ncclNetDeviceHandle_v8_t** sendDevComm); |
| 46 | + // Finalize connection establishment after remote peer has called connect. |
| 47 | + // This call must not block for the connection to be established, and instead |
| 48 | + // should return successfully with recvComm == NULL with the expectation that |
| 49 | + // it will be called again until recvComm != NULL. |
| 50 | + // If *recvDevComm points to a valid object, then NCCL is requesting device offload for this connection |
| 51 | + ncclResult_t (*accept)(void* listenComm, void** recvComm, ncclNetDeviceHandle_v8_t** recvDevComm); |
| 52 | + // Register/Deregister memory. Comm can be either a sendComm or a recvComm. |
| 53 | + // Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA. |
| 54 | + ncclResult_t (*regMr)(void* comm, void* data, size_t size, int type, void** mhandle); |
| 55 | + /* DMA-BUF support */ |
| 56 | + ncclResult_t (*regMrDmaBuf)(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle); |
| 57 | + ncclResult_t (*deregMr)(void* comm, void* mhandle); |
| 58 | + // Asynchronous send to a peer. |
| 59 | + // May return request == NULL if the call cannot be performed (or would block) |
| 60 | + ncclResult_t (*isend)(void* sendComm, void* data, int size, int tag, void* mhandle, void** request); |
| 61 | + // Asynchronous recv from a peer. |
| 62 | + // May return request == NULL if the call cannot be performed (or would block) |
| 63 | + ncclResult_t (*irecv)(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request); |
| 64 | + // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is |
| 65 | + // visible to the GPU |
| 66 | + ncclResult_t (*iflush)(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request); |
| 67 | + // Test whether a request is complete. If size is not NULL, it returns the |
| 68 | + // number of bytes sent/received. |
| 69 | + ncclResult_t (*test)(void* request, int* done, int* sizes); |
| 70 | + // Close and free send/recv comm objects |
| 71 | + ncclResult_t (*closeSend)(void* sendComm); |
| 72 | + ncclResult_t (*closeRecv)(void* recvComm); |
| 73 | + ncclResult_t (*closeListen)(void* listenComm); |
| 74 | + |
| 75 | + // Copy the given mhandle to a dptr in a format usable by this plugin's device code |
| 76 | + ncclResult_t (*getDeviceMr)(void* comm, void* mhandle, void** dptr_mhandle); |
| 77 | + |
| 78 | + // Notify the plugin that a recv has completed by the device |
| 79 | + ncclResult_t (*irecvConsumed)(void* recvComm, int n, void* request); |
| 80 | +} ncclNet_v8_t; |
| 81 | + |
| 82 | + |
| 83 | +typedef struct { |
| 84 | + void* mhandle; |
| 85 | + void* address; |
| 86 | + uint32_t size; |
| 87 | +} ncclNetSGE_v8_t; |
| 88 | + |
| 89 | +typedef struct { |
| 90 | + // Name of the collective network (mainly for logs) |
| 91 | + const char* name; |
| 92 | + // Initialize the collective network. |
| 93 | + ncclResult_t (*init)(ncclDebugLogger_t logFunction); |
| 94 | + // Return the number of adapters capable of doing collective operations. |
| 95 | + // If ndev returns 0, all other functions might be set to NULL. |
| 96 | + ncclResult_t (*devices)(int* ndev); |
| 97 | + // Get various device properties. |
| 98 | + ncclResult_t (*getProperties)(int dev, ncclNetProperties_v8_t* props); |
| 99 | + // Create a receiving object and provide a handle to connect to it. The |
| 100 | + // handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged |
| 101 | + // between ranks to create connections. |
| 102 | + ncclResult_t (*listen)(int dev, void* handle, void** listenComm); |
| 103 | + // Create a group for collective operations. handles have been created |
| 104 | + // using listen() above. rank indicates caller's rank in the collective network. |
| 105 | + ncclResult_t (*connect)(void* handles[], int nranks, int rank, void* listenComm, void** collComm); |
| 106 | + // Returns whether a reduction operation on a data type is supported. |
| 107 | + // 1 for supported, 0 otherwise. |
| 108 | + ncclResult_t (*reduceSupport)(ncclDataType_t dataType, ncclRedOp_t redOp, int* supported); |
| 109 | + // Register/Deregister memory. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA. |
| 110 | + ncclResult_t (*regMr)(void* collComm, void* data, size_t size, int type, void** mhandle); |
| 111 | + /* DMA-BUF support */ |
| 112 | + ncclResult_t (*regMrDmaBuf)(void* collComm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle); |
| 113 | + ncclResult_t (*deregMr)(void* collComm, void* mhandle); |
| 114 | + // Performs an asynchronous allreduce operation on the collective group. |
| 115 | + // May return request == NULL if the call cannot be performed (or would block). |
| 116 | + ncclResult_t (*iallreduce)(void* collComm, void* sendData, void* recvData, int count, |
| 117 | + ncclDataType_t dataType, ncclRedOp_t redOp, void* sendMhandle, void* recvMhandle, void** request); |
| 118 | + ncclResult_t (*iallgather)(void* collComm, void* sendData, int nRecvParts, ncclNetSGE_v8_t* recvParts, |
| 119 | + size_t bytesPerRank, size_t windowOffset, size_t windowBytes, |
| 120 | + void* sendMhandle, void** request); |
| 121 | + ncclResult_t (*ireducescatter)(void* collComm, int nSendParts, ncclNetSGE_v8_t* sendParts, void* recvData, |
| 122 | + size_t bytesPerRank, size_t windowOffset, size_t windowBytes, |
| 123 | + ncclDataType_t dataType, ncclRedOp_t redOp, |
| 124 | + void* recvMhandle, void** request); |
| 125 | + // Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is |
| 126 | + // visible to the GPU |
| 127 | + ncclResult_t (*iflush)(void* collComm, void* data, int size, void* mhandle, void** request); |
| 128 | + // Test whether a request is complete. If size is not NULL, it returns the |
| 129 | + // number of bytes sent/received. |
| 130 | + ncclResult_t (*test)(void* request, int* done, int* size); |
| 131 | + // Close and free collective comm objects |
| 132 | + ncclResult_t (*closeColl)(void* collComm); |
| 133 | + ncclResult_t (*closeListen)(void* listenComm); |
| 134 | +} ncclCollNet_v8_t; |
| 135 | + |
| 136 | + |
| 137 | +#endif // end include guard |
0 commit comments