Skip to content

Commit

Permalink
plugin update to nccl-2.20
Browse files Browse the repository at this point in the history
  • Loading branch information
bureddy committed Feb 16, 2024
1 parent 3ff78de commit dfcd775
Show file tree
Hide file tree
Showing 8 changed files with 874 additions and 433 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ AC_ARG_WITH([verbs],
AC_CHECK_HEADER( [infiniband/verbs.h], [],[AC_MSG_FAILURE([ibverbs header files not found])])
AC_CHECK_LIB([ibverbs], [ibv_get_device_list], [],[AC_MSG_FAILURE([libibverbs not found]);])

AC_CHECK_DECLS([IBV_ACCESS_RELAXED_ORDERING, IBV_QPF_GRH_REQUIRED, ibv_reg_dmabuf_mr], [], [],
AC_CHECK_DECLS([IBV_ACCESS_RELAXED_ORDERING, IBV_QPF_GRH_REQUIRED, ibv_reg_dmabuf_mr, ibv_query_ece, ibv_set_ece], [], [],
[[#include <infiniband/verbs.h>]])

# check for ucx
Expand Down
2 changes: 2 additions & 0 deletions include/ibvwrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ static inline ncclResult_t wrap_ibv_poll_cq(struct ibv_cq *cq, int num_entries,
ncclResult_t wrap_ibv_create_qp(struct ibv_qp **ret, struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr);
ncclResult_t wrap_ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask);
ncclResult_t wrap_ibv_destroy_qp(struct ibv_qp *qp);
ncclResult_t wrap_ibv_query_ece(struct ibv_qp *qp, struct ibv_ece *ece, int* supported);
ncclResult_t wrap_ibv_set_ece(struct ibv_qp *qp, struct ibv_ece *ece, int* supported);
ncclResult_t wrap_ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr);
ncclResult_t wrap_ibv_post_recv(struct ibv_qp *qp, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr);
ncclResult_t wrap_ibv_event_type_str(char **ret, enum ibv_event_type event);
Expand Down
56 changes: 34 additions & 22 deletions include/p2p_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,27 @@ struct ncclIbMrCache {
int capacity, population;
};

#define NCCL_IB_MAX_DEVS_PER_NIC 2
#define MAX_MERGED_DEV_NAME (MAXNAMESIZE*NCCL_IB_MAX_DEVS_PER_NIC)+NCCL_IB_MAX_DEVS_PER_NIC
struct ncclIbMergedDev {
int ndevs;
int devs[NCCL_IB_MAX_DEVS_PER_NIC]; // Points to an index in ncclIbDevs
int speed;
char devName[MAX_MERGED_DEV_NAME]; // Up to NCCL_IB_MAX_DEVS_PER_NIC * name size, and a character for each '+'
} __attribute__((aligned(64)));

struct ncclIbRequest {
struct ncclIbVerbs* verbs;
struct ncclIbNetCommBase* base;
int type;
int events;
struct ncclSocket* sock;
struct ncclIbGidInfo* gidInfo;
int events[NCCL_IB_MAX_DEVS_PER_NIC];
struct ncclIbNetCommDevBase* devBases[NCCL_IB_MAX_DEVS_PER_NIC];
int nreqs;
union {
struct {
int size;
void* data;
uint32_t lkey;
uint32_t lkeys[NCCL_IB_MAX_DEVS_PER_NIC];
int offset;
} send;
struct {
Expand All @@ -64,56 +73,57 @@ struct ncclIbRequest {
};
};

struct ncclIbVerbs {
int dev;
struct ibv_pd* pd; // duplicate of ncclIbDevs[dev].pd
// Retain local RoCE address for error logging
struct ncclIbGidInfo {
uint8_t link_layer;
union ibv_gid localGid;
};

typedef struct ncclIbNetCommDevBase {
int ibDevN;
struct ibv_pd* pd;
struct ibv_cq* cq;
uint64_t pad[1];
struct ncclIbRequest reqs[MAX_REQUESTS];
};
struct ncclIbGidInfo gidInfo;
} ncclIbNetCommDevBase;

typedef struct ncclIbDev {
pthread_mutex_t lock;
int device;
uint64_t guid;
uint8_t port;
uint8_t portNum;
uint8_t link;
uint8_t isSharpDev;
int speed;
struct ibv_context* context;
int pdRefs;
struct ibv_pd* pd;
struct ncclIbVerbs verbs;
char devName[MAXNAMESIZE];
char *pciPath;
int realPort;
int maxQp;
struct ncclIbMrCache mrCache;
int ar; // ADAPTIVE_ROUTING
} __attribute__((aligned(64))) nccl_ib_dev_t;
struct ibv_port_attr portAttr;
} __attribute__((aligned(64))) ncclIbDev;

#define MAX_IB_PORT 15
struct userIbDev {
char devName[MAXNAMESIZE];
uint16_t port_en;
};

#define MAX_IB_DEVS 32
struct ncclIbMergedDev ncclIbMergedDevs[MAX_IB_DEVS];
extern struct ncclIbDev ncclIbDevs[MAX_IB_DEVS];
extern struct ncclIbDev userIbDevs[MAX_IB_DEVS];
/* Detect whether GDR can work on a given NIC with the current CUDA device
* Returns :
* ncclSuccess : GDR works
* ncclSystemError : no module or module loaded but not supported by GPU */
ncclResult_t nccl_p2p_gdr_support(int dev);
ncclResult_t nccl_p2p_gdr_support();

ncclResult_t nccl_p2p_dmabuf_support(int dev);

ncclResult_t nccl_p2p_ib_pci_path(nccl_ib_dev_t *devs, int num_devs, char* dev_name, char** path, int* real_port);
ncclResult_t nccl_p2p_ib_pci_path(ncclIbDev *devs, int num_devs, char* dev_name, char** path, int* real_port);

ncclResult_t nccl_p2p_ib_get_properties(nccl_ib_dev_t *devs, int dev, ncclNetProperties_t* props);
ncclResult_t nccl_p2p_ib_get_properties(ncclIbDev *devs, int dev, ncclNetProperties_t* props);

ncclResult_t nccl_p2p_ib_init(int *num_devs, nccl_ib_dev_t *ncclIbDevs, char *ncclIbIfName, union ncclSocketAddress *ncclIbIfAddr, pthread_t *ncclIbAsyncThread, ncclDebugLogger_t logFunction);
ncclResult_t nccl_p2p_ib_init(int *num_devs, ncclIbDev *ncclIbDevs, char *ncclIbIfName, union ncclSocketAddress *ncclIbIfAddr, pthread_t *ncclIbAsyncThread, ncclDebugLogger_t logFunction);

/* Convert value returtned by ibv_query_port to actual link width */
int nccl_p2p_ib_width(int width);
Expand All @@ -125,6 +135,8 @@ int64_t ncclParamSharpMaxComms();

int64_t ncclParamIbMergeVfs();

int64_t ncclParamIbMergeNics();

int ncclIbRelaxedOrderingCapable(void);

nccl_p2p_plugin_t nccl_p2p_get_plugin_type();
Expand Down
Loading

0 comments on commit dfcd775

Please sign in to comment.