Skip to content

Commit def3745

Browse files
RobsonRLemosstanleytsang-amdlawruble13eidenyoshidaex-rzr
authored
Merge back hotfixes from 5.7 (#385) (#391)
* Enable gfx94x build targets (#353) --------- * 5.7 cherry pick - Benchmark perf. improvements for discrete distributions (#379) * Remove workaround with hipGridDim_x * hipBlockDim_x It makes no difference anymore on ROCm >= 5.5, probably after switching to code object v5. * Use restrict with tables of discrete distributions This change improves performance in device API benchamrks for most engines. --------- --------- Co-authored-by: Stanley Tsang <[email protected]> Co-authored-by: Lauren Wrubleski <[email protected]> Co-authored-by: Eiden Yoshida <[email protected]> Co-authored-by: Anton Gorenko <[email protected]>
1 parent ac9d6e7 commit def3745

File tree

2 files changed

+30
-22
lines changed

2 files changed

+30
-22
lines changed

benchmark/benchmark_rocrand_kernel.cpp

+1-10
Original file line numberDiff line numberDiff line change
@@ -88,16 +88,7 @@ void generate_kernel(GeneratorState * states,
8888
const Extra extra)
8989
{
9090
const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x;
91-
92-
// Using gridDim.x * blockDim.x should actually a performance improvement, however, this kernel
93-
// just so happen to use an unfortunate amount of registers that the changes introduced in
94-
// https://github.com/llvm/llvm-project/commit/ba0d079c7aa52bc0ae860d16dd4a33b0dc5cfff7,
95-
// cause adverse code generation that degrades performance.
96-
#ifdef USE_HIP_CPU
97-
const unsigned int stride = gridDim.x * blockDim.x;
98-
#else
99-
const unsigned int stride = hipGridDim_x * hipBlockDim_x;
100-
#endif
91+
const unsigned int stride = gridDim.x * blockDim.x;
10192

10293
GeneratorState state = states[state_id];
10394
unsigned int index = state_id;

library/include/rocrand/rocrand_discrete.h

+29-12
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved.
1+
// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved.
22
//
33
// Permission is hereby granted, free of charge, to any person obtaining a copy
44
// of this software and associated documentation files (the "Software"), to deal
@@ -57,16 +57,25 @@
5757
namespace rocrand_device {
5858
namespace detail {
5959

60-
FQUALIFIERS unsigned int discrete_alias(const double x, const rocrand_discrete_distribution_st& dis)
60+
FQUALIFIERS unsigned int discrete_alias(const double x,
61+
const unsigned int size,
62+
const unsigned int offset,
63+
const unsigned int* __restrict__ alias,
64+
const double* __restrict__ probability)
6165
{
6266
// Calculate value using Alias table
6367

6468
// x is [0, 1)
65-
const double nx = dis.size * x;
66-
const double fnx = floor(nx);
67-
const double y = nx - fnx;
69+
const double nx = size * x;
70+
const double fnx = floor(nx);
71+
const double y = nx - fnx;
6872
const unsigned int i = static_cast<unsigned int>(fnx);
69-
return dis.offset + (y < dis.probability[i] ? i : dis.alias[i]);
73+
return offset + (y < probability[i] ? i : alias[i]);
74+
}
75+
76+
FQUALIFIERS unsigned int discrete_alias(const double x, const rocrand_discrete_distribution_st& dis)
77+
{
78+
return discrete_alias(x, dis.size, dis.offset, dis.alias, dis.probability);
7079
}
7180

7281
FQUALIFIERS
@@ -94,17 +103,20 @@ FQUALIFIERS unsigned int discrete_alias(const unsigned long long int
94103
return discrete_alias(x, dis);
95104
}
96105

97-
FQUALIFIERS unsigned int discrete_cdf(const double x, const rocrand_discrete_distribution_st& dis)
106+
FQUALIFIERS unsigned int discrete_cdf(const double x,
107+
const unsigned int size,
108+
const unsigned int offset,
109+
const double* __restrict__ cdf)
98110
{
99111
// Calculate value using binary search in CDF
100112

101113
unsigned int min = 0;
102-
unsigned int max = dis.size - 1;
114+
unsigned int max = size - 1;
103115
do
104116
{
105117
const unsigned int center = (min + max) / 2;
106-
const double p = dis.cdf[center];
107-
if (x > p)
118+
const double p = cdf[center];
119+
if(x > p)
108120
{
109121
min = center + 1;
110122
}
@@ -113,9 +125,14 @@ FQUALIFIERS unsigned int discrete_cdf(const double x, const rocrand_discrete_dis
113125
max = center;
114126
}
115127
}
116-
while (min != max);
128+
while(min != max);
129+
130+
return offset + min;
131+
}
117132

118-
return dis.offset + min;
133+
FQUALIFIERS unsigned int discrete_cdf(const double x, const rocrand_discrete_distribution_st& dis)
134+
{
135+
return discrete_cdf(x, dis.size, dis.offset, dis.cdf);
119136
}
120137

121138
FQUALIFIERS

0 commit comments

Comments
 (0)