Skip to content

Commit bb0a5ba

Browse files
Merge pull request #232 from LibRapid/test
Updates to benchmarks and some small performance improvements
2 parents 5cbe7a0 + 421039a commit bb0a5ba

15 files changed

+1214
-1224
lines changed

CMakeLists.txt

+5-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ option(LIBRAPID_CODE_COV "Compile LibRapid C++ with Coverage" OFF)
3737
option(LIBRAPID_STRICT "Force all warnings into errors (use with caution)" OFF)
3838
option(LIBRAPID_QUIET "Hide warnings generated WITHIN LibRapid's source" OFF)
3939

40+
option(LIBRAPID_USE_PRECOMPILED_HEADER "Use precompiled headers to speed up compilation" OFF)
41+
4042
option(LIBRAPID_GET_FFTW "Clone and use FFTW -- WARNING: SEE DOCUMENTATION" OFF)
4143
option(LIBRAPID_GET_BLAS "Download pre-built OpenBLAS binaries and use them" OFF)
4244
option(LIBRAPID_GET_MULTIPREC "Download generic multiprecision libraries, as opposed to trying to find one on the system" OFF)
@@ -106,7 +108,9 @@ target_compile_definitions(${module_name} PUBLIC LIBRAPID_SOURCE="${LIBRAPID_SOU
106108
# clang-format on
107109

108110
# Precompiled Headers
109-
target_precompile_headers(${module_name} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/librapid/include/librapid/core/librapidPch.hpp")
111+
if (LIBRAPID_USE_PRECOMPILED_HEADER)
112+
target_precompile_headers(${module_name} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/librapid/include/librapid/core/librapidPch.hpp")
113+
endif ()
110114

111115
# Extract system information
112116
set(IS_LINUX OFF)

docs/source/cmakeIntegration.md

+19
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,25 @@ DEFAULT: OFF
6161
Disable all warnings from LibRapid. This is useful if you are using LibRapid as a dependency and want a cleaner
6262
compilation output. Warnings should be minimal in the first place, but this option is provided just in case.
6363

64+
### ``LIBRAPID_USE_PRECOMPILED_HEADER``
65+
66+
```
67+
DEFAULT: OFF
68+
```
69+
70+
Enable the use of precompiled headers within LibRapid's compilation. This can be useful to accelerate compilation, but
71+
can lead to some strange build errors, which is why it is disabled by default.
72+
73+
:::{warning}
74+
One such build error occurs on some macOS systems with GCC. The resulting error is something along the lines of:
75+
76+
```
77+
Unknown flag -Xarch_amd64
78+
```
79+
80+
If you encounter this error, try disabling ``LIBRAPID_USE_PRECOMPILED_HEADER``.
81+
:::
82+
6483
### ``LIBRAPID_GET_FFTW``
6584

6685
```

docs/source/index.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ Getting Started <gettingStarted>
6868
CMake Integration <cmakeIntegration>
6969
API Reference <apiReference>
7070
Tutorials <tutorials>
71-
Performance and Benchmarks <performance/performance>
71+
Performance <performance/performance>
72+
Benchmark Results <BenchmarkResults/BenchmarkResults>
7273
Caution <caution>
7374
```
7475

examples/templateCMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ set(LIBRAPID_STRICT OFF) # Enable all warnings and treat them as errors
3030

3131
set(LIBRAPID_QUIET OFF) # Silence all warnings
3232

33+
set(LIBRAPID_USE_PRECOMPILED_HEADER OFF) # Use precompiled headers to accelerate
34+
# compilation at the cost of stability
35+
# on some systems.
36+
3337
set(LIBRAPID_USE_BLAS ON) # Attempt to use a BLAS library -- if not found,
3438
# LibRapid falls back to less optimized routines
3539

librapid/include/librapid/array/arrayContainer.hpp

+26-22
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ namespace librapid {
7373
struct IsArrayContainer : std::false_type {};
7474

7575
template<typename ShapeType, typename StorageScalar>
76-
struct IsArrayContainer<array::ArrayContainer<ShapeType, StorageScalar>> : std::true_type {};
76+
struct IsArrayContainer<array::ArrayContainer<ShapeType, StorageScalar>> : std::true_type {
77+
};
7778

7879
LIBRAPID_DEFINE_AS_TYPE(typename StorageScalar,
7980
array::ArrayContainer<Shape COMMA StorageScalar>);
@@ -93,7 +94,6 @@ namespace librapid {
9394
using Scalar = typename StorageType::Scalar;
9495
using Packet = typename typetraits::TypeInfo<Scalar>::Packet;
9596
using Backend = typename typetraits::TypeInfo<ArrayContainer>::Backend;
96-
using Iterator = detail::ArrayIterator<GeneralArrayView<ArrayContainer, ShapeType>>;
9797

9898
using DirectSubscriptType = typename detail::SubscriptType<StorageType>::Direct;
9999
using DirectRefSubscriptType = typename detail::SubscriptType<StorageType>::Ref;
@@ -330,19 +330,19 @@ namespace librapid {
330330

331331
/// \brief Return an iterator to the beginning of the array container
332332
/// \return Iterator
333-
LIBRAPID_ALWAYS_INLINE Iterator begin() const noexcept;
333+
LIBRAPID_ALWAYS_INLINE auto begin() const noexcept;
334334

335335
/// \brief Return an iterator to the end of the array container
336336
/// \return Iterator
337-
LIBRAPID_ALWAYS_INLINE Iterator end() const noexcept;
337+
LIBRAPID_ALWAYS_INLINE auto end() const noexcept;
338338

339339
/// \brief Return an iterator to the beginning of the array container
340340
/// \return Iterator
341-
LIBRAPID_ALWAYS_INLINE Iterator begin();
341+
LIBRAPID_ALWAYS_INLINE auto begin();
342342

343343
/// \brief Return an iterator to the end of the array container
344344
/// \return Iterator
345-
LIBRAPID_ALWAYS_INLINE Iterator end();
345+
LIBRAPID_ALWAYS_INLINE auto end();
346346

347347
template<typename T, typename Char, typename Ctx>
348348
void str(const fmt::formatter<T, Char> &format, char bracket, char separator,
@@ -765,12 +765,11 @@ namespace librapid {
765765
ArrayContainer<ShapeType_, StorageType_>::packet(size_t index) const -> Packet {
766766
auto ptr = LIBRAPID_ASSUME_ALIGNED(m_storage.begin());
767767

768-
#if defined(LIBRAPID_NATIVE_ARCH) && !defined(LIBRAPID_OSX)
769-
// On MacOS (and other platforms??) we cannot use aligned loads in arrays due to one
770-
// annoying edge case. Normally, all SIMD loads will be aligned to a 64-byte boundary.
771-
// Say, however, this array is a sub-array of a larger array. If the outer dimension
772-
// of the larger array does not result in a 64-byte alignment, the data of *this* array
773-
// will not be correctly aligned, hence causing a segfault.
768+
#if defined(LIBRAPID_NATIVE_ARCH)
769+
LIBRAPID_ASSERT(
770+
reinterpret_cast<uintptr_t>(ptr) % typetraits::TypeInfo<Scalar>::packetWidth == 0,
771+
"ArrayContainer::packet called on unaligned storage");
772+
774773
return xsimd::load_aligned(ptr + index);
775774
#else
776775
return xsimd::load_unaligned(ptr + index);
@@ -786,10 +785,15 @@ namespace librapid {
786785
template<typename ShapeType_, typename StorageType_>
787786
LIBRAPID_ALWAYS_INLINE void
788787
ArrayContainer<ShapeType_, StorageType_>::writePacket(size_t index, const Packet &value) {
788+
auto ptr = LIBRAPID_ASSUME_ALIGNED(m_storage.begin());
789+
789790
#if defined(LIBRAPID_NATIVE_ARCH)
790-
value.store_aligned(m_storage.begin() + index);
791+
LIBRAPID_ASSERT(
792+
reinterpret_cast<uintptr_t>(ptr) % typetraits::TypeInfo<Scalar>::packetWidth == 0,
793+
"ArrayContainer::packet called on unaligned storage");
794+
value.store_aligned(ptr + index);
791795
#else
792-
value.store_unaligned(m_storage.begin() + index);
796+
value.store_unaligned(ptr + index);
793797
#endif
794798
}
795799

@@ -881,24 +885,24 @@ namespace librapid {
881885

882886
template<typename ShapeType_, typename StorageType_>
883887
LIBRAPID_ALWAYS_INLINE auto ArrayContainer<ShapeType_, StorageType_>::begin() const noexcept
884-
-> Iterator {
885-
return Iterator(GeneralArrayView(*this), 0);
888+
-> auto {
889+
return detail::ArrayIterator(createGeneralArrayView(*this), 0);
886890
}
887891

888892
template<typename ShapeType_, typename StorageType_>
889893
LIBRAPID_ALWAYS_INLINE auto ArrayContainer<ShapeType_, StorageType_>::end() const noexcept
890-
-> Iterator {
891-
return Iterator(GeneralArrayView(*this), m_shape[0]);
894+
-> auto {
895+
return detail::ArrayIterator(createGeneralArrayView(*this), m_shape[0]);
892896
}
893897

894898
template<typename ShapeType_, typename StorageType_>
895-
LIBRAPID_ALWAYS_INLINE auto ArrayContainer<ShapeType_, StorageType_>::begin() -> Iterator {
896-
return Iterator(GeneralArrayView(*this), 0);
899+
LIBRAPID_ALWAYS_INLINE auto ArrayContainer<ShapeType_, StorageType_>::begin() -> auto {
900+
return detail::ArrayIterator(createGeneralArrayView(*this), 0);
897901
}
898902

899903
template<typename ShapeType_, typename StorageType_>
900-
LIBRAPID_ALWAYS_INLINE auto ArrayContainer<ShapeType_, StorageType_>::end() -> Iterator {
901-
return Iterator(GeneralArrayView(*this), m_shape[0]);
904+
LIBRAPID_ALWAYS_INLINE auto ArrayContainer<ShapeType_, StorageType_>::end() -> auto {
905+
return detail::ArrayIterator(createGeneralArrayView(*this), m_shape[0]);
902906
}
903907

904908
template<typename ShapeType_, typename StorageType_>

librapid/include/librapid/array/arrayIterator.hpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ namespace librapid::detail {
55
template<typename T>
66
class ArrayIterator {
77
public:
8-
using IndexType = int64_t;
8+
using IndexType = size_t;
99

1010
/// Default constructor should never be used
1111
ArrayIterator() = delete;
@@ -14,6 +14,8 @@ namespace librapid::detail {
1414

1515
explicit LIBRAPID_ALWAYS_INLINE ArrayIterator(const T &array, IndexType index);
1616

17+
explicit LIBRAPID_ALWAYS_INLINE ArrayIterator(T &&array, IndexType index);
18+
1719
/// Copy an ArrayIterator object (const)
1820
/// \param other The array to copy
1921
LIBRAPID_ALWAYS_INLINE ArrayIterator(const ArrayIterator &other) = default;
@@ -53,6 +55,10 @@ namespace librapid::detail {
5355
LIBRAPID_ALWAYS_INLINE ArrayIterator<T>::ArrayIterator(const T &array, IndexType index) :
5456
m_array(array), m_index(index) {}
5557

58+
template<typename T>
59+
LIBRAPID_ALWAYS_INLINE ArrayIterator<T>::ArrayIterator(T &&array, IndexType index) :
60+
m_array(std::move(array)), m_index(index) {}
61+
5662
template<typename T>
5763
LIBRAPID_ALWAYS_INLINE ArrayIterator<T> &ArrayIterator<T>::operator++() {
5864
++m_index;

librapid/include/librapid/array/assignOps.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ namespace librapid {
328328
::librapid::detail::LibRapidType::Scalar,
329329
int> = 0>
330330
LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto dataSourceExtractor(const T &obj) {
331-
return obj.storage().begin().get();
331+
return obj.storage().begin();
332332
}
333333

334334
template<typename T, typename std::enable_if_t<typetraits::TypeInfo<T>::type ==
@@ -518,7 +518,7 @@ namespace librapid {
518518
std::make_index_sequence<argSize>(),
519519
filename,
520520
kernelName,
521-
reinterpret_cast<Scalar *>(lhs.storage().begin().get()),
521+
reinterpret_cast<Scalar *>(lhs.storage().begin()),
522522
function);
523523
}
524524
} // namespace detail

librapid/include/librapid/array/linalg/arrayMultiply.hpp

+30-5
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,14 @@ namespace librapid {
106106
/// \return Class of the array multiplication
107107
LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE MatmulClass matmulClass() const;
108108

109+
LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE ShapeType calculateShape() const;
110+
109111
/// \brief Determine the shape of the result
110112
/// \return Shape of the result
111113
LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE ShapeType shape() const;
112114

115+
LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE size_t size() const;
116+
113117
/// \brief Determine the number of dimensions of the result
114118
/// \return Number of dimensions of the result
115119
LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE int64_t ndim() const;
@@ -173,6 +177,9 @@ namespace librapid {
173177
ScalarA m_alpha; // Scaling factor for A
174178
TypeB m_b; // Second array
175179
ScalarB m_beta; // Scaling factor for B
180+
181+
ShapeType m_shape;
182+
size_t m_size;
176183
};
177184

178185
template<typename ShapeTypeA, typename StorageTypeA, typename ShapeTypeB,
@@ -182,23 +189,26 @@ namespace librapid {
182189
TypeB &&b, Beta beta) :
183190
m_transA(transA),
184191
m_transB(transB), m_a(std::forward<TypeA>(a)), m_alpha(static_cast<ScalarA>(alpha)),
185-
m_b(std::forward<TypeB>(b)), m_beta(static_cast<ScalarB>(beta)) {}
192+
m_b(std::forward<TypeB>(b)), m_beta(static_cast<ScalarB>(beta)),
193+
m_shape(calculateShape()), m_size(m_shape.size()) {}
186194

187195
template<typename ShapeTypeA, typename StorageTypeA, typename ShapeTypeB,
188196
typename StorageTypeB, typename Alpha, typename Beta>
189197
ArrayMultiply<ShapeTypeA, StorageTypeA, ShapeTypeB, StorageTypeB, Alpha,
190198
Beta>::ArrayMultiply(TypeA &&a, TypeB &&b) :
191199
m_transA(false),
192200
m_transB(false), m_a(std::forward<TypeA>(a)), m_alpha(1),
193-
m_b(std::forward<TypeB>(b)), m_beta(0) {}
201+
m_b(std::forward<TypeB>(b)), m_beta(0), m_shape(calculateShape()),
202+
m_size(m_shape.size()) {}
194203

195204
template<typename ShapeTypeA, typename StorageTypeA, typename ShapeTypeB,
196205
typename StorageTypeB, typename Alpha, typename Beta>
197206
ArrayMultiply<ShapeTypeA, StorageTypeA, ShapeTypeB, StorageTypeB, Alpha,
198207
Beta>::ArrayMultiply(bool transA, bool transB, TypeA &&a, TypeB &&b) :
199208
m_transA(transA),
200209
m_transB(transB), m_a(std::forward<TypeA>(a)), m_alpha(1),
201-
m_b(std::forward<TypeB>(b)), m_beta(0) {}
210+
m_b(std::forward<TypeB>(b)), m_beta(0), m_shape(calculateShape()),
211+
m_size(m_shape.size()) {}
202212

203213
template<typename ShapeTypeA, typename StorageTypeA, typename ShapeTypeB,
204214
typename StorageTypeB, typename Alpha, typename Beta>
@@ -268,8 +278,8 @@ namespace librapid {
268278

269279
template<typename ShapeTypeA, typename StorageTypeA, typename ShapeTypeB,
270280
typename StorageTypeB, typename Alpha, typename Beta>
271-
auto ArrayMultiply<ShapeTypeA, StorageTypeA, ShapeTypeB, StorageTypeB, Alpha, Beta>::shape()
272-
const -> ShapeType {
281+
auto ArrayMultiply<ShapeTypeA, StorageTypeA, ShapeTypeB, StorageTypeB, Alpha,
282+
Beta>::calculateShape() const -> ShapeType {
273283
const auto &shapeA = m_a.shape();
274284
const auto &shapeB = m_b.shape();
275285
MatmulClass matmulClass = this->matmulClass();
@@ -294,6 +304,21 @@ namespace librapid {
294304
return {1};
295305
}
296306

307+
template<typename ShapeTypeA, typename StorageTypeA, typename ShapeTypeB,
308+
typename StorageTypeB, typename Alpha, typename Beta>
309+
auto ArrayMultiply<ShapeTypeA, StorageTypeA, ShapeTypeB, StorageTypeB, Alpha, Beta>::shape()
310+
const -> ShapeType {
311+
return m_shape;
312+
}
313+
314+
template<typename ShapeTypeA, typename StorageTypeA, typename ShapeTypeB,
315+
typename StorageTypeB, typename Alpha, typename Beta>
316+
auto
317+
ArrayMultiply<ShapeTypeA, StorageTypeA, ShapeTypeB, StorageTypeB, Alpha, Beta>::size() const
318+
-> size_t {
319+
return m_size;
320+
}
321+
297322
template<typename ShapeTypeA, typename StorageTypeA, typename ShapeTypeB,
298323
typename StorageTypeB, typename Alpha, typename Beta>
299324
auto

librapid/include/librapid/array/linalg/transpose.hpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -441,8 +441,6 @@ namespace librapid {
441441
using ArrayType = TransposeType;
442442
using BaseType = typename std::decay_t<TransposeType>;
443443
using Scalar = typename typetraits::TypeInfo<BaseType>::Scalar;
444-
using Reference = BaseType &;
445-
using ConstReference = const BaseType &;
446444
using ShapeType = typename BaseType::ShapeType;
447445
using Backend = typename typetraits::TypeInfo<BaseType>::Backend;
448446

@@ -624,8 +622,8 @@ namespace librapid {
624622
else {
625623
if (m_inputShape.ndim() == 2) {
626624
int64_t blockSize = global::cacheLineSize / sizeof(Scalar);
627-
auto *__restrict outPtr = out.storage().begin().get();
628-
auto *__restrict inPtr = m_array.storage().begin().get();
625+
auto *__restrict outPtr = out.storage().begin();
626+
auto *__restrict inPtr = m_array.storage().begin();
629627
detail::cuda::transposeImpl(
630628
outPtr, inPtr, m_inputShape[0], m_inputShape[1], m_alpha, blockSize);
631629
} else {

0 commit comments

Comments
 (0)