diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..7c3f72a81 --- /dev/null +++ b/.clang-format @@ -0,0 +1,97 @@ +--- +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: true +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: true +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Stroustrup +BreakBeforeTernaryOperators: true +#BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializersBeforeComma: true +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 2 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: true +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IncludeCategories: + - Regex: '^<.*\.h>' + Priority: 1 + - Regex: '^<.*' + Priority: 2 + - Regex: '.*' + Priority: 3 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IndentCaseLabels: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: false +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Left +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +#Standard: Auto +Standard: Cpp11 +TabWidth: 8 +UseTab: Never +... + diff --git a/dash/include/dash/algorithm/Copy.h b/dash/include/dash/algorithm/Copy.h index 368d58ed9..bb0d79fa5 100644 --- a/dash/include/dash/algorithm/Copy.h +++ b/dash/include/dash/algorithm/Copy.h @@ -475,41 +475,6 @@ dash::Future copy_async_impl( // ========================================================================= // Local to Global // ========================================================================= - -/** - * Blocking implementation of \c dash::copy (local to global) without - * optimization for local subrange. - */ -template < - typename ValueType, - class GlobOutputIt > -GlobOutputIt copy_impl( - ValueType * in_first, - ValueType * in_last, - GlobOutputIt out_first) -{ - DASH_LOG_TRACE("dash::copy_impl()", - "l_in_first:", in_first, - "l_in_last:", in_last, - "g_out_first:", out_first.pos()); - - auto num_elements = std::distance(in_first, in_last); - dart_storage_t ds = dash::dart_storage(num_elements); - DASH_ASSERT_RETURNS( - dart_put_blocking( - out_first.dart_gptr(), - in_first, - ds.nelem, - ds.dtype), - DART_OK); - - auto out_last = out_first + num_elements; - DASH_LOG_TRACE("dash::copy_impl >", - "g_out_last:", out_last.dart_gptr()); - - return out_last; -} - /** * Asynchronous implementation of \c dash::copy (local to global) without * optimization for local subrange. @@ -527,6 +492,13 @@ dash::Future copy_async_impl( "l_in_last:", in_last, "g_out_first:", out_first.dart_gptr()); + + auto num_copy_elem = std::distance(in_first, in_last); + + if (num_copy_elem < 1) { + return dash::Future([=]() mutable { return out_first; }); + } + // Accessed global pointers to be flushed: #ifdef DASH__ALGORITHM__COPY__USE_FLUSH std::vector req_handles; @@ -534,38 +506,57 @@ dash::Future copy_async_impl( std::vector req_handles; #endif - auto num_copy_elem = std::distance(in_first, in_last); - auto src_ptr = in_first; - auto dest_gptr = out_first.dart_gptr(); + auto nremaining = num_copy_elem; + auto pattern = out_first.pattern(); + while (nremaining) { + // global index to local unit and index + auto local_pos = pattern.local(out_first.pos()); + // number of elements in unit + auto local_size = pattern.local_extents(team_unit_t{local_pos.unit}); + + auto dest_gptr = out_first.dart_gptr(); + + auto lsize = local_size[0]; + using lsize_t = decltype(lsize); + + num_copy_elem = std::min({lsize, + static_cast(nremaining)}); + + dart_storage_t ds = dash::dart_storage(num_copy_elem); #ifdef DASH__ALGORITHM__COPY__USE_FLUSH - dart_storage_t ds = dash::dart_storage(num_copy_elem); - if (dart_put( - dest_gptr, - src_ptr, - ds.nelem, - ds.dtype) - != DART_OK) { - DASH_LOG_ERROR("dash::copy_async_impl", "dart_put failed"); - DASH_THROW( - dash::exception::RuntimeError, "dart_put failed"); - } - req_handles.push_back(dest_gptr); + if (dart_put( + dest_gptr, + in_first, + ds.nelem, + ds.dtype) + != DART_OK) { + DASH_LOG_ERROR("dash::copy_async_impl", "dart_put failed"); + DASH_THROW( + dash::exception::RuntimeError, "dart_put failed"); + } + req_handles.push_back(dest_gptr); #else - dart_handle_t put_handle; - dart_storage_t ds = dash::dart_storage(num_copy_elem); - DASH_ASSERT_RETURNS( - dart_put_handle( - dest_gptr, - src_ptr, - ds.nelem, - ds.dtype, - &put_handle), - DART_OK); - if (put_handle != NULL) { - req_handles.push_back(put_handle); - } + dart_handle_t put_handle; + DASH_ASSERT_RETURNS( + dart_put_handle( + dest_gptr, + in_first, + ds.nelem, + ds.dtype, + &put_handle), + DART_OK); + if (put_handle != NULL) { + req_handles.push_back(put_handle); + } #endif + std::advance(in_first, num_copy_elem); + + nremaining = std::distance(in_first, in_last); + + if (nremaining) std::advance(out_first, num_copy_elem); + } + #ifdef DASH_ENABLE_TRACE_LOGGING for (auto gptr : req_handles) { DASH_LOG_TRACE("dash::copy_async_impl", " req_handle:", gptr); @@ -610,6 +601,34 @@ dash::Future copy_async_impl( return result; } +/** + * Blocking implementation of \c dash::copy (local to global) without + * optimization for local subrange. + */ +template < + typename ValueType, + class GlobOutputIt > +GlobOutputIt copy_impl( + ValueType * in_first, + ValueType * in_last, + GlobOutputIt out_first) +{ + DASH_LOG_TRACE("dash::copy_impl()", + "l_in_first:", in_first, + "l_in_last:", in_last, + "g_out_first:", out_first.pos()); + + + auto fut = copy_async_impl(in_first, in_last, out_first); + auto out_last = fut.get(); + + DASH_LOG_TRACE("dash::copy_impl >", + "g_out_last:", out_last.dart_gptr()); + + return out_last; +} + + } // namespace internal @@ -867,7 +886,7 @@ ValueType * copy( auto total_copy_elem = in_last - in_first; // Instead of testing in_first.local() and in_last.local(), this test for - // a local-only range only requires one call to in_first.local() which + // a local-only range only requires one call to in_first.local() which // increases throughput by ~10% for local ranges. if (num_local_elem == total_copy_elem) { // Entire input range is local: @@ -1113,10 +1132,11 @@ GlobOutputIt copy( // Copy to remote elements succeeding the local subrange: if (g_l_offset_end < out_h_last.pos()) { DASH_LOG_TRACE("dash::copy", "copy to global succeeding local subrange"); + out_last = dash::internal::copy_impl( - in_first + l_elem_offset + num_local_elem, - in_last, - out_first + num_local_elem); + in_first + l_elem_offset + num_local_elem, + in_last, + out_first + num_local_elem); } } else { // All elements in output range are remote diff --git a/dash/test/algorithm/CopyTest.cc b/dash/test/algorithm/CopyTest.cc index c27b60f70..752d00723 100644 --- a/dash/test/algorithm/CopyTest.cc +++ b/dash/test/algorithm/CopyTest.cc @@ -438,16 +438,20 @@ TEST_F(CopyTest, AsyncLocalToGlobPtr) auto block_offset = (dash::myid() + 1) % dash::size(); auto global_offset = block_offset * num_elem_per_unit; - using glob_it_t = decltype(array.begin()); - using glob_ptr_t = typename glob_it_t::pointer; + //using glob_it_t = decltype(array.begin()); + //using glob_ptr_t = typename glob_it_t::pointer; + // + auto globIt = array.begin(); + /* glob_ptr_t gptr_dest = static_cast( array.begin() + global_offset); LOG_MESSAGE("CopyTest.AsyncLocalToGlobPtr: call copy_async"); + */ auto copy_fut = dash::copy_async(local_range, local_range + num_elem_per_unit, - gptr_dest); + globIt + global_offset); // Blocks until remote completion: LOG_MESSAGE("CopyTest.AsyncLocalToGlobPtr: call fut.wait"); @@ -803,6 +807,27 @@ TEST_F(CopyTest, AsyncGlobalToLocalBlock) } } +TEST_F(CopyTest, CArrayToDashArray) +{ + dash::Array arr(100); + + if (dash::myid() == 0) { + int buf[100]; + std::iota(buf, buf + 100, 0); + // copy local buffer to global array + auto it_out = dash::copy(buf, buf + 100, arr.begin()); + DASH_LOG_DEBUG_VAR("CopyTest.CArrayToDashArray", it_out); + EXPECT_EQ_U(arr.end(), it_out); + } + + arr.barrier(); + + if (dash::myid() == 0) { + for (size_t idx = 0; idx < 100; ++idx) { + EXPECT_EQ_U(idx, static_cast(arr[idx])); + } + } +} #if 0 // TODO TEST_F(CopyTest, AsyncAllToLocalVector)