From 9c5fc3572d784efc4f59ec74464ccbb3169ac8a6 Mon Sep 17 00:00:00 2001 From: Clement Cheung Date: Wed, 17 Nov 2021 14:49:54 -0800 Subject: [PATCH] Port stats test Summary: Also fixes an undefined behavior in our code: ``` #0 0xca4b378 in facebook::fboss::phy::BaldEagle::fwSerdesParamsEyes(facebook::fboss::MdioController::FullyLockedMdio&, facebook::fboss::phy::BaldEagle::LaneMode, facebook::fboss::LaneID)::$_46::operator()(int) const (/tmp/setup_link_test_bin-6.5.21+0xca4b378) #1 0xca3d48c in facebook::fboss::phy::BaldEagle::fwSerdesParamsEyes(facebook::fboss::MdioController::FullyLockedMdio&, facebook::fboss::phy::BaldEagle::LaneMode, facebook::fboss::LaneID) (/tmp/setup_link_test_bin-6.5.21+0xca3d48c) #2 0xca3a8b1 in facebook::fboss::phy::BaldEagle::getPortInfo(facebook::fboss::Port const*, std::vector > const&, std::vector > const&) (/tmp/setup_link_test_bin-6.5.21+0xca3a8b1) #3 0x72b0706 in facebook::fboss::YampPhyInterfaceHandler::getPortInfo(int, facebook::fboss::Port const*, facebook::fboss::phy::PhyPortConfig) (/tmp/setup_link_test_bin-6.5.21+0x72b0706) #4 0x71c15d6 in std::_Function_handler::_M_invoke(std::_Any_data const&) (/tmp/setup_link_test_bin-6.5.21+0x71c15d6) #5 0x71a0578 in facebook::fboss::Pim::runOnPimThreadAfterDelay(std::function, unsigned int)::'lambda'()::operator()() const::'lambda'()::operator()() const (/tmp/setup_link_test_bin-6.5.21+0x71a0578) #6 0x1073e68e in folly::TimeoutManager::CobTimeouts::CobTimeout::timeoutExpired() (/tmp/setup_link_test_bin-6.5.21+0x1073e68e) #7 0x107182ef in folly::AsyncTimeout::libeventCallback(int, short, void*) (/tmp/setup_link_test_bin-6.5.21+0x107182ef) #8 0x14b144f7 in event_process_active (/tmp/setup_link_test_bin-6.5.21+0x14b144f7) #9 0x14b14717 in event_base_loop (/tmp/setup_link_test_bin-6.5.21+0x14b14717) #10 0x1071e2b3 in folly::EventBase::loopBody(int, bool) (/tmp/setup_link_test_bin-6.5.21+0x1071e2b3) #11 0x1071d780 in folly::EventBase::loop() (/tmp/setup_link_test_bin-6.5.21+0x1071d780) #12 0x107216fd in folly::EventBase::loopForever() (/tmp/setup_link_test_bin-6.5.21+0x107216fd) #13 0x7fc324218660 in execute_native_thread_routine (/usr/local/fbcode/platform009/lib/libstdc++.so.6+0xd9660) #14 0x7fc3240da20b in start_thread (/usr/local/fbcode/platform009/lib/libpthread.so.0+0x920b) #15 0x7fc323feb16e in clone (/usr/local/fbcode/platform009/lib/libc.so.6+0x11816e) UndefinedBehaviorSanitizer: float-cast-overflow fboss/lib/phy/facebook/credo/yamp/BaldEagle.cpp:1455:22 in ``` Reviewed By: birdsoup Differential Revision: D32162784 fbshipit-source-id: f12a5d5a4f1dd2e9a2b7cef89fb6c268e2d4e587 --- fboss/agent/test/link_tests/LinkTest.h | 7 ++ fboss/agent/test/link_tests/PhyInfoTest.cpp | 5 - fboss/agent/test/link_tests/PortStatsTest.cpp | 110 ++++++++++++++++++ fboss/lib/phy/PhyManager.cpp | 4 +- 4 files changed, 120 insertions(+), 6 deletions(-) create mode 100644 fboss/agent/test/link_tests/PortStatsTest.cpp diff --git a/fboss/agent/test/link_tests/LinkTest.h b/fboss/agent/test/link_tests/LinkTest.h index dc2b67d379bf8..793f5e5f15e94 100644 --- a/fboss/agent/test/link_tests/LinkTest.h +++ b/fboss/agent/test/link_tests/LinkTest.h @@ -15,6 +15,13 @@ DECLARE_bool(skip_xphy_programming); namespace facebook::fboss { +using namespace std::chrono_literals; + +// When we switch to use qsfp_service to collect stats(PhyInfo), default stats +// collection frequency is 60s. Give the maximum check time 5x24=120s here. +constexpr auto kSecondsBetweenXphyInfoCollectionCheck = 5s; +constexpr auto kMaxNumXphyInfoCollectionCheck = 24; + class LinkTest : public AgentTest { protected: void SetUp() override; diff --git a/fboss/agent/test/link_tests/PhyInfoTest.cpp b/fboss/agent/test/link_tests/PhyInfoTest.cpp index a069879fcc6e4..cec49df0f9457 100644 --- a/fboss/agent/test/link_tests/PhyInfoTest.cpp +++ b/fboss/agent/test/link_tests/PhyInfoTest.cpp @@ -13,13 +13,8 @@ using namespace ::testing; using namespace facebook::fboss; -using namespace std::chrono_literals; static constexpr int kSecondsBetweenSnapshots = 20; -// When we switch to use qsfp_service to collect stats(PhyInfo), default stats -// collection frequency is 60s. Give the maximum check time 5x24=120s here. -static constexpr auto kSecondsBetweenXphyInfoCollectionCheck = 5s; -static constexpr auto kMaxNumXphyInfoCollectionCheck = 24; namespace { void validatePhyInfo( diff --git a/fboss/agent/test/link_tests/PortStatsTest.cpp b/fboss/agent/test/link_tests/PortStatsTest.cpp new file mode 100644 index 0000000000000..ead2e1d2c3e45 --- /dev/null +++ b/fboss/agent/test/link_tests/PortStatsTest.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2004-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + * + */ + +#include +#include +#include "common/stats/ThreadCachedServiceData.h" +#include "fboss/agent/test/link_tests/LinkTest.h" +#include "fboss/lib/CommonUtils.h" + +using namespace ::testing; +using namespace facebook; +using namespace facebook::fboss; + +class PortStatsTest : public LinkTest { + public: + // unordered_map> + using XphyPortStats = + std::unordered_map>; + + std::optional getXphyPortStats(bool logErrors = false); + void verifyXphyPortStats( + const XphyPortStats& before, + const XphyPortStats& after); +}; + +std::optional PortStatsTest::getXphyPortStats( + bool fatalErrors) { + XphyPortStats portStats; + + tcData().publishStats(); + + for (const auto& port : getCabledPorts()) { + auto portName = getPortName(port); + auto result = portStats.try_emplace(portName); + auto iter = result.first; + auto& portStat = iter->second; + + for (const auto side : {"system", "line"}) { + for (const auto stat : {"fec_uncorrectable", "fec_correctable"}) { + auto counterName = folly::to(side, ".", stat, ".sum"); + auto fullCounterName = + folly::to(portName, ".xphy.", counterName); + if (!tcData().hasCounter(fullCounterName)) { + if (fatalErrors) { + EXPECT_TRUE(tcData().hasCounter(fullCounterName)) + << fullCounterName << " absent"; + } + return std::nullopt; + } + portStat[counterName] = tcData().getCounter(fullCounterName); + } + } + } + + return portStats; +} + +void PortStatsTest::verifyXphyPortStats( + const XphyPortStats& before, + const XphyPortStats& after) { + for (const auto& port : getCabledPorts()) { + auto portName = getPortName(port); + + for (const auto side : {"system", "line"}) { + auto counterName = folly::to(side, ".fec_uncorrectable.sum"); + EXPECT_EQ( + before.at(portName).at(counterName), + after.at(portName).at(counterName)) + << after.at(portName).at(counterName) - + before.at(portName).at(counterName) + << " uncorrectable FEC codewords on port " << portName; + + counterName = folly::to(side, ".fec_correctable.sum"); + EXPECT_TRUE( + before.at(portName).at(counterName) <= + after.at(portName).at(counterName)) + << "Negative corrected codewords on port " << portName; + } + } +} + +TEST_F(PortStatsTest, xphySanity) { + std::optional portStatsBefore; + try { + checkWithRetry( + [this, &portStatsBefore] { + return (portStatsBefore = getXphyPortStats()); + }, + kMaxNumXphyInfoCollectionCheck /* retries */, + kSecondsBetweenXphyInfoCollectionCheck /* retry period */); + } catch (const FbossError&) { + // one last try with full logging + portStatsBefore = getXphyPortStats(true); + ASSERT_TRUE(portStatsBefore.has_value()) + << "Never has complete xphy port stats"; + } + + // sleep override + std::this_thread::sleep_for(std::chrono::seconds( + kSecondsBetweenXphyInfoCollectionCheck * kMaxNumXphyInfoCollectionCheck)); + + verifyXphyPortStats(*portStatsBefore, *getXphyPortStats(true)); +} diff --git a/fboss/lib/phy/PhyManager.cpp b/fboss/lib/phy/PhyManager.cpp index f0ac265bb941c..632de9292f0c9 100644 --- a/fboss/lib/phy/PhyManager.cpp +++ b/fboss/lib/phy/PhyManager.cpp @@ -510,13 +510,15 @@ void PhyManager::updateStatsLocked( auto* xphy = getExternalPhyLocked(wLockedCache); // If the xphy doesn't support either port or prbs stats, no-op if (!xphy->isSupported(phy::ExternalPhy::Feature::PORT_STATS) && + !xphy->isSupported(phy::ExternalPhy::Feature::PORT_INFO) && !xphy->isSupported(phy::ExternalPhy::Feature::PRBS_STATS)) { return; } auto pimID = getPhyIDInfo(xphyID).pimID; auto evb = getPimEventBase(pimID); - if (xphy->isSupported(phy::ExternalPhy::Feature::PORT_STATS)) { + if (xphy->isSupported(phy::ExternalPhy::Feature::PORT_STATS) || + xphy->isSupported(phy::ExternalPhy::Feature::PORT_INFO)) { if (wLockedCache->ongoingStatCollection.has_value() && !wLockedCache->ongoingStatCollection->isReady()) { XLOG(DBG4) << "XPHY Port Stat collection for Port:" << portID