Skip to content

Commit

Permalink
GH-45190: [C++][Compute] Add rank_percentile function
Browse files Browse the repository at this point in the history
  • Loading branch information
pitrou committed Jan 20, 2025
1 parent 61c82ea commit 4fd918a
Show file tree
Hide file tree
Showing 6 changed files with 428 additions and 119 deletions.
13 changes: 13 additions & 0 deletions cpp/src/arrow/compute/api_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ using compute::DictionaryEncodeOptions;
using compute::FilterOptions;
using compute::NullPlacement;
using compute::RankOptions;
using compute::RankPercentileOptions;

template <>
struct EnumTraits<FilterOptions::NullSelectionBehavior>
Expand Down Expand Up @@ -151,6 +152,10 @@ static auto kRankOptionsType = GetFunctionOptionsType<RankOptions>(
DataMember("sort_keys", &RankOptions::sort_keys),
DataMember("null_placement", &RankOptions::null_placement),
DataMember("tiebreaker", &RankOptions::tiebreaker));
static auto kRankPercentileOptionsType = GetFunctionOptionsType<RankPercentileOptions>(
DataMember("sort_keys", &RankPercentileOptions::sort_keys),
DataMember("null_placement", &RankPercentileOptions::null_placement),
DataMember("factor", &RankPercentileOptions::factor));
static auto kPairwiseOptionsType = GetFunctionOptionsType<PairwiseOptions>(
DataMember("periods", &PairwiseOptions::periods));
static auto kListFlattenOptionsType = GetFunctionOptionsType<ListFlattenOptions>(
Expand Down Expand Up @@ -228,6 +233,14 @@ RankOptions::RankOptions(std::vector<SortKey> sort_keys, NullPlacement null_plac
tiebreaker(tiebreaker) {}
constexpr char RankOptions::kTypeName[];

RankPercentileOptions::RankPercentileOptions(std::vector<SortKey> sort_keys,
NullPlacement null_placement, double factor)
: FunctionOptions(internal::kRankPercentileOptionsType),
sort_keys(std::move(sort_keys)),
null_placement(null_placement),
factor(factor) {}
constexpr char RankPercentileOptions::kTypeName[];

PairwiseOptions::PairwiseOptions(int64_t periods)
: FunctionOptions(internal::kPairwiseOptionsType), periods(periods) {}
constexpr char PairwiseOptions::kTypeName[];
Expand Down
24 changes: 24 additions & 0 deletions cpp/src/arrow/compute/api_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,30 @@ class ARROW_EXPORT RankOptions : public FunctionOptions {
Tiebreaker tiebreaker;
};

/// \brief Percentile rank options
class ARROW_EXPORT RankPercentileOptions : public FunctionOptions {
public:
explicit RankPercentileOptions(std::vector<SortKey> sort_keys = {},
NullPlacement null_placement = NullPlacement::AtEnd,
double factor = 1.0);
/// Convenience constructor for array inputs
explicit RankPercentileOptions(SortOrder order,
NullPlacement null_placement = NullPlacement::AtEnd,
double factor = 1.0)
: RankPercentileOptions({SortKey("", order)}, null_placement, factor) {}

static constexpr char const kTypeName[] = "RankPercentileOptions";
static RankPercentileOptions Defaults() { return RankPercentileOptions(); }

/// Column key(s) to order by and how to order by these sort keys.
std::vector<SortKey> sort_keys;
/// Whether nulls and NaNs are placed at the start or at the end
NullPlacement null_placement;
/// Factor to apply to the output.
/// Use 1.0 for results in (0, 1), 100.0 for percentages, etc.
double factor;
};

/// \brief Partitioning options for NthToIndices
class ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
public:
Expand Down
Loading

0 comments on commit 4fd918a

Please sign in to comment.