Skip to content

Commit 79e36e2

Browse files
committed
[SPARK-19827][R][FOLLOWUP] spark.ml R API for PIC
## What changes were proposed in this pull request? Follow up style fixes to PIC in R; see apache#23072 ## How was this patch tested? Existing tests. Closes apache#23292 from srowen/SPARK-19827.2. Authored-by: Sean Owen <[email protected]> Signed-off-by: Sean Owen <[email protected]>
1 parent bd8da37 commit 79e36e2

File tree

3 files changed

+10
-12
lines changed

3 files changed

+10
-12
lines changed

R/pkg/R/mllib_clustering.R

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -621,38 +621,35 @@ setMethod("write.ml", signature(object = "LDAModel", path = "character"),
621621
#'
622622
#' A scalable graph clustering algorithm. Users can call \code{spark.assignClusters} to
623623
#' return a cluster assignment for each input vertex.
624-
#'
625-
# Run the PIC algorithm and returns a cluster assignment for each input vertex.
624+
#' Run the PIC algorithm and returns a cluster assignment for each input vertex.
626625
#' @param data a SparkDataFrame.
627626
#' @param k the number of clusters to create.
628-
#' @param initMode the initialization algorithm.
627+
#' @param initMode the initialization algorithm; "random" or "degree"
629628
#' @param maxIter the maximum number of iterations.
630629
#' @param sourceCol the name of the input column for source vertex IDs.
631630
#' @param destinationCol the name of the input column for destination vertex IDs
632631
#' @param weightCol weight column name. If this is not set or \code{NULL},
633632
#' we treat all instance weights as 1.0.
634633
#' @param ... additional argument(s) passed to the method.
635634
#' @return A dataset that contains columns of vertex id and the corresponding cluster for the id.
636-
#' The schema of it will be:
637-
#' \code{id: Long}
638-
#' \code{cluster: Int}
635+
#' The schema of it will be: \code{id: integer}, \code{cluster: integer}
639636
#' @rdname spark.powerIterationClustering
640-
#' @aliases assignClusters,PowerIterationClustering-method,SparkDataFrame-method
637+
#' @aliases spark.assignClusters,SparkDataFrame-method
641638
#' @examples
642639
#' \dontrun{
643640
#' df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0),
644641
#' list(1L, 2L, 1.0), list(3L, 4L, 1.0),
645642
#' list(4L, 0L, 0.1)),
646643
#' schema = c("src", "dst", "weight"))
647-
#' clusters <- spark.assignClusters(df, initMode="degree", weightCol="weight")
644+
#' clusters <- spark.assignClusters(df, initMode = "degree", weightCol = "weight")
648645
#' showDF(clusters)
649646
#' }
650647
#' @note spark.assignClusters(SparkDataFrame) since 3.0.0
651648
setMethod("spark.assignClusters",
652649
signature(data = "SparkDataFrame"),
653650
function(data, k = 2L, initMode = c("random", "degree"), maxIter = 20L,
654651
sourceCol = "src", destinationCol = "dst", weightCol = NULL) {
655-
if (!is.numeric(k) || k < 1) {
652+
if (!is.integer(k) || k < 1) {
656653
stop("k should be a number with value >= 1.")
657654
}
658655
if (!is.integer(maxIter) || maxIter <= 0) {

R/pkg/R/mllib_fpm.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,8 @@ setMethod("write.ml", signature(object = "FPGrowthModel", path = "character"),
183183
#' @return A complete set of frequent sequential patterns in the input sequences of itemsets.
184184
#' The returned \code{SparkDataFrame} contains columns of sequence and corresponding
185185
#' frequency. The schema of it will be:
186-
#' \code{sequence: ArrayType(ArrayType(T))} (T is the item type)
187-
#' \code{freq: Long}
186+
#' \code{sequence: ArrayType(ArrayType(T))}, \code{freq: integer}
187+
#' where T is the item type
188188
#' @rdname spark.prefixSpan
189189
#' @aliases findFrequentSequentialPatterns,PrefixSpan,SparkDataFrame-method
190190
#' @examples

examples/src/main/r/ml/powerIterationClustering.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0),
3030
list(4L, 0L, 0.1)),
3131
schema = c("src", "dst", "weight"))
3232
# assign clusters
33-
clusters <- spark.assignClusters(df, k=2L, maxIter=20L, initMode="degree", weightCol="weight")
33+
clusters <- spark.assignClusters(df, k = 2L, maxIter = 20L,
34+
initMode = "degree", weightCol = "weight")
3435

3536
showDF(arrange(clusters, clusters$id))
3637
# $example off$

0 commit comments

Comments
 (0)