[SPARK-19827][R][FOLLOWUP] spark.ml R API for PIC

srowen · srowen · commit 79e36e2c2ac0 · 2018-12-12T09:03:13.000-06:00
## What changes were proposed in this pull request? Follow up style fixes to PIC in R; see apache#23072 ## How was this patch tested? Existing tests. Closes apache#23292 from srowen/SPARK-19827.2. Authored-by: Sean Owen <sean.owen@databricks.com> Signed-off-by: Sean Owen <sean.owen@databricks.com>
diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R
@@ -621,38 +621,35 @@ setMethod("write.ml", signature(object = "LDAModel", path = "character"),
 #'
 #' A scalable graph clustering algorithm. Users can call \code{spark.assignClusters} to
 #' return a cluster assignment for each input vertex.
-#'
-#  Run the PIC algorithm and returns a cluster assignment for each input vertex.
+#' Run the PIC algorithm and returns a cluster assignment for each input vertex.
 #' @param data a SparkDataFrame.
 #' @param k the number of clusters to create.
-#' @param initMode the initialization algorithm.
+#' @param initMode the initialization algorithm; "random" or "degree"
 #' @param maxIter the maximum number of iterations.
 #' @param sourceCol the name of the input column for source vertex IDs.
 #' @param destinationCol the name of the input column for destination vertex IDs
 #' @param weightCol weight column name. If this is not set or \code{NULL},
 #'                  we treat all instance weights as 1.0.
 #' @param ... additional argument(s) passed to the method.
 #' @return A dataset that contains columns of vertex id and the corresponding cluster for the id.
-#'         The schema of it will be:
-#'         \code{id: Long}
-#'         \code{cluster: Int}
+#'         The schema of it will be: \code{id: integer}, \code{cluster: integer}
 #' @rdname spark.powerIterationClustering
-#' @aliases assignClusters,PowerIterationClustering-method,SparkDataFrame-method
+#' @aliases spark.assignClusters,SparkDataFrame-method
 #' @examples
 #' \dontrun{
 #' df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0),
 #'                            list(1L, 2L, 1.0), list(3L, 4L, 1.0),
 #'                            list(4L, 0L, 0.1)),
 #'                       schema = c("src", "dst", "weight"))
-#' clusters <- spark.assignClusters(df, initMode="degree", weightCol="weight")
+#' clusters <- spark.assignClusters(df, initMode = "degree", weightCol = "weight")
 #' showDF(clusters)
 #' }
 #' @note spark.assignClusters(SparkDataFrame) since 3.0.0
 setMethod("spark.assignClusters",
           signature(data = "SparkDataFrame"),
           function(data, k = 2L, initMode = c("random", "degree"), maxIter = 20L,
             sourceCol = "src", destinationCol = "dst", weightCol = NULL) {
-            if (!is.numeric(k) || k < 1) {
+            if (!is.integer(k) || k < 1) {
               stop("k should be a number with value >= 1.")
             }
             if (!is.integer(maxIter) || maxIter <= 0) {
diff --git a/R/pkg/R/mllib_fpm.R b/R/pkg/R/mllib_fpm.R
@@ -183,8 +183,8 @@ setMethod("write.ml", signature(object = "FPGrowthModel", path = "character"),
 #' @return A complete set of frequent sequential patterns in the input sequences of itemsets.
 #'         The returned \code{SparkDataFrame} contains columns of sequence and corresponding
 #'         frequency. The schema of it will be:
-#'         \code{sequence: ArrayType(ArrayType(T))} (T is the item type)
-#'         \code{freq: Long}
+#'         \code{sequence: ArrayType(ArrayType(T))}, \code{freq: integer}
+#'         where T is the item type
 #' @rdname spark.prefixSpan
 #' @aliases findFrequentSequentialPatterns,PrefixSpan,SparkDataFrame-method
 #' @examples
diff --git a/examples/src/main/r/ml/powerIterationClustering.R b/examples/src/main/r/ml/powerIterationClustering.R
@@ -30,7 +30,8 @@ df <- createDataFrame(list(list(0L, 1L, 1.0), list(0L, 2L, 1.0),
                            list(4L, 0L, 0.1)),
                       schema = c("src", "dst", "weight"))
 # assign clusters
-clusters <- spark.assignClusters(df, k=2L, maxIter=20L, initMode="degree", weightCol="weight")
+clusters <- spark.assignClusters(df, k = 2L, maxIter = 20L,
+                                 initMode = "degree", weightCol = "weight")
 
 showDF(arrange(clusters, clusters$id))
 # $example off$