diff --git a/src/main/scala/com/cloudera/sparkts/TimeSeriesRDD.scala b/src/main/scala/com/cloudera/sparkts/TimeSeriesRDD.scala index ebadc95e..cf93106d 100644 --- a/src/main/scala/com/cloudera/sparkts/TimeSeriesRDD.scala +++ b/src/main/scala/com/cloudera/sparkts/TimeSeriesRDD.scala @@ -28,6 +28,7 @@ import org.apache.spark.SparkContext._ import org.apache.spark.{Partition, Partitioner, SparkContext, TaskContext} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix} +import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.rdd.RDD import org.apache.spark.util.StatCounter @@ -281,7 +282,9 @@ class TimeSeriesRDD(val index: DateTimeIndex, parent: RDD[(String, Vector[Double /** * Converts a TimeSeriesRDD into a distributed IndexedRowMatrix, useful to take advantage * of Spark MLlib's statistic functions on matrices in a distributed fashion. This is only - * supported for cases with a uniform time series index + * supported for cases with a uniform time series index. See + * [[http://spark.apache.org/docs/latest/mllib-data-types.html]] for more information on the + * matrix data structure * @param nPartitions number of partitions, default to -1, which represents the same number * as currently used for the TimeSeriesRDD * @return an equivalent IndexedRowMatrix @@ -306,9 +309,11 @@ class TimeSeriesRDD(val index: DateTimeIndex, parent: RDD[(String, Vector[Double /** * Converts a TimeSeriesRDD into a distributed RowMatrix, note that indices in * a RowMatrix are not significant, and thus this is a valid operation regardless - * of the type of time index + * of the type of time index. See + * [[http://spark.apache.org/docs/latest/mllib-data-types.html]] for more information on the + * matrix data structure * @param nPartitions - * @return + * @return an equivalent RowMatrix */ def toRowMatrix(nPartitions: Int = -1): RowMatrix = { val instants = this.toInstants(nPartitions)