val info: RDD[String] = sc.parallelize(List[String]( "love1", "love2", "love3", "love4", "love5", "love6", "love7", "love8", "love9", "love10", "love11", "love12"),3) val rdd: RDD[(String, Double)] = sc.parallelize(List[(String,Double)](("zhangsan",66.5),("lisi",33.2), ("zhangsan",66.7),("lisi",33.4),("zhangsan",66.8),("wangwu",29.8)))
val result: RDD[(String, Iterable[String])] = info.groupBy(one=>{one.split("")(4)}) result.foreach(println)
val resultBy: RDD[(Boolean, Iterable[(String, Double)])] = rdd.groupBy(one => { one._2 > 34 }) resultBy.foreach(println)
val resultByKey: RDD[(String, Iterable[Double])] = rdd.groupByKey() resultByKey.foreach(info=>{ val name: String = info._1 val iterable: Iterable[Double] = info._2 val list: List[Double] = info._2.toList println("name = " + name + ",iterable = " + iterable + ",list = " + list) })
|