搜索引擎中常用的倒排索引inverted index
Class InvertedIndex{
public static void main(String [] args){
SparkConfig conf = new SparkConf().setAppName("invertedIndex")
.set("spark.serializer","org.apache.spark.serializer.JavaSerializer")
.set("spark.akka.frameSize","256")
.set("spark.ui.port","4071");
SparkContext sc = new SparkContext(conf);
TextFile inputfile = new TextFile(arg[0]);
JavaRDD result = sc.textFile(inputfile).map(x=>x.split("\t")).map(x=>(x[0],x[1])).
map(x=>x._2.split(" ").map(y=>(y,x._1))).flatMap(x=>x).reduceByKey((x,y)=>x+"|"+y)
result.collect.foreach(println);
sc.stop();
}
}