CodeCollection2018
8/13/2019 - 6:48 AM

倒排索引

搜索引擎中常用的倒排索引inverted index

Class InvertedIndex{
    public static void main(String [] args){
        SparkConfig  conf = new SparkConf().setAppName("invertedIndex")
        .set("spark.serializer","org.apache.spark.serializer.JavaSerializer")
        .set("spark.akka.frameSize","256")
        .set("spark.ui.port","4071");
        SparkContext sc = new SparkContext(conf);
        TextFile inputfile = new TextFile(arg[0]);
        JavaRDD result = sc.textFile(inputfile).map(x=>x.split("\t")).map(x=>(x[0],x[1])).
        map(x=>x._2.split(" ").map(y=>(y,x._1))).flatMap(x=>x).reduceByKey((x,y)=>x+"|"+y)
        result.collect.foreach(println);
        sc.stop();
    }
}