ayaniimi213
4/10/2013 - 3:01 AM

Download and decode WebGraph format graphs

Download and decode WebGraph format graphs

#!/bin/bash

#
# http://law.di.unimi.it/datasets.php
#
# Usage:
#   % download.sh http://data.law.di.unimi.it/webdata/cnr-2000/cnr-2000
#

for ext in .properties .graph .md5sums; do
    wget -c $1$ext
done
#!/bin/sh

#
# http://webgraph.dsi.unimi.it/
#
# Usage (example):
#
#   % ls ~/Desktop/WebGraph
#   enron-nat.graph  enron-nat.properties
#
#   % ./decode.sh ~/Desktop/WebGraph/enron-nat
#
#   % ls ~/Desktop/WebGraph
#   enron-nat.graph  enron-nat.offsets     enron-nat.txt
#   enron-nat.obl    enron-nat.properties
#

# CP=`ls | ruby -e "puts \\$stdin.read.gsub(/\s+/, ':')"`
CP=`ls -w 1000000000 --format=commas | sed 's/, /:/g'`

javac -cp $CP WebGraphDecoder.java

java -cp $CP it.unimi.dsi.webgraph.BVGraph -o -O -L $1 &&
java -cp $CP:. WebGraphDecoder $1
import it.unimi.dsi.fastutil.ints.IntArrayFIFOQueue;
import it.unimi.dsi.fastutil.ints.IntArrays;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.webgraph.GraphClassParser;
import it.unimi.dsi.webgraph.ImmutableGraph;
import it.unimi.dsi.webgraph.LazyIntIterator;

import java.io.*;
import java.util.*;

public class WebGraphDecoder {
  static public void main(String arg[]) throws Exception {
    ImmutableGraph graph = ImmutableGraph.load(arg[0]);
    BufferedWriter bw = new BufferedWriter(new FileWriter(arg[0] + ".tsv"));

    int num_v = graph.numNodes();
    System.out.printf("Vertices: %d\n", num_v);
    System.out.printf("Edges: %d\n", graph.numArcs());

    int num_e = 0;
    for (int v = 0; v < num_v; ++v) {
      LazyIntIterator successors = graph.successors(v);
      for (int i = 0; i < graph.outdegree(v); ++i) {
        int w = successors.nextInt();
        bw.write(Integer.toString(v));
        bw.write("\t");
        bw.write(Integer.toString(w));
        bw.write("\n");
        ++num_e;
      }
    }

    bw.flush();
    System.out.printf("Output Edges: %d\n", num_e);
  }
}