ray-c
2/6/2013 - 6:16 PM

Avro append-to-existing file example with the DataFileWriter.appendTo(…) API.

Avro append-to-existing file example with the DataFileWriter.appendTo(…) API.

package com.cloudera.example;

import java.io.IOException;
import java.io.OutputStream;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.file.SeekableInput;
import org.apache.avro.mapred.FsInput;
import org.apache.avro.reflect.ReflectData;
import org.apache.avro.reflect.ReflectDatumWriter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class DFWAppendTest {
  public static class Sample {
    CharSequence foo;

    public Sample(CharSequence bar) {
      this.foo = bar;
    }
  }

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", "hdfs://localhost");
    conf.setInt("dfs.replication", 1);
    FileSystem fs = FileSystem.get(conf);
    Schema sample = ReflectData.get().getSchema(Sample.class);
    ReflectDatumWriter<Sample> rdw = new ReflectDatumWriter<DFWAppendTest.Sample>(
        Sample.class);
    DataFileWriter<Sample> dfwo = new DataFileWriter<DFWAppendTest.Sample>(rdw);
    Path filePath = new Path("/sample.avro");
    OutputStream out = fs.create(filePath);
    DataFileWriter<Sample> dfw = dfwo.create(sample, out);
    dfw.append(new Sample("Eggs"));
    dfw.append(new Sample("Spam"));
    dfw.close();
    out.close();
    OutputStream aout = fs.append(filePath);
    dfw = dfwo.appendTo(new FsInput(filePath, conf), aout);
    dfw.append(new Sample("Monty"));
    dfw.append(new Sample("Python"));
    dfwo.close();
    aout.close();
  }
}