MapReduce 自訂檔案輸出格式 (2)

1 篇文章 / 0 new
author
MapReduce 自訂檔案輸出格式 (2)
利用提供的 MultipleOutputs 來自定輸出檔名的前置名稱, 原為 part-r-00000 變成 自訂名稱-r-00000 粗體部分將無法變更, 這與透過 OutputFormat 進行自訂的檔名可以完全控制不同. 但做法很簡單, 主要在於變更 reduce的 write 動作
Reducer 重點部份
public class MaxReducerMultiOutput  extends Reducer<Text, IntWritable, Text, IntWritable> {
    private MultipleOutputs<Text, IntWritable> multipleOutputs;
    @Override
    protected void setup(Context context) throws IOException,InterruptedException {
        multipleOutputs = new MultipleOutputs<Text, IntWritable>(context);
    }
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int maxValue = Integer.MIN_VALUE;
        for (IntWritable value : values) {
            maxValue = Math.max(maxValue, value.get());
        }
        //參數:key, value, 輸出檔案的前置檔名,不可為 'part'
        multipleOutputs.write(key, new IntWritable(maxValue), key.toString());
    }
    @Override
    protected void cleanup(Context context) throws IOException,InterruptedException {
        multipleOutputs.close();
    }
}
Mapper
public class MaxMapper extends Mapper<LongWritable, Text, Text, IntWritable> {<br />
    @Override<br />
    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {<br />
        String line = value.toString();<br />
        String year = line.substring(0, 4);<br />
        int data = Integer.parseInt(line.substring(11, 15));<br />
        context.write(new Text(year), new IntWritable(data));<br />
    }<br />
}

Driver
public class MaxDriverMultiOutput extends Configured implements Tool {
    @Override
    public int run(String[] args) throws Exception {
        Configuration conf = getConf();
        Job job = new Job(conf, "MultipleOutputs");
        job.setMapperClass(MaxMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setReducerClass(MaxReducerMultiOutput.class);//自定 輸出檔名-r-xxxxx        
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);//加入此行當無資料內容時檔案不會建立
        FileInputFormat.addInputPaths(job, "/in");
        FileOutputFormat.setOutputPath(job, new Path("/out"));
        return job.waitForCompletion(true) ? 0 : 1;
    }
    public static void main(String[] args) throws Exception {
        int exitCode = ToolRunner.run(new MaxDriverMultiOutput(), args);
        System.exit(exitCode);
    }
}
資料內容
1950-12-04 7999
1950-12-05 1234
...
關鍵字: 
Free Web Hosting