Hadoop读书笔记（十三）MapReduce中Top算法

时间：2014-12-03 00:34:21 阅读：228 评论：0 收藏：0 [点我收藏+]

Hadoop读书笔记系列文章：http://blog.csdn.net/caicongyang/article/category/2166855

1.说明：

从给定的文件中的找到最大值

2.代码：

TopApp.java

package suanfa;

import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.TestMiniMRClientCluster.MyReducer;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * <p> 
 * Title: TopApp.java 
 * Package suanfa 
 * </p>
 * <p>
 * Description: 从算1000w个数据中找到最大值
 * <p>
 * @author Tom.Cai
 * @created 2014-12-2 下午10:28:33 
 * @version V1.0 
 *
 */
public class TopApp {
	private static final String INPUT_PATH = "hdfs://192.168.80.100:9000/top_input";
	private static final String OUT_PATH = "hdfs://192.168.80.100:9000/top_out";
	
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
		final Path outPath = new Path(OUT_PATH);
		if(fileSystem.exists(outPath)){
			fileSystem.delete(outPath, true);
		}
		
		final Job job = new Job(conf , TopApp.class.getSimpleName());
		FileInputFormat.setInputPaths(job, INPUT_PATH);
		job.setMapperClass(MyMapper.class);
		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(LongWritable.class);
		job.setOutputValueClass(NullWritable.class);
		FileOutputFormat.setOutputPath(job, outPath);
		job.waitForCompletion(true);
	}
	
	static class MyMapper extends  Mapper<LongWritable, Text, LongWritable, NullWritable>{
		long max = Long.MAX_VALUE;
		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			long temp = Long.parseLong(value.toString());
			if(temp>max){
				max = temp;
			}
		}

		@Override
		protected void cleanup(Context context) throws IOException, InterruptedException {
			context.write(new LongWritable(max), NullWritable.get());
		}
	}
	
	
	static class MyReduceR extends  Reducer<LongWritable, NullWritable, LongWritable, NullWritable>{
		long max = Long.MAX_VALUE;
		@Override
		protected void reduce(LongWritable key, Iterable<NullWritable> value, Context context) throws IOException, InterruptedException {
			long temp = Long.parseLong(value.toString());
			if(temp>max){
				max = temp;
			}
		}
		
		@Override
		protected void cleanup(Context context) throws IOException, InterruptedException {
			context.write(new LongWritable(max), NullWritable.get());
		}
		
	}
}

欢迎大家一起讨论学习！

有用的自己收!

记录与分享，让你我共成长！欢迎查看我的其他博客；我的博客地址：http://blog.csdn.net/caicongyang

Hadoop读书笔记（十三）MapReduce中Top算法

标签：hadoop mapreduce中top算法 top算法

原文地址：http://blog.csdn.net/caicongyang/article/details/41686243

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行