码迷,mamicode.com
首页 > 编程语言 > 详细

2018-07-29期 MapReduce实现对字符串进行排序

时间:2018-07-30 11:20:38      阅读:166      评论:0      收藏:0      [点我收藏+]

标签:set   ble   input   latch   spl   event   tmp   port   方法   

package cn.sjq.mr.sort.number;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.junit.Test;

/**

* MapReduce字符串排序,字符串排序是按照数据字典顺序进行排序

* 这里所有的类采用匿名内部类实现

* @author songjq

*

*/

public class StringSort {

/**

* 对字符串进行排序Mapper类

* @author songjq

*

*/

static class StringSortMapper extends Mapper<LongWritable, Text, Text, NullWritable> {

private Text tkey = new Text();

@Override

protected void map(LongWritable k1, Text v1, Context context)

throws IOException, InterruptedException {

String line = v1.toString();

//分词 格式:Wait Events Statistics

String[] split = StringUtils.split(line," ");

for(int i=0;i<split.length;i++) {

tkey.set(split[i]);

context.write(tkey, NullWritable.get());

}

}

}

/**

* MapReduce提交job类

* 这里采用MapReduce默认的比较器进行字符串的升序排序

* 执行结果:

* Activity

Advisory

Buffer

Cache

Cache

Dictionary

Events

IO

Instance

...

* @throws Exception

*/

@Test

public void StringSortJob() throws Exception {

Job job = Job.getInstance(new Configuration());

job.setJarByClass(StringSort.class);

job.setMapperClass(StringSortMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(NullWritable.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(NullWritable.class);

//采用MapReduce默认排序规则

//job.setSortComparatorClass(cls);

FileInputFormat.setInputPaths(job, new Path("D:\\test\\tmp\\sort\\Strings.data"));

FileOutputFormat.setOutputPath(job, new Path("D:\\test\\tmp\\sort\\out5"));

job.waitForCompletion(true);

}

/**

* 自定义StringSortMyComparator比较器,继承Text.Comparator重写compare方法,实现对字符串降序排序

* @author songjq

*

*/

static class StringSortMyComparator extends Text.Comparator{

//只需要在super前面加 - 即可实现降序排序

@Override

public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {

return -  super.compare(b1, s1, l1, b2, s2, l2);

}

}

/**

* MapReduce提交job类

* 这里采用自定义比较器StringSortMyComparator实现对字符串的降序排序

* 执行结果:

* Latch

Instance

IO

Events

Dictionary

Cache

Cache

Buffer

Advisory

Activity

...

* @throws Exception

*/

@Test

public void StringSortUseMyComparatorJob() throws Exception {

Job job = Job.getInstance(new Configuration());

job.setJarByClass(StringSort.class);

job.setMapperClass(StringSortMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(NullWritable.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(NullWritable.class);

//采用自定义比较器实现对字符串降序排序

job.setSortComparatorClass(StringSortMyComparator.class);

FileInputFormat.setInputPaths(job, new Path("D:\\test\\tmp\\sort\\Strings.data"));

FileOutputFormat.setOutputPath(job, new Path("D:\\test\\tmp\\sort\\out6"));

job.waitForCompletion(true);

}

}


2018-07-29期 MapReduce实现对字符串进行排序

标签:set   ble   input   latch   spl   event   tmp   port   方法   

原文地址:http://blog.51cto.com/2951890/2151971

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!