Hadoop:MapReduce编程之统计二手房数目

发布于:2021-10-21 00:43:53

MapReduce编程之统计二手房数目

要求:统计出上海各个地区二手房的数目,其中将浦东的二手房单独由一个Reduce计算


分析:由于浦东区的二手房数目需要单独统计,因此需要设置分区器,同时设置ReduceTask为2
代码实现:


package com.miao.secondhouse;

import com.miao.partition.UserPartition;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

/**
* @ClassName SecondHouseNum
* @Description TODO 统计上海各个地区二手房数目,将浦东的二手房单独由一个Reduce计算
* @Date 2021-04-27 17:47:53
* @Create By Miao
*/
public class SecondHouseNum extends Configured implements Tool {

//构建、配置、提交Job
public int run(String[] args) throws Exception {
/**
* step1:构建Job
*/
//实例化一个MapReduce的Job对象
Job job = Job.getInstance(this.getConf(),"second house num");
//指定允许jar包运行的类
job.setJarByClass(SecondHouseNum.class);

/**
* step2:配置Job
*/
//Input:配置输入
//指定输入类的类型
job.setInputFormatClass(TextInputFormat.class);//可以不指定,默认就是TextInputFormat
//指定输入源,也就是二手房信息所在的文件的路径
Path inputPath = new Path("D:\Study\idea\MavenProject\secondhouse.csv");//使用第一个参数作为程序的输入
TextInputFormat.setInputPaths(job,inputPath);

//Map:配置Map
job.setMapperClass(SecondMapper.class); //设置调用的Mapper类
job.setMapOutputKeyClass(Text.class); //设置K2的类型
job.setMapOutputValueClass(IntWritable.class); //设置V2的类型

//Shuffle:配置Shuffle
job.setPartitionerClass(UserPartition.class); //设置分区器


//Reduce:配置Reduce
job.setReducerClass(SecondReducer.class); //设置调用reduce的类
job.setOutputKeyClass(Text.class); //设置K3的类型
job.setOutputValueClass(IntWritable.class); //设置V3的类型
job.setNumReduceTasks(2); //设置ReduceTask的个数,默认为1

//Output:配置输出
//指定输出类的类型
job.setOutputFormatClass(TextOutputFormat.class);//默认就是TextOutputFormat
//设置输出的路径,输出计算得到的二手房数目信息存放的文件的路径
Path outputPath = new Path("D:\Study\idea\MavenProject\output\three");
//判断输出是否存在,存在就删除
FileSystem fs = FileSystem.get(this.getConf());
if(fs.exists(outputPath)){
fs.delete(outputPath,true);
}
TextOutputFormat.setOutputPath(job,outputPath);


/**
* step3:提交Job
*/
return job.waitForCompletion(true) ? 0 : -1;
}


//程序的入口方法
public static void main(String[] args) throws Exception {
//构建配置管理对象
Configuration conf = new Configuration();
//通过工具类的run方法调用当前类的实例的run方法
int status = ToolRunner.run(conf, new SecondHouseNum(), args);
//退出程序
System.exit(status);
}

public static class SecondMapper extends Mapper{
//Key2
Text outputKey = new Text();
//Value2
IntWritable outputValue = new IntWritable(1);

//小区名称,户型,面积,地区,楼层朝向,总价,单价,建造年份
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//取出地区
String region = value.toString().split(",")[3];
//地区作为Key2
this.outputKey.set(region);
//输出
context.write(this.outputKey,this.outputValue);
}
}

public static class SecondReducer extends Reducer{
//Value3
IntWritable outputValue = new IntWritable();

@Override
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
this.outputValue.set(sum);
context.write(key,this.outputValue);
}
}
}


自定义分区器


代码实现:


package com.miao.partition;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

/**
* @ClassName UserPartition
* @Description TODO 自定义分区器
* extends Partitioner
* 必须指定Key2和Value2的类型,重写getPartition方法必须指定类型
* @Date 2021-04-27 17:47:53
* @Create By Miao
*/

public class UserPartition extends Partitioner {
@Override
public int getPartition(Text k2, IntWritable v2, int numPartition) {
String region = k2.toString();
if ("浦东".equals(region)) {
return 0;
} else {
return 1;
}
}
}

相关推荐

最新更新

猜你喜欢