우분투 맵/리듀스
data

WordCountReducer.java
package com.sist.map;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
private IntWritable result=new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> value,
Reducer<Text, IntWritable, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
int sum=0;
for(IntWritable val:value){
//System.out.println(sum);
sum+=val.get();
}
result.set(sum);
context.write(key, result);
}
}
WordCountMapper.java
package com.sist.map;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/*
* java oracle java oracle hadoop
* <맵>
* -> java 1
* oracle 1
* java 1
* oracle 1
* hadoop 1
*
* <셔플>
* ->쇼트 & 병합
* hadoop 1
* java 1,1
* oracle 1,1
*
* <리듀스>
* -> hadoop 1
* java 2
* oracle 2
*
*/
import java.util.*;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
// INPUT INPUT OUTPUT OUTPUT
// 줄 라인번호 실제데이터 단어 1
private final static IntWritable one=new IntWritable(1);
private Text word=new Text();
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
StringTokenizer st=new StringTokenizer(value.toString());
while(st.hasMoreTokens()){
String s=””;
word.set(s=st.nextToken());
//System.out.println(s);
context.write(word, one);
}
}
}
WordCount.java
package com.sist.map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.io.Text;
import java.io.*;
public class WordCount {
public static void main(String[] args){
try{
// TODO Auto-generated method stub
String path=”/home/sist/bigDataStudy/MapReduceProject/output”;
File file=new File(path);
if(file.exists()){
System.out.println(“존재”);
File[] list=file.listFiles();
for(File f:list){
f.delete();
}
file.delete();
}
Configuration conf=new Configuration();
//Hadoop => Job
Job job=new Job(conf,”WordCount”);
//실행 클래스 파일
job.setJarByClass(WordCount.class);
//매퍼 클래스
job.setMapperClass(WordCountMapper.class);
//리듀스 클래스
job.setReducerClass(WordCountReducer.class);
//결과값 클래스 (key, value)
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//읽어올 파일 지정
FileInputFormat.addInputPath(job, new Path(“./data”));
//결과값 출력 파일
FileOutputFormat.setOutputPath(job, new Path(“./output”));
//job 실행 요청
job.waitForCompletion(true);
}catch(Exception ex){System.out.println(ex.getMessage());}
}
}
결과
