MapReduce
MovieMapper.java
package com.sist.hadoop;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MovieMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
private final IntWritable one = new IntWritable(1);
private Text result = new Text();
String[] feel = { “사랑”,”로맨스”,”매력”,”즐거움”,”스릴”,
“소름”,”긴장”,”공포”,”유머”,”웃음”,”개그”,
“행복”,”전율”,”경이”,”우울”,”절망”,”신비”,
“여운”,”희망”,”긴박”,”감동”,”감성”,”휴머니즘”,
“자극”,”재미”,”액션”,”반전”,”비극”,”미스테리”,
“판타지”,”꿈”,”설레임”,”흥미”,”풍경”,”일상”,
“순수”,”힐링”,”눈물”,”그리움”,”호러”,”충격”,”잔혹”,
“드라마”,”판타지”,”공포”,”멜로”,”애정”,
“로맨스”,”모험”,”느와르”,”다큐멘터리”,
“코미디”,”미스터리”,”범죄”,”SF”,”액션”,”애니메이션” };
Pattern[] pattern = new Pattern[feel.length];
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
for( int i=0; i<feel.length; i++ ){
pattern[i] = Pattern.compile( feel[i] );
}
Matcher[] matcher = new Matcher[feel.length];
for( int i=0; i<feel.length; i++){
matcher[i] = pattern[i].matcher( value.toString() );
while( matcher[i].find() ){
result.set( feel[i] ); //String 을 Hadoop.Text로 전환
context.write( result, one );
}
}
}
}
MovieDriver.java
package com.sist.hadoop;
import java.util.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.*;
public class MovieDriver {
public static void main(String[] args) throws Exception{
File dir = new File( “./output” );
if( dir.exists() ){
File[] files = dir.listFiles();
for(File f:files){
f.delete(); // -rf
}
dir.delete(); // rm
}
System.out.println(“11111111111111”);
//Hadoop
Configuration conf = new Configuration();
System.out.println(“2222”);
//JOB : 작업지시
Job job = new Job( conf, “WordCount” );
System.out.println(“333”);
//장비
job.setMapperClass( MovieMapper.class );
job.setReducerClass( MovieReducer.class );
System.out.println(“444”);
//결과값
job.setOutputKeyClass( Text.class );
job.setOutputValueClass( IntWritable.class );
System.out.println(“555”);
//작업하는 내용
FileInputFormat.addInputPath(job, new Path(“/home/sist/bigdataStudy/.metadata/.plugins/org.eclipse.wst.server.core/tmp0/wtpwebapps/MovieMapReduceProject/desc.txt”));
System.out.println(“666”);
FileOutputFormat.setOutputPath(job, new Path(“./output”));
System.out.println(“777”);
//실행
job.waitForCompletion( true );
}
}
MovieReducer.java
package com.sist.hadoop;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MovieReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable res = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
// TODO Auto-generated method stub
int sum = 0;
for( IntWritable i: values ){
sum += i.get(); //get : IntWritable -> int로 바꾼다.
}
res.set( sum );
context.write( key, res );
}
}