MapReduce example : WordCount3
we also offer , online and classroom trainings
we support in POC
author: Bharat (sree ram)
contact : 09640892992
_____________________________________________________________________
MapReduce Example : WordCount3
purpose:
___________
I want to treat love, like as positive words and hate, dislike as negative words .
Now, I want to find out, number of positive words and number of negative words.
Input hdfs file:
_________________
mydir/comment.txt
___________________________
I like hadoop I love hadoop
I dislike java and I hate java
I love hadoop
________________________________
o/p of the program :
________________________________
positive 3
negative 2
_______________________________
Program : WordCount3.java
_________________________________
package my.map.red.app;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount3
{
public static class MapForWordCount extends Mapper<LongWritable, Text, Text, IntWritable>
{
public void map(LongWritable key, Text value, Context con) throws IOException, InterruptedException
{
String line = value.toString();
StringTokenizer token = new StringTokenizer(line);
while(token.hasMoreTokens())
{
String status = new String();
String word = token.nextToken();
if (word.matches("like") || word.matches("love"))
status="Positive";
if (word.matches("dislike") || word.matches("hate"))
status="Negative";
Text outputKey = new Text(status);
IntWritable outputValue = new IntWritable(1);
if (status.matches("Positive") || status.matches("Negative"))
con.write(outputKey, outputValue);
}
} // end of map()
} //end of Mapper Class
/*
output of the mapper phase :
<Negative , <1,1>>
<Positive, <1,1,1>>
*/
public static class ReduceForWordCount extends Reducer<Text, IntWritable, Text, IntWritable>
{
public void reduce(Text status, Iterable<IntWritable> values, Context con) throws IOException, InterruptedException
{
int sum = 0;
for(IntWritable value : values)
{
sum += value.get();
}
con.write(status, new IntWritable(sum));
} // end of reduce()
} // end of Reducer class
/*
output of the reducer
Nagative 2
Positive 3
*/
// job definition
public static void main(String[] args) throws Exception
{
Configuration c = new Configuration();
String[] files = new GenericOptionsParser(c, args).getRemainingArgs();
Path input = new Path(files[0]);
Path output = new Path(files[1]);
Job j = new Job(c, "wordcount");
j.setJarByClass(WordCount3.class);
j.setMapperClass(MapForWordCount.class);
j.setCombinerClass(ReduceForWordCount.class);
j.setReducerClass(ReduceForWordCount.class);
j.setOutputKeyClass(Text.class);
j.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(j, input);
FileOutputFormat.setOutputPath(j, output);
System.exit(j.waitForCompletion(true) ? 0:1);
} // end of main()
} end of main class
Excellent post. Very interesting to read. I really love to read such a nice post. Thanks! keep rocking.Hadoop Admin Online Course Hyderabad
ReplyDelete