Map Reduce example : Number of words per each line


we also offer , online and classroom trainings
we support in POC
author: Bharat (sree ram)
contact : 04042026071
_______________________________________________________________________

This program will find number of words per each line.

input hdfs file:

    mydir/file1.txt
_______________________________

hadoop execution model is mapreduce
mapreduce is a backend business process logic
advantage of mapreduce is it will not sort raw data
but output of the mapper will be sorted

________________________________________________

o/p by this program:

  line1  5
  line2  7
  line3  10
  line4  8

__________________________________________________

 package my.map.red.app;

 import java.io.IOException;

 import java.util.StringTokenizer;

 import org.apache.hadoop.conf.Configuration;

 import org.apache.hadoop.fs.Path;

 import org.apache.hadoop.io.Text;

 import org.apache.hadoop.io.IntWritable;

 import org.apache.hadoop.io.LongWritable;

 import org.apache.hadoop.mapreduce.Mapper;

 import org.apache.hadoop.mapreduce.Reducer;

 import org.apache.hadoop.mapreduce.Job;

 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 import org.apache.hadoop.mapreduce.util.GenericOptionsParser;

 public class LineWordsCount
 {
    public static class MapForLineWordsCount extends Mapper<LongWritable, Text, Text,

IntWritable>
    {
           int lineno = 1;
           public void map(LongWritable key, Text value, Context con) throws IOException,

InterruptedException
           {
               String line = value.toString();

               StringTokenizer token = new StringTokenizer(line);
               while(token.hasMoreTokens())
               {
                  String word = token.nextToken();
                  String l = "line"+lineno;
                  Text outputKey = new Text(l);
                  IntWritable outputValue = new IntWritable(1);
                  con.write(outputKey, outputValue);
               }

               lineno++;

            } // end of map()
    } //end of Mapper Class

  /*
     output of the mapper phase :
   
      <line1, <1,1,1,1,1>>
      <line2, <1,1,1,1,1,1,1>>
      <line3, <1,1,1,1,1,1,1,1,1,1>>
      <line4, <1,1,1,1,1,1,1,1>>

  */

   public static class ReduceForLineWordsCount extends Reducer<Text, IntWritable, Text,

IntWritable>
   {
          public void reduce(Text line, Iterable<IntWritable> values, Context con) throws

IOException, InterruptedException
          {

               int sum = 0;

               for(IntWritable value : values)
               {

                   sum += value.get();

               }

               con.write(line , new IntWritable(sum));

          } // end of reduce()
   } // end of Reducer class
/*

 output of the reducer

   line1 5
   line2 7
   line3 10
   line4 8

*/

 // job definition

   public static void  main(String[] args) throws Exception
   {

           Configuration c = new Configuration();

           String[] files = new GenericOptionsParser(c, args).getRemainingArgs();

           Path input = new Path(files[0]);

           Path output = new Path(files[1]);

           Job j = new Job(c, "Linewordscount");

           j.setJarByClass(LineWordsCount.class);

           j.setMapperClass(MapForLineWordsCount.class);

           j.setCombinerClass(ReduceForLineWordsCount.class);

           j.setReducerClass(ReduceForLineWordsCount.class);

           j.setOutputKeyClass(Text.class);

           j.setOutputValueClass(IntWritable.class);

           FileInputFormat.addInputPath(j, input);

           FileOutputFormat.setOutputPath(j, output);

           System.exit(j.waitForCompletion(true) ? 0:1);

   } // end of main()

} end of main class

4 comments:

MapReduce example : WordCount3

MapReduce Training in Hyderabad | Online MapReduce Training in India | MapReduce Training in Ameerpet



we also offer , online and classroom trainings
we support in POC
author: Bharat (sree ram)
contact : 09640892992
_____________________________________________________________________

 MapReduce Example : WordCount3


purpose:
___________

   I want to treat love, like as positive words and hate, dislike as negative words .
Now, I want to find out, number of positive words and number of negative words.

Input hdfs file:
_________________

    mydir/comment.txt
___________________________
I like hadoop I love hadoop
I dislike java and I hate java
I love hadoop
________________________________

o/p of the program :
________________________________
  positive 3
  negative 2
_______________________________

Program : WordCount3.java
_________________________________

 package my.map.red.app;

 import java.io.IOException;

 import java.util.StringTokenizer;

 import org.apache.hadoop.conf.Configuration;

 import org.apache.hadoop.fs.Path;

 import org.apache.hadoop.io.Text;

 import org.apache.hadoop.io.IntWritable;

 import org.apache.hadoop.io.LongWritable;

 import org.apache.hadoop.mapreduce.Mapper;

 import org.apache.hadoop.mapreduce.Reducer;

 import org.apache.hadoop.mapreduce.Job;

 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 import org.apache.hadoop.util.GenericOptionsParser;

 public class WordCount3
 {
    public static class MapForWordCount extends Mapper<LongWritable, Text, Text, IntWritable>
    {
           public void map(LongWritable key, Text value, Context con) throws IOException, InterruptedException
           {
               String line = value.toString();

               StringTokenizer token = new StringTokenizer(line);

               while(token.hasMoreTokens())
               {

                  String status = new String();

                  String word = token.nextToken();

                  if (word.matches("like") || word.matches("love"))

                       status="Positive";

                  if (word.matches("dislike") || word.matches("hate"))

                        status="Negative";

                  Text outputKey = new Text(status);

                  IntWritable outputValue = new IntWritable(1);

                  if (status.matches("Positive") || status.matches("Negative"))

                      con.write(outputKey, outputValue);
               }
            } // end of map()
    } //end of Mapper Class

  /*
     output of the mapper phase :

       <Negative , <1,1>>
       <Positive, <1,1,1>>

  */
 
   public static class ReduceForWordCount extends Reducer<Text, IntWritable, Text, IntWritable>
   {
          public void reduce(Text status, Iterable<IntWritable> values, Context con) throws IOException, InterruptedException
          {

               int sum = 0;

               for(IntWritable value : values)
               {

                   sum += value.get();

               }

               con.write(status, new IntWritable(sum));

          } // end of reduce()
   } // end of Reducer class
/*

 output of the reducer

   Nagative 2
   Positive 3

*/

 // job definition

   public static void  main(String[] args) throws Exception
   {

           Configuration c = new Configuration();

           String[] files = new GenericOptionsParser(c, args).getRemainingArgs();

           Path input = new Path(files[0]);

           Path output = new Path(files[1]);

           Job j = new Job(c, "wordcount");

           j.setJarByClass(WordCount3.class);

           j.setMapperClass(MapForWordCount.class);

           j.setCombinerClass(ReduceForWordCount.class);

           j.setReducerClass(ReduceForWordCount.class);

           j.setOutputKeyClass(Text.class);

           j.setOutputValueClass(IntWritable.class);

           FileInputFormat.addInputPath(j, input);

           FileOutputFormat.setOutputPath(j, output);

           System.exit(j.waitForCompletion(true) ? 0:1);

   } // end of main()

} end of main class

1 comments:

MapReduce Example : WordCount2



we also offer , online and classroom trainings
we support in POC
author: Bharat (sree ram)
contact : 09640892992
________________________________________________________________________________
 MapReduce Example : WordCount2


purpose:
___________
  I want to find out  , Word frequency for only required words.


Input hdfs file:
_________________

    mydir/comment.txt
___________________________
I like hadoop I love hadoop
I dislike java and I hate java
I love hadoop
________________________________

o/p of the program :
________________________________

   like  1
   love  2
_______________________________

Program : WordCount.java
_________________________________

 package my.map.red.app;

 import java.io.IOException;

 import java.util.StringTokenizer;

 import org.apache.hadoop.conf.Configuration;

 import org.apache.hadoop.fs.Path;

 import org.apache.hadoop.io.Text;

 import org.apache.hadoop.io.IntWritable;

 import org.apache.hadoop.io.LongWritable;

 import org.apache.hadoop.mapreduce.Mapper;

 import org.apache.hadoop.mapreduce.Reducer;

 import org.apache.hadoop.mapreduce.Job;

 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 import org.apache.hadoop.util.GenericOptionsParser;

 public class WordCount2
 {
    public static class MapForWordCount extends Mapper<LongWritable, Text, Text, IntWritable>
    {
           public void map(LongWritable key, Text value, Context con) throws IOException, InterruptedException
           {
               String line = value.toString();

               StringTokenizer token = new StringTokenizer(line);

               while(token.hasMoreTokens())
               {

                  String status = new String();

                  String word = token.nextToken();

                  if (word.matches("like") || word.matches("love"))  
         {
                  Text outputKey = new Text(word);

               IntWritable outputValue = new IntWritable(1);

                        con.write(outputKey, outputValue);
                  }
            } // end of map()
    } //end of Mapper Class

  /*
  sample     output of the mapper phase :

       <like , <1>>
        <love , <1,1>>

  */
 
   public static class ReduceForWordCount extends Reducer<Text, IntWritable, Text, IntWritable>
   {
          public void reduce(Text word, Iterable<IntWritable> values, Context con) throws IOException, InterruptedException
          {

               int sum = 0;

               for(IntWritable value : values)
               {

                   sum += value.get();

               }

               con.write(word, new IntWritable(sum));

          } // end of reduce()
   } // end of Reducer class
/*

sample output of the reducer

   like  1
   love  2

*/

 // job definition

   public static void  main(String[] args) throws Exception
   {

           Configuration c = new Configuration();

           String[] files = new GenericOptionsParser(c, args).getRemainingArgs();

           Path input = new Path(files[0]);

           Path output = new Path(files[1]);

           Job j = new Job(c, "wordcount");

           j.setJarByClass(WordCount2.class);

           j.setMapperClass(MapForWordCount.class);

           j.setCombinerClass(ReduceForWordCount.class);

           j.setReducerClass(ReduceForWordCount.class);

           j.setOutputKeyClass(Text.class);

           j.setOutputValueClass(IntWritable.class);

           FileInputFormat.addInputPath(j, input);

           FileOutputFormat.setOutputPath(j, output);

           System.exit(j.waitForCompletion(true) ? 0:1);

   } // end of main()

} end of main class

0 comments:

MapReduce Example : WordCount



we also offer , online and classroom trainings
we support in POC
author: Bharat (sree ram)
contact : 04042026071
_______________________________________________________________


 MapReduce Example : WordCount


purpose:
___________
  I want to find out , each word's occurance (frequency).


Input hdfs file:
_________________

    mydir/comment.txt
___________________________
I like hadoop I love hadoop
I dislike java and I hate java
I love hadoop
________________________________

o/p of the program :
________________________________
  hadoop 3
  I 5
  java 2
  like 1
  love 2
     .
     .
     .

_______________________________

Program : WordCount.java
_________________________________

 package my.map.red.app;

 import java.io.IOException;

 import java.util.StringTokenizer;

 import org.apache.hadoop.conf.Configuration;

 import org.apache.hadoop.fs.Path;

 import org.apache.hadoop.io.Text;

 import org.apache.hadoop.io.IntWritable;

 import org.apache.hadoop.io.LongWritable;

 import org.apache.hadoop.mapreduce.Mapper;

 import org.apache.hadoop.mapreduce.Reducer;

 import org.apache.hadoop.mapreduce.Job;

 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 import org.apache.hadoop.util.GenericOptionsParser;

 public class WordCount
 {
    public static class MapForWordCount extends Mapper<LongWritable, Text, Text, IntWritable>
    {
           public void map(LongWritable key, Text value, Context con) throws IOException, InterruptedException
           {
               String line = value.toString();

               StringTokenizer token = new StringTokenizer(line);

               while(token.hasMoreTokens())
               {

                  String status = new String();

                  String word = token.nextToken();

                  Text outputKey = new Text(word);

                  IntWritable outputValue = new IntWritable(1);

                  con.write(outputKey, outputValue);
               }
            } // end of map()
    } //end of Mapper Class

  /*
  sample     output of the mapper phase :

       <I , <1,1,1,1,1>>
       <hadoop, <1,1,1>>

  */
 
   public static class ReduceForWordCount extends Reducer<Text, IntWritable, Text, IntWritable>
   {
          public void reduce(Text word, Iterable<IntWritable> values, Context con) throws IOException, InterruptedException
          {

               int sum = 0;

               for(IntWritable value : values)
               {

                   sum += value.get();

               }

               con.write(word, new IntWritable(sum));

          } // end of reduce()
   } // end of Reducer class
/*

sample output of the reducer

   hadoop 3
   I 5

*/

 // job definition

   public static void  main(String[] args) throws Exception
   {

           Configuration c = new Configuration();

           String[] files = new GenericOptionsParser(c, args).getRemainingArgs();

           Path input = new Path(files[0]);

           Path output = new Path(files[1]);

           Job j = new Job(c, "wordcount");

           j.setJarByClass(WordCount.class);

           j.setMapperClass(MapForWordCount.class);

           j.setCombinerClass(ReduceForWordCount.class);

           j.setReducerClass(ReduceForWordCount.class);

           j.setOutputKeyClass(Text.class);

           j.setOutputValueClass(IntWritable.class);

           FileInputFormat.addInputPath(j, input);

           FileOutputFormat.setOutputPath(j, output);

           System.exit(j.waitForCompletion(true) ? 0:1);

   } // end of main()

} end of main class

1 comments: