Map Reduce example : Number of words per each line

author: Bharat (sree ram)
This program will find number of words per each line.

input hdfs file:


hadoop execution model is mapreduce
mapreduce is a backend business process logic
advantage of mapreduce is it will not sort raw data
but output of the mapper will be sorted


o/p by this program:

  line1  5
  line2  7
  line3  10
  line4  8




 import java.util.StringTokenizer;

 import org.apache.hadoop.conf.Configuration;

 import org.apache.hadoop.fs.Path;




 import org.apache.hadoop.mapreduce.Mapper;

 import org.apache.hadoop.mapreduce.Reducer;

 import org.apache.hadoop.mapreduce.Job;

 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 import org.apache.hadoop.mapreduce.util.GenericOptionsParser;

 public class LineWordsCount
    public static class MapForLineWordsCount extends Mapper<LongWritable, Text, Text,

           int lineno = 1;
           public void map(LongWritable key, Text value, Context con) throws IOException,

               String line = value.toString();

               StringTokenizer token = new StringTokenizer(line);
                  String word = token.nextToken();
                  String l = "line"+lineno;
                  Text outputKey = new Text(l);
                  IntWritable outputValue = new IntWritable(1);
                  con.write(outputKey, outputValue);


            } // end of map()
    } //end of Mapper Class

     output of the mapper phase :
      <line1, <1,1,1,1,1>>
      <line2, <1,1,1,1,1,1,1>>
      <line3, <1,1,1,1,1,1,1,1,1,1>>
      <line4, <1,1,1,1,1,1,1,1>>


   public static class ReduceForLineWordsCount extends Reducer<Text, IntWritable, Text,

          public void reduce(Text line, Iterable<IntWritable> values, Context con) throws

IOException, InterruptedException

               int sum = 0;

               for(IntWritable value : values)

                   sum += value.get();


               con.write(line , new IntWritable(sum));

          } // end of reduce()
   } // end of Reducer class

 output of the reducer

   line1 5
   line2 7
   line3 10
   line4 8


 // job definition

   public static void  main(String[] args) throws Exception

           Configuration c = new Configuration();

           String[] files = new GenericOptionsParser(c, args).getRemainingArgs();

           Path input = new Path(files[0]);

           Path output = new Path(files[1]);

           Job j = new Job(c, "Linewordscount");







           FileInputFormat.addInputPath(j, input);

           FileOutputFormat.setOutputPath(j, output);

           System.exit(j.waitForCompletion(true) ? 0:1);

   } // end of main()

} end of main class


  1. Thanks for the code, but I didn't get the output in a sorted manner. How can I achieve that?

  2. Here mapper's o/p is (longWritable, Text) and i/p for reducer is (text,IntWritable) so this will fail while jar cmd execution with IOException.


