Good morning, I am trying to solve the following problems: "Count number of vowels and consonants" using the hadoop platform, MapReduce technique.
The question that happens to me is the following: When executing the algorithm I get this output:
Consonantes 1
Consonantes 1
Consonantes 1
Consonantes 1
Consonantes 1
Consonantes 1
Consonantes 1
Consonantes 1
Consonantes 1
Consonantes 1
Consonantes 1
Consonantes 1
Vocales 1
Vocales 1
Vocales 1
Vocales 1
Vocales 1
Vocales 1
Vocales 1
Vocales 1
Vocales 1
Vocales 1
The text I analyze is the following: "hello hello as you walk." The truth is that if I manually count the output the number of vowels and consonants is fine, that is, 12 consonants and 10 vowels. But it does not correspond to the output of the Reduce.
I leave the solution that I have proposed for the problem. It may not be very efficient. Any comment will be valued. Thanks
public class Vocales {
public static class MapperV extends Mapper<Object, Text, Text, IntWritable>{
public void map(Object key, Text value, Mapper.Context context) throws IOException, InterruptedException {
StringTokenizer itPalabras = new StringTokenizer(value.toString());
caracter c = new caracter();
while(itPalabras.hasMoreTokens()){
String cadena = itPalabras.nextToken();
char [] caracter = cadena.toCharArray();
for(int i=0; i<cadena.length(); i++){
if(c.esVocal(caracter[i])){
context.write(new Text ("Vocales"), new IntWritable(1));
}else{
if(c.esConsonante(caracter[i])){
context.write(new Text("Consonantes"),new IntWritable(1));
}
}
}
}
}
}
public static class ReducerV extends Reducer<Text, IntWritable, Text, IntWritable>{
public void reduce(Text key, Iterable<IntWritable> values, Reducer.Context context) throws IOException, InterruptedException {
int countVocales = 0, countConsonantes = 0;
for(IntWritable val : values){
if(key.toString().equals("Vocales")) countVocales += val.get();
else countConsonantes += val.get();
}
Text v = new Text("total de "+key.toString()+": ");
Text c = new Text("total de "+key.toString()+": ");
context.write(v , new IntWritable(countVocales));
context.write(c , new IntWritable(countConsonantes));
}
}
public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException{
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Job-Eje2");
job.setJarByClass(Vocales.class);
job.setMapperClass(MapperV.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setReducerClass(ReducerV.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
public static class caracter{
//Metodo que permite deteminar si un caracter es o no vocal.
//Con el metodo toLowerCase de la clase Character permite evitar problemas cuando se trata de vocales de mayusculas o minusculas.
public boolean esVocal(char c){
if(Character.toLowerCase(c) == 'a' || Character.toLowerCase(c) == 'e' || Character.toLowerCase(c) == 'i'|| Character.toLowerCase(c) == 'o' || Character.toLowerCase(c) == 'u' ){
return true;
}else{
return false;
}
}
public boolean esConsonante(char c) {
if(Character.isLetter(c) && (!esVocal(c))){
return true;
}else{
return false;
}
}
}
}