Java Tutorials
Building Hadoop MapReduce Jobs In Java
Building XML With Java And DOM
Creating Java Servlets With NetBeans
Basic File Manipulation With Java
Introduction To Java

Building Hadoop MapReduce Jobs In Java

Sample data subset

This is a few lines that can be stored in a single file and pointed to with Hadoop in standalone or pseudo cluster mode. These were taken directly from the census data on the Amazon EBS volume.

"07","050","00","01","007","","","","","","","","","A","Bibb County, Alabama",20826,10745,10081,1449,1530,1454,1407,1422,3248,3177,2725,1093,908,1324,755,334,34.7,15540,8031,7509,14702,2924,2413,1000,1413,20719,15966,4624,49,17,8,1,5,0,1,1,1,2,1,1,0,0,61,107,16064,4651,115,25,4,76,20826,210,113,7,0,90,20616,15870,20826,19587,7421,4331,6120,4597,1174,586,541,269,1239,1024,215,7421,5581,2554,4331,1912,939,505,1840,1638,700,2859,1778,2.64,3.08,8345,7421,924,122,1.4,9.3,7421,5951,1470,2.70,2.39
"07","050","00","02","050","","","","","","","","","S","Bethel Census Area, Alaska",16006,8500,7506,1605,1906,1886,1468,1065,2156,2464,1699,599,327,517,250,64,25.3,9629,5104,4525,8909,1042,831,419,412,15389,2006,61,13114,168,7,12,21,2,116,1,9,9,6,0,2,1,31,617,2526,111,13680,217,23,77,16006,140,82,5,1,52,15866,1958,16006,15765,4226,2123,7299,5469,1407,752,710,363,241,194,47,4226,3175,2157,2123,1531,642,385,1051,839,117,2472,667,3.73,4.41,5188,4226,962,508,2.0,6.2,4226,2581,1645,4.16,3.06
"07","050","00","04","007","","","","","","","","","A","Gila County, Arizona",51335,25249,26086,3116,3578,3946,3413,2117,4726,6704,7051,3351,3174,5748,3426,985,42.3,38445,18651,19794,36795,12111,10159,4677,5482,50411,39951,197,6630,220,60,39,46,29,18,12,16,28,6,10,6,6,3385,924,40763,257,7060,304,54,3854,51335,8546,6791,68,12,1675,42789,35391,51335,50404,20140,11103,13712,10782,3214,1728,2235,1001,931,828,103,20140,14090,5306,11103,3594,2174,1266,6050,5203,2473,6174,7000,2.50,2.99,28189,20140,8049,5725,3.2,11.3,20140,15858,4282,2.47,2.61
"07","050","00","05","007","","","","","","","","","A","Benton County, Arkansas",153406,75686,77720,11616,11423,11301,10473,9169,21910,23177,18193,7417,6754,12199,7682,2092,35.3,112585,54805,57780,106655,25977,21973,9867,12106,150615,139399,629,2531,1673,414,148,155,59,124,503,270,130,27,29,13,61,6253,2791,142028,817,4196,2012,209,7002,153406,13469,9596,223,71,3579,139937,133094,153406,151275,58212,36675,44561,37555,6145,2411,5682,2258,2131,1152,979,58212,43474,20023,36675,15761,4778,3068,14738,12292,4945,21588,14796,2.60,3.01,64281,58212,6069,1731,2.6,8.0,58212,42005,16207,2.61,2.57
"07","050","00","06","007","","","","","","","","","A","Butte County, California",203171,99546,103625,11637,13409,14704,17101,19648,23087,27249,26809,9527,7944,15207,12630,4219,35.8,154404,74247,80157,141860,36728,32056,13597,18459,195248,171728,2816,3866,6752,511,637,500,611,181,192,4120,296,119,40,60,77,9790,7923,178739,3873,7271,8349,695,12756,203171,21339,17134,391,127,3687,181832,162564,203171,197327,79566,37130,53903,43521,8697,3583,18031,5016,5844,1630,4214,79566,49386,22571,37130,14929,8879,5619,30180,21636,8826,24810,22122,2.48,3.02,85523,79566,5957,1350,2.1,5.2,79566,48336,31230,2.48,2.48
"07","050","00","08","007","","","","","","","","","A","Archuleta County, Colorado",9898,5016,4882,531,662,799,735,402,940,1639,1806,697,509,805,300,73,40.8,7391,3687,3704,7109,1442,1178,612,566,9641,8743,35,139,31,7,5,3,10,3,1,2,3,2,0,1,0,690,257,8993,55,234,40,6,833,9898,1659,472,4,4,1179,8239,7927,9898,9814,3980,2381,2711,2326,327,147,415,198,84,61,23,3980,2872,1257,2381,919,325,233,1108,878,238,1346,826,2.47,2.89,6212,3980,2232,1456,4.0,11.0,3980,3057,923,2.48,2.41
"07","060","00","09","001","08980","","","","","","","","A","Brookfield town, Fairfield County, Connecticut",15664,7617,8047,1023,1335,1271,905,516,1570,2998,2664,1014,684,973,536,175,39.2,11376,5452,5924,11024,2066,1684,710,974,15540,14926,119,11,388,136,114,52,21,28,11,26,0,0,0,0,0,96,124,15039,146,35,443,4,127,15664,372,61,85,16,210,15292,14666,15664,15586,5572,3797,5208,4100,521,157,488,199,78,0,78,5572,4367,2176,3797,1905,433,216,1205,961,361,2284,1177,2.80,3.18,5781,5572,209,122,0.5,1.4,5572,4960,612,2.88,2.16
"08","160","00","10","","","01400","","","","","","","A","Arden village, Delaware",474,232,242,15,27,27,22,10,43,71,112,31,27,41,34,14,46.5,388,183,205,381,102,89,43,46,465,451,4,2,8,5,2,0,0,1,0,0,0,0,0,0,0,0,9,460,4,3,15,1,0,474,11,3,0,1,7,463,440,474,474,229,89,113,83,20,3,23,13,0,0,0,229,123,49,89,37,26,8,106,88,23,52,65,2.07,2.80,243,229,14,0,1.7,3.4,229,173,56,2.23,1.59

Complete Code

import java.io.IOException;
import java.util.*;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;

public class Gender {
    
    private static String genderCheck = "female";
    
    public static class Map extends MapReduceBase implements Mapper {
        private final static IntWritable one = new IntWritable(1);
        private Text locText = new Text();
        
        public void map(LongWritable key, Text value, OutputCollector output, Reporter reporter) throws IOException {
            String line = value.toString();
            String location = line.split(",")[14] + "," + line.split(",")[15];
            long male = 0L;
            long female = 0L;
            if (line.split(",")[17].matches("\d+") && line.split(",")[18].matches("\d+")) {
                male = Long.parseLong(line.split(",")[17]);
                female = Long.parseLong(line.split(",")[18]);
            }
            long diff = male - female;
            locText.set(location);
            if (Gender.genderCheck.toLowerCase().equals("female") && diff < 0) {
                output.collect(locText, new LongWritable(diff * -1L));
            }
            else if (Gender.genderCheck.toLowerCase().equals("male") && diff > 0) {
                output.collect(locText, new LongWritable(diff));                
            }
        }
    }

    public static void main(String[] args) throws Exception {
        JobConf conf = new JobConf(Gender.class);
        conf.setJobName("gender");
        conf.setOutputKeyClass(Text.class); 
        conf.setOutputValueClass(LongWritable.class);
        conf.setMapperClass(Map.class);

        if (args.length != 3) {
            System.out.println("Usage:");
            System.out.println("[male/female] /path/to/2kh/files /path/to/output");
            System.exit(1);
        }

        if (!args[0].equalsIgnoreCase("male") && !args[0].equalsIgnoreCase("female")) {
            System.out.println("first argument must be male or female");
            System.exit(1);
        }         
        Gender.genderCheck = args[0];

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);
        FileInputFormat.setInputPaths(conf, new Path(args[1]));
        FileOutputFormat.setOutputPath(conf, new Path(args[2]));
        JobClient.runJob(conf);
    }

}

Building Hadoop MapReduce Jobs In Java <<  1 2
New Content