Using map reduce to count word frequency.
https://hadoop.apache.org/docs/r1.2.1/mapred\_tutorial.html\#Example%3A+WordCount+v1.0
Have you met this question in a real interview?
Yes
Example
chunk1: "Google Bye GoodBye Hadoop code"
chunk2: "lintcode code Bye"
Get MapReduce result:
Bye: 2
GoodBye: 1
Google: 1
Hadoop: 1
code: 2
lintcode: 1
/**
* Definition of Input:
* template<class T>
* class Input {
* public:
* bool done();
* // Returns true if the iteration has elements or false.
* void next();
* // Move to the next element in the iteration
* // Runtime error if the iteration has no more elements
* T value();
* // Get the current element, Runtime error if
* // the iteration has no more elements
* }
*/
class WordCountMapper: public Mapper {
public:
void Map(Input<string>* input) {
// Write your code here
// Please directly use func 'output' to
// output the results into output buffer.
// void output(string &key, int value);
while (!input->done()) {
vector<string> words = split(input->value(), " ");
for (string& word : words) {
output(word, 1);
}
input->next();
}
}
private:
vector<string> split(const string& value, string delim) {
vector<string> words;
int last = 0, index;
while ((index = value.find(delim, last)) != string::npos) {
words.push_back(value.substr(last, index - last));
last = index + delim.length();
}
if (last < value.length()) {
words.push_back(value.substr(last, value.length() - last));
}
return words;
}
};
class WordCountReducer: public Reducer {
public:
void Reduce(string &key, Input<int>* input) {
// Write your code here
// Please directly use func 'output' to
// output the results into output buffer.
// void output(string &key, int value);
int sum = 0;
while (!input->done()) {
sum += input->value();
input->next();
}
output(key, sum);
}
};