Use map reduce to build inverted index for given documents.
/**
* Definition of Input:
* template<class T>
* class Input {
* public:
* bool done();
* // Returns true if the iteration has elements or false.
* void next();
* // Move to the next element in the iteration
* // Runtime error if the iteration has no more elements
* T value();
* // Get the current element, Runtime error if
* // the iteration has no more elements
* }
* Definition of Document:
* class Document {
* public:
* int id; // document id
* string content; // document content
* }
*/
class InvertedIndexMapper: public Mapper {
public:
void Map(Input<Document>* input) {
// Write your code here
// Please directly use func 'output' to output
// the results into output buffer.
// void output(string &key, int value);
while (!input->done()) {
vector<string> words = split(input->value().content, " ");
for (string& word : words) {
if (!word.empty())
output(word, input->value().id);
}
input->next();
}
}
private:
vector<string> split(const string& value, string delim) {
vector<string> words;
int last = 0, index;
while ((index = value.find(delim, last)) != string::npos) {
words.push_back(value.substr(last, index - last));
last = index + delim.length();
}
if (last < value.length()) {
words.push_back(value.substr(last, value.length() - last));
}
return words;
}
};
class InvertedIndexReducer: public Reducer {
public:
void Reduce(string &key, Input<int>* input) {
// Write your code here
// Please directly use func 'output' to output
// the results into output buffer.
// void output(string &key, vector<int> &value);
vector<int> ids;
int lastid = -1;
while (!input->done()) {
if (lastid != input->value())
ids.push_back(input->value());
lastid = input->value();
input->next();
}
output(key, ids);
}
};