Use map reduce to build inverted index for given documents.

/**
 * Definition of Input:
 * template<class T>
 * class Input {
 * public:
 *     bool done(); 
 *         // Returns true if the iteration has elements or false.
 *     void next();
 *         // Move to the next element in the iteration
 *         // Runtime error if the iteration has no more elements
 *     T value();
 *        // Get the current element, Runtime error if
 *        // the iteration has no more elements
 * }
 * Definition of Document:
 * class Document {
 * public:
 *     int id; // document id
 *     string content; // document content
 * }
 */
class InvertedIndexMapper: public Mapper {
public:
    void Map(Input<Document>* input) {
        // Write your code here
        // Please directly use func 'output' to output 
        // the results into output buffer.
        // void output(string &key, int value);
     while (!input->done()) {
            vector<string> words = split(input->value().content, " ");
            for (string& word : words) {
                if (!word.empty())
                    output(word, input->value().id);
            }
            input->next();
        }
    }

private:
    vector<string> split(const string& value, string delim) {
        vector<string> words;
        int last = 0, index;
        while ((index = value.find(delim, last)) != string::npos) {
            words.push_back(value.substr(last, index - last));
            last = index + delim.length();
        }
        if (last < value.length()) {
            words.push_back(value.substr(last, value.length() - last));
        }
        return words;
    }
};


class InvertedIndexReducer: public Reducer {
public:
    void Reduce(string &key, Input<int>* input) {
        // Write your code here
        // Please directly use func 'output' to output 
        // the results into output buffer.
        // void output(string &key, vector<int> &value);
        vector<int> ids;
        int lastid = -1;
        while (!input->done()) {
            if (lastid != input->value())
                ids.push_back(input->value());
            lastid = input->value();
            input->next();
        }
        output(key, ids);
    }
};

results matching ""

    No results matching ""