Create an inverted index with given documents.

Notice

Ensure that data does not include punctuation.

Have you met this question in a real interview?

Yes

Example

Given a list of documents with id and content. (class Document)

[
  {
    "id": 1,
    "content": "This is the content of document 1 it is very short"
  },
  {
    "id": 2,
    "content": "This is the content of document 2 it is very long bilabial bilabial heheh hahaha ..."
  },
]

Return an inverted index (HashMap with key is the word and value is a list of document ids).

{
   "This": [1, 2],
   "is": [1, 2],
   ...
}
/**
 * Definition of Document:
 * class Document {
 * public:
 *     int id;
 *     string content;
 * }
 */
class Solution {
public:
    /**
     * @param docs a list of documents
     * @return an inverted index
     */
    map<string, vector<int>> invertedIndex(vector<Document>& docs) {
        // Write your code here
        map<string, vector<int> > res;
        for (auto& doc : docs ) {
            vector<string> words = getWords(doc.content);
            for (auto& word : words) {
                if (find(res[word].begin(), res[word].end(), doc.id) == res[word].end()) {
                    res[word].push_back(doc.id);
                }
            }
        }

        return res;
    }

private:
    vector<string> getWords(string& content) {
        vector<string> words;
        int start = 0, end = 0;
        for( ; end <= content.length(); ++end) {
            if (end < content.length() && content[end] != ' ') {
                continue;
            }
            string word = content.substr(start, end - start);
            if (!word.empty()) {
                words.push_back(word);
            }
            start = end + 1;
        }

        return words;
    }
};

results matching ""

    No results matching ""