Create an inverted index with given documents.
Notice
Ensure that data does not include punctuation.
Have you met this question in a real interview?
Yes
Example
Given a list of documents with id and content. (class Document)
[
{
"id": 1,
"content": "This is the content of document 1 it is very short"
},
{
"id": 2,
"content": "This is the content of document 2 it is very long bilabial bilabial heheh hahaha ..."
},
]
Return an inverted index (HashMap with key is the word and value is a list of document ids).
{
"This": [1, 2],
"is": [1, 2],
...
}
/**
* Definition of Document:
* class Document {
* public:
* int id;
* string content;
* }
*/
class Solution {
public:
/**
* @param docs a list of documents
* @return an inverted index
*/
map<string, vector<int>> invertedIndex(vector<Document>& docs) {
// Write your code here
map<string, vector<int> > res;
for (auto& doc : docs ) {
vector<string> words = getWords(doc.content);
for (auto& word : words) {
if (find(res[word].begin(), res[word].end(), doc.id) == res[word].end()) {
res[word].push_back(doc.id);
}
}
}
return res;
}
private:
vector<string> getWords(string& content) {
vector<string> words;
int start = 0, end = 0;
for( ; end <= content.length(); ++end) {
if (end < content.length() && content[end] != ' ') {
continue;
}
string word = content.substr(start, end - start);
if (!word.empty()) {
words.push_back(word);
}
start = end + 1;
}
return words;
}
};