LeetCode 527. Word Abbreviation

时间：2019-07-08 10:14:51 阅读：87 评论：0 收藏：0 [点我收藏+]

Greedy

先生成abbreviation，如果有重复，对所有重复的prefix再添加一个字符，直到没有重复为止。

class Solution {
public:
    vector<string> wordsAbbreviation(vector<string>& dict) {
        int n=dict.size();
        vector<string> res(n);
        vector<int> prefix(n,0); //prefix[i] = to what index is the prefix
        
        for (int i=0;i<n;++i){
            res[i] = abbrev(dict[i],prefix[i]);
        }
        
        for (int i=0;i<n;++i){ 
            while (true){
                vector<int> dupes;
                for (int j=i+1;j<n;++j){
                    if (res[i]==res[j])
                        dupes.push_back(j);
                }
                if (dupes.empty()) break;
                dupes.push_back(i);
                
                // add one more character
                for (int k:dupes){
                    res[k] = abbrev(dict[k],++prefix[k]);
                }
            }
        }
        
        return res;
    }
    
    string abbrev(string &word, int i){ // word[0~i] is prefix
        int len=word.size();
        // prefix has i+1 char, plus one number, plus last char
        if ((i+1)+1+1 >= len) return word;
        return word.substr(0,i+1) + to_string(len-(i+1)-1) + word[len-1];
    }
};

记dict长度为n，每个word平均长度为m。

外侧for循环O(n)，while循环最多O(m)，

内侧：j的for循环O(n) + k的for循环最多也是O(n) * abbrev的O(m)。

时间复杂度 O(m^2 * n^2)

Group + Longest Common Prefix

上述方法为了去重，需要反复遍历dict。但其实很多word是完全无关的，因此我们可以先根据3位的abbreviation分group，再对每个group操作。但是最坏情况下，时间复杂度依旧和上述方法一样。

这里更好的处理方法是对每个group中的word按照字母顺序排序，这样对于word[i]，一定不会重复的前缀一定要比下式大：

max { lcp(word[i],word[i-1]), lcp(word[i],word[i+1]) }

class Solution {
public:
    vector<string> wordsAbbreviation(vector<string>& dict) {
        int n=dict.size();
        unordered_map<string,vector<pair<string,int>>> m; // abbrev -> vector of (word,index)
        vector<string> res(n);
        
        // group by initial abbreviation
        for (int i=0;i<n;++i){
            string word=dict[i];
            string ab=abbrev(word,0);
            m[ab].push_back({word,i});
        }
        
        for (const auto &x:m){
            auto group=x.second;
            sort(group.begin(),group.end());
            
            vector<int> prefix(group.size(),0); //prefix[i] = to what index is the prefix
            for (int i=1;i<group.size();++i){
                int lcp=longestCommonPrefix(group[i-1].first,group[i].first);
                prefix[i] = lcp;
                prefix[i-1] = max(lcp,prefix[i-1]);
            }
            for (int i=0;i<group.size();++i){
                res[group[i].second] = abbrev(group[i].first,prefix[i]);
            }
        }

        return res;
    }
    
    string abbrev(string &word, int i){ // word[0~i] is prefix
        int len=word.size();
        // prefix has i+1 char, plus one number, plus last char
        if ((i+1)+1+1 >= len) return word;
        return word.substr(0,i+1) + to_string(len-(i+1)-1) + word[len-1];
    }
    
    // return the number of longest common prefix
    int longestCommonPrefix(string a, string b){
        int i=0;
        while (i<a.size() && i<b.size() && a[i]==b[i])
            ++i;
        return i;
    }
};

时间复杂度比较难分析，主要由sort时间复杂度决定。

sort时间复杂度应该是 O(m*nlogn)，nlogn次比较，每次比较两个长度为m的字符串要 O(m)。

https://cs.stackexchange.com/questions/47771/a-list-of-n-strings-lexicographic-order-using-the-merge-sort-algorithm-the-wors

Group + Trie

本题也可以group后用Trie做。对于每个group建Trie树，然后找每个word的最短prefix即可。可以通过给TrieNode里加一个count，遍历一个word的时候，如果对应的TrieNode的count为1，说明到此为止的prefix一定是distinct的。

struct TrieNode{
    int count;
    vector<TrieNode *> next;
    TrieNode():count(0),next(26,NULL){}
};

class Solution {
public:
    vector<string> wordsAbbreviation(vector<string>& dict) {
        int n=dict.size();
        unordered_map<string,vector<pair<string,int>>> m; // abbrev -> vector of (word,index)
        vector<string> res(n);
        
        // group by initial abbreviation
        for (int i=0;i<n;++i){
            string word=dict[i];
            string ab=abbrev(word,0);
            m[ab].push_back({word,i});
        }
        
        for (const auto &x:m){
            auto group=x.second;
            TrieNode *root=new TrieNode();
            for (pair<string,int> y:group){
                string word=y.first;
                TrieNode *p=root;
                for (char ch:word){
                    if (p->next[ch-‘a‘]==NULL)
                        p->next[ch-‘a‘] = new TrieNode();
                    p = p->next[ch-‘a‘];
                    ++p->count;
                }
                
            }
            for (pair<string,int> y:group){
                string word=y.first;
                TrieNode *p=root;
                int i;
                for (i=0;i<word.size();++i){
                    p = p->next[word[i]-‘a‘];
                    if (p->count==1) break; // find distinct prefix
                }
                res[y.second] = abbrev(word,i);
            }
        }

        return res;
    }
    
    string abbrev(string &word, int i){ // word[0~i] is prefix
        int len=word.size();
        // prefix has i+1 char, plus one number, plus last char
        if ((i+1)+1+1 >= len) return word;
        return word.substr(0,i+1) + to_string(len-(i+1)-1) + word[len-1];
    }
};

时间复杂度 O(mn)

LeetCode 527. Word Abbreviation

原文：https://www.cnblogs.com/hankunyan/p/11149050.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)