首页 > 编程语言 > 详细

后缀数组

时间:2020-04-02 15:44:05      阅读:69      评论:0      收藏:0      [点我收藏+]

1. New Distinct Substrings

题目链接:spoj - SUBST1

题意:给你一个串,求他的子串个数

思路:每个子串都是某个后缀的前缀,所以计算每个后缀的贡献即可,按照后缀suffix(sa[1]),suffix(sa[2])...的顺序计算,当加入suffix(sa[k])时,本应产生n-sa[k]+1个前缀,但是有height[k]个前缀在前面已经计算过了,所以suffix(sa[k])对答案的贡献就是n-sa[k]+1-height[k]

技术分享图片
#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

const int N = 50010;

int wa[N], wb[N], wv[N], wss[N];
int n, T, rak[N], height[N], cal[N], sa[N];
char s[N];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

int main()
{
    scanf("%d", &T);
    while (T--) {
        scanf("%s", s + 1);
        int n = strlen(s + 1);
        for (int i = 1; i <= n; i++) cal[i] = s[i];
        cal[n + 1] = 0;
        da(cal + 1, sa, n + 1, 150);
        calc(cal + 1, sa, n);
        int res = 0;
        for (int i = 1; i <= n; i++)
            res = res + n - sa[i] + 1 - height[i];
        printf("%d\n", res);
    }
    return 0;
}
View Code

2. Substring

题目链接:hdu - 5769

题意:给你一个串和一个字符x,求包含字符x的子串个数

思路:和上题思路一样,计算每个后缀的贡献,不同的是子串中需要包含字符x,所以我们可以先预处理出每个位置后面离它最近的一个字符x的位置pos[],当加入suffix(sa[k])时,我们只能从pos[sa[k]]开始取前缀,所以suffix(sa[k])对答案的贡献就是n-sa[k]+1-max(height[k],pos[sa[k]]-sa[k])

技术分享图片
#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

typedef long long ll;

const int N = 700010;

int wa[N], wb[N], wv[N], wss[N];
int rak[N], height[N], cal[N], sa[N];
int T, icas, pos[N];
char s[N], ch[N];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

int main()
{
    scanf("%d", &T);
    while (T--) {
        scanf("%s%s", ch + 1, s + 1);
        int n = strlen(s + 1);
        for (int i = 1; i <= n; i++) cal[i] = s[i];
        cal[n + 1] = 0;
        da(cal + 1, sa, n + 1, 200);
        calc(cal + 1, sa, n);
        int p = n + 1;
        for (int i = n; i >= 1; i--) {
            if (s[i] == ch[1]) p = i;
            pos[i] = p;
        }
        ll res = 0;
        for (int i = 1; i <= n; i++) {
            res = res + n - sa[i] + 1;
            res -= max(height[i], pos[sa[i]] - sa[i]);
        }
        printf("Case #%d: %lld\n", ++icas, res);
    }
    return 0;
}
View Code

3. Musical Theme

题目链接:poj - 1743

题意:给你一个串,找出最长、不可重叠、重复的子串

思路:二分答案,将问题转换为判定型问题,判断是否存在两个子串长度为k且不重叠,将排序后的后缀分为若干组,每组内所有元素的height大于等于k,那么每组内任意两个后缀的最长公共前缀长度都大于等于k,此时如果有一组内sa的最大值与最小值之差大于等于k,那么就一定存在两个子串长度为k且不重叠

技术分享图片
#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

const int N = 20010;
const int INF = 0x3f3f3f3f;

int wa[N], wb[N], wv[N], wss[N], a[N];
int n, rak[N], height[N], cal[N], sa[N];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

bool check(int mid)
{
    int imin = sa[1], imax = sa[1];
    for (int i = 2; i <= n; i++) {
        if (height[i] >= mid) {
            imin = min(imin, sa[i]);
            imax = max(imax, sa[i]);
        }
        else {
            if (imax - imin > mid) return true;
            imin = imax = sa[i];
        }
    }
    return imax - imin > mid;
}

int main()
{
    while (scanf("%d", &n) && 0 != n) {
        for (int i = 1; i <= n; i++) scanf("%d", &a[i]);
        for (int i = 1; i <= n; i++) {
            cal[i] = a[i] - a[i - 1];
            cal[i] += 90;
        }
        cal[n + 1] = 0;
        da(cal + 1, sa, n + 1, 200);
        calc(cal + 1, sa, n);
        int l = 0, r = n;
        while (l < r) {
            int mid = (l + r) / 2;
            if (check(mid)) l = mid + 1;
            else r = mid;
        }
        printf("%d\n", l >= 5 ? l : 0);
    }
    return 0;
}
View Code

4. Milk Patterns

题目链接:poj - 3261

题意:给你一个数组和一个数k,求出最长、可以重叠、重复k次的子串

思路:二分答案,将问题转换为判定型问题,判断是否存在k个子串长度为L且不重叠,将排序后的后缀分为若干组,每组内所有元素的height大于等于L,此时如果存在一组组内后缀的数目大于等于k,那么就一定存在k个子串长度为L且不重叠

技术分享图片
#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

const int N = 1000010;

int wa[N], wb[N], wv[N], wss[N], a[N];
int n, k, rak[N], height[N], cal[N], sa[N];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

bool check(int mid)
{
    int cnt = 1;
    for (int i = 2; i <= n; i++) {
        if (height[i] >= mid) cnt++;
        else cnt = 1;
        if (cnt >= k) return true;
    }
    return false;
}

int main()
{
    scanf("%d%d", &n, &k);
    for (int i = 1; i <= n; i++) {
        scanf("%d", &cal[i]);
        cal[i]++;
    }
    cal[n + 1] = 0;
    da(cal + 1, sa, n + 1, N);
    calc(cal + 1, sa, n);
    int l = 1, r = n;
    while (l < r) {
        int mid = (l + r + 1) / 2;
        if (check(mid)) l = mid;
        else r = mid - 1;
    }
    printf("%d\n", l);
    return 0;
}
View Code

5. Life Forms

题目链接:poj - 3294

题意:给你n个字符串,求在大于n/2个字符串中含有的最长公共子串

思路:将n个字符串拼接起来,中间用一个从没出现过且不相同的字符隔开,求出后缀数组后二分答案,将排序后的后缀分为若干组,判断每一组内的后缀是否出现在大于n/2个不同的字符串中(标记某个字符串是否出现过,统计不同字符串的数量)

技术分享图片
#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

typedef long long ll;

const int N = 101500;
const int M = 110;

int wa[N], wb[N], wv[N], wss[N];
int rak[N], height[N], cal[N], sa[N];
int n, len, belong[N];
bool f[M];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

bool check(int k)
{
    memset(f, false, sizeof(f));
    int cnt = 0;
    for (int i = 1; i <= len; i++) {
        if (height[i] >= k) {
            if (0 != belong[sa[i]] && false == f[belong[sa[i]]]) {
                cnt++;
                f[belong[sa[i]]] = true;
            }
        }
        else {
            memset(f, false, sizeof(f));
            if (0 != belong[sa[i]]) {
                cnt = 1;
                f[belong[sa[i]]] = true;
            }
            else cnt = 0;
        }
        if (cnt > n / 2) return true;
    }
    return false;
}

int main()
{
    while (scanf("%d", &n) && 0 != n) {
        int b = 125;
        len = 0;
        for (int i = 1; i <= n; i++) {
            char s[10 * M];
            scanf("%s", s + 1);
            for (int k = 1; k <= strlen(s + 1); k++) {
                cal[++len] = s[k];
                belong[len] = i;
            }
            if (i != n) {
                cal[++len] = b++;
                belong[len] = 0;
            }
        }
        cal[len + 1] = 0;
        belong[len + 1] = 0;
        da(cal + 1, sa, len + 1, 250);
        calc(cal + 1, sa, len);
        int l = 0, r = len;
        while (l < r) {
            int mid = (l + r + 1) / 2;
            if (check(mid)) l = mid;
            else r = mid - 1;
        }
        if (0 == l) {
            printf("?\n\n");
            continue;
        }
        memset(f, false, sizeof(f));
        int cnt = 0;
        for (int i = 1; i <= len; i++) {
            if (height[i] >= l) {
                if (0 != belong[sa[i]] && false == f[belong[sa[i]]]) {
                    cnt++;
                    f[belong[sa[i]]] = true;
                }
            }
            else {
                if (cnt > n / 2) {
                    for (int j = 0; j < l; j++) printf("%c", char(cal[sa[i - 1] + j]));
                    printf("\n");
                }
                memset(f, false, sizeof(f));
                if (0 != belong[sa[i]]) {
                    cnt = 1;
                    f[belong[sa[i]]] = true;
                }
                else cnt = 0;
            }
        }
        printf("\n");
    }
    return 0;
}
View Code

6. Relevant Phrases of Annihilation

题目链接:spoj - PHRASES

题意:给你n个字符串,求在每个字符串中不重叠的至少出现2次的最长子串

思路:将n个字符串拼接起来,中间用一个从没出现过且不相同的字符隔开,求后缀数组后二分答案,将排序后的后缀分为若干组,判断是否有一组后缀在每个原来的字符串中至少出现两次,并且在每个原来的字符串中,后缀的起始位置的最大值与最小值之差是否不小于当前二分的长度

技术分享图片
#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

const int N = 100500;
const int M = 10010;
const int INF = 0x3f3f3f3f;

int wa[N], wb[N], wv[N], wss[N];
int rak[N], height[N], cal[N], sa[N];
int n, T, len, belong[N];
int f[12], imin[12], imax[12];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

bool check(int k)
{
    memset(f, 0, sizeof(f));
    memset(imin, INF, sizeof(imin));
    memset(imax, 0, sizeof(imax));
    int cnt = 0;
    for (int i = 1; i <= len; i++) {
        if (height[i] >= k) {
            if (0 != belong[sa[i]] && 0 == f[belong[sa[i]]]) cnt++;
            f[belong[sa[i]]]++;
            imin[belong[sa[i]]] = min(imin[belong[sa[i]]], sa[i]);
            imax[belong[sa[i]]] = max(imax[belong[sa[i]]], sa[i]);
        }
        else {
            memset(f, 0, sizeof(f));
            memset(imin, INF, sizeof(imin));
            memset(imax, 0, sizeof(imax));
            if (0 != belong[sa[i]]) {
                cnt = 1;
                f[belong[sa[i]]]++;
                imin[belong[sa[i]]] = min(imin[belong[sa[i]]], sa[i]);
                imax[belong[sa[i]]] = max(imax[belong[sa[i]]], sa[i]);
            }
            else cnt = 0;
        }
        if (cnt == n) {
            bool flag = true;
            for (int i = 1; i <= n; i++) {
                if (f[belong[sa[i]]] < 2) flag = false;
                if (imax[i] - imin[i] < k) flag = false;
            }
            if (flag) return true;
        }
    }
    return false;
}

int main()
{
    scanf("%d", &T);
    while (T--) {
        scanf("%d", &n);
        len = 0;
        int b = 125;
        for (int i = 1; i <= n; i++) {
            char s[M];
            scanf("%s", s + 1);
            for (int k = 1; k <= strlen(s + 1); k++) {
                cal[++len] = s[k];
                belong[len] = i;
            }
            if (i != n) cal[++len] = ++b;
        }
        cal[len + 1] = 0;
        da(cal + 1, sa, len + 1, 150);
        calc(cal + 1, sa, len);
        int l = 0, r = len;
        while (l < r) {
            int mid = (l + r + 1) / 2;
            if (check(mid)) l = mid;
            else r = mid - 1;
        }
        printf("%d\n", l);
    }
    return 0;
}
View Code

7. Long Long Message

题目链接:poj - 2774

题意:给你两个串,求两个串的最长公共子串

思路:将第二个串写在第一个串的后面,中间用一个没有出现过的字符隔开,求这个新字符串的后缀数组,当sa[i-1]和sa[i]不属于同一个字符串时,height[i]的最大值就是答案

技术分享图片
#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

const int N = 200010;

int wa[N], wb[N], wv[N], wss[N];
int n, T, rak[N], height[N], cal[N], sa[N];
char a[N], b[N];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

int main()
{
    scanf("%s%s", a + 1, b + 1);
    int la = strlen(a + 1), lb = strlen(b + 1);
    for (int i = 1; i <= la; i++) cal[i] = a[i];
    cal[la + 1] = 0;
    for (int i = 1; i <= lb; i++) cal[i + la + 1] = b[i];
    int n = la + lb + 1;
    cal[n + 1] = 0;
    da(cal + 1, sa, n + 1, 150);
    calc(cal + 1, sa, n);
    int res = 0;
    for (int i = 2; i <= n; i++) {
        if (sa[i - 1] < la + 1 && sa[i] > la + 1) res = max(res, height[i]);
        else if (sa[i - 1] > la + 1 && sa[i] < la + 1) res = max(res, height[i]);
    }
    printf("%d\n", res);
    return 0;
}
View Code

8. Substrings

题目链接:poj - 1226

题意:给你n个串,求顺序出现或者逆序出现在每个字符串的最长子串

思路:将每个字符串反过来写一遍,再将这2*n个字符串拼接起来,中间用一个从没出现过且不相同的字符隔开,求出后缀数组后二分答案,判断是否有一组后缀在每个原来的字符串或反转后

的字符串中出现,注意特判n=1的情况

技术分享图片
#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>

using namespace std;

const int N = 20500;
const int M = 110;
const int INF = 0x3f3f3f3f;

int wa[N], wb[N], wv[N], wss[N];
int rak[N], height[N], cal[N], sa[N];
int n, T, len, belong[N];
bool f[M];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

bool check(int k)
{
    memset(f, false, sizeof(f));
    int cnt = 0;
    for (int i = 1; i <= len; i++) {
        if (height[i] >= k) {
            if (0 != belong[sa[i]] && false == f[belong[sa[i]]]) {
                cnt++;
                f[belong[sa[i]]] = true;
            }
        }
        else {
            memset(f, false, sizeof(f));
            if (0 != belong[sa[i]]) {
                cnt = 1;
                f[belong[sa[i]]] = true;
            }
            else cnt = 0;
        }
        if (cnt == n) return true;
    }
    return false;
}

int main()
{
    scanf("%d", &T);
    while (T--) {
        scanf("%d", &n);
        if (1 == n) {
            char s[M];
            scanf("%s", s + 1);
            printf("%d\n", strlen(s + 1));
            continue;
        }
        len = 0;
        int b = 125;
        for (int i = 1; i <= n; i++) {
            char s[M];
            scanf("%s", s + 1);
            for (int k = 1; k <= strlen(s + 1); k++) {
                cal[++len] = s[k] + 1;
                belong[len] = i;
            }
            cal[++len] = ++b;
            for (int k = strlen(s + 1); k >= 1; k--) {
                cal[++len] = s[k] + 1;
                belong[len] = i;
            }
            if (i != n) cal[++len] = ++b;
        }
        cal[len + 1] = 0;
        da(cal + 1, sa, len + 1, 350);
        calc(cal + 1, sa, len);
        int l = 0, r = len;
        while (l < r) {
            int mid = (l + r + 1) / 2;
            if (check(mid)) l = mid;
            else r = mid - 1;
        }
        printf("%d\n", l);
    }
    return 0;
}
View Code

9. Mr. Panda and Fantastic Beasts

题目链接:Gym - 101194F

题意:给你n个字符串,找到第一个字符串字典序最小的最短子串,使得这个子串不是其他字符串的子串

思路:将n个字符串拼接起来,中间用一个从没出现过且不相同的字符隔开,求出后缀数组后,对所有来自第一个字符串的后缀进行判断,找到它左边第一个不是来自第一个串的后缀,求lcp为x,找到它右边第一个不是来自第一个串的后缀,求lcp为y,那么显然该后缀至少取前max(x,y)+1个字符做为子串,对每个后缀都进行一次判断,取最小值即可,注意需要判断max(x,y)+1是否超过了第一个字符串后缀的长度

技术分享图片
#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstring>
#include <cmath>

using namespace std;

const int N = 300010;
const int M = 250010;
const int INF = 0x3f3f3f3f;

int wa[N], wb[N], wv[N], wss[N];
int rak[N], height[N], cal[N], sa[N];
int rmq[N], imin[N][30];
int n, len, T, belong[N], lef[N], rig[N], len1, icas;
char s[M];

bool cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for (i = 0; i < m; i++) wss[i] = 0;
    for (i = 0; i < n; i++) wss[x[i] = r[i]]++;
    for (i = 1; i < m; i++) wss[i] += wss[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--wss[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p) {
        for (p = 0, i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++)
            if (sa[i] >= j) y[p++] = sa[i] - j;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) wss[i] = 0;
        for (i = 0; i < n; i++) wss[wv[i]]++;
        for (i = 1; i < m; i++) wss[i] += wss[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--wss[wv[i]]] = y[i];
        for (swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++) {
            if (cmp(y, sa[i - 1], sa[i], j)) x[sa[i]] = p - 1;
            else x[sa[i]] = p++;
        }
    }
}

void calc(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; i++) rak[sa[i]] = i;
    for (i = 0; i < n; height[rak[i++]] = k)
        for (k ? k-- : 0, j = sa[rak[i] - 1]; r[i + k] == r[j + k]; k++);
    for (i = n; i; i--) {
        rak[i] = rak[i - 1];
        sa[i]++;
    }
}

void init(int n)
{
    memset(imin, 0, sizeof(imin));
    for (int i = 1; i <= n; i++) imin[i][0] = height[i];
    int l = int(log((double)n) / log(2.0));
    for (int j = 1; j <= l; j++) {
        for (int i = 1; i + (1 << (j - 1)) <= n; i++) {
            imin[i][j] = min(imin[i][j - 1], imin[i + (1 << (j - 1))][j - 1]);
        }
    }
}

int ask(int l, int r)
{
    int k = int(log(double(r - l + 1)) / log(2.0));
    return min(imin[l][k], imin[r - (1 << k) + 1][k]);
}

int lcp(int a, int b)
{
    int ra = rak[a], rb = rak[b];
    if (ra > rb) swap(ra, rb);
    return ask(ra + 1, rb);
}

int main()
{
    scanf("%d", &T);
    while (T--) {
        scanf("%d", &n);
        int b = 125;
        len = 0;
        for (int i = 1; i <= n; i++) {
            scanf("%s", s + 1);
            if (1 == i) len1 = strlen(s + 1);
            for (int k = 1; k <= strlen(s + 1); k++) {
                cal[++len] = s[k];
                belong[len] = i;
            }
            if (i != n) cal[++len] = ++b;
        }
        cal[len + 1] = 0;
        da(cal + 1, sa, len + 1, 50200);
        calc(cal + 1, sa, len);
        int last = -1;
        for (int i = 1; i <= len; i++) {
            if (0 == belong[sa[i]]) continue;
            if (1 == belong[sa[i]]) lef[i] = last;
            else last = i;
        }
        last = -1;
        for (int i = len; i >= 1; i--) {
            if (0 == belong[sa[i]]) continue;
            if (1 == belong[sa[i]]) rig[i] = last;
            else last = i;
        }
        init(len);
        int rl = INF, pos = 0;
        for (int i = 1; i <= len; i++) {
            if (1 != belong[sa[i]] || 0 == belong[sa[i]]) continue;
            int L = lef[i], R = rig[i], lc = 0;
            if (-1 != L) lc = max(lcp(sa[L], sa[i]), lc);
            if (-1 != R) lc = max(lcp(sa[R], sa[i]), lc);
            if (len1 - sa[i] + 1 > lc) {
                int t = lc + 1;
                if (t < rl) {
                    rl = t;
                    pos = sa[i];
                }
            }
        }
        printf("Case #%d: ", ++icas);
        if (INF == rl) printf("Impossible\n");
        else {
            for (int i = 0; i < rl; i++) printf("%c", char(cal[pos + i]));
            printf("\n");
        }
    }
    return 0;
}
View Code

 

后缀数组

原文:https://www.cnblogs.com/zzzzzzy/p/12616006.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!