码迷,mamicode.com
首页 > 编程语言 > 详细

SPOJ694--- DISUBSTR - Distinct Substrings(后缀数组)

时间:2015-03-31 22:31:39      阅读:163      评论:0      收藏:0      [点我收藏+]

标签:后缀数组

Given a string, we need to find the total number of its distinct substrings.
Input

T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000
Output

For each test case output one number saying the number of distinct substrings.
Example

Sample Input:
2
CCCCC
ABABA

Sample Output:
5
9

Explanation for the testcase with string ABABA:
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.

求不同子串个数,每一个子串都是字符串的后缀的前缀,从suffix(sa[0])开始,每次加入一个新的后缀,子串个数加了n - sa[i] + 1个,其中height[i]个是与之前那个后缀的LCP,是重复的,需要去掉
所以加了n - sa[i] + 1 - height[i]个

/*************************************************************************
    > File Name: SPOJ694.cpp
    > Author: ALex
    > Mail: zchao1995@gmail.com 
    > Created Time: 2015年03月31日 星期二 18时27分46秒
 ************************************************************************/

#include <functional>
#include <algorithm>
#include <iostream>
#include <fstream>
#include <cstring>
#include <cstdio>
#include <cmath>
#include <cstdlib>
#include <queue>
#include <stack>
#include <map>
#include <bitset>
#include <set>
#include <vector>

using namespace std;

const double pi = acos(-1.0);
const int inf = 0x3f3f3f3f;
const double eps = 1e-15;
typedef long long LL;
typedef pair <int, int> PLL;

class SuffixArray
{
    public:
        static const int N = 1010;
        int init[N];
        int X[N];
        int Y[N];
        int Rank[N];
        int sa[N];
        int height[N];
        int buc[N];
        int size;

        void clear()
        {
            size = 0;
        }

        void insert(int n)
        {
            init[size++] = n;
        }

        bool cmp(int *r, int a, int b, int l)
        {
            return (r[a] == r[b] && r[a + l] == r[b + l]);
        }

        void getsa(int m = 256)
        {
            init[size] = 0;
            int l, p, *x = X, *y = Y, n = size + 1;
            for (int i = 0; i < m; ++i)
            {
                buc[i] = 0;
            }
            for (int i = 0; i < n; ++i)
            {
                ++buc[x[i] = init[i]];
            }
            for (int i = 1; i < m; ++i)
            {
                buc[i] += buc[i - 1];
            }
            for (int i = n - 1; i >= 0; --i)
            {
                sa[--buc[x[i]]] = i;
            }
            for (l = 1, p = 1; l <= n && p < n; m = p, l *= 2)
            {
                p = 0;
                for (int i = n - l; i < n; ++i)
                {
                    y[p++] = i;
                }
                for (int i = 0; i < n; ++i)
                {
                    if (sa[i] >= l)
                    {
                        y[p++] = sa[i] - l;
                    }
                }
                for (int i = 0; i < m; ++i)
                {
                    buc[i] = 0;
                }
                for (int i = 0; i < n; ++i)
                {
                    ++buc[x[y[i]]];
                }
                for (int i = 1; i < m; ++i)
                {
                    buc[i] += buc[i - 1];
                }
                for (int i = n - 1; i >= 0; --i)
                {
                    sa[--buc[x[y[i]]]] = y[i];
                }
                int i;
                for (swap(x, y), x[sa[0]] = 0, p = 1, i = 1; i < n; ++i)
                {
                    x[sa[i]] = cmp(y, sa[i - 1], sa[i], l) ? p - 1 : p++;
                }
            }
        }

        void getheight()
        {
            int h = 0, n = size;
            for (int i = 0; i <= n; ++i)
            {
                Rank[sa[i]] = i;
            }
            height[0] = 0;
            for (int i = 0; i < n; ++i)
            {
                if (h > 0)
                {
                    --h;
                }
                int j = sa[Rank[i] - 1];
                for (; i + h < n && j + h < n && init[i + h] == init[j + h]; ++h);
                height[Rank[i] - 1] = h;
            }
        }

        void solve()
        {
            int ans = 0;
            for (int i = 1; i <= size; ++i)
            {
                ans += size - sa[i]  - height[i - 1];
            }
            printf("%d\n", ans);
        }
}SA;

char str[1010];

int main()
{
    int t;
    scanf("%d", &t);
    while (t--)
    {
        scanf("%s", str);
        SA.clear();
        int len = strlen(str);
        int maxs = 0;
        for (int i = 0; i < len; ++i)
        {
            SA.insert((int)str[i]);
            maxs = max(maxs, (int)str[i]);
        }
        SA.getsa(maxs + 1);
        SA.getheight();
        SA.solve();
    }
    return 0;
}

SPOJ694--- DISUBSTR - Distinct Substrings(后缀数组)

标签:后缀数组

原文地址:http://blog.csdn.net/guard_mine/article/details/44783569

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!