标签:style blog color 使用 ar for 2014 art div
给定一个query和一个text,均由小写字母组成。要求在text中找出以同样的顺序连续出现在query中的最长连续字母序列的长度。例如, query为“acbac”,text为“acaccbabb”,那么text中的“cba”为最长的连续出现在query中的字母序列,因此,返回结果应该为其长度3。
bool RabinKarpMatch(const string& T, const string& P)
{
static const int d = 128;
static const int q = 6999997;
int n = T.length();
int m = P.length();
int h = 1;
for (int i = 1; i < m; i++)
h = (h*d) % q; //h=d^(m-1) mode q
int p = 0, t = 0;
for (int i = 0; i < m; ++i) //processing
{
p = ((p*d) + P[i]) % q;
t = ((t*d) + T[i]) % q;
}
for (int s = 0; s < n - m + 1; ++s) //s=[0...n-m+1-1]
{
if (t== p)
{
int i = 0;
for (i; i < m; ++i)
{
if (P[i] != T[s + i])
break;
}
if (i == m)
return true;
}
t = (d*(t - T[s] * h% q+q) + T[s + m]) % q;
}
return false;
}
size_t GetLargestCommomSubLen(const string& text, const string& query)
{
size_t query_len = query.length();
size_t text_len = text.length();
assert(text_len >= query_len);
if (text.empty() || query.empty())
return 0;
size_t max_len = 0;
for (size_t start = 0; start < query_len; ++start)
{
size_t size = query_len - start;
for (size_t len = 1; len <= size; ++len)
{
if (RabinKarpMatch(text, query.substr(start, len)))
{
if (len>max_len)
max_len = len;
}
}
}
return max_len;
}
int GetLongestCommSubstrLen(const string& text, const string& query)
{
int text_len = text.length();
int query_len = query.length();
if (text_len == 0 || 0 == query_len)
return 0;
vector<vector<int>> L(text_len, vector<int>(query_len, 0));
int text_start = -1;
int query_start = -1;
for (int j = 0; j < query_len; ++j)
{
L[0][j] = (text[0] == query[j] ? 1 : 0);
}
for (int i = 1; i < text_len; ++i)
{
L[i][0] = (text[i] == query[0] ? 1 : 0);
for (int j = 1; j < query_len; ++j)
{
if (text[i] == query[j])
{
L[i][j] = L[i - 1][j - 1] + 1;
}
}
}
int longest = 0;
for (int i = 0; i < text_len; ++i)
{
for (int j = 0; j < query_len; ++j)
{
if (longest < L[i][j])
{
longest = L[i][j];
text_start = i - longest + 1;
query_start = j - longest + 1;
}
}
}
return longest;
}
这种方法采取了空间换时间的策略,尽管如此,在空间上,还可以优化,在空间的使用上并没有想象的那么恐怖。比如在计算斐波拉契数列时,其实求后一项只与前面两项相关,多余的信息存储造成了空间上的浪费,在这里同样也是如此,看公式L[ i,j ]=L[ i-1,j-1 ] + 1,亦知L的计算也只与前一行相关,而前一行的值是通过计算已知的了,于是只要两行存储空间即可,每当计算新的一行的,把旧行上升到第0行即可,swap一下即可。int GetLongestCommSubstrLen(const string& text, const string& query)
{
int text_len = text.length();
int query_len = query.length();
if (text_len == 0 || 0 == query_len)
return 0;
vector<vector<int>> L(2, vector<int>(query_len, 0));
int text_start = -1;
int query_start = -1;
int longest = 0;
for (int j = 0; j < query_len; ++j)
{
if (text[0] == query[j])
{
L[0][j] = 1;
}
}
for (int i = 1; i < text_len; ++i)
{
L[1][0] = (text[i] == query[0] ? 1 : 0);
for (int j = 1; j < query_len; ++j)
{
if (text[i] == query[j])
{
L[1][j] = L[0][j - 1] + 1;
if (longest < L[1][j])
longest = L[1][j];
}
}
L[1].swap(L[0]);
}
return longest;
}最长公共子串问题(方法一:暴力+RK匹配,方法二:DP+空间优化)
标签:style blog color 使用 ar for 2014 art div
原文地址:http://blog.csdn.net/u012333003/article/details/39082981