后缀自动机二·重复旋律5

题目链接:HihoCoder - 1445

题意:求一个串所有不同子串的个数

思路:文本串S的后缀自动机是一部恰好只能识别S全部子串的机器,所以统计所有不同子串的个数,只需要将每个状态内子串的个数相加即可,而每个状态内子串的个数为sam[i].longest-sam[i].shortest+1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include <iostream>
#include <algorithm>
#include <cstring>
#include <cstdio>

typedef long long ll;

const int N = 1000010;
const int M = 26;

struct node
{
int trans[M], slink;
int sst, lst;
};

int n;
char s[N];
node sam[N << 1];

int newnode(int sst, int lst, int *trans, int slink)
{
sam[n].sst = sst;
sam[n].lst = lst;
sam[n].slink = slink;
if (trans) memcpy(sam[n].trans, trans, sizeof(sam[n].trans));
else memset(sam[n].trans, -1, sizeof(sam[n].trans));
return n++;
}

int insert(int c, int u)
{
int z = newnode(-1, sam[u].lst + 1, 0, -1), v = u;
while(-1 != v && -1 == sam[v].trans[c]) {
sam[v].trans[c] = z;
v = sam[v].slink;
}
if (-1 == v) {
sam[z].sst = 1;
sam[z].slink = 0;
return z;
}
int x = sam[v].trans[c];
if (sam[v].lst + 1 == sam[x].lst) {
sam[z].sst = sam[x].lst + 1;
sam[z].slink = x;
return z;
}
int y = newnode(-1, sam[v].lst + 1, sam[x].trans, sam[x].slink);
sam[x].sst = sam[y].lst + 1;
sam[x].slink = y;
sam[z].sst = sam[y].lst + 1;
sam[z].slink = y;
while (-1 != v && sam[v].trans[c] == x) {
sam[v].trans[c] = y;
v = sam[v].slink;
}
sam[y].sst = sam[sam[y].slink].lst + 1;
return z;
}

int main()
{
scanf("%s", s + 1);
int u = newnode(0, 0, 0, -1), len = strlen(s + 1);
for (int i = 1; i <= len; i++)
u = insert(s[i] - 'a', u);
ll res = 0;
for (int i = 1; i < n; i++) {
ll t = sam[i].lst - sam[i].sst + 1;
res += t;
}
printf("%lld", res);
return 0;
}

后缀自动机三·重复旋律6

题目链接:HihoCoder - 1449

题意:给你一个串,求出长度为k的子串中出现次数最多的子串的出现次数,求出所有k的答案

思路:假设现在已经求出后缀自动机中每个状态endpos的大小,res[1], res[2], … res[length(s)]一定单调递减(因为一个长度为i的子串出现了, 则长度为i-1、i-2…1的子串一定也都出现了),所以我们只需要更新后缀自动机每个状态中longest对应的答案(即res[sam[i].longest] = max(res[sam[i].longest], sam[i].endpos))后,从后向前更新答案即可

现在考虑如何求出后缀自动机中每个状态endpos的大小,我们站在sam[i].slink的角度,将起点做为根节点,将绿色节点表示向自动机中插入字符时分裂出来的节点,粉色节点表示成其他节点,对于粉色的节点,父节点的endpos大小恰好等于所有子节点们的endpos大小之和再加1,而对于绿色节点,父节点的endpos大小恰好等于所有子节点们的endpos大小之和,由于叶子节点只可能为粉色,所以叶子节点的endpos大小为1,利用slink反向建树,dfs求出所有状态的endpos即可

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#include <iostream>
#include <algorithm>
#include <cstring>
#include <cstdio>
#include <cmath>

using namespace std;

typedef long long ll;

const int N = 2000010;
const int M = 26;

struct node
{
int trans[M], slink;
int sst, lst, flag;
ll edpos;
};

struct Edge {
int to, nex;
};

int n, cnt, head[N];
ll imax[N], res[N];
char s[N];
node sam[N << 1];
Edge edge[2 * N];

void add_edge(int u, int v)
{
edge[++cnt].to = v;
edge[cnt].nex = head[u];
head[u] = cnt;
}

void build()
{
for (int i = 1; i < n; i++) {
int to = sam[i].slink;
if (-1 != to) {
add_edge(i, to);
add_edge(to, i);
}
}
}

void dfs(int u, int fa)
{
for (int i = head[u]; 0 != i; i = edge[i].nex) {
int v = edge[i].to;
if (v == fa) continue;
dfs(v, u);
sam[u].edpos += sam[v].edpos;
}
}

int newnode(int sst, int lst, int *trans, int slink, int flag)
{
sam[n].sst = sst;
sam[n].lst = lst;
sam[n].slink = slink;
sam[n].flag = flag;
sam[n].edpos = flag;
if (trans) memcpy(sam[n].trans, trans, sizeof(sam[n].trans));
else memset(sam[n].trans, -1, sizeof(sam[n].trans));
return n++;
}

int insert(int c, int u)
{
int z = newnode(-1, sam[u].lst + 1, 0, -1, 1), v = u;
while(-1 != v && -1 == sam[v].trans[c]) {
sam[v].trans[c] = z;
v = sam[v].slink;
}
if (-1 == v) {
sam[z].sst = 1;
sam[z].slink = 0;
return z;
}
int x = sam[v].trans[c];
if (sam[v].lst + 1 == sam[x].lst) {
sam[z].sst = sam[x].lst + 1;
sam[z].slink = x;
return z;
}
int y = newnode(-1, sam[v].lst + 1, sam[x].trans, sam[x].slink, 0);
sam[x].sst = sam[y].lst + 1;
sam[x].slink = y;
sam[z].sst = sam[y].lst + 1;
sam[z].slink = y;
while (-1 != v && sam[v].trans[c] == x) {
sam[v].trans[c] = y;
v = sam[v].slink;
}
sam[y].sst = sam[sam[y].slink].lst + 1;
return z;
}

int main()
{
scanf("%s", s + 1);
int u = newnode(0, 0, 0, -1, 1), len = strlen(s + 1);
for (int i = 1; i <= len; i++)
u = insert(s[i] - 'a', u);
build();
dfs(0, -1);
for (int i = 1; i < n; i++)
res[sam[i].lst] = max(res[sam[i].lst], sam[i].edpos);
for (int i = len - 1; i >= 1; i--)
res[i] = max(res[i], res[i + 1]);
for (int i = 1; i <= len; i++)
printf("%lld\n", res[i]);
return 0;
}

后缀自动机四·重复旋律7

题目链接:HihoCoder - 1457

题意:给你很多十进制串,求出所有串中不同子串的和(即把十进制子串看成数字,然后求和,允许有前导0),答案模1000000007

思路:先考虑只有一个串的情况,对这个串建立sam,设每个节点中子串的和为sum,我们站在trans的角度考虑这个问题,利用dp的思想,每读入一个数字,则下一个节点的sum应该加上这个节点的sum10+读入的数字该节点的子串个数,显然可以用拓扑排序来解决这个问题

再来考虑多个串的情况,和后缀数组类似,我们将所有字符串拼接起来,中间用一个没有出现过的字符隔开,对这个新的字符串建立sam,和上面的分析一样,我们站在trans的角度来求解,但需要考虑到一个节点中的某些子串可能含有这个没有出现过的字符,这部分子串是不对答案有贡献的,所以现在需要解决的是如何维护每个节点内真实的子串个数vnum, 其实一个节点中如果有某个子串s的话,则从自动机0开始逐个喂入s的字符的话,则最终自动机将停止在这个节点,即读入子串的过程其实就是在自动机上画出路径的过程,所以一个节点中的每个子串和这些从0出发通过trans跳转到达此节点的路径一一对应,所以再用一个拓扑排序来维护vnum即可

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#include <iostream>
#include <algorithm>
#include <cstring>
#include <cstdio>
#include <stack>

using namespace std;

typedef long long ll;

const int N = 2000010;
const int M = 11;
const ll mod = 1000000007;

struct node {
int trans[M], slink;
int sst, lst;
ll sum, vnum;
};

node sam[N << 1];
int T, n, cal[N], len, deg[N];
char s[N];

int newnode(int sst, int lst, int *trans, int slink)
{
sam[n].sst = sst;
sam[n].lst = lst;
sam[n].slink = slink;
if (trans) memcpy(sam[n].trans, trans, sizeof(sam[n].trans));
else memset(sam[n].trans, -1, sizeof(sam[n].trans));
return n++;
}

int insert(int c, int u)
{
int z = newnode(-1, sam[u].lst + 1, 0, -1), v = u;
while(-1 != v && -1 == sam[v].trans[c]) {
sam[v].trans[c] = z;
v = sam[v].slink;
}
if (-1 == v) {
sam[z].sst = 1;
sam[z].slink = 0;
return z;
}
int x = sam[v].trans[c];
if (sam[v].lst + 1 == sam[x].lst) {
sam[z].sst = sam[x].lst + 1;
sam[z].slink = x;
return z;
}
int y = newnode(-1, sam[v].lst + 1, sam[x].trans, sam[x].slink);
sam[x].sst = sam[y].lst + 1;
sam[x].slink = y;
sam[z].sst = sam[y].lst + 1;
sam[z].slink = y;
while (-1 != v && sam[v].trans[c] == x) {
sam[v].trans[c] = y;
v = sam[v].slink;
}
sam[y].sst = sam[sam[y].slink].lst + 1;
return z;
}

void topsort()
{
for (int i = 0; i < n; i++) {
for (int j = 0; j < M; j++) {
if (-1 == sam[i].trans[j]) continue;
deg[sam[i].trans[j]]++;
}
}
stack<int> st;
st.push(0);
sam[0].vnum = 1;
while (!st.empty()) {
int t = st.top();
st.pop();
for (int i = 0; i < M; i++) {
int to = sam[t].trans[i];
if (-1 == to) continue;
if (10 != i) sam[to].vnum += sam[t].vnum;
if (0 == --deg[to]) st.push(to);
}
}
}

ll solve()
{
for (int i = 0; i < n; i++) {
for (int j = 0; j < M; j++) {
if (-1 == sam[i].trans[j]) continue;
deg[sam[i].trans[j]]++;
}
}
stack<int> st;
st.push(0);
while (!st.empty()) {
int t = st.top();
st.pop();
for (int i = 0; i < M; i++) {
int to = sam[t].trans[i];
if (-1 == to) continue;
if (10 != i) {
sam[to].sum += (sam[t].sum * 10 + sam[t].vnum * i);
sam[to].sum %= mod;
}
if (0 == --deg[to]) st.push(to);
}
}
ll res = 0;
for (int i = 1; i < n; i++) {
res += sam[i].sum;
res %= mod;
}
return res;
}

int main()
{
scanf("%d", &T);
while (T--) {
scanf("%s", s + 1);
int L = strlen(s + 1);
for (int i = 1; i <= L; i++) {
cal[++len] = s[i] - '0';
s[i] = '\0';
}
if (0 != T) cal[++len] = 10;
}
int u = newnode(0, 0, 0, -1);
for (int i = 1; i <= len; i++) u = insert(cal[i], u);
topsort();
printf("%lld\n", solve());
return 0;
}

后缀自动机五·重复旋律8

题目链接:HihoCoder - 1465

题意:求若干串T的循环同构串在另一长串S中各自做为子串出现的次数,如果出现多次则重复计算

思路:我们先对S构建后缀自动机,对于T串,我们将它复制一份接在后面,然后从T[1]开始一个字符一个字符的插入后缀自动机,维护两个变量u和l,其中l是最大的使得T[i-l+1,…,i]能读入sam之后不会出现无路可走的情况的数,u是T[i-l+1,…,i]所属的sam节点,当插入T[i+1]时,我们从u沿着slik走,走到第一个节点v使得v.trans[T[i+1]-‘a’] = x,x!=null,那么新的l=|longest(v)|+1,新的u=x,但是需要考虑的问题就是如果此时l>=n,T[i+1-l+1,…,i+1]会在u中,但T[i+1-n+1,…,i+1]不一定在u中,那么T[i+1-n+1,…,i+1]就会是T[i+1-l+1,…,i+1]的一个后缀,显然T[i+1-n+1,…,i+1]的endpos集合的大小(即出现的次数)>=T[i+1-l+1,…,i+1]的endpos集合的大小=|u.endpos|,所以我们此时仍然需要从u沿着slink走,直到v是slink-path(u)上最后的那个满足v.longest>=n的点位置,那么此时T[i+1-n+1,…,i+1]就一定在v中,|v.endpos|就是出现的次数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#include <iostream>
#include <algorithm>
#include <cstring>
#include <cstdio>

using namespace std;

const int N = 200010;
const int M = 26;

struct node {
int trans[M], slink;
int sst, lst, edpos;
bool flag;
};

struct Edge {
int to, nex;
};

char s[N], t[N << 1];
int n, len, lt, T, head[N], cnt, vis[N << 1];
node sam[N << 1];
Edge edge[N << 1];

void add_edge(int u, int v)
{
edge[++cnt].to = v;
edge[cnt].nex = head[u];
head[u] = cnt;
}

void build()
{
for (int i = 1; i < n; i++) {
int to = sam[i].slink;
if (-1 != to) {
add_edge(i, to);
add_edge(to, i);
}
}
}

void dfs(int u, int fa)
{
for (int i = head[u]; 0 != i; i = edge[i].nex) {
int v = edge[i].to;
if (v == fa) continue;
dfs(v, u);
sam[u].edpos += sam[v].edpos;
}
}

int newnode(int sst, int lst, int *trans, int slink, int flag)
{
sam[n].sst = sst;
sam[n].lst = lst;
sam[n].slink = slink;
sam[n].flag = flag;
sam[n].edpos = flag;
if (trans) memcpy(sam[n].trans, trans, sizeof(sam[n].trans));
else memset(sam[n].trans, -1, sizeof(sam[n].trans));
return n++;
}

int insert(int c, int u)
{
int z = newnode(-1, sam[u].lst + 1, 0, -1, 1), v = u;
while(-1 != v && -1 == sam[v].trans[c]) {
sam[v].trans[c] = z;
v = sam[v].slink;
}
if (-1 == v) {
sam[z].sst = 1;
sam[z].slink = 0;
return z;
}
int x = sam[v].trans[c];
if (sam[v].lst + 1 == sam[x].lst) {
sam[z].sst = sam[x].lst + 1;
sam[z].slink = x;
return z;
}
int y = newnode(-1, sam[v].lst + 1, sam[x].trans, sam[x].slink, 0);
sam[x].sst = sam[y].lst + 1;
sam[x].slink = y;
sam[z].sst = sam[y].lst + 1;
sam[z].slink = y;
while (-1 != v && sam[v].trans[c] == x) {
sam[v].trans[c] = y;
v = sam[v].slink;
}
sam[y].sst = sam[sam[y].slink].lst + 1;
return z;
}

int solve()
{
memset(vis, 0, sizeof(vis));
int res = 0, u = 0, l = 0;
for (int i = 1; i <= len; i++) {
int c = t[i] - 'a';
while (u && -1 == sam[u].trans[c]) {
u = sam[u].slink;
l = sam[u].lst;
}
if (-1 != sam[u].trans[c]) {
u = sam[u].trans[c];
l++;
}
else u = l = 0;
if (l > lt) {
while (sam[sam[u].slink].lst >= lt) {
u = sam[u].slink;
l = sam[u].lst;
}
}
if (l >= lt && 0 == vis[u]) {
vis[u] = 1;
res += sam[u].edpos;
}
}
return res;
}

int main()
{
scanf("%s", s + 1);
int u = newnode(0, 0, 0, -1, 1), ls = strlen(s + 1);
for (int i = 1; i <= ls; i++)
u = insert(s[i] - 'a', u);
build();
dfs(0, -1);
scanf("%d", &T);
while (T--) {
scanf("%s", t + 1);
lt = strlen(t + 1);
len = lt;
for (int i = 1; i < lt; i++) t[++len] = t[i];
printf("%d\n", solve());
for (int i = 1; i <= len; i++) t[i] = '\0';
}
return 0;
}

String and Times

题目链接:计蒜客 A2018

题意:给你一个串,求该串出现[l,r]次的子串个数

思路:对串构建后缀自动机,求出每个状态的|endpos|,如果对于节点u,|u.endpos|$\in$[l,r],那么这个状态节点对答案的贡献就是u.longest-u.shortest+1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#include <iostream>
#include <algorithm>
#include <cstring>
#include <cstdio>
#include <cmath>

using namespace std;

typedef long long ll;

const int N = 200010;
const int M = 26;

struct node
{
int trans[M], slink;
int sst, lst, flag;
ll edpos;
};

struct Edge {
int to, nex;
};

int n, T, cnt, head[N];
ll l, r;
char s[N];
node sam[N << 1];
Edge edge[2 * N];

void add_edge(int u, int v)
{
edge[++cnt].to = v;
edge[cnt].nex = head[u];
head[u] = cnt;
}

void build()
{
for (int i = 1; i < n; i++) {
int to = sam[i].slink;
if (-1 != to) {
add_edge(i, to);
add_edge(to, i);
}
}
}

void dfs(int u, int fa)
{
for (int i = head[u]; 0 != i; i = edge[i].nex) {
int v = edge[i].to;
if (v == fa) continue;
dfs(v, u);
sam[u].edpos += sam[v].edpos;
}
}

int newnode(int sst, int lst, int *trans, int slink, int flag)
{
sam[n].sst = sst;
sam[n].lst = lst;
sam[n].slink = slink;
sam[n].flag = flag;
sam[n].edpos = flag;
if (trans) memcpy(sam[n].trans, trans, sizeof(sam[n].trans));
else memset(sam[n].trans, -1, sizeof(sam[n].trans));
return n++;
}

int insert(int c, int u)
{
int z = newnode(-1, sam[u].lst + 1, 0, -1, 1), v = u;
while(-1 != v && -1 == sam[v].trans[c]) {
sam[v].trans[c] = z;
v = sam[v].slink;
}
if (-1 == v) {
sam[z].sst = 1;
sam[z].slink = 0;
return z;
}
int x = sam[v].trans[c];
if (sam[v].lst + 1 == sam[x].lst) {
sam[z].sst = sam[x].lst + 1;
sam[z].slink = x;
return z;
}
int y = newnode(-1, sam[v].lst + 1, sam[x].trans, sam[x].slink, 0);
sam[x].sst = sam[y].lst + 1;
sam[x].slink = y;
sam[z].sst = sam[y].lst + 1;
sam[z].slink = y;
while (-1 != v && sam[v].trans[c] == x) {
sam[v].trans[c] = y;
v = sam[v].slink;
}
sam[y].sst = sam[sam[y].slink].lst + 1;
return z;
}

int main()
{
while (scanf("%s%lld%lld", s + 1, &l, &r) != EOF) {
int u = newnode(0, 0, 0, -1, 1), ls = strlen(s + 1);
for (int i = 1; i <= ls; i++)
u = insert(s[i] - 'A', u);
build();
dfs(0, -1);
ll res = 0;
for (int i = 1; i < n; i++)
if (sam[i].edpos >= l && sam[i].edpos <= r)
res += sam[i].lst - sam[i].sst + 1;
printf("%lld\n", res);
memset(sam, 0, sizeof(sam));
memset(s, '\0', sizeof(s));
memset(head, 0, sizeof(head));
cnt = n = 0;
}
}