2021 IC M (suffix automata)

Posted by vapokerpro on Mon, 22 Nov 2021 14:08:02 +0100

Given a string, find the leftmost position of the most lexically ordered substring in the substring of each prefix.
Idea: in fact, when you see the substring and the dictionary order, it is easy to think of the J in Guilin. It is natural to reverse the string, build the suffix automata, and then build the parent tree. The edge right is a lot more characters than the father, and then the dfs order is the dictionary order. See a Guilin blog for details. How to find it later? Consider the contribution of each substring, that is, for each substring, the right endpoint of the substring with its maximum dictionary order must be on the right side of its right endpoint (like a nonsense). When we traverse the parent number incrementally along the dfs order, because the dictionary order of the substring is getting larger and larger, Then the contribution of the current substring can certainly cover all the previous substrings, that is, all suffixes on the right of the right endpoint of the current substring will be covered by the current substring. You can maintain the segment tree, or make a tag array, record the time, and finally cover it. When recording the coverage information, we can easily think of a substring, The largest dictionary order substring must be its suffix, so we only need to record the left endpoint. Note that in order to cover as many substrings as possible, the position of each substring should be as large as possible when constructing suffix automata, because we are built with inverse strings, so the original string is as small as possible, so we can cover as many substrings as possible.
Suffocating operation in Tucao, forget to input, inverted string 1 to n exchange, the result is no turn, tim starts from 0, and the last minimum value also set a 0, make complaints about tim 0.

#include <bits/stdc++.h>
#define de(x) cerr << "  debug  " << #x << "  ==  " << x << endl;
#define ll long long

using namespace std;

const int maxn = 1e6 + 7;
struct state
{
    int len, link;
    int nxt[26];
    int id;
    int pos;
} st[maxn * 2];
int sz, last;
void sam_init()
{
    memset(st[0].nxt, 0, sizeof(st[0].nxt));
    sz = 0;
    st[0].len = 0;
    st[0].link = -1;
    sz++;
    last = 0;
}
int cur;
void sam_extend(char c, int id)
{
    cur = sz++;
    st[cur].len = st[last].len + 1;
    st[cur].pos = id;
    int p = last;
    while (p != -1 && !st[p].nxt[c - 'a'])
    {
        st[p].nxt[c - 'a'] = cur;
        p = st[p].link;
    }
    if (p == -1)
    {
        st[cur].link = 0;
    }
    else
    {
        int q = st[p].nxt[c - 'a'];
        if (st[p].len + 1 == st[q].len)
        {
            st[cur].link = q;
        }
        else
        {
            int clone = sz++;
            st[clone].len = st[p].len + 1;
            memcpy(st[clone].nxt, st[q].nxt, sizeof(st[clone].nxt));
            st[clone].link = st[q].link;
            while (p != -1 && st[p].nxt[c - 'a'] == q)
            {
                st[p].nxt[c - 'a'] = clone;
                p = st[p].link;
            }
            st[q].link = st[cur].link = clone;
        }
    }
    last = cur;
}
int len[maxn*2];
int a[maxn*2];
int n;
void getSiz()
{
    for (int i = 1; i < sz; i++)
    {
        len[st[i].len]++;
    }
    for (int i = n; i >= 1; i--)
    {
        len[i] += len[i + 1];
    }
    for (int i = 1; i < sz; i++)
    {
        a[len[st[i].len]--] = i;
    }
    for (int i = 1; i < sz; i++)
    {
        int p = a[i];
        st[st[p].link].pos = max(st[st[p].link].pos, st[p].pos);
    }
}
char s[maxn];
struct node
{
    int str;
    int x;
    node(int ss, int xx)
    {
        str = ss;
        x = xx;
    }
};
vector<node> v[maxn*2];
bool cmp(node a, node b)
{
    return a.str < b.str;
}
pair<int, int> P[maxn];
int tim;
void dfs(int x)
{
    if (x != 0)
    {
        int l = n - st[x].pos + 1;
        int r = l + st[st[x].link].len;
        P[r] = make_pair(tim++, l);
    }
    for(auto i: v[x])
    {
        dfs(i.x);
    }
   // for (int i = 0; i < 26; i++)
   // {
   //     if (st[x].nxt[i])
  //      {
  //          dfs(st[x].nxt[i]);
   //     }
  //  }
}
int main()
{
    scanf("%s",s+1);
    n = strlen(s + 1);
    for (int i = 1; i <= n/2; i++)
    {
        swap(s[i], s[n - i + 1]);
    }
    sam_init();
    for (int i = 1; i <= n; i++)
    {
        sam_extend(s[i], i);
    }
    getSiz();
    for (int i = 1; i < sz; i++)
    {
        v[st[i].link].push_back(node(s[st[i].pos - st[st[i].link].len] - 'a', i));
    }
    for (int i = 0; i < sz; i++)
    {
        sort(v[i].begin(), v[i].end(), cmp);
    }
    dfs(0);
    int ans = 1;
    int tt = 0;
   // cout<<n<<endl;
    for (int i = 1; i <= n; i++)
    {
       // cout<<i<<endl;
        if (P[i].first > tt)
        {
            tt = P[i].first;
            ans = P[i].second;
        }
        printf("%d %d\n", ans, i);
    }
    #ifdef iyua
        system("pause");
    #endif 
    return 0;
}

Topics: acm