[Game 5, Hangzhou Radio & Television University, 2019 1006=HDU6629] string matching

Posted by WindChill on Wed, 09 Oct 2019 14:56:09 +0200

Topic:
Comparing all suffix substrings of a string with the original string several times, the longest common prefix can be obtained.

Problem Solution: Extending KMP naked questions, pushing with KMP for 2 hours did not come out. Finally, my teammate asked me what other comparison algorithm I had, and suddenly thought of expanding KMP and Baidu went through it directly. (Xie teammates don't kill) I heard that there were horse-drawn carriages.

PS: First of all, all extend[i]+1, because a long string of N, then its comparison number must be >= N. In all extended KMP [i], if the last character of the suffix string can be matched directly, then the final result does not need + 1 (because there is no need to compare the latter item to make it wrong). + 1 is only used to make up + N
As for the use of extend, Baidu can extend KMP to study.

#include<cstdio>
#include<string>
#include<cstring>
#include<algorithm>
#include<iostream>
using namespace std;
const int maxn = 1e6 + 7;
char str[maxn];
const int MAX = maxn; //Maximum string length
int knext[MAX], extend[MAX];

//Preprocessing computes knext arrays
void getknext(char str[])
{
	int i = 0, j, po, len = strlen(str);
	knext[0] = len; //Initialize knext[0]
	while (str[i] == str[i + 1] && i + 1 < len) i++; knext[1] = i; //Compute knext[1]
	po = 1; //Where to initialize po
	for (i = 2; i < len; i++)
	{
		if (knext[i - po] + i < knext[po] + po) //In the first case, the knext[i] value can be obtained directly.
			knext[i] = knext[i - po];
		else //In the second case, the value of knext[i] can only be obtained by continuing to match
		{
			j = knext[po] + po - i;
			if (j < 0) j = 0; //If I > Po + knext [Po], match from the beginning
			while (i + j < len && str[j] == str[j + i]) j++; knext[i] = j;
			po = i; //Update the location of po
		}
	}
}

//Compute the extend ed array
void EXKMP(char s1[], char s2[])
{
	int i = 0, j, po, len = strlen(s1), l2 = strlen(s2);
	getknext(s2); //Knxt Array for Computing Substrings
	while (s1[i] == s2[i] && i < l2 && i < len) i++; extend[0] = i;
	po = 0; //Initialize the location of po
	for (i = 1; i < len; i++)
	{
		if (knext[i - po] + i < extend[po] + po) //In the first case, you can get the value of extend[i] directly
			extend[i] = knext[i - po];
		else //In the second case, continue matching to get the value of extend[i]
		{
			j = extend[po] + po - i;
			if (j < 0) j = 0; //If I > extend [po] + po, match from scratch
			while (i + j < len && j < l2 && s1[j + i] == s2[j]) j++; extend[i] = j;
			po = i; //Update the location of po
		}
	}
}
int main() {
	int t;
	cin >> t;
	while (t--) {
		memset(knext, 0, sizeof(knext));
		memset(extend, 0, sizeof(extend));
		scanf("%s", str);
		EXKMP(str, str);
		long long  sum = 0;
		int re = strlen(str);
		for (int i = 1; i <= re; i++) {
			sum += extend[i];
			if ((extend[i] + i) != re)
				sum++;
		}
		cout << sum << endl;
	}
	return 0;
}