「UR #20」机器蚤分组(“Ur #20” machine flea group)

题目

点这里看题目。

分析

定义 \(a\le b\) 当且仅当 \(a\) 为 \(b\) 的子串,题目就是要求 \(S[l:r]\) 的所有本质不同的子串和 \(\le\) 构成的偏序集的最小链覆盖中链的条数。

熟练地使用 Dilworth 定理,我们转而求最长反链的长度。注意到,字符串作为元素,自带长度的区分。根据我们的经验,按照这个度量去划分很有可能可以得到最长反链——因为,长度相等的子串构成的集合一定不存在偏序关系。所以,可以猜到:

结论 I:
最长反链的长度即为最大的同一长度的本质不同子串个数。

证明移步 UOJ 题解,这边不想管了。

结论 I:

最长反链的长度即为最大的同一长度的本质不同子串个数。

证明移步 UOJ 题解,这边不想管了。

此时已经可以导出一个 \(O(nq)\) 的做法,然而还需要接着优化。简单尝试,可以感知到硬来优化是行不通的——换言之,应当继续寻找性质。

难点就在于:下面的性质不容易猜出来(至少我没有明确的线索)。这里只能直接借助 UOJ 题解来理解:

结论 II:
最长反链长度 \(\ge k\) 当且仅当长度为 \(n-k+1\) 的子串全不相同。

证明:
充分性显然。对于必要性进行反证法,如果存在 \(S[p:p+n-k]=S[q:q+n-k]\),则可以直接翻译为 \(\forall 0\le d\le n-k,S[p+d]=S[q+d]\)。对于任意的 \(l\le n-k+1\),我们可以得到长度为 \(l\) 的本质不同的子串个数至多为 \((n-l+1)-(n-k+1-l+1)=k-1\),矛盾。

结论 II:

最长反链长度 \(\ge k\) 当且仅当长度为 \(n-k+1\) 的子串全不相同。

证明:

充分性显然。对于必要性进行反证法,如果存在 \(S[p:p+n-k]=S[q:q+n-k]\),则可以直接翻译为 \(\forall 0\le d\le n-k,S[p+d]=S[q+d]\)。对于任意的 \(l\le n-k+1\),我们可以得到长度为 \(l\) 的本质不同的子串个数至多为 \((n-l+1)-(n-k+1-l+1)=k-1\),矛盾。

那么,根据结论,对于一个确定的字符串 \(S\),不难得到答案即为:

回到子串的问题上来。对于原串建立后缀树,则对于某一个后缀 \(S[i:]\),当它和 \(S[j:]\) 的 LCA 确定时,真正有效的 \(j\) 只有两个:\(<i\) 的最大的 \(j\)、\(>i\) 的最小的 \(j\)。

但是这明显还不够,我们接着剔除可考虑的 \((i,j)\) 对。不难想到,如果 \(i_1\le i_2\le j_2\le j_1\),且 \(i_1\le i_2,j_1\le j_2\) 不同时成立,则 \((i_1,j_1)\) 也是没用的。这一点直接启发我们针对 LCA 而非 \(i\) 进行考察。当我们想要新增 LCA 的某个子树进行考虑的时候,我们可以进行启发式合并。则根据前面的论断,当我们想要加入一个结点的时候,我们只需要在已有集合里面找出两个下标最近的元素即可。这样只会产生 \(O(n\log n)\) 个有效的 \((i,j)\),启发式合并的复杂度仅有 \(O(n\log^2n)\)。

再求解原问题。由于受到 \(r\) 的限制,因此我们需要分别考虑 \(\operatorname{LCP}\) 完全包含于 \([l,r]\) 和部分包含于 \([l,r]\)(也就是伸出去了一部分)的情况。然而这两种情况都可以轻易地转化,变为“可扫描线”的问题,此处便不在赘述。

小结:

  • 最长反链取值范围的经验性论断,以及本题中给出的两个结论,都挺重要的。
  • 剔除无效 \((i,j)\) 对的部分:主动地去想到“剔除”的操作,然后再考察下标之间的关系,通过严格的偏序筛掉大量没有用的对。以及,启发式合并的思路也值得积累。
  • 小心最后“\(\operatorname{LCP}\) 是否完全包含”的讨论!

代码


#include <set>
#include <cstdio>
#include <vector>
#include <cstring>
#include <iostream>

#define rep( i, a, b ) for( int i = (a) ; i <= (b) ; i ++ )
#define per( i, a, b ) for( int i = (a) ; i >= (b) ; i -- )

const int INF = 1e9;
const int MAXN = 1e5 + 5, MAXV = 2e5 + 5;

template<typename _T>
void read( _T &x ) {
	x = 0; char s = getchar(); bool f = false;
	while( ! ( '0' <= s && s <= '9' ) ) { f = s == '-', s = getchar(); }
	while( '0' <= s && s <= '9' ) { x = ( x << 3 ) + ( x << 1 ) + ( s - '0' ), s = getchar(); }
	if( f ) x = -x;
}

template<typename _T>
void write( _T x ) {
	if( x < 0 ) putchar( '-' ), x = -x;
	if( 9 < x ) write( x / 10 );
	putchar( x % 10 + '0' );
}

template<typename _T>
_T Max( const _T &a, const _T &b ) {
	return a > b ? a : b;
}

template<typename _T>
_T Min( const _T &a, const _T &b ) {
	return a < b ? a : b;
}

typedef std :: pair<int, int> Change;

struct Edge {
	int to, nxt;
} Graph[MAXV << 1];

std :: vector<Change> uptL[MAXN], uptR[MAXN];

int tre[MAXN << 2];

std :: vector<int> qry[MAXN];
int qR[MAXN], ans[MAXN], bsc[MAXN];

std :: set<int> ed[MAXV];
int head[MAXV], stId[MAXV], cnt = 1;

int ch[MAXV][26], mx[MAXV], fa[MAXV];
int rt, lst, ntot;

char str[MAXN];

int N, Q;

inline void AddEdge( const int &from, const int &to ) {
	Graph[++ cnt].to = to, Graph[cnt].nxt = head[from];
	head[from] = cnt;
}

inline void Copy( const int &a, const int &b ) {
	fa[a] = fa[b], mx[a] = mx[b];
	memcpy( ch[a], ch[b], sizeof ch[b] );
}

inline void Expand( const char &c, const int &id ) {
	int x = c - 'a', p = lst, cur = ++ ntot;
	mx[cur] = mx[lst] + 1, ed[lst = cur].insert( id );
	while( p && ! ch[p][x] ) ch[p][x] = cur, p = fa[p];
	if( ! p ) { fa[cur] = rt; return ; }
	int q = ch[p][x];
	if( mx[q] == mx[p] + 1 ) { fa[cur] = q; return ; }
	int nq = ++ ntot; Copy( nq, q );
	mx[nq] = mx[p] + 1, fa[cur] = fa[q] = nq;
	while( p && ch[p][x] == q ) ch[p][x] = nq, p = fa[p];
}

inline void AddPair( const int &l, const int &r, const int &lcs ) {
	uptL[l - lcs + 1].emplace_back( r, lcs );
	uptR[l - lcs + 1].emplace_back( r, l );
}

inline void Merge( int &a, int &b, const int &lcs ) {
	if( ed[a].size() < ed[b].size() )
		std :: swap( a, b );
	for( const int& x : ed[b] ) {
		std :: set<int> :: iterator
			it = ed[a].upper_bound( x );
		if( it != ed[a].end() ) AddPair( x, *it, lcs );
		if( it != ed[a].begin() ) AddPair( * -- it, x, lcs );
	}
	for( const int& x : ed[b] ) ed[a].insert( x );
	ed[b].clear();
}

void DFS( const int &u, const int &fa ) {
	stId[u] = u;
	for( int i = head[u], v ; i ; i = Graph[i].nxt )
		if( ( v = Graph[i].to ) ^ fa )
			DFS( v, u ), Merge( stId[u], stId[v], mx[u] );
}

inline void Upt( const int &x ) {
	tre[x] = Max( tre[x << 1], tre[x << 1 | 1] );
}

void Build( const int &x, const int &l, const int &r ) {
	if( l > r ) return ;
	tre[x] = - INF;
	if( l == r ) return ;
	int mid = ( l + r ) >> 1;
	Build( x << 1, l, mid );
	Build( x << 1 | 1, mid + 1, r );
	Upt( x );
}

void Update( const int &x, const int &l, const int &r, const int &p, const int &nVal ) {
	if( l == r ) { tre[x] = Max( tre[x], nVal ); return ; }
	int mid = ( l + r ) >> 1;
	if( p <= mid ) Update( x << 1, l, mid, p, nVal );
	else Update( x << 1 | 1, mid + 1, r, p, nVal );
	Upt( x );
}

int Query( const int &x, const int &l, const int &r, const int &segL, const int &segR ) {
	if( segL <= l && r <= segR ) return tre[x];
	int mid = ( l + r ) >> 1, ret = - INF;
	if( segL <= mid ) ret = Max( ret, Query( x << 1, l, mid, segL, segR ) );
	if( mid  < segR ) ret = Max( ret, Query( x << 1 | 1, mid + 1, r, segL, segR ) );
	return ret;
}

int main() {
	rt = lst = ++ ntot;
	read( N ), read( Q );
	scanf( "%s", str + 1 );
	rep( i, 1, N ) Expand( str[i], i );
	rep( i, 2, ntot ) 
		AddEdge( fa[i], i ), 
		AddEdge( i, fa[i] );
	DFS( rt, 0 );
	rep( i, 1, Q ) {
		int l; read( l ), read( qR[i] );
		qry[l].push_back( i ), bsc[i] = qR[i] - l + 1;
	}
	Build( 1, 1, N );
	per( i, N, 1 ) {
		int n = uptL[i].size();
		rep( j, 0, n - 1 )
			Update( 1, 1, N, uptL[i][j].first, uptL[i][j].second );
		n = qry[i].size();
		rep( j, 0, n - 1 ) {
			int cur = qry[i][j];
			ans[cur] = Max( ans[cur], Query( 1, 1, N, i, qR[cur] ) );
		}
	}
	Build( 1, 1, N );
	rep( i, 1, N ) {
		int n = uptR[i].size();
		rep( j, 0, n - 1 )
			Update( 1, 1, N, uptR[i][j].first, uptR[i][j].second );
		n = qry[i].size();
		rep( j, 0, n - 1 ) {
			int cur = qry[i][j];
			ans[cur] = Max( ans[cur], Query( 1, 1, N, i, qR[cur] ) - i + 1 );
		}
	}
	rep( i, 1, Q ) write( bsc[i] - ans[i] ), putchar( '\n' );
	return 0;
}
————————

subject

Click here to see the topic.

analysis

Define \ (a \ Le B \) if and only if \ (a \) is a substring of \ (B \), the problem is to require all essentially different substrings of \ (s [l: R] \) and the minimum chain covering the number of chains in the poset composed of \ (\ Le \).

Skillfully using Dilworth theorem, we turn to find the length of the longest inverse chain. Note that as an element, a string has its own < strong > length < / strong >. According to our experience, it is possible to obtain the longest inverse chain by dividing according to this measure – because there must be no partial order relationship in the set composed of substrings with equal length < / strong >. So you can guess:

Conclusion I:
The length of the longest anti chain is the maximum number of essentially different substrings of the same length.
Prove that we don’t want to take care of the uoj problem.

Conclusion I:

The length of the longest anti chain is the maximum number of essentially different substrings of the same length.

Prove that we don’t want to take care of the uoj problem.

At this point, you can export a \ (O (NQ) \) method, but you still need to optimize it. With a simple attempt, we can feel that hard optimization will not work – in other words, we should continue to look for properties.

The difficulty is that the following properties are not easy to guess (at least I don’t have a clear clue). This can only be understood directly with the help of uoj problem solution:

Conclusion II:
The longest anti chain length \ (\ Ge K \) if and only if the substrings with length \ (n-k + 1 \) are all different.
prove:
The adequacy is obvious. If there is \ (s [P: P + n-k] = s [Q: Q + n-k] \), it can be directly translated into \ (\ forall 0 \ le d \ Le n-k, s [P + D] = s [Q + D] \). For any \ (L \ Le n-k + 1 \), we can get that the number of substrings with essentially different length \ (L \) is at most \ ((n-l + 1) – (n-k + 1-L + 1) = k-1 \), which is contradictory.

Conclusion II:

The longest anti chain length \ (\ Ge K \) if and only if the substrings with length \ (n-k + 1 \) are all different.

prove:

The adequacy is obvious. If there is \ (s [P: P + n-k] = s [Q: Q + n-k] \), it can be directly translated into \ (\ forall 0 \ le d \ Le n-k, s [P + D] = s [Q + D] \). For any \ (L \ Le n-k + 1 \), we can get that the number of substrings with essentially different length \ (L \) is at most \ ((n-l + 1) – (n-k + 1-L + 1) = k-1 \), which is contradictory.

Then, according to the conclusion, for a certain string \ (s \), it is not difficult to find the answer is:

Back to the string of questions. When establishing a suffix tree for the original string, for a suffix \ (s [I:] \), when it is determined with the LCA of \ (s [J:] \), there are only two truly valid \ (J \): \ (& lt; I \) maximum \ (J \) and \ (& gt; I \) minimum \ (J \).

But this is obviously not enough. We then eliminate the possible \ ((I, J) \) pairs. It is not hard to imagine that if \ (i_1 \ Le i_2 \ Le j_2 \ Le j_1 \) and \ (i_1 \ Le i_2, j_1 \ Le j_2 \) are not valid at the same time, \ ((i_1, j_1) \) is also useless < / strong >. This directly inspired us to investigate LCA rather than \ (I \). When we want to add a subtree of LCA for consideration, we can conduct heuristic merging. According to the previous conclusion, when we want to add a node, we only need to find the nearest element of the two subscripts in the existing set. This will only produce \ (O (n \ log n) \) valid \ ((I, J) \), and the complexity of heuristic merging is only \ (O (n \ log ^ 2n) \).

Then solve the original problem. Due to the limitation of \ (R \), we need to consider the case that \ (\ operatorname {LCP} \) is completely contained in \ ([l, R] \) and partially contained in \ ([l, R] \) (that is, it extends a part). However, these two situations can be easily transformed into the problem of “scanline”, which will not be repeated here.

Summary:

  • The empirical conclusion of the value range of the longest anti chain and the two conclusions given in this topic are very important.
  • Eliminate the invalid \ ((I, J) \) pairs: actively think of the “elimination” operation, then investigate the relationship between subscripts, and screen out a large number of useless pairs through strict partial order. And the idea of heuristic merging is also worth accumulating.
  • Be careful of the discussion of “whether the \ (\ operatorname {LCP} \) is completely included” at the end!

code


#include <set>
#include <cstdio>
#include <vector>
#include <cstring>
#include <iostream>

#define rep( i, a, b ) for( int i = (a) ; i <= (b) ; i ++ )
#define per( i, a, b ) for( int i = (a) ; i >= (b) ; i -- )

const int INF = 1e9;
const int MAXN = 1e5 + 5, MAXV = 2e5 + 5;

template<typename _T>
void read( _T &x ) {
	x = 0; char s = getchar(); bool f = false;
	while( ! ( '0' <= s && s <= '9' ) ) { f = s == '-', s = getchar(); }
	while( '0' <= s && s <= '9' ) { x = ( x << 3 ) + ( x << 1 ) + ( s - '0' ), s = getchar(); }
	if( f ) x = -x;
}

template<typename _T>
void write( _T x ) {
	if( x < 0 ) putchar( '-' ), x = -x;
	if( 9 < x ) write( x / 10 );
	putchar( x % 10 + '0' );
}

template<typename _T>
_T Max( const _T &a, const _T &b ) {
	return a > b ? a : b;
}

template<typename _T>
_T Min( const _T &a, const _T &b ) {
	return a < b ? a : b;
}

typedef std :: pair<int, int> Change;

struct Edge {
	int to, nxt;
} Graph[MAXV << 1];

std :: vector<Change> uptL[MAXN], uptR[MAXN];

int tre[MAXN << 2];

std :: vector<int> qry[MAXN];
int qR[MAXN], ans[MAXN], bsc[MAXN];

std :: set<int> ed[MAXV];
int head[MAXV], stId[MAXV], cnt = 1;

int ch[MAXV][26], mx[MAXV], fa[MAXV];
int rt, lst, ntot;

char str[MAXN];

int N, Q;

inline void AddEdge( const int &from, const int &to ) {
	Graph[++ cnt].to = to, Graph[cnt].nxt = head[from];
	head[from] = cnt;
}

inline void Copy( const int &a, const int &b ) {
	fa[a] = fa[b], mx[a] = mx[b];
	memcpy( ch[a], ch[b], sizeof ch[b] );
}

inline void Expand( const char &c, const int &id ) {
	int x = c - 'a', p = lst, cur = ++ ntot;
	mx[cur] = mx[lst] + 1, ed[lst = cur].insert( id );
	while( p && ! ch[p][x] ) ch[p][x] = cur, p = fa[p];
	if( ! p ) { fa[cur] = rt; return ; }
	int q = ch[p][x];
	if( mx[q] == mx[p] + 1 ) { fa[cur] = q; return ; }
	int nq = ++ ntot; Copy( nq, q );
	mx[nq] = mx[p] + 1, fa[cur] = fa[q] = nq;
	while( p && ch[p][x] == q ) ch[p][x] = nq, p = fa[p];
}

inline void AddPair( const int &l, const int &r, const int &lcs ) {
	uptL[l - lcs + 1].emplace_back( r, lcs );
	uptR[l - lcs + 1].emplace_back( r, l );
}

inline void Merge( int &a, int &b, const int &lcs ) {
	if( ed[a].size() < ed[b].size() )
		std :: swap( a, b );
	for( const int& x : ed[b] ) {
		std :: set<int> :: iterator
			it = ed[a].upper_bound( x );
		if( it != ed[a].end() ) AddPair( x, *it, lcs );
		if( it != ed[a].begin() ) AddPair( * -- it, x, lcs );
	}
	for( const int& x : ed[b] ) ed[a].insert( x );
	ed[b].clear();
}

void DFS( const int &u, const int &fa ) {
	stId[u] = u;
	for( int i = head[u], v ; i ; i = Graph[i].nxt )
		if( ( v = Graph[i].to ) ^ fa )
			DFS( v, u ), Merge( stId[u], stId[v], mx[u] );
}

inline void Upt( const int &x ) {
	tre[x] = Max( tre[x << 1], tre[x << 1 | 1] );
}

void Build( const int &x, const int &l, const int &r ) {
	if( l > r ) return ;
	tre[x] = - INF;
	if( l == r ) return ;
	int mid = ( l + r ) >> 1;
	Build( x << 1, l, mid );
	Build( x << 1 | 1, mid + 1, r );
	Upt( x );
}

void Update( const int &x, const int &l, const int &r, const int &p, const int &nVal ) {
	if( l == r ) { tre[x] = Max( tre[x], nVal ); return ; }
	int mid = ( l + r ) >> 1;
	if( p <= mid ) Update( x << 1, l, mid, p, nVal );
	else Update( x << 1 | 1, mid + 1, r, p, nVal );
	Upt( x );
}

int Query( const int &x, const int &l, const int &r, const int &segL, const int &segR ) {
	if( segL <= l && r <= segR ) return tre[x];
	int mid = ( l + r ) >> 1, ret = - INF;
	if( segL <= mid ) ret = Max( ret, Query( x << 1, l, mid, segL, segR ) );
	if( mid  < segR ) ret = Max( ret, Query( x << 1 | 1, mid + 1, r, segL, segR ) );
	return ret;
}

int main() {
	rt = lst = ++ ntot;
	read( N ), read( Q );
	scanf( "%s", str + 1 );
	rep( i, 1, N ) Expand( str[i], i );
	rep( i, 2, ntot ) 
		AddEdge( fa[i], i ), 
		AddEdge( i, fa[i] );
	DFS( rt, 0 );
	rep( i, 1, Q ) {
		int l; read( l ), read( qR[i] );
		qry[l].push_back( i ), bsc[i] = qR[i] - l + 1;
	}
	Build( 1, 1, N );
	per( i, N, 1 ) {
		int n = uptL[i].size();
		rep( j, 0, n - 1 )
			Update( 1, 1, N, uptL[i][j].first, uptL[i][j].second );
		n = qry[i].size();
		rep( j, 0, n - 1 ) {
			int cur = qry[i][j];
			ans[cur] = Max( ans[cur], Query( 1, 1, N, i, qR[cur] ) );
		}
	}
	Build( 1, 1, N );
	rep( i, 1, N ) {
		int n = uptR[i].size();
		rep( j, 0, n - 1 )
			Update( 1, 1, N, uptR[i][j].first, uptR[i][j].second );
		n = qry[i].size();
		rep( j, 0, n - 1 ) {
			int cur = qry[i][j];
			ans[cur] = Max( ans[cur], Query( 1, 1, N, i, qR[cur] ) - i + 1 );
		}
	}
	rep( i, 1, Q ) write( bsc[i] - ans[i] ), putchar( '\n' );
	return 0;
}