3008. 找出数组中的美丽下标 II

摘要
Title: 3008. 找出数组中的美丽下标 II
Categories: 字符串哈希、二分、kmp

Powered by:NEFU AB-IN

Link

3008. 找出数组中的美丽下标 II

题意

给你一个下标从 0 开始的字符串 s 、字符串 a 、字符串 b 和一个整数 k 。

如果下标 i 满足以下条件,则认为它是一个 美丽下标 :

0 <= i <= s.length - a.length
s[i…(i + a.length - 1)] == a
存在下标 j 使得:
0 <= j <= s.length - b.length
s[j…(j + b.length - 1)] == b
|j - i| <= k
以数组形式按 从小到大排序 返回美丽下标。

思路

  1. 字符串哈希 + 二分
    求模式串的字符串哈希,然后根据两个子串的哈希,求出对应的下标数组,根据数组判断合法性

  2. kmp + 二分 / 滑动窗口
    https://www.zhihu.com/question/21923021/answer/37475572
    用 KMP 求出 a 在 s 中的所有出现位置,记作 posA。
    用 KMP 求出 b 在 s 中的所有出现位置,记作 posB。
    遍历 posA 中的下标 i,在 posB 中二分查找离 i 最近的 j。如果 ∣i−j∣≤k,则把 i 加入答案。

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# 3.8.19 import
import random
from bisect import bisect_left
from collections import Counter, defaultdict, deque
from datetime import datetime, timedelta
from functools import lru_cache, reduce
from heapq import heapify, heappop, heappush, nlargest, nsmallest
from itertools import combinations, compress, permutations, starmap, tee
from math import ceil, comb, fabs, floor, gcd, hypot, log, perm, sqrt
from string import ascii_lowercase, ascii_uppercase
from sys import exit, setrecursionlimit, stdin
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union

# Constants
TYPE = TypeVar('TYPE')
N = int(2e5 + 10)
M = int(20)
INF = int(1e12)
OFFSET = int(100)
MOD = int(1e9 + 7)

# Set recursion limit
setrecursionlimit(int(2e9))


class Arr:
array = staticmethod(lambda x=0, size=N: [x() if callable(x) else x for _ in range(size)])
array2d = staticmethod(lambda x=0, rows=N, cols=M: [Arr.array(x, cols) for _ in range(rows)])
graph = staticmethod(lambda size=N: [[] for _ in range(size)])


class Math:
max = staticmethod(lambda a, b: a if a > b else b)
min = staticmethod(lambda a, b: a if a < b else b)


class Std:
class StringHash:
"""A class for efficiently computing hash values of substrings within a string."""

def __init__(self, s: str, mod: int = 1_070_777_777):
"""Initialize the StringHash object with the string, base, and mod."""
self._mod = mod
self._base = random.randint(8 * 10 ** 8, 9 * 10 ** 8)
self._s = s
self._n = len(s)
self._pow_base_ = [1] + Arr.array(0, self._n) # pow_base[i] = BASE ^ i
self._pre_hash_ = Arr.array(0, self._n + 1) # pre_hash[i] = hash(s[:i])
self._compute_hash()

def _compute_hash(self):
"""Compute the prefix hash values and power of base values for the string."""
for i, b in enumerate(self._s):
self._pow_base_[i + 1] = self._pow_base_[i] * self._base % self._mod
self._pre_hash_[i + 1] = (self._pre_hash_[i] * self._base + ord(b)) % self._mod

def get_hash(self, l: int, r: int) -> int:
"""Get the hash value of the substring s[l:r] """
return (self._pre_hash_[r + 1] - self._pre_hash_[l] * self._pow_base_[r - l + 1] % self._mod + self._mod) % self._mod

def compute_hash(self, word: str) -> int:
"""Compute the hash value of a given word using the object's base and mod."""
h = 0
for b in word:
h = (h * self._base + ord(b)) % self._mod
return h

# ————————————————————— Division line ——————————————————————


class Solution:
def beautifulIndices(self, s: str, a: str, b: str, k: int) -> List[int]:
s_hash = Std.StringHash(s)
a_id_, b_id_ = [], []
a_hash, b_hash = s_hash.compute_hash(a), s_hash.compute_hash(b)

for i in range(len(s)):
if i + len(a) <= len(s) and a_hash == s_hash.get_hash(i, i + len(a) - 1):
a_id_.append(i)
if i + len(b) <= len(s) and b_hash == s_hash.get_hash(i, i + len(b) - 1):
b_id_.append(i)

ans = []
if not (a_id_ and b_id_):
return []

for i in a_id_:
ans_l, ans_r = 0, 0
l, r = 0, len(b_id_) - 1
while l < r:
mid = l + r >> 1
if b_id_[mid] >= i - k:
r = mid
else:
l = mid + 1
ans_l = r if b_id_[r] >= i - k else INF

l, r = 0, len(b_id_) - 1
while l < r:
mid = l + r + 1 >> 1
if b_id_[mid] <= i + k:
l = mid
else:
r = mid - 1
ans_r = r if b_id_[r] <= i + k else -INF

if ans_r >= ans_l:
ans.append(i)
return ans


# print(Solution().beautifulIndices("isawsquirrelnearmysquirrelhouseohmy", "my", "squirrel", 15))
print(Solution().beautifulIndices("vatevavakz", "va", "lbda", 1))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# 3.8.19 import
import random
from bisect import bisect_left
from collections import Counter, defaultdict, deque
from datetime import datetime, timedelta
from functools import lru_cache, reduce
from heapq import heapify, heappop, heappush, nlargest, nsmallest
from itertools import combinations, compress, permutations, starmap, tee
from math import ceil, comb, fabs, floor, gcd, hypot, log, perm, sqrt
from string import ascii_lowercase, ascii_uppercase
from sys import exit, setrecursionlimit, stdin
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union

# Constants
TYPE = TypeVar('TYPE')
N = int(2e5 + 10)
M = int(20)
INF = int(1e12)
OFFSET = int(100)
MOD = int(1e9 + 7)

# Set recursion limit
setrecursionlimit(int(2e9))


class Arr:
array = staticmethod(lambda x=0, size=N: [x() if callable(x) else x for _ in range(size)])
array2d = staticmethod(lambda x=0, rows=N, cols=M: [Arr.array(x, cols) for _ in range(rows)])
graph = staticmethod(lambda size=N: [[] for _ in range(size)])


class Math:
max = staticmethod(lambda a, b: a if a > b else b)
min = staticmethod(lambda a, b: a if a < b else b)


class Std:
class KMP:
def __init__(self, t: str):
"""Initializes the KMP object with a text string."""
self.t = t

def _calc_max_match_lengths(self, p: str) -> List[int]:
"""Constructs the maximum match lengths table for the pattern.

Args:
p (str): The pattern string for which the table is constructed.

Returns:
List[int]: The list of maximum match lengths for the pattern string.
"""
mml_, max_len = Arr.array(0, len(p)), 0 # Initialize max match lengths array and max length
for i in range(1, len(p)):
while max_len > 0 and p[max_len] != p[i]: # Backtrack to find the longest prefix which is also a suffix
max_len = mml_[max_len - 1]
if p[max_len] == p[i]: # If characters match, extend the length of the prefix
max_len += 1
mml_[i] = max_len # Store the max length at this position
return mml_

def search(self, p: str) -> List[int]:
"""Searches for all occurrences of the pattern in the text.

Args:
p (str): The pattern string to search for within the text.

Returns:
List[int]: A list of starting indices where the pattern is found in the text.
"""
mml_ = self._calc_max_match_lengths(p) # Compute max match lengths for the pattern
pos_ = [] # List to store the starting indices of matches
cnt = 0 # Number of characters currently matched
for i in range(len(self.t)):
while cnt > 0 and p[cnt] != self.t[i]: # If there's a mismatch, backtrack using max match lengths table
cnt = mml_[cnt - 1]
if p[cnt] == self.t[i]: # If characters match, advance the match length
cnt += 1
if cnt == len(p): # If a full match is found, record the start position and backtrack
pos_.append(i - len(p) + 1)
cnt = mml_[cnt - 1]

return pos_

# ————————————————————— Division line ——————————————————————


class Solution:
def beautifulIndices(self, s: str, a: str, b: str, k: int) -> List[int]:
kmp = Std.KMP(s)
a_id_ = kmp.search(a)
b_id_ = kmp.search(b)

ans = []
if not (a_id_ and b_id_):
return []

for i in a_id_:
ans_l, ans_r = 0, 0
l, r = 0, len(b_id_) - 1
while l < r:
mid = l + r >> 1
if b_id_[mid] >= i - k:
r = mid
else:
l = mid + 1
ans_l = r if b_id_[r] >= i - k else INF

l, r = 0, len(b_id_) - 1
while l < r:
mid = l + r + 1 >> 1
if b_id_[mid] <= i + k:
l = mid
else:
r = mid - 1
ans_r = r if b_id_[r] <= i + k else -INF

if ans_r >= ans_l:
ans.append(i)
return ans

使用搜索:谷歌必应百度