摘要
Title: 3008. 找出数组中的美丽下标 II
Categories: 字符串哈希、二分、kmp

3008. 找出数组中的美丽下标 II

题意

给你一个下标从 0 开始的字符串 s 、字符串 a 、字符串 b 和一个整数 k 。

如果下标 i 满足以下条件，则认为它是一个美丽下标：

0 <= i <= s.length - a.length
s[i…(i + a.length - 1)] == a
存在下标 j 使得：
0 <= j <= s.length - b.length
s[j…(j + b.length - 1)] == b
|j - i| <= k
以数组形式按从小到大排序返回美丽下标。

思路

字符串哈希 + 二分
求模式串的字符串哈希，然后根据两个子串的哈希，求出对应的下标数组，根据数组判断合法性
kmp + 二分 / 滑动窗口
https://www.zhihu.com/question/21923021/answer/37475572
用 KMP 求出 a 在 s 中的所有出现位置，记作 posA。
用 KMP 求出 b 在 s 中的所有出现位置，记作 posB。
遍历 posA 中的下标 i，在 posB 中二分查找离 i 最近的 j。如果 ∣i−j∣≤k，则把 i 加入答案。

代码

# 3.8.19 import
import random
from bisect import bisect_left
from collections import Counter, defaultdict, deque
from datetime import datetime, timedelta
from functools import lru_cache, reduce
from heapq import heapify, heappop, heappush, nlargest, nsmallest
from itertools import combinations, compress, permutations, starmap, tee
from math import ceil, comb, fabs, floor, gcd, hypot, log, perm, sqrt
from string import ascii_lowercase, ascii_uppercase
from sys import exit, setrecursionlimit, stdin
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union

# Constants
TYPE = TypeVar('TYPE')
N = int(2e5 + 10)
M = int(20)
INF = int(1e12)
OFFSET = int(100)
MOD = int(1e9 + 7)

# Set recursion limit
setrecursionlimit(int(2e9))


class Arr:
    array = staticmethod(lambda x=0, size=N: [x() if callable(x) else x for _ in range(size)])
    array2d = staticmethod(lambda x=0, rows=N, cols=M: [Arr.array(x, cols) for _ in range(rows)])
    graph = staticmethod(lambda size=N: [[] for _ in range(size)])


class Math:
    max = staticmethod(lambda a, b: a if a > b else b)
    min = staticmethod(lambda a, b: a if a < b else b)


class Std:
    class StringHash:
        """A class for efficiently computing hash values of substrings within a string."""

        def __init__(self, s: str, mod: int = 1_070_777_777):
            """Initialize the StringHash object with the string, base, and mod."""
            self._mod = mod
            self._base = random.randint(8 * 10 ** 8, 9 * 10 ** 8)
            self._s = s
            self._n = len(s)
            self._pow_base_ = [1] + Arr.array(0, self._n)  # pow_base[i] = BASE ^ i
            self._pre_hash_ = Arr.array(0, self._n + 1)  # pre_hash[i] = hash(s[:i])
            self._compute_hash()

        def _compute_hash(self):
            """Compute the prefix hash values and power of base values for the string."""
            for i, b in enumerate(self._s):
                self._pow_base_[i + 1] = self._pow_base_[i] * self._base % self._mod
                self._pre_hash_[i + 1] = (self._pre_hash_[i] * self._base + ord(b)) % self._mod

        def get_hash(self, l: int, r: int) -> int:
            """Get the hash value of the substring s[l:r] """
            return (self._pre_hash_[r + 1] - self._pre_hash_[l] * self._pow_base_[r - l + 1] % self._mod + self._mod) % self._mod

        def compute_hash(self, word: str) -> int:
            """Compute the hash value of a given word using the object's base and mod."""
            h = 0
            for b in word:
                h = (h * self._base + ord(b)) % self._mod
            return h

# ————————————————————— Division line ——————————————————————


class Solution:
    def beautifulIndices(self, s: str, a: str, b: str, k: int) -> List[int]:
        s_hash = Std.StringHash(s)
        a_id_, b_id_ = [], []
        a_hash, b_hash = s_hash.compute_hash(a), s_hash.compute_hash(b)

        for i in range(len(s)):
            if i + len(a) <= len(s) and a_hash == s_hash.get_hash(i, i + len(a) - 1):
                a_id_.append(i)
            if i + len(b) <= len(s) and b_hash == s_hash.get_hash(i, i + len(b) - 1):
                b_id_.append(i)

        ans = []
        if not (a_id_ and b_id_):
            return []

        for i in a_id_:
            ans_l, ans_r = 0, 0
            l, r = 0, len(b_id_) - 1
            while l < r:
                mid = l + r >> 1
                if b_id_[mid] >= i - k:
                    r = mid
                else:
                    l = mid + 1
            ans_l = r if b_id_[r] >= i - k else INF

            l, r = 0, len(b_id_) - 1
            while l < r:
                mid = l + r + 1 >> 1
                if b_id_[mid] <= i + k:
                    l = mid
                else:
                    r = mid - 1
            ans_r = r if b_id_[r] <= i + k else -INF

            if ans_r >= ans_l:
                ans.append(i)
        return ans


# print(Solution().beautifulIndices("isawsquirrelnearmysquirrelhouseohmy", "my", "squirrel", 15))
print(Solution().beautifulIndices("vatevavakz", "va", "lbda", 1))

# 3.8.19 import
import random
from bisect import bisect_left
from collections import Counter, defaultdict, deque
from datetime import datetime, timedelta
from functools import lru_cache, reduce
from heapq import heapify, heappop, heappush, nlargest, nsmallest
from itertools import combinations, compress, permutations, starmap, tee
from math import ceil, comb, fabs, floor, gcd, hypot, log, perm, sqrt
from string import ascii_lowercase, ascii_uppercase
from sys import exit, setrecursionlimit, stdin
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union

# Constants
TYPE = TypeVar('TYPE')
N = int(2e5 + 10)
M = int(20)
INF = int(1e12)
OFFSET = int(100)
MOD = int(1e9 + 7)

# Set recursion limit
setrecursionlimit(int(2e9))


class Arr:
    array = staticmethod(lambda x=0, size=N: [x() if callable(x) else x for _ in range(size)])
    array2d = staticmethod(lambda x=0, rows=N, cols=M: [Arr.array(x, cols) for _ in range(rows)])
    graph = staticmethod(lambda size=N: [[] for _ in range(size)])


class Math:
    max = staticmethod(lambda a, b: a if a > b else b)
    min = staticmethod(lambda a, b: a if a < b else b)


class Std:
    class KMP:
        def __init__(self, t: str):
            """Initializes the KMP object with a text string."""
            self.t = t

        def _calc_max_match_lengths(self, p: str) -> List[int]:
            """Constructs the maximum match lengths table for the pattern.

            Args:
                p (str): The pattern string for which the table is constructed.

            Returns:
                List[int]: The list of maximum match lengths for the pattern string.
            """
            mml_, max_len = Arr.array(0, len(p)), 0  # Initialize max match lengths array and max length
            for i in range(1, len(p)):
                while max_len > 0 and p[max_len] != p[i]:  # Backtrack to find the longest prefix which is also a suffix
                    max_len = mml_[max_len - 1]
                if p[max_len] == p[i]:  # If characters match, extend the length of the prefix
                    max_len += 1
                mml_[i] = max_len  # Store the max length at this position
            return mml_

        def search(self, p: str) -> List[int]:
            """Searches for all occurrences of the pattern in the text.

            Args:
                p (str): The pattern string to search for within the text.

            Returns:
                List[int]: A list of starting indices where the pattern is found in the text.
            """
            mml_ = self._calc_max_match_lengths(p)  # Compute max match lengths for the pattern
            pos_ = []  # List to store the starting indices of matches
            cnt = 0  # Number of characters currently matched
            for i in range(len(self.t)):
                while cnt > 0 and p[cnt] != self.t[i]:  # If there's a mismatch, backtrack using max match lengths table
                    cnt = mml_[cnt - 1]
                if p[cnt] == self.t[i]:  # If characters match, advance the match length
                    cnt += 1
                if cnt == len(p):  # If a full match is found, record the start position and backtrack
                    pos_.append(i - len(p) + 1)
                    cnt = mml_[cnt - 1]

            return pos_

# ————————————————————— Division line ——————————————————————


class Solution:
    def beautifulIndices(self, s: str, a: str, b: str, k: int) -> List[int]:
        kmp = Std.KMP(s)
        a_id_ = kmp.search(a)
        b_id_ = kmp.search(b)

        ans = []
        if not (a_id_ and b_id_):
            return []

        for i in a_id_:
            ans_l, ans_r = 0, 0
            l, r = 0, len(b_id_) - 1
            while l < r:
                mid = l + r >> 1
                if b_id_[mid] >= i - k:
                    r = mid
                else:
                    l = mid + 1
            ans_l = r if b_id_[r] >= i - k else INF

            l, r = 0, len(b_id_) - 1
            while l < r:
                mid = l + r + 1 >> 1
                if b_id_[mid] <= i + k:
                    l = mid
                else:
                    r = mid - 1
            ans_r = r if b_id_[r] <= i + k else -INF

            if ans_r >= ans_l:
                ans.append(i)
        return ans