# 哈希滚动字符串匹配算法理解与模除运算法则
string = "It is a test, but not just a test."
sub_string = "test"
string_len = len(string)
sub_len = len(sub_string)
# 暴力匹配
for i in range(string_len):
if string[i:i + sub_len] == sub_string:
# 打印匹配起始索引
print(f"{sub_string} is found at index {i}.")
# 再原始一点
for i in range(len(string)):
match = True
for j in range(sub_len):
if string[i + j] != sub_string[j]:
match = False
break
if match:
print(f"{sub_string} is found at index {i}.")
# 哈希滚动 —— Rabin Karp 算法
# 散列空间大小
base = 256
# 模运算除数
modulus = 101
# 用于哈希滚动时每次去除首字符的哈希成分,
# 使其余部分不受影响,避免多次重复计算
h = 1
for i in range(sub_len-1):
h = (h*base) % modulus
string_hash = 0
sub_hash = 0
for i in range(sub_len):
# 从字符串首部计算与子字符串等长字符串的哈希值
string_hash = (base * string_hash + ord(string[i])) % modulus
# 计算匹配子字符串哈希值
sub_hash = (base*sub_hash+ord(sub_string[i])) % modulus
for i in range(string_len - sub_len):
if (string_hash == sub_hash):
print(f"{sub_string} is found at index {i}.")
tail_hash = base*(string_hash-ord(string[i])*h) # 去除首字符的哈希成分,基本原理是模除运算法则
string_hash = (tail_hash+ord(string[i+sub_len])) % modulus