Rabin Karp Algorithm
Definition​
- Definition
- Explanation
- Guidance
- Tips
The Rabin-Karp Algorithm is a string-searching algorithm that efficiently finds the occurrence of a pattern within a text. It's particularly useful when the pattern searching involves multiple occurrences in the text
Hashing the pattern and each substring of the text, then comparing these hashes to find matches. To avoid recomputing hashes for each substring, it uses a rolling hash function. This function efficiently updates the hash value by subtracting the contribution of the outgoing character and adding the contribution of the incoming character. When a match is found, it verifies the match by comparing the actual characters. By utilizing hashing, it significantly reduces the number of comparisons needed, leading to faster search times
- Compute the hash value of the pattern
- Slide the pattern over the text, computing the hash of each substring using the rolling hash function
- Compare the hash values of the pattern and the current substring
- If the hash values match, compare the actual characters to confirm the match
- If a match is confirmed, record the position/index of the match
- Continue sliding the pattern until the end of the text is reached
- use a rolling hash function to efficiently update hash values
- employ a hash function that minimizes collisions for better performance
- choose a base and a prime number carefully to avoid hash collisions
- utilize modular arithmetic to prevent overflow when dealing with large hash values
Practice​
- Practice
- Solution
rabin_karp(text, pattern):
n = length(text)
m = length(pattern)
prime = a large prime number
base = a base value for the hash function
pattern_hash = hash(pattern) # Compute hash of the pattern
text_hash = hash(text[0:m]) # Compute hash of the initial substring of text
for i from 0 to n-m:
if pattern_hash == text_hash:
if pattern == text[i:i+m]:
print "Pattern found at index", i
if i < n-m:
text_hash = (text_hash - ord(text[i]) * base**m) * base + ord(text[i+m])
# Rolling hash function
hash(s):
hash_value = 0
for character in s:
hash_value = (hash_value * base + ord(character)) % prime
return hash_value
package main
const (
prime = 101
)
func rabinKarp(text, pattern string) []int {
var result []int
n, m := len(text), len(pattern)
if n < m {
return result
}
h := 1
for i := 0; i < m-1; i++ {
h = (h * 256) % prime
}
patternHash := 0
windowHash := 0
for i := 0; i < m; i++ {
patternHash = (256*patternHash + int(pattern[i])) % prime
windowHash = (256*windowHash + int(text[i])) % prime
}
for i := 0; i <= n-m; i++ {
if patternHash == windowHash {
match := true
for j := 0; j < m; j++ {
if text[i+j] != pattern[j] {
match = false
break
}
}
if match {
result = append(result, i)
}
}
if i < n-m {
windowHash = (256*(windowHash-int(text[i])*h) + int(text[i+m])) % prime
if windowHash < 0 {
windowHash += prime
}
}
}
return result
}
import java.util.ArrayList;
import java.util.List;
public class RabinKarp {
private static final int prime = 101;
public static List<Integer> rabinKarp(String text, String pattern) {
List<Integer> indices = new ArrayList<>();
int n = text.length();
int m = pattern.length();
if (n < m) {
return indices;
}
int h = 1;
for (int i = 0; i < m - 1; i++) {
h = (h * 256) % prime;
}
int patternHash = 0;
int windowHash = 0;
for (int i = 0; i < m; i++) {
patternHash = (256 * patternHash + pattern.charAt(i)) % prime;
windowHash = (256 * windowHash + text.charAt(i)) % prime;
}
for (int i = 0; i <= n - m; i++) {
if (patternHash == windowHash) {
boolean match = true;
for (int j = 0; j < m; j++) {
if (text.charAt(i + j) != pattern.charAt(j)) {
match = false;
break;
}
}
if (match) {
indices.add(i);
}
}
if (i < n - m) {
windowHash = (256 * (windowHash - text.charAt(i) * h) + text.charAt(i + m)) % prime;
if (windowHash < 0) {
windowHash += prime;
}
}
}
return indices;
}
}
function rabinKarp(text, pattern) {
const prime = 101;
const result = [];
const n = text.length;
const m = pattern.length;
if (n < m) {
return result;
}
let h = 1;
for (let i = 0; i < m - 1; i++) {
h = (h * 256) % prime;
}
let patternHash = 0;
let windowHash = 0;
for (let i = 0; i < m; i++) {
patternHash = (256 * patternHash + pattern.charCodeAt(i)) % prime;
windowHash = (256 * windowHash + text.charCodeAt(i)) % prime;
}
for (let i = 0; i <= n - m; i++) {
if (patternHash === windowHash) {
let match = true;
for (let j = 0; j < m; j++) {
if (text[i + j] !== pattern[j]) {
match = false;
break;
}
}
if (match) {
result.push(i);
}
}
if (i < n - m) {
windowHash =
(256 * (windowHash - text.charCodeAt(i) * h) + text.charCodeAt(i + m)) %
prime;
if (windowHash < 0) {
windowHash += prime;
}
}
}
return result;
}
fun rabinKarp(text: String, pattern: String): List<Int> {
val prime = 101
val result = mutableListOf<Int>()
val n = text.length
val m = pattern.length
if (n < m) {
return result
}
var h = 1
repeat(m - 1) {
h = h * 256 % prime
}
var patternHash = 0
var windowHash = 0
for (i in 0 until m) {
patternHash = (256 * patternHash + pattern[i].toInt()) % prime
windowHash = (256 * windowHash + text[i].toInt()) % prime
}
for (i in 0..n - m) {
if (patternHash == windowHash) {
var match = true
for (j in 0 until m) {
if (text[i + j] != pattern[j]) {
match = false
break
}
}
if (match) {
result.add(i)
}
}
if (i < n - m) {
windowHash = (256 * (windowHash - text[i].toInt() * h) + text[i + m].toInt()) % prime
if (windowHash < 0) {
windowHash += prime
}
}
}
return result
}
def rabin_karp(text, pattern):
prime = 101
result = []
n, m = len(text), len(pattern)
if n < m:
return result
h = 1
for _ in range(m - 1):
h = (h * 256) % prime
pattern_hash = 0
window_hash = 0
for i in range(m):
pattern_hash = (256 * pattern_hash + ord(pattern[i])) % prime
window_hash = (256 * window_hash + ord(text[i])) % prime
for i in range(n - m + 1):
if pattern_hash == window_hash:
match = True
for j in range(m):
if text[i + j] != pattern[j]:
match = False
break
if match:
result.append(i)
if i < n - m:
window_hash = (256 * (window_hash - ord(text[i]) * h) + ord(text[i + m])) % prime
return result
fn rabin_karp(text: &str, pattern: &str) -> Vec<usize> {
const PRIME: usize = 101;
let mut result = Vec::new();
let n = text.len();
let m = pattern.len();
if n < m {
return result;
}
let mut h = 1;
for _ in 0..m - 1 {
h = (h * 256) % PRIME;
}
let mut pattern_hash = 0;
let mut window_hash = 0;
for i in 0..m {
pattern_hash = (256 * pattern_hash + pattern.as_bytes()[i] as usize) % PRIME;
window_hash = (256 * window_hash + text.as_bytes()[i] as usize) % PRIME;
}
for i in 0..=n - m {
if pattern_hash == window_hash {
let mut match_found = true;
for j in 0..m {
if text.as_bytes()[i + j] != pattern.as_bytes()[j] {
match_found = false;
break;
}
}
if match_found {
result.push(i);
}
}
if i < n - m {
window_hash =
(256 * (window_hash as isize - text.as_bytes()[i] as isize * h as isize) as usize
+ text.as_bytes()[i + m] as usize)
% PRIME;
if window_hash < 0 {
window_hash += PRIME;
}
}
}
result
}
function rabinKarp(text: string, pattern: string): number[] {
const prime = 101;
const result: number[] = [];
const n = text.length;
const m = pattern.length;
if (n < m) {
return result;
}
let h = 1;
for (let i = 0; i < m - 1; i++) {
h = (h * 256) % prime;
}
let patternHash = 0;
let windowHash = 0;
for (let i = 0; i < m; i++) {
patternHash = (256 * patternHash + pattern.charCodeAt(i)) % prime;
windowHash = (256 * windowHash + text.charCodeAt(i)) % prime;
}
for (let i = 0; i <= n - m; i++) {
if (patternHash === windowHash) {
let match = true;
for (let j = 0; j < m; j++) {
if (text[i + j] !== pattern[j]) {
match = false;
break;
}
}
if (match) {
result.push(i);
}
}
if (i < n - m) {
windowHash =
(256 * (windowHash - text.charCodeAt(i) * h) + text.charCodeAt(i + m)) %
prime;
if (windowHash < 0) {
windowHash += prime;
}
}
}
return result;
}