Pattern Matching

Master various string pattern matching algorithms for efficient text processing and analysis.

Naive String Matching

Basic Naive Algorithm

// Naive string matching algorithm
vector<int> naiveSearch(string text, string pattern) {
    vector<int> result;
    int n = text.length();
    int m = pattern.length();
    
    for (int i = 0; i <= n - m; i++) {
        int j;
        for (j = 0; j < m; j++) {
            if (text[i + j] != pattern[j]) {
                break;
            }
        }
        if (j == m) {
            result.push_back(i);
        }
    }
    
    return result;
}

Time Complexity

Time: O((n - m + 1) × m) in worst case
Space: O(1)
Best case: O(n) when pattern not found
Worst case: O(n × m) when all characters match except last

KMP Algorithm

KMP with Failure Function

// Build failure function for KMP
vector<int> buildFailureFunction(string pattern) {
    int m = pattern.length();
    vector<int> failure(m, 0);
    
    int j = 0;
    for (int i = 1; i < m; i++) {
        while (j > 0 && pattern[i] != pattern[j]) {
            j = failure[j - 1];
        }
        if (pattern[i] == pattern[j]) {
            j++;
        }
        failure[i] = j;
    }
    
    return failure;
}

// KMP string matching
vector<int> kmpSearch(string text, string pattern) {
    vector<int> result;
    int n = text.length();
    int m = pattern.length();
    
    vector<int> failure = buildFailureFunction(pattern);
    
    int j = 0;
    for (int i = 0; i < n; i++) {
        while (j > 0 && text[i] != pattern[j]) {
            j = failure[j - 1];
        }
        if (text[i] == pattern[j]) {
            j++;
        }
        if (j == m) {
            result.push_back(i - m + 1);
            j = failure[j - 1];
        }
    }
    
    return result;
}

KMP Advantages

Time complexity: O(n + m)
Space complexity: O(m)
No backtracking: Efficient for large texts
Pattern preprocessing: Failure function computed once

Rabin-Karp Algorithm

Rolling Hash Implementation

// Rabin-Karp with rolling hash
vector<int> rabinKarpSearch(string text, string pattern) {
    vector<int> result;
    int n = text.length();
    int m = pattern.length();
    
    if (m > n) return result;
    
    const int base = 256;
    const int mod = 1000000007;
    
    // Calculate hash of pattern
    long long patternHash = 0;
    long long textHash = 0;
    long long h = 1;
    
    for (int i = 0; i < m - 1; i++) {
        h = (h * base) % mod;
    }
    
    for (int i = 0; i < m; i++) {
        patternHash = (base * patternHash + pattern[i]) % mod;
        textHash = (base * textHash + text[i]) % mod;
    }
    
    // Slide the pattern over text
    for (int i = 0; i <= n - m; i++) {
        if (patternHash == textHash) {
            // Verify match character by character
            bool match = true;
            for (int j = 0; j < m; j++) {
                if (text[i + j] != pattern[j]) {
                    match = false;
                    break;
                }
            }
            if (match) {
                result.push_back(i);
            }
        }
        
        // Calculate hash for next window
        if (i < n - m) {
            textHash = (base * (textHash - text[i] * h) + text[i + m]) % mod;
            if (textHash < 0) {
                textHash += mod;
            }
        }
    }
    
    return result;
}

Rolling Hash Benefits

Average case: O(n + m)
Worst case: O(n × m) due to hash collisions
Space: O(1)
Multiple patterns: Can search for multiple patterns simultaneously

Z-Algorithm

Z-Array Construction

// Build Z-array for string
vector<int> buildZArray(string s) {
    int n = s.length();
    vector<int> z(n, 0);
    
    int l = 0, r = 0;
    for (int i = 1; i < n; i++) {
        if (i <= r) {
            z[i] = min(r - i + 1, z[i - l]);
        }
        
        while (i + z[i] < n && s[z[i]] == s[i + z[i]]) {
            z[i]++;
        }
        
        if (i + z[i] - 1 > r) {
            l = i;
            r = i + z[i] - 1;
        }
    }
    
    return z;
}

// Z-algorithm for pattern matching
vector<int> zAlgorithmSearch(string text, string pattern) {
    string combined = pattern + "$" + text;
    vector<int> z = buildZArray(combined);
    vector<int> result;
    
    int m = pattern.length();
    for (int i = m + 1; i < z.size(); i++) {
        if (z[i] == m) {
            result.push_back(i - m - 1);
        }
    }
    
    return result;
}

Z-Algorithm Properties

Time complexity: O(n + m)
Space complexity: O(n + m)
No preprocessing: Pattern not preprocessed separately
Linear time: Guaranteed O(n + m) performance

Suffix Arrays

Suffix Array Construction

// Build suffix array using sorting
vector<int> buildSuffixArray(string s) {
    int n = s.length();
    vector<int> suffixArray(n);
    vector<int> rank(n);
    
    // Initialize with single characters
    for (int i = 0; i < n; i++) {
        suffixArray[i] = i;
        rank[i] = s[i];
    }
    
    // Sort suffixes by first 2^k characters
    for (int k = 1; k < n; k *= 2) {
        // Sort by rank
        sort(suffixArray.begin(), suffixArray.end(), [&](int a, int b) {
            if (rank[a] != rank[b]) {
                return rank[a] < rank[b];
            }
            int rankA = (a + k < n) ? rank[a + k] : -1;
            int rankB = (b + k < n) ? rank[b + k] : -1;
            return rankA < rankB;
        });
        
        // Update ranks
        vector<int> newRank(n);
        newRank[suffixArray[0]] = 0;
        for (int i = 1; i < n; i++) {
            if (rank[suffixArray[i]] == rank[suffixArray[i - 1]] &&
                rank[suffixArray[i] + k] == rank[suffixArray[i - 1] + k]) {
                newRank[suffixArray[i]] = newRank[suffixArray[i - 1]];
            } else {
                newRank[suffixArray[i]] = i;
            }
        }
        rank = newRank;
    }
    
    return suffixArray;
}

Longest Common Prefix (LCP)

// Build LCP array
vector<int> buildLCPArray(string s, vector<int>& suffixArray) {
    int n = s.length();
    vector<int> lcp(n, 0);
    vector<int> rank(n);
    
    // Build rank array
    for (int i = 0; i < n; i++) {
        rank[suffixArray[i]] = i;
    }
    
    int k = 0;
    for (int i = 0; i < n; i++) {
        if (rank[i] == n - 1) {
            k = 0;
            continue;
        }
        
        int j = suffixArray[rank[i] + 1];
        while (i + k < n && j + k < n && s[i + k] == s[j + k]) {
            k++;
        }
        
        lcp[rank[i]] = k;
        if (k > 0) k--;
    }
    
    return lcp;
}

Pattern Search in Suffix Array

// Search pattern in suffix array
vector<int> searchInSuffixArray(string text, string pattern, vector<int>& suffixArray) {
    vector<int> result;
    int n = text.length();
    int m = pattern.length();
    
    // Binary search for pattern
    int left = 0, right = n - 1;
    while (left <= right) {
        int mid = (left + right) / 2;
        int cmp = text.compare(suffixArray[mid], m, pattern);
        
        if (cmp == 0) {
            // Found match, find all occurrences
            int start = mid;
            while (start > 0 && text.compare(suffixArray[start - 1], m, pattern) == 0) {
                start--;
            }
            
            int end = mid;
            while (end < n - 1 && text.compare(suffixArray[end + 1], m, pattern) == 0) {
                end++;
            }
            
            for (int i = start; i <= end; i++) {
                result.push_back(suffixArray[i]);
            }
            break;
        } else if (cmp < 0) {
            left = mid + 1;
        } else {
            right = mid - 1;
        }
    }
    
    return result;
}

Performance Comparison

Algorithm	Time Complexity	Space Complexity	Best Use Case
Naive	O((n-m+1)×m)	O(1)	Simple cases, small patterns
KMP	O(n + m)	O(m)	General purpose, no backtracking
Rabin-Karp	O(n + m) avg	O(1)	Multiple patterns, rolling hash
Z-Algorithm	O(n + m)	O(n + m)	Pattern matching, string analysis
Suffix Array	O(n log n)	O(n)	Multiple queries, string analysis

Common Patterns

Single pattern search: KMP or Z-algorithm
Multiple pattern search: Rabin-Karp or Aho-Corasick
String analysis: Suffix arrays and LCP
Rolling hash: Rabin-Karp for multiple patterns
No backtracking: KMP for large texts

Applications

Text editors: Find and replace operations
Search engines: Pattern matching in documents
Bioinformatics: DNA sequence analysis
Data compression: String compression algorithms
Network security: Intrusion detection systems

Naive String Matching​

Basic Naive Algorithm​

Time Complexity​

KMP Algorithm​

KMP with Failure Function​

KMP Advantages​

Rabin-Karp Algorithm​

Rolling Hash Implementation​

Rolling Hash Benefits​

Z-Algorithm​

Z-Array Construction​

Z-Algorithm Properties​

Suffix Arrays​

Suffix Array Construction​

Longest Common Prefix (LCP)​

Pattern Search in Suffix Array​

Performance Comparison​

Common Patterns​

Applications​

Naive String Matching

Basic Naive Algorithm

Time Complexity

KMP Algorithm

KMP with Failure Function

KMP Advantages

Rabin-Karp Algorithm

Rolling Hash Implementation

Rolling Hash Benefits

Z-Algorithm

Z-Array Construction

Z-Algorithm Properties

Suffix Arrays

Suffix Array Construction

Longest Common Prefix (LCP)

Pattern Search in Suffix Array

Performance Comparison

Common Patterns

Applications