Advanced String Problems
Explore advanced string processing algorithms for complex text analysis and manipulation.
Longest Common Substring
Dynamic Programming Approach
// Find longest common substring using DP
string longestCommonSubstring(string s1, string s2) {
int m = s1.length();
int n = s2.length();
vector<vector<int>> dp(m + 1, vector<int>(n + 1, 0));
int maxLength = 0;
int endIndex = 0;
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (s1[i - 1] == s2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
if (dp[i][j] > maxLength) {
maxLength = dp[i][j];
endIndex = i - 1;
}
} else {
dp[i][j] = 0;
}
}
}
return s1.substr(endIndex - maxLength + 1, maxLength);
}
Space-Optimized Approach
// Space-optimized longest common substring
string longestCommonSubstringOptimized(string s1, string s2) {
int m = s1.length();
int n = s2.length();
vector<int> prev(n + 1, 0);
vector<int> curr(n + 1, 0);
int maxLength = 0;
int endIndex = 0;
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (s1[i - 1] == s2[j - 1]) {
curr[j] = prev[j - 1] + 1;
if (curr[j] > maxLength) {
maxLength = curr[j];
endIndex = i - 1;
}
} else {
curr[j] = 0;
}
}
prev = curr;
fill(curr.begin(), curr.end(), 0);
}
return s1.substr(endIndex - maxLength + 1, maxLength);
}
Longest Palindromic Substring
Expand Around Centers
// Find longest palindromic substring
string longestPalindrome(string s) {
if (s.empty()) return "";
int start = 0, maxLength = 1;
for (int i = 0; i < s.length(); i++) {
// Check for odd length palindromes
int len1 = expandAroundCenter(s, i, i);
// Check for even length palindromes
int len2 = expandAroundCenter(s, i, i + 1);
int len = max(len1, len2);
if (len > maxLength) {
maxLength = len;
start = i - (len - 1) / 2;
}
}
return s.substr(start, maxLength);
}
int expandAroundCenter(string s, int left, int right) {
while (left >= 0 && right < s.length() && s[left] == s[right]) {
left--;
right++;
}
return right - left - 1;
}
Manacher's Algorithm
// Manacher's algorithm for longest palindromic substring
string manacherAlgorithm(string s) {
if (s.empty()) return "";
// Transform string to handle even length palindromes
string transformed = "#";
for (char c : s) {
transformed += c;
transformed += "#";
}
int n = transformed.length();
vector<int> radius(n, 0);
int center = 0, right = 0;
int maxRadius = 0, maxCenter = 0;
for (int i = 0; i < n; i++) {
if (i < right) {
radius[i] = min(right - i, radius[2 * center - i]);
}
// Try to expand palindrome centered at i
while (i + radius[i] + 1 < n && i - radius[i] - 1 >= 0 &&
transformed[i + radius[i] + 1] == transformed[i - radius[i] - 1]) {
radius[i]++;
}
// Update center and right if necessary
if (i + radius[i] > right) {
center = i;
right = i + radius[i];
}
// Update maximum radius
if (radius[i] > maxRadius) {
maxRadius = radius[i];
maxCenter = i;
}
}
// Extract the longest palindrome
string result = "";
for (int i = maxCenter - maxRadius; i <= maxCenter + maxRadius; i++) {
if (transformed[i] != '#') {
result += transformed[i];
}
}
return result;
}
String Compression
Run-Length Encoding
// Compress string using run-length encoding
string compressString(string s) {
if (s.empty()) return s;
string compressed = "";
int count = 1;
for (int i = 1; i < s.length(); i++) {
if (s[i] == s[i - 1]) {
count++;
} else {
compressed += s[i - 1] + to_string(count);
count = 1;
}
}
compressed += s.back() + to_string(count);
return compressed.length() < s.length() ? compressed : s;
}
LZW Compression
// LZW compression algorithm
vector<int> lzwCompress(string s) {
unordered_map<string, int> dictionary;
vector<int> result;
// Initialize dictionary with single characters
for (int i = 0; i < 256; i++) {
dictionary[string(1, char(i))] = i;
}
string current = "";
int nextCode = 256;
for (char c : s) {
string next = current + c;
if (dictionary.find(next) != dictionary.end()) {
current = next;
} else {
result.push_back(dictionary[current]);
dictionary[next] = nextCode++;
current = string(1, c);
}
}
if (!current.empty()) {
result.push_back(dictionary[current]);
}
return result;
}
Edit Distance
Levenshtein Distance
// Calculate edit distance between two strings
int editDistance(string s1, string s2) {
int m = s1.length();
int n = s2.length();
vector<vector<int>> dp(m + 1, vector<int>(n + 1, 0));
// Initialize base cases
for (int i = 0; i <= m; i++) {
dp[i][0] = i;
}
for (int j = 0; j <= n; j++) {
dp[0][j] = j;
}
// Fill the DP table
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (s1[i - 1] == s2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1];
} else {
dp[i][j] = 1 + min({
dp[i - 1][j], // Delete
dp[i][j - 1], // Insert
dp[i - 1][j - 1] // Replace
});
}
}
}
return dp[m][n];
}
Space-Optimized Edit Distance
// Space-optimized edit distance
int editDistanceOptimized(string s1, string s2) {
int m = s1.length();
int n = s2.length();
vector<int> prev(n + 1, 0);
vector<int> curr(n + 1, 0);
// Initialize base cases
for (int j = 0; j <= n; j++) {
prev[j] = j;
}
for (int i = 1; i <= m; i++) {
curr[0] = i;
for (int j = 1; j <= n; j++) {
if (s1[i - 1] == s2[j - 1]) {
curr[j] = prev[j - 1];
} else {
curr[j] = 1 + min({
prev[j], // Delete
curr[j - 1], // Insert
prev[j - 1] // Replace
});
}
}
prev = curr;
}
return prev[n];
}
Longest Common Subsequence
// Find longest common subsequence
string longestCommonSubsequence(string s1, string s2) {
int m = s1.length();
int n = s2.length();
vector<vector<int>> dp(m + 1, vector<int>(n + 1, 0));
// Fill DP table
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (s1[i - 1] == s2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]);
}
}
}
// Reconstruct LCS
string lcs = "";
int i = m, j = n;
while (i > 0 && j > 0) {
if (s1[i - 1] == s2[j - 1]) {
lcs = s1[i - 1] + lcs;
i--;
j--;
} else if (dp[i - 1][j] > dp[i][j - 1]) {
i--;
} else {
j--;
}
}
return lcs;
}
String Transformation
Minimum Operations to Make Strings Equal
// Minimum operations to make two strings equal
int minOperations(string s1, string s2) {
int m = s1.length();
int n = s2.length();
if (m != n) return -1; // Cannot make equal if lengths differ
vector<int> count(256, 0);
// Count characters in both strings
for (int i = 0; i < m; i++) {
count[s1[i]]++;
count[s2[i]]--;
}
// Check if all counts are zero
for (int i = 0; i < 256; i++) {
if (count[i] != 0) return -1;
}
// Find minimum operations using two pointers
int i = m - 1, j = n - 1;
int operations = 0;
while (i >= 0 && j >= 0) {
if (s1[i] == s2[j]) {
i--;
j--;
} else {
i--;
operations++;
}
}
return operations;
}
String Rotation
// Check if one string is rotation of another
bool isRotation(string s1, string s2) {
if (s1.length() != s2.length()) return false;
string combined = s1 + s1;
return combined.find(s2) != string::npos;
}
Anagram Detection
// Check if two strings are anagrams
bool areAnagrams(string s1, string s2) {
if (s1.length() != s2.length()) return false;
vector<int> count(256, 0);
for (int i = 0; i < s1.length(); i++) {
count[s1[i]]++;
count[s2[i]]--;
}
for (int i = 0; i < 256; i++) {
if (count[i] != 0) return false;
}
return true;
}
Performance Analysis
Time Complexity
- Longest Common Substring: O(m × n)
- Longest Palindromic Substring: O(n²) or O(n) with Manacher's
- String Compression: O(n)
- Edit Distance: O(m × n)
- String Transformation: O(n)
Space Complexity
- Most algorithms: O(m × n) for DP, O(1) for optimized versions
- Manacher's Algorithm: O(n)
- LZW Compression: O(n) for dictionary
Common Patterns
- Dynamic programming for string comparison
- Two pointers for palindromic problems
- Hash maps for character counting
- Sliding window for substring problems
- Greedy algorithms for transformation problems
Applications
- Text processing: String analysis and manipulation
- Bioinformatics: DNA sequence comparison
- Data compression: String compression algorithms
- Spell checkers: Edit distance for suggestions
- Plagiarism detection: String similarity analysis