Skip to main content

Advanced String Problems

Explore advanced string processing algorithms for complex text analysis and manipulation.

Longest Common Substring

Dynamic Programming Approach

// Find longest common substring using DP
string longestCommonSubstring(string s1, string s2) {
int m = s1.length();
int n = s2.length();

vector<vector<int>> dp(m + 1, vector<int>(n + 1, 0));
int maxLength = 0;
int endIndex = 0;

for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (s1[i - 1] == s2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
if (dp[i][j] > maxLength) {
maxLength = dp[i][j];
endIndex = i - 1;
}
} else {
dp[i][j] = 0;
}
}
}

return s1.substr(endIndex - maxLength + 1, maxLength);
}

Space-Optimized Approach

// Space-optimized longest common substring
string longestCommonSubstringOptimized(string s1, string s2) {
int m = s1.length();
int n = s2.length();

vector<int> prev(n + 1, 0);
vector<int> curr(n + 1, 0);

int maxLength = 0;
int endIndex = 0;

for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (s1[i - 1] == s2[j - 1]) {
curr[j] = prev[j - 1] + 1;
if (curr[j] > maxLength) {
maxLength = curr[j];
endIndex = i - 1;
}
} else {
curr[j] = 0;
}
}
prev = curr;
fill(curr.begin(), curr.end(), 0);
}

return s1.substr(endIndex - maxLength + 1, maxLength);
}

Longest Palindromic Substring

Expand Around Centers

// Find longest palindromic substring
string longestPalindrome(string s) {
if (s.empty()) return "";

int start = 0, maxLength = 1;

for (int i = 0; i < s.length(); i++) {
// Check for odd length palindromes
int len1 = expandAroundCenter(s, i, i);
// Check for even length palindromes
int len2 = expandAroundCenter(s, i, i + 1);

int len = max(len1, len2);
if (len > maxLength) {
maxLength = len;
start = i - (len - 1) / 2;
}
}

return s.substr(start, maxLength);
}

int expandAroundCenter(string s, int left, int right) {
while (left >= 0 && right < s.length() && s[left] == s[right]) {
left--;
right++;
}
return right - left - 1;
}

Manacher's Algorithm

// Manacher's algorithm for longest palindromic substring
string manacherAlgorithm(string s) {
if (s.empty()) return "";

// Transform string to handle even length palindromes
string transformed = "#";
for (char c : s) {
transformed += c;
transformed += "#";
}

int n = transformed.length();
vector<int> radius(n, 0);
int center = 0, right = 0;
int maxRadius = 0, maxCenter = 0;

for (int i = 0; i < n; i++) {
if (i < right) {
radius[i] = min(right - i, radius[2 * center - i]);
}

// Try to expand palindrome centered at i
while (i + radius[i] + 1 < n && i - radius[i] - 1 >= 0 &&
transformed[i + radius[i] + 1] == transformed[i - radius[i] - 1]) {
radius[i]++;
}

// Update center and right if necessary
if (i + radius[i] > right) {
center = i;
right = i + radius[i];
}

// Update maximum radius
if (radius[i] > maxRadius) {
maxRadius = radius[i];
maxCenter = i;
}
}

// Extract the longest palindrome
string result = "";
for (int i = maxCenter - maxRadius; i <= maxCenter + maxRadius; i++) {
if (transformed[i] != '#') {
result += transformed[i];
}
}

return result;
}

String Compression

Run-Length Encoding

// Compress string using run-length encoding
string compressString(string s) {
if (s.empty()) return s;

string compressed = "";
int count = 1;

for (int i = 1; i < s.length(); i++) {
if (s[i] == s[i - 1]) {
count++;
} else {
compressed += s[i - 1] + to_string(count);
count = 1;
}
}

compressed += s.back() + to_string(count);

return compressed.length() < s.length() ? compressed : s;
}

LZW Compression

// LZW compression algorithm
vector<int> lzwCompress(string s) {
unordered_map<string, int> dictionary;
vector<int> result;

// Initialize dictionary with single characters
for (int i = 0; i < 256; i++) {
dictionary[string(1, char(i))] = i;
}

string current = "";
int nextCode = 256;

for (char c : s) {
string next = current + c;

if (dictionary.find(next) != dictionary.end()) {
current = next;
} else {
result.push_back(dictionary[current]);
dictionary[next] = nextCode++;
current = string(1, c);
}
}

if (!current.empty()) {
result.push_back(dictionary[current]);
}

return result;
}

Edit Distance

Levenshtein Distance

// Calculate edit distance between two strings
int editDistance(string s1, string s2) {
int m = s1.length();
int n = s2.length();

vector<vector<int>> dp(m + 1, vector<int>(n + 1, 0));

// Initialize base cases
for (int i = 0; i <= m; i++) {
dp[i][0] = i;
}
for (int j = 0; j <= n; j++) {
dp[0][j] = j;
}

// Fill the DP table
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (s1[i - 1] == s2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1];
} else {
dp[i][j] = 1 + min({
dp[i - 1][j], // Delete
dp[i][j - 1], // Insert
dp[i - 1][j - 1] // Replace
});
}
}
}

return dp[m][n];
}

Space-Optimized Edit Distance

// Space-optimized edit distance
int editDistanceOptimized(string s1, string s2) {
int m = s1.length();
int n = s2.length();

vector<int> prev(n + 1, 0);
vector<int> curr(n + 1, 0);

// Initialize base cases
for (int j = 0; j <= n; j++) {
prev[j] = j;
}

for (int i = 1; i <= m; i++) {
curr[0] = i;
for (int j = 1; j <= n; j++) {
if (s1[i - 1] == s2[j - 1]) {
curr[j] = prev[j - 1];
} else {
curr[j] = 1 + min({
prev[j], // Delete
curr[j - 1], // Insert
prev[j - 1] // Replace
});
}
}
prev = curr;
}

return prev[n];
}

Longest Common Subsequence

// Find longest common subsequence
string longestCommonSubsequence(string s1, string s2) {
int m = s1.length();
int n = s2.length();

vector<vector<int>> dp(m + 1, vector<int>(n + 1, 0));

// Fill DP table
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (s1[i - 1] == s2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]);
}
}
}

// Reconstruct LCS
string lcs = "";
int i = m, j = n;
while (i > 0 && j > 0) {
if (s1[i - 1] == s2[j - 1]) {
lcs = s1[i - 1] + lcs;
i--;
j--;
} else if (dp[i - 1][j] > dp[i][j - 1]) {
i--;
} else {
j--;
}
}

return lcs;
}

String Transformation

Minimum Operations to Make Strings Equal

// Minimum operations to make two strings equal
int minOperations(string s1, string s2) {
int m = s1.length();
int n = s2.length();

if (m != n) return -1; // Cannot make equal if lengths differ

vector<int> count(256, 0);

// Count characters in both strings
for (int i = 0; i < m; i++) {
count[s1[i]]++;
count[s2[i]]--;
}

// Check if all counts are zero
for (int i = 0; i < 256; i++) {
if (count[i] != 0) return -1;
}

// Find minimum operations using two pointers
int i = m - 1, j = n - 1;
int operations = 0;

while (i >= 0 && j >= 0) {
if (s1[i] == s2[j]) {
i--;
j--;
} else {
i--;
operations++;
}
}

return operations;
}

String Rotation

// Check if one string is rotation of another
bool isRotation(string s1, string s2) {
if (s1.length() != s2.length()) return false;

string combined = s1 + s1;
return combined.find(s2) != string::npos;
}

Anagram Detection

// Check if two strings are anagrams
bool areAnagrams(string s1, string s2) {
if (s1.length() != s2.length()) return false;

vector<int> count(256, 0);

for (int i = 0; i < s1.length(); i++) {
count[s1[i]]++;
count[s2[i]]--;
}

for (int i = 0; i < 256; i++) {
if (count[i] != 0) return false;
}

return true;
}

Performance Analysis

Time Complexity

  • Longest Common Substring: O(m × n)
  • Longest Palindromic Substring: O(n²) or O(n) with Manacher's
  • String Compression: O(n)
  • Edit Distance: O(m × n)
  • String Transformation: O(n)

Space Complexity

  • Most algorithms: O(m × n) for DP, O(1) for optimized versions
  • Manacher's Algorithm: O(n)
  • LZW Compression: O(n) for dictionary

Common Patterns

  1. Dynamic programming for string comparison
  2. Two pointers for palindromic problems
  3. Hash maps for character counting
  4. Sliding window for substring problems
  5. Greedy algorithms for transformation problems

Applications

  • Text processing: String analysis and manipulation
  • Bioinformatics: DNA sequence comparison
  • Data compression: String compression algorithms
  • Spell checkers: Edit distance for suggestions
  • Plagiarism detection: String similarity analysis