Disjoint Set Union (Union-Find)
Master the Disjoint Set Union data structure for efficient union and find operations on disjoint sets.
DSU Fundamentals
What is Disjoint Set Union?
Disjoint Set Union (DSU) is a data structure that efficiently supports union and find operations on disjoint sets. It's also known as Union-Find and is used for problems involving connected components, cycle detection, and dynamic connectivity.
Basic Structure
class DisjointSetUnion {
private:
vector<int> parent;
vector<int> rank;
int n;
public:
DisjointSetUnion(int size) {
n = size;
parent.resize(n);
rank.resize(n, 0);
// Initialize each element as its own parent
for (int i = 0; i < n; i++) {
parent[i] = i;
}
}
// Find root of element x
int find(int x) {
if (parent[x] != x) {
parent[x] = find(parent[x]); // Path compression
}
return parent[x];
}
// Union two sets
void unionSets(int x, int y) {
int rootX = find(x);
int rootY = find(y);
if (rootX == rootY) return; // Already in same set
// Union by rank
if (rank[rootX] < rank[rootY]) {
parent[rootX] = rootY;
} else if (rank[rootX] > rank[rootY]) {
parent[rootY] = rootX;
} else {
parent[rootY] = rootX;
rank[rootX]++;
}
}
// Check if two elements are in same set
bool connected(int x, int y) {
return find(x) == find(y);
}
// Get number of connected components
int getComponentCount() {
int count = 0;
for (int i = 0; i < n; i++) {
if (parent[i] == i) {
count++;
}
}
return count;
}
};
Union & Find Operations
Basic Find Operation
// Find root without path compression
int findBasic(int x) {
while (parent[x] != x) {
x = parent[x];
}
return x;
}
// Find root with path compression
int find(int x) {
if (parent[x] != x) {
parent[x] = find(parent[x]); // Path compression
}
return parent[x];
}
Basic Union Operation
// Union without optimization
void unionBasic(int x, int y) {
int rootX = find(x);
int rootY = find(y);
if (rootX != rootY) {
parent[rootX] = rootY;
}
}
// Union with rank optimization
void unionSets(int x, int y) {
int rootX = find(x);
int rootY = find(y);
if (rootX == rootY) return;
if (rank[rootX] < rank[rootY]) {
parent[rootX] = rootY;
} else if (rank[rootX] > rank[rootY]) {
parent[rootY] = rootX;
} else {
parent[rootY] = rootX;
rank[rootX]++;
}
}
Path Compression
Path Compression Implementation
// Path compression during find
int findWithPathCompression(int x) {
if (parent[x] != x) {
parent[x] = findWithPathCompression(parent[x]);
}
return parent[x];
}
// Iterative path compression
int findIterative(int x) {
int root = x;
while (parent[root] != root) {
root = parent[root];
}
// Path compression
while (parent[x] != root) {
int next = parent[x];
parent[x] = root;
x = next;
}
return root;
}
Benefits of Path Compression
- Reduces tree height: Flattens the tree structure
- Improves future queries: Makes subsequent finds faster
- Amortized O(α(n)): Where α is the inverse Ackermann function
- Nearly constant time: For practical purposes
Union by Rank
Rank-based Union
// Union by rank
void unionByRank(int x, int y) {
int rootX = find(x);
int rootY = find(y);
if (rootX == rootY) return;
if (rank[rootX] < rank[rootY]) {
parent[rootX] = rootY;
} else if (rank[rootX] > rank[rootY]) {
parent[rootY] = rootX;
} else {
parent[rootY] = rootX;
rank[rootX]++;
}
}
Union by Size
// Union by size
void unionBySize(int x, int y) {
int rootX = find(x);
int rootY = find(y);
if (rootX == rootY) return;
if (size[rootX] < size[rootY]) {
parent[rootX] = rootY;
size[rootY] += size[rootX];
} else {
parent[rootY] = rootX;
size[rootX] += size[rootY];
}
}
Connected Components
Count Connected Components
// Count number of connected components
int countConnectedComponents(vector<vector<int>>& edges, int n) {
DisjointSetUnion dsu(n);
for (auto& edge : edges) {
dsu.unionSets(edge[0], edge[1]);
}
return dsu.getComponentCount();
}
Find Connected Components
// Get all connected components
vector<vector<int>> getConnectedComponents(vector<vector<int>>& edges, int n) {
DisjointSetUnion dsu(n);
for (auto& edge : edges) {
dsu.unionSets(edge[0], edge[1]);
}
// Group nodes by their root
unordered_map<int, vector<int>> components;
for (int i = 0; i < n; i++) {
int root = dsu.find(i);
components[root].push_back(i);
}
vector<vector<int>> result;
for (auto& [root, nodes] : components) {
result.push_back(nodes);
}
return result;
}
Cycle Detection
Detect Cycle in Undirected Graph
// Detect cycle in undirected graph
bool hasCycle(vector<vector<int>>& edges, int n) {
DisjointSetUnion dsu(n);
for (auto& edge : edges) {
int u = edge[0];
int v = edge[1];
if (dsu.connected(u, v)) {
return true; // Cycle detected
}
dsu.unionSets(u, v);
}
return false;
}
Detect Cycle in Directed Graph
// Detect cycle in directed graph using DSU
bool hasCycleDirected(vector<vector<int>>& edges, int n) {
DisjointSetUnion dsu(n);
for (auto& edge : edges) {
int u = edge[0];
int v = edge[1];
if (dsu.connected(u, v)) {
return true; // Cycle detected
}
dsu.unionSets(u, v);
}
return false;
}
Advanced Applications
Minimum Spanning Tree (Kruskal's Algorithm)
// Kruskal's algorithm using DSU
int kruskalMST(vector<vector<int>>& edges, int n) {
// Sort edges by weight
sort(edges.begin(), edges.end(), [](const vector<int>& a, const vector<int>& b) {
return a[2] < b[2]; // Assuming weight is at index 2
});
DisjointSetUnion dsu(n);
int mstWeight = 0;
int edgesUsed = 0;
for (auto& edge : edges) {
int u = edge[0];
int v = edge[1];
int weight = edge[2];
if (!dsu.connected(u, v)) {
dsu.unionSets(u, v);
mstWeight += weight;
edgesUsed++;
if (edgesUsed == n - 1) {
break; // MST complete
}
}
}
return mstWeight;
}
Number of Islands
// Count number of islands using DSU
int numIslands(vector<vector<char>>& grid) {
if (grid.empty()) return 0;
int m = grid.size();
int n = grid[0].size();
DisjointSetUnion dsu(m * n);
int islands = 0;
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
if (grid[i][j] == '1') {
islands++;
// Check adjacent cells
int directions[4][2] = {{0, 1}, {1, 0}, {0, -1}, {-1, 0}};
for (auto& dir : directions) {
int ni = i + dir[0];
int nj = j + dir[1];
if (ni >= 0 && ni < m && nj >= 0 && nj < n && grid[ni][nj] == '1') {
int current = i * n + j;
int neighbor = ni * n + nj;
if (!dsu.connected(current, neighbor)) {
dsu.unionSets(current, neighbor);
islands--;
}
}
}
}
}
}
return islands;
}
Redundant Connection
// Find redundant connection in graph
vector<int> findRedundantConnection(vector<vector<int>>& edges) {
int n = edges.size();
DisjointSetUnion dsu(n + 1);
for (auto& edge : edges) {
int u = edge[0];
int v = edge[1];
if (dsu.connected(u, v)) {
return edge; // Redundant edge found
}
dsu.unionSets(u, v);
}
return {}; // No redundant edge
}
Performance Analysis
Time Complexity
- Find: O(α(n)) with path compression
- Union: O(α(n)) with union by rank
- Connected: O(α(n))
- α(n): Inverse Ackermann function (nearly constant)
Space Complexity
- O(n): For parent and rank arrays
- O(n): Additional space for size array if needed
Amortized Analysis
- Without optimizations: O(log n) per operation
- With path compression: O(α(n)) per operation
- With union by rank: O(α(n)) per operation
- α(n) < 5: For all practical values of n
Common Patterns
- Connected components in graphs
- Cycle detection in undirected graphs
- Minimum spanning tree algorithms
- Dynamic connectivity problems
- Union-find in graph algorithms
Applications
- Graph algorithms: Connected components, MST
- Network connectivity: Check if nodes are connected
- Image processing: Connected component labeling
- Social networks: Friend groups and communities
- Competitive programming: Efficient union-find operations
When to Use DSU
- Dynamic connectivity problems
- Cycle detection in graphs
- Connected components analysis
- Minimum spanning tree algorithms
- Union-find operations are frequent