Skip to main content

Disjoint Set Union (Union-Find)

Master the Disjoint Set Union data structure for efficient union and find operations on disjoint sets.

DSU Fundamentals

What is Disjoint Set Union?

Disjoint Set Union (DSU) is a data structure that efficiently supports union and find operations on disjoint sets. It's also known as Union-Find and is used for problems involving connected components, cycle detection, and dynamic connectivity.

Basic Structure

class DisjointSetUnion {
private:
vector<int> parent;
vector<int> rank;
int n;

public:
DisjointSetUnion(int size) {
n = size;
parent.resize(n);
rank.resize(n, 0);

// Initialize each element as its own parent
for (int i = 0; i < n; i++) {
parent[i] = i;
}
}

// Find root of element x
int find(int x) {
if (parent[x] != x) {
parent[x] = find(parent[x]); // Path compression
}
return parent[x];
}

// Union two sets
void unionSets(int x, int y) {
int rootX = find(x);
int rootY = find(y);

if (rootX == rootY) return; // Already in same set

// Union by rank
if (rank[rootX] < rank[rootY]) {
parent[rootX] = rootY;
} else if (rank[rootX] > rank[rootY]) {
parent[rootY] = rootX;
} else {
parent[rootY] = rootX;
rank[rootX]++;
}
}

// Check if two elements are in same set
bool connected(int x, int y) {
return find(x) == find(y);
}

// Get number of connected components
int getComponentCount() {
int count = 0;
for (int i = 0; i < n; i++) {
if (parent[i] == i) {
count++;
}
}
return count;
}
};

Union & Find Operations

Basic Find Operation

// Find root without path compression
int findBasic(int x) {
while (parent[x] != x) {
x = parent[x];
}
return x;
}

// Find root with path compression
int find(int x) {
if (parent[x] != x) {
parent[x] = find(parent[x]); // Path compression
}
return parent[x];
}

Basic Union Operation

// Union without optimization
void unionBasic(int x, int y) {
int rootX = find(x);
int rootY = find(y);

if (rootX != rootY) {
parent[rootX] = rootY;
}
}

// Union with rank optimization
void unionSets(int x, int y) {
int rootX = find(x);
int rootY = find(y);

if (rootX == rootY) return;

if (rank[rootX] < rank[rootY]) {
parent[rootX] = rootY;
} else if (rank[rootX] > rank[rootY]) {
parent[rootY] = rootX;
} else {
parent[rootY] = rootX;
rank[rootX]++;
}
}

Path Compression

Path Compression Implementation

// Path compression during find
int findWithPathCompression(int x) {
if (parent[x] != x) {
parent[x] = findWithPathCompression(parent[x]);
}
return parent[x];
}

// Iterative path compression
int findIterative(int x) {
int root = x;
while (parent[root] != root) {
root = parent[root];
}

// Path compression
while (parent[x] != root) {
int next = parent[x];
parent[x] = root;
x = next;
}

return root;
}

Benefits of Path Compression

  • Reduces tree height: Flattens the tree structure
  • Improves future queries: Makes subsequent finds faster
  • Amortized O(α(n)): Where α is the inverse Ackermann function
  • Nearly constant time: For practical purposes

Union by Rank

Rank-based Union

// Union by rank
void unionByRank(int x, int y) {
int rootX = find(x);
int rootY = find(y);

if (rootX == rootY) return;

if (rank[rootX] < rank[rootY]) {
parent[rootX] = rootY;
} else if (rank[rootX] > rank[rootY]) {
parent[rootY] = rootX;
} else {
parent[rootY] = rootX;
rank[rootX]++;
}
}

Union by Size

// Union by size
void unionBySize(int x, int y) {
int rootX = find(x);
int rootY = find(y);

if (rootX == rootY) return;

if (size[rootX] < size[rootY]) {
parent[rootX] = rootY;
size[rootY] += size[rootX];
} else {
parent[rootY] = rootX;
size[rootX] += size[rootY];
}
}

Connected Components

Count Connected Components

// Count number of connected components
int countConnectedComponents(vector<vector<int>>& edges, int n) {
DisjointSetUnion dsu(n);

for (auto& edge : edges) {
dsu.unionSets(edge[0], edge[1]);
}

return dsu.getComponentCount();
}

Find Connected Components

// Get all connected components
vector<vector<int>> getConnectedComponents(vector<vector<int>>& edges, int n) {
DisjointSetUnion dsu(n);

for (auto& edge : edges) {
dsu.unionSets(edge[0], edge[1]);
}

// Group nodes by their root
unordered_map<int, vector<int>> components;
for (int i = 0; i < n; i++) {
int root = dsu.find(i);
components[root].push_back(i);
}

vector<vector<int>> result;
for (auto& [root, nodes] : components) {
result.push_back(nodes);
}

return result;
}

Cycle Detection

Detect Cycle in Undirected Graph

// Detect cycle in undirected graph
bool hasCycle(vector<vector<int>>& edges, int n) {
DisjointSetUnion dsu(n);

for (auto& edge : edges) {
int u = edge[0];
int v = edge[1];

if (dsu.connected(u, v)) {
return true; // Cycle detected
}

dsu.unionSets(u, v);
}

return false;
}

Detect Cycle in Directed Graph

// Detect cycle in directed graph using DSU
bool hasCycleDirected(vector<vector<int>>& edges, int n) {
DisjointSetUnion dsu(n);

for (auto& edge : edges) {
int u = edge[0];
int v = edge[1];

if (dsu.connected(u, v)) {
return true; // Cycle detected
}

dsu.unionSets(u, v);
}

return false;
}

Advanced Applications

Minimum Spanning Tree (Kruskal's Algorithm)

// Kruskal's algorithm using DSU
int kruskalMST(vector<vector<int>>& edges, int n) {
// Sort edges by weight
sort(edges.begin(), edges.end(), [](const vector<int>& a, const vector<int>& b) {
return a[2] < b[2]; // Assuming weight is at index 2
});

DisjointSetUnion dsu(n);
int mstWeight = 0;
int edgesUsed = 0;

for (auto& edge : edges) {
int u = edge[0];
int v = edge[1];
int weight = edge[2];

if (!dsu.connected(u, v)) {
dsu.unionSets(u, v);
mstWeight += weight;
edgesUsed++;

if (edgesUsed == n - 1) {
break; // MST complete
}
}
}

return mstWeight;
}

Number of Islands

// Count number of islands using DSU
int numIslands(vector<vector<char>>& grid) {
if (grid.empty()) return 0;

int m = grid.size();
int n = grid[0].size();
DisjointSetUnion dsu(m * n);

int islands = 0;

for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
if (grid[i][j] == '1') {
islands++;

// Check adjacent cells
int directions[4][2] = {{0, 1}, {1, 0}, {0, -1}, {-1, 0}};

for (auto& dir : directions) {
int ni = i + dir[0];
int nj = j + dir[1];

if (ni >= 0 && ni < m && nj >= 0 && nj < n && grid[ni][nj] == '1') {
int current = i * n + j;
int neighbor = ni * n + nj;

if (!dsu.connected(current, neighbor)) {
dsu.unionSets(current, neighbor);
islands--;
}
}
}
}
}
}

return islands;
}

Redundant Connection

// Find redundant connection in graph
vector<int> findRedundantConnection(vector<vector<int>>& edges) {
int n = edges.size();
DisjointSetUnion dsu(n + 1);

for (auto& edge : edges) {
int u = edge[0];
int v = edge[1];

if (dsu.connected(u, v)) {
return edge; // Redundant edge found
}

dsu.unionSets(u, v);
}

return {}; // No redundant edge
}

Performance Analysis

Time Complexity

  • Find: O(α(n)) with path compression
  • Union: O(α(n)) with union by rank
  • Connected: O(α(n))
  • α(n): Inverse Ackermann function (nearly constant)

Space Complexity

  • O(n): For parent and rank arrays
  • O(n): Additional space for size array if needed

Amortized Analysis

  • Without optimizations: O(log n) per operation
  • With path compression: O(α(n)) per operation
  • With union by rank: O(α(n)) per operation
  • α(n) < 5: For all practical values of n

Common Patterns

  1. Connected components in graphs
  2. Cycle detection in undirected graphs
  3. Minimum spanning tree algorithms
  4. Dynamic connectivity problems
  5. Union-find in graph algorithms

Applications

  • Graph algorithms: Connected components, MST
  • Network connectivity: Check if nodes are connected
  • Image processing: Connected component labeling
  • Social networks: Friend groups and communities
  • Competitive programming: Efficient union-find operations

When to Use DSU

  • Dynamic connectivity problems
  • Cycle detection in graphs
  • Connected components analysis
  • Minimum spanning tree algorithms
  • Union-find operations are frequent