Shortest Common Supersequence (SCS)
Definition​
- Definition
- Explanation
- Guidance
- Tips
The Shortest Common Supersequence (SCS) algorithm finds the shortest supersequence that contains all the given sequences as subsequences. It's a dynamic programming-based approach, commonly used in bioinformatics, data compression, and version control systems. This algorithm efficiently computes the length of the SCS and can also reconstruct the SCS itself
It leverages dynamic programming to find the shortest common supersequence of two given sequences. It involves constructing a table where each cell represents the length of the SCS for subsequences of the original sequences. The algorithm fills this table iteratively by considering three possibilities: extending the supersequence by adding characters from each sequence individually, or if the characters match, extending the supersequence by adding that character once. The final cell of the table holds the length of the SCS. By tracing back through the table, the SCS itself can be reconstructed
- Initialize a 2D table of size
[m+1][n+1]
where m and n are the lengths of the two sequences - Fill the first row and first column of the table with values representing the lengths of empty subsequences
- Iterate through each cell of the table starting from
[1,1]
to[m,n]
- For each cell
[i,j]
, if the characters at positionsi-1
andj-1
in the sequences match, set the value of the current cell to one plus the value of the cell diagonally left-upwards - If the characters don't match, set the value of the current cell to the maximum of the cell directly above or directly leftwards
- The value in the bottom-right cell of the table represents the length of the SCS
- Reconstruct the SCS by tracing back through the table, starting from the bottom-right cell, following the rules of movement (diagonal if characters match, otherwise up or left)
- Concatenate the characters encountered during the tracing to form the SCS
- utilize memoization or tabulation to optimize the algorithm's time complexity
- ensure to handle cases where one sequence is a subsequence of the other efficiently
- it's crucial to understand the recurrence relation and base cases for dynamic programming implementation
- consider implementing a solution that reconstructs the actual SCS sequence if required
Practice​
- Practice
- Solution
shortest_common_supersequence(sequence1, sequence2):
m = length(sequence1)
n = length(sequence2)
table = create_table(m+1, n+1)
// Step 1: Initialize the table
for i from 0 to m:
table[i][0] = i
for j from 0 to n:
table[0][j] = j
// Step 2: Fill the table
for i from 1 to m:
for j from 1 to n:
if sequence1[i-1] == sequence2[j-1]:
table[i][j] = 1 + table[i-1][j-1]
else:
table[i][j] = max(table[i-1][j], table[i][j-1])
// Step 3: Reconstruct the SCS
scs = ""
i = m
j = n
while i > 0 and j > 0:
if sequence1[i-1] == sequence2[j-1]:
scs = sequence1[i-1] + scs
i = i - 1
j = j - 1
else:
if table[i-1][j] > table[i][j-1]:
scs = sequence1[i-1] + scs
i = i - 1
else:
scs = sequence2[j-1] + scs
j = j - 1
// Step 4: Add remaining characters
while i > 0:
scs = sequence1[i-1] + scs
i = i - 1
while j > 0:
scs = sequence2[j-1] + scs
j = j - 1
return scs
package main
func min(a, b int) int {
if a < b {
return a
}
return b
}
func shortestCommonSupersequence(str1, str2 string) string {
m, n := len(str1), len(str2)
dp := make([][]int, m+1)
for i := range dp {
dp[i] = make([]int, n+1)
}
for i := 0; i <= m; i++ {
for j := 0; j <= n; j++ {
if i == 0 {
dp[i][j] = j
} else if j == 0 {
dp[i][j] = i
} else if str1[i-1] == str2[j-1] {
dp[i][j] = dp[i-1][j-1] + 1
} else {
dp[i][j] = 1 + min(dp[i-1][j], dp[i][j-1])
}
}
}
// Reconstruct the shortest common supersequence
i, j := m, n
var result string
for i > 0 && j > 0 {
if str1[i-1] == str2[j-1] {
result = string(str1[i-1]) + result
i--
j--
} else if dp[i-1][j] < dp[i][j-1] {
result = string(str1[i-1]) + result
i--
} else {
result = string(str2[j-1]) + result
j--
}
}
for i > 0 {
result = string(str1[i-1]) + result
i--
}
for j > 0 {
result = string(str2[j-1]) + result
j--
}
return result
}
public class ShortestCommonSupersequence {
public static String shortestCommonSupersequence(String str1, String str2) {
int m = str1.length();
int n = str2.length();
int[][] dp = new int[m + 1][n + 1];
for (int i = 0; i <= m; i++) {
for (int j = 0; j <= n; j++) {
if (i == 0) {
dp[i][j] = j;
} else if (j == 0) {
dp[i][j] = i;
} else if (str1.charAt(i - 1) == str2.charAt(j - 1)) {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = 1 + Math.min(dp[i - 1][j], dp[i][j - 1]);
}
}
}
StringBuilder sb = new StringBuilder();
int i = m, j = n;
while (i > 0 && j > 0) {
if (str1.charAt(i - 1) == str2.charAt(j - 1)) {
sb.insert(0, str1.charAt(i - 1));
i--;
j--;
} else if (dp[i - 1][j] < dp[i][j - 1]) {
sb.insert(0, str1.charAt(i - 1));
i--;
} else {
sb.insert(0, str2.charAt(j - 1));
j--;
}
}
while (i > 0) {
sb.insert(0, str1.charAt(i - 1));
i--;
}
while (j > 0) {
sb.insert(0, str2.charAt(j - 1));
j--;
}
return sb.toString();
}
}
function shortestCommonSupersequence(str1, str2) {
const m = str1.length;
const n = str2.length;
const dp = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
for (let i = 0; i <= m; i++) {
for (let j = 0; j <= n; j++) {
if (i === 0) {
dp[i][j] = j;
} else if (j === 0) {
dp[i][j] = i;
} else if (str1[i - 1] === str2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = 1 + Math.min(dp[i - 1][j], dp[i][j - 1]);
}
}
}
let i = m,
j = n;
let result = "";
while (i > 0 && j > 0) {
if (str1[i - 1] === str2[j - 1]) {
result = str1[i - 1] + result;
i--;
j--;
} else if (dp[i - 1][j] < dp[i][j - 1]) {
result = str1[i - 1] + result;
i--;
} else {
result = str2[j - 1] + result;
j--;
}
}
while (i > 0) {
result = str1[i - 1] + result;
i--;
}
while (j > 0) {
result = str2[j - 1] + result;
j--;
}
return result;
}
fun shortestCommonSupersequence(str1: String, str2: String): String {
val m = str1.length
val n = str2.length
val dp = Array(m + 1) { IntArray(n + 1) }
for (i in 0..m) {
for (j in 0..n) {
when {
i == 0 -> dp[i][j] = j
j == 0 -> dp[i][j] = i
str1[i - 1] == str2[j - 1] -> dp[i][j] = dp[i - 1][j - 1] + 1
else -> dp[i][j] = 1 + minOf(dp[i - 1][j], dp[i][j - 1])
}
}
}
var i = m
var j = n
var result = ""
while (i > 0 && j > 0) {
when {
str1[i - 1] == str2[j - 1] -> {
result = str1[i - 1] + result
i--
j--
}
dp[i - 1][j] < dp[i][j - 1] -> {
result = str1[i - 1] + result
i--
}
else -> {
result = str2[j - 1] + result
j--
}
}
}
while (i > 0) {
result = str1[i - 1] + result
i--
}
while (j > 0) {
result = str2[j - 1] + result
j--
}
return result
}
def shortest_common_supersequence(str1, str2):
m, n = len(str1), len(str2)
dp = [[0] * (n + 1) for _ in range(m + 1)]
for i in range(m + 1):
for j in range(n + 1):
if i == 0:
dp[i][j] = j
elif j == 0:
dp[i][j] = i
elif str1[i - 1] == str2[j - 1]:
dp[i][j] = dp[i - 1][j - 1] + 1
else:
dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1])
i, j = m, n
result = ''
while i > 0 and j > 0:
if str1[i - 1] == str2[j - 1]:
result = str1[i - 1] + result
i -= 1
j -= 1
elif dp[i - 1][j] < dp[i][j - 1]:
result = str1[i - 1] + result
i -= 1
else:
result = str2[j - 1] + result
j -= 1
while i > 0:
result = str1[i - 1] + result
i -= 1
while j > 0:
result = str2[j - 1] + result
j -= 1
return result
fn shortest_common_supersequence(str1: &str, str2: &str) -> String {
let m = str1.len();
let n = str2.len();
let mut dp = vec![vec![0; n + 1]; m + 1];
for i in 0..=m {
for j in 0..=n {
if i == 0 {
dp[i][j] = j;
} else if j == 0 {
dp[i][j] = i;
} else if str1.chars().nth(i - 1).unwrap() == str2.chars().nth(j - 1).unwrap() {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = 1 + dp[i - 1][j].min(dp[i][j - 1]);
}
}
}
let (mut i, mut j) = (m, n);
let mut result = String::new();
while i > 0 && j > 0 {
if str1.chars().nth(i - 1).unwrap() == str2.chars().nth(j - 1).unwrap() {
result.insert(0, str1.chars().nth(i - 1).unwrap());
i -= 1;
j -= 1;
} else if dp[i - 1][j] < dp[i][j - 1] {
result.insert(0, str1.chars().nth(i - 1).unwrap());
i -= 1;
} else {
result.insert(0, str2.chars().nth(j - 1).unwrap());
j -= 1;
}
}
while i > 0 {
result.insert(0, str1.chars().nth(i - 1).unwrap());
i -= 1;
}
while j > 0 {
result.insert(0, str2.chars().nth(j - 1).unwrap());
j -= 1;
}
result
}
function shortestCommonSupersequence(str1: string, str2: string): string {
const m = str1.length;
const n = str2.length;
const dp: number[][] = Array.from({ length: m + 1 }, () =>
Array(n + 1).fill(0),
);
for (let i = 0; i <= m; i++) {
for (let j = 0; j <= n; j++) {
if (i === 0) {
dp[i][j] = j;
} else if (j === 0) {
dp[i][j] = i;
} else if (str1[i - 1] === str2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = 1 + Math.min(dp[i - 1][j], dp[i][j - 1]);
}
}
}
let i = m,
j = n;
let result = "";
while (i > 0 && j > 0) {
if (str1[i - 1] === str2[j - 1]) {
result = str1[i - 1] + result;
i--;
j--;
} else if (dp[i - 1][j] < dp[i][j - 1]) {
result = str1[i - 1] + result;
i--;
} else {
result = str2[j - 1] + result;
j--;
}
}
while (i > 0) {
result = str1[i - 1] + result;
i--;
}
while (j > 0) {
result = str2[j - 1] + result;
j--;
}
return result;
}