Longest Common Subsequence (LCS): Mastering Two-Sequence DP Patterns
Longest Common Subsequence (LCS): Mastering Two-Sequence DP Patterns
The Longest Common Subsequence (LCS) problem is fundamental to sequence comparison and forms the backbone of many real-world applications including diff tools, version control systems, and bioinformatics sequence alignment.
Table of Contents
- LCS Problem Definition
- Basic 2D DP Solution
- Space Optimization Techniques
- LCS Variations and Extensions
- Advanced Multi-Sequence LCS
- Real-World Applications
- Problem Recognition
LCS Problem Definition {#lcs-definition}
Given two sequences, find the length of the longest subsequence that appears in both sequences in the same relative order.
Key Concepts
Example Walkthrough
// Demonstrate LCS with step-by-step trace
func demonstrateLCS() {
text1 := "ABCDGH"
text2 := "AEDFHR"
fmt.Printf("Text1: %s\n", text1)
fmt.Printf("Text2: %s\n", text2)
// Expected LCS: "ADH" (length 3)
length := longestCommonSubsequence(text1, text2)
fmt.Printf("LCS Length: %d\n", length)
sequence := getLCS(text1, text2)
fmt.Printf("LCS Sequence: %s\n", sequence)
// Show all possible LCS of this length
allLCS := getAllLCS(text1, text2)
fmt.Printf("All LCS: %v\n", allLCS)
}
Basic 2D DP Solution {#basic-lcs-solution}
Classic LCS Algorithm
// Classic LCS between two strings - O(mn) time and space
func longestCommonSubsequence(text1, text2 string) int {
m, n := len(text1), len(text2)
// dp[i][j] = LCS length for text1[0:i] and text2[0:j]
dp := make([][]int, m+1)
for i := range dp {
dp[i] = make([]int, n+1)
}
// Fill the DP table
for i := 1; i <= m; i++ {
for j := 1; j <= n; j++ {
if text1[i-1] == text2[j-1] {
// Characters match: extend the LCS
dp[i][j] = dp[i-1][j-1] + 1
} else {
// No match: take the better of two options
dp[i][j] = max(dp[i-1][j], dp[i][j-1])
}
}
}
return dp[m][n]
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
LCS with DP Table Visualization
// Visualize the DP table construction
func visualizeLCSDP(text1, text2 string) {
m, n := len(text1), len(text2)
dp := make([][]int, m+1)
for i := range dp {
dp[i] = make([]int, n+1)
}
fmt.Printf("LCS DP Table Construction:\n")
fmt.Printf("Text1: %s\n", text1)
fmt.Printf("Text2: %s\n", text2)
fmt.Println()
// Print header
fmt.Printf(" ε ")
for j := 0; j < n; j++ {
fmt.Printf(" %c ", text2[j])
}
fmt.Println()
// Fill and print table row by row
for i := 0; i <= m; i++ {
if i == 0 {
fmt.Printf("ε ")
} else {
fmt.Printf("%c ", text1[i-1])
}
for j := 0; j <= n; j++ {
if i == 0 || j == 0 {
dp[i][j] = 0
} else if text1[i-1] == text2[j-1] {
dp[i][j] = dp[i-1][j-1] + 1
} else {
dp[i][j] = max(dp[i-1][j], dp[i][j-1])
}
fmt.Printf(" %d ", dp[i][j])
}
fmt.Println()
}
}
Reconstructing the LCS String
// Get the actual LCS string, not just the length
func getLCS(text1, text2 string) string {
m, n := len(text1), len(text2)
dp := make([][]int, m+1)
for i := range dp {
dp[i] = make([]int, n+1)
}
// Fill DP table
for i := 1; i <= m; i++ {
for j := 1; j <= n; j++ {
if text1[i-1] == text2[j-1] {
dp[i][j] = dp[i-1][j-1] + 1
} else {
dp[i][j] = max(dp[i-1][j], dp[i][j-1])
}
}
}
// Backtrack to build LCS
lcs := make([]byte, dp[m][n])
i, j, k := m, n, dp[m][n]-1
for i > 0 && j > 0 {
if text1[i-1] == text2[j-1] {
// This character is part of LCS
lcs[k] = text1[i-1]
k--
i--
j--
} else if dp[i-1][j] > dp[i][j-1] {
// Move up (text1 contributes more)
i--
} else {
// Move left (text2 contributes more)
j--
}
}
return string(lcs)
}
Finding All Possible LCS
// Find all possible LCS strings (there can be multiple)
func getAllLCS(text1, text2 string) []string {
m, n := len(text1), len(text2)
dp := make([][]int, m+1)
for i := range dp {
dp[i] = make([]int, n+1)
}
// Fill DP table
for i := 1; i <= m; i++ {
for j := 1; j <= n; j++ {
if text1[i-1] == text2[j-1] {
dp[i][j] = dp[i-1][j-1] + 1
} else {
dp[i][j] = max(dp[i-1][j], dp[i][j-1])
}
}
}
var allLCS []string
backtrackAllLCS(text1, text2, dp, m, n, "", &allLCS)
return removeDuplicates(allLCS)
}
func backtrackAllLCS(text1, text2 string, dp [][]int, i, j int, current string, result *[]string) {
if i == 0 || j == 0 {
// Reverse the string since we built it backwards
*result = append(*result, reverse(current))
return
}
if text1[i-1] == text2[j-1] {
// This character must be in the LCS
backtrackAllLCS(text1, text2, dp, i-1, j-1, current+string(text1[i-1]), result)
} else {
// Explore both directions where LCS length is maximum
if dp[i-1][j] == dp[i][j] {
backtrackAllLCS(text1, text2, dp, i-1, j, current, result)
}
if dp[i][j-1] == dp[i][j] {
backtrackAllLCS(text1, text2, dp, i, j-1, current, result)
}
}
}
func reverse(s string) string {
runes := []rune(s)
for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
runes[i], runes[j] = runes[j], runes[i]
}
return string(runes)
}
func removeDuplicates(strings []string) []string {
seen := make(map[string]bool)
var result []string
for _, s := range strings {
if !seen[s] {
seen[s] = true
result = append(result, s)
}
}
return result
}
Space Optimization Techniques {#space-optimization}
Rolling Array Optimization
// Space-optimized LCS using only two rows - O(min(m,n)) space
func longestCommonSubsequenceOptimized(text1, text2 string) int {
m, n := len(text1), len(text2)
// Ensure text1 is the shorter string for better space complexity
if m > n {
return longestCommonSubsequenceOptimized(text2, text1)
}
// Use only two rows instead of full m×n table
prev := make([]int, m+1)
curr := make([]int, m+1)
for j := 1; j <= n; j++ {
for i := 1; i <= m; i++ {
if text1[i-1] == text2[j-1] {
curr[i] = prev[i-1] + 1
} else {
curr[i] = max(prev[i], curr[i-1])
}
}
prev, curr = curr, prev
}
return prev[m]
}
Space-Optimized with Path Reconstruction
// Hirschberg's algorithm for space-efficient LCS with path reconstruction
func lcsHirschberg(text1, text2 string) string {
m, n := len(text1), len(text2)
if m == 0 || n == 0 {
return ""
}
if m == 1 {
// Base case: single character in text1
if strings.Contains(text2, text1) {
return text1
}
return ""
}
// Divide: find the middle row
mid := m / 2
// Compute LCS lengths from left and right
left := lcsLengthForward(text1[:mid], text2)
right := lcsLengthBackward(text1[mid:], text2)
// Find the optimal split point
maxSum := -1
splitJ := 0
for j := 0; j <= n; j++ {
if left[j]+right[n-j] > maxSum {
maxSum = left[j] + right[n-j]
splitJ = j
}
}
// Conquer: recursively solve subproblems
leftLCS := lcsHirschberg(text1[:mid], text2[:splitJ])
rightLCS := lcsHirschberg(text1[mid:], text2[splitJ:])
return leftLCS + rightLCS
}
func lcsLengthForward(text1, text2 string) []int {
m, n := len(text1), len(text2)
dp := make([]int, n+1)
for i := 1; i <= m; i++ {
prev := 0
for j := 1; j <= n; j++ {
temp := dp[j]
if text1[i-1] == text2[j-1] {
dp[j] = prev + 1
} else {
dp[j] = max(dp[j], dp[j-1])
}
prev = temp
}
}
return dp
}
func lcsLengthBackward(text1, text2 string) []int {
m, n := len(text1), len(text2)
dp := make([]int, n+1)
for i := m - 1; i >= 0; i-- {
prev := 0
for j := n - 1; j >= 0; j-- {
temp := dp[j]
if text1[i] == text2[j] {
dp[j] = prev + 1
} else {
dp[j] = max(dp[j], dp[j+1])
}
prev = temp
}
}
return dp
}
LCS Variations and Extensions {#lcs-variations}
Longest Common Substring (Contiguous)
// Longest Common Substring - must be contiguous
func longestCommonSubstring(text1, text2 string) string {
m, n := len(text1), len(text2)
// dp[i][j] = length of common substring ending at i-1, j-1
dp := make([][]int, m+1)
for i := range dp {
dp[i] = make([]int, n+1)
}
maxLength := 0
endingI := 0
for i := 1; i <= m; i++ {
for j := 1; j <= n; j++ {
if text1[i-1] == text2[j-1] {
dp[i][j] = dp[i-1][j-1] + 1
if dp[i][j] > maxLength {
maxLength = dp[i][j]
endingI = i
}
} else {
dp[i][j] = 0 // Reset for contiguous requirement
}
}
}
if maxLength == 0 {
return ""
}
return text1[endingI-maxLength : endingI]
}
Shortest Common Supersequence
// Find the shortest string that contains both input strings as subsequences
func shortestCommonSupersequence(str1, str2 string) string {
m, n := len(str1), len(str2)
// First find LCS using DP
dp := make([][]int, m+1)
for i := range dp {
dp[i] = make([]int, n+1)
}
for i := 1; i <= m; i++ {
for j := 1; j <= n; j++ {
if str1[i-1] == str2[j-1] {
dp[i][j] = dp[i-1][j-1] + 1
} else {
dp[i][j] = max(dp[i-1][j], dp[i][j-1])
}
}
}
// Build supersequence by backtracking
var result []byte
i, j := m, n
for i > 0 && j > 0 {
if str1[i-1] == str2[j-1] {
// Common character: add once
result = append([]byte{str1[i-1]}, result...)
i--
j--
} else if dp[i-1][j] > dp[i][j-1] {
// str1[i-1] not in LCS: add from str1
result = append([]byte{str1[i-1]}, result...)
i--
} else {
// str2[j-1] not in LCS: add from str2
result = append([]byte{str2[j-1]}, result...)
j--
}
}
// Add remaining characters
for i > 0 {
result = append([]byte{str1[i-1]}, result...)
i--
}
for j > 0 {
result = append([]byte{str2[j-1]}, result...)
j--
}
return string(result)
}
LCS with Character Costs
// LCS where different character matches have different values
func lcsWithCosts(text1, text2 string, matchCosts map[byte]int) int {
m, n := len(text1), len(text2)
// dp[i][j] = maximum cost LCS for text1[0:i] and text2[0:j]
dp := make([][]int, m+1)
for i := range dp {
dp[i] = make([]int, n+1)
}
for i := 1; i <= m; i++ {
for j := 1; j <= n; j++ {
if text1[i-1] == text2[j-1] {
// Characters match: add the cost of this character
cost := matchCosts[text1[i-1]]
dp[i][j] = dp[i-1][j-1] + cost
} else {
// No match: take the better option
dp[i][j] = max(dp[i-1][j], dp[i][j-1])
}
}
}
return dp[m][n]
}
Advanced Multi-Sequence LCS {#multi-sequence-lcs}
LCS of Three Strings
// LCS of three strings using 3D DP
func longestCommonSubsequence3(text1, text2, text3 string) int {
m, n, p := len(text1), len(text2), len(text3)
// 3D DP: dp[i][j][k] = LCS for text1[0:i], text2[0:j], text3[0:k]
dp := make([][][]int, m+1)
for i := range dp {
dp[i] = make([][]int, n+1)
for j := range dp[i] {
dp[i][j] = make([]int, p+1)
}
}
for i := 1; i <= m; i++ {
for j := 1; j <= n; j++ {
for k := 1; k <= p; k++ {
if text1[i-1] == text2[j-1] && text2[j-1] == text3[k-1] {
// All three characters match
dp[i][j][k] = dp[i-1][j-1][k-1] + 1
} else {
// Take the maximum from the seven possible subproblems
dp[i][j][k] = max3(
dp[i-1][j][k], // Skip from text1
dp[i][j-1][k], // Skip from text2
dp[i][j][k-1], // Skip from text3
)
}
}
}
}
return dp[m][n][p]
}
func max3(a, b, c int) int {
return max(max(a, b), c)
}
LCS of Multiple Sequences (General Solution)
// LCS of multiple sequences using recursive approach with memoization
type MultiLCSMemo struct {
memo map[string]int
sequences []string
}
func NewMultiLCSMemo(sequences []string) *MultiLCSMemo {
return &MultiLCSMemo{
memo: make(map[string]int),
sequences: sequences,
}
}
func (mlcs *MultiLCSMemo) LCS() int {
indices := make([]int, len(mlcs.sequences))
return mlcs.lcsRecursive(indices)
}
func (mlcs *MultiLCSMemo) lcsRecursive(indices []int) int {
// Create key for memoization
key := fmt.Sprintf("%v", indices)
if result, exists := mlcs.memo[key]; exists {
return result
}
// Check if any sequence is exhausted
for i, idx := range indices {
if idx >= len(mlcs.sequences[i]) {
mlcs.memo[key] = 0
return 0
}
}
// Check if all current characters are the same
char := mlcs.sequences[0][indices[0]]
allSame := true
for i := 1; i < len(mlcs.sequences); i++ {
if mlcs.sequences[i][indices[i]] != char {
allSame = false
break
}
}
if allSame {
// All characters match: include this character and advance all
newIndices := make([]int, len(indices))
for i := range indices {
newIndices[i] = indices[i] + 1
}
result := 1 + mlcs.lcsRecursive(newIndices)
mlcs.memo[key] = result
return result
}
// Characters don't match: try skipping each sequence
maxResult := 0
for i := range indices {
newIndices := make([]int, len(indices))
copy(newIndices, indices)
newIndices[i]++
result := mlcs.lcsRecursive(newIndices)
maxResult = max(maxResult, result)
}
mlcs.memo[key] = maxResult
return maxResult
}
Real-World Applications {#real-world-applications}
Text Diff Implementation
// Text diff system using LCS
type TextDiff struct {
lines1 []string
lines2 []string
}
type DiffLine struct {
Type string // "add", "delete", "equal"
Content string
LineNum1 int
LineNum2 int
}
func NewTextDiff(text1, text2 string) *TextDiff {
return &TextDiff{
lines1: strings.Split(text1, "\n"),
lines2: strings.Split(text2, "\n"),
}
}
func (td *TextDiff) ComputeDiff() []DiffLine {
m, n := len(td.lines1), len(td.lines2)
// Compute LCS table for line comparison
dp := make([][]int, m+1)
for i := range dp {
dp[i] = make([]int, n+1)
}
for i := 1; i <= m; i++ {
for j := 1; j <= n; j++ {
if td.lines1[i-1] == td.lines2[j-1] {
dp[i][j] = dp[i-1][j-1] + 1
} else {
dp[i][j] = max(dp[i-1][j], dp[i][j-1])
}
}
}
// Backtrack to generate diff
var diff []DiffLine
i, j := m, n
for i > 0 || j > 0 {
if i > 0 && j > 0 && td.lines1[i-1] == td.lines2[j-1] {
// Lines are equal
diff = append([]DiffLine{{
Type: "equal",
Content: td.lines1[i-1],
LineNum1: i,
LineNum2: j,
}}, diff...)
i--
j--
} else if i > 0 && (j == 0 || dp[i][j] == dp[i-1][j]) {
// Line deleted from first file
diff = append([]DiffLine{{
Type: "delete",
Content: td.lines1[i-1],
LineNum1: i,
LineNum2: 0,
}}, diff...)
i--
} else {
// Line added in second file
diff = append([]DiffLine{{
Type: "add",
Content: td.lines2[j-1],
LineNum1: 0,
LineNum2: j,
}}, diff...)
j--
}
}
return diff
}
func (td *TextDiff) GenerateUnifiedDiff() string {
diff := td.ComputeDiff()
var result strings.Builder
lineNum1, lineNum2 := 1, 1
for _, line := range diff {
switch line.Type {
case "add":
result.WriteString(fmt.Sprintf("+%d: %s\n", lineNum2, line.Content))
lineNum2++
case "delete":
result.WriteString(fmt.Sprintf("-%d: %s\n", lineNum1, line.Content))
lineNum1++
case "equal":
result.WriteString(fmt.Sprintf(" %d: %s\n", lineNum1, line.Content))
lineNum1++
lineNum2++
}
}
return result.String()
}
DNA Sequence Alignment
// Bioinformatics sequence alignment using LCS
type DNAAligner struct {
matchScore int
mismatchScore int
}
func NewDNAAligner(match, mismatch int) *DNAAligner {
return &DNAAligner{
matchScore: match,
mismatchScore: mismatch,
}
}
func (da *DNAAligner) Align(seq1, seq2 string) (int, string, string) {
m, n := len(seq1), len(seq2)
// DP table for alignment scores
dp := make([][]int, m+1)
for i := range dp {
dp[i] = make([]int, n+1)
}
// Fill DP table
for i := 1; i <= m; i++ {
for j := 1; j <= n; j++ {
if seq1[i-1] == seq2[j-1] {
// Nucleotides match
dp[i][j] = dp[i-1][j-1] + da.matchScore
} else {
// No match: either align with mismatch or gap
align := dp[i-1][j-1] + da.mismatchScore
gap1 := dp[i-1][j] // Gap in seq2
gap2 := dp[i][j-1] // Gap in seq1
dp[i][j] = max(align, max(gap1, gap2))
}
}
}
// Backtrack to get alignment
var aligned1, aligned2 strings.Builder
i, j := m, n
for i > 0 || j > 0 {
if i > 0 && j > 0 {
align := dp[i-1][j-1]
if seq1[i-1] == seq2[j-1] {
align += da.matchScore
} else {
align += da.mismatchScore
}
if dp[i][j] == align {
aligned1.WriteByte(seq1[i-1])
aligned2.WriteByte(seq2[j-1])
i--
j--
continue
}
}
if i > 0 && dp[i][j] == dp[i-1][j] {
// Gap in seq2
aligned1.WriteByte(seq1[i-1])
aligned2.WriteByte('-')
i--
} else {
// Gap in seq1
aligned1.WriteByte('-')
aligned2.WriteByte(seq2[j-1])
j--
}
}
// Reverse strings (built backwards)
return dp[m][n], reverseString(aligned1.String()), reverseString(aligned2.String())
}
func reverseString(s string) string {
runes := []rune(s)
for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
runes[i], runes[j] = runes[j], runes[i]
}
return string(runes)
}
Problem Recognition {#problem-recognition}
LCS Pattern Indicators
type LCSPatternMatcher struct {
patterns map[string][]string
}
func NewLCSPatternMatcher() *LCSPatternMatcher {
return &LCSPatternMatcher{
patterns: map[string][]string{
"basic_lcs": {
"common subsequence", "two strings", "longest common",
"sequence comparison", "alignment",
},
"diff_tools": {
"difference", "diff", "compare files", "version control",
"changes", "patches",
},
"bioinformatics": {
"DNA", "sequence alignment", "genome", "protein",
"BLAST", "nucleotide", "amino acid",
},
"text_processing": {
"similar documents", "plagiarism", "text similarity",
"document comparison", "content analysis",
},
},
}
}
func (lpm *LCSPatternMatcher) IdentifyApplication(description string) string {
desc := strings.ToLower(description)
maxScore := 0
bestMatch := "general_lcs"
for pattern, keywords := range lpm.patterns {
score := 0
for _, keyword := range keywords {
if strings.Contains(desc, keyword) {
score++
}
}
if score > maxScore {
maxScore = score
bestMatch = pattern
}
}
return bestMatch
}
LCS vs Other Sequence Problems
| Problem Type | Input | Output | Key Insight |
|---|---|---|---|
| LCS | Two sequences | Common subsequence | Character match extends LCS |
| LIS | One sequence | Increasing subsequence | Ordering constraint |
| Edit Distance | Two sequences | Minimum operations | Transformation cost |
| Substring | Two sequences | Common substring | Contiguity required |
Conclusion
Longest Common Subsequence is a fundamental pattern in sequence analysis with wide-ranging applications. Understanding LCS provides the foundation for many advanced string algorithms and real-world text processing tasks.
LCS Mastery Framework:
🔍 Core Algorithm
- 2D DP approach - Build table bottom-up
- Space optimization - Use rolling arrays for O(n) space
- Path reconstruction - Backtrack to find actual sequences
🚀 Advanced Techniques
- Multi-sequence LCS - Extend to 3+ sequences
- Hirschberg's algorithm - Space-efficient with path reconstruction
- Weighted LCS - Different match values
💡 Real-World Impact
- Diff tools - Git, SVN, file comparison utilities
- Bioinformatics - DNA/protein sequence alignment
- Text analysis - Plagiarism detection, similarity scoring
Key Takeaways:
- Foundation for many algorithms - Understanding LCS helps with edit distance, diff algorithms
- Space-time tradeoffs - Multiple optimization strategies available
- Practical applications - Powers many real-world text processing tools
- Extensible pattern - Easily adapted for domain-specific needs
🚀 Next Steps: With LCS mastered, you're ready to explore Edit Distance patterns that quantify the cost of transforming one sequence into another!
Next in series: Edit Distance Patterns | Previous: LIS Patterns