Home > Blockchain >  Swift: concatenate two arrays without duplicating shared suffix/prefix
Swift: concatenate two arrays without duplicating shared suffix/prefix

Time:04-16

I have a situation where I need the join two arrays which often have an overlapping subarray.

So if I have:

let input1 = ["A", "B", "F", "E"]

and

let input2 = ["F", "E", "A", "G"]

the result should be:

let result = ["A", "B", "F", "E", "A", "G"]

So it's a bit like a "union" in the sense that the output does not duplicate the shared subarray/intersection and contains both arrays in their original state (just overlapping/intersecting). The canonical way to do something similar is with Sets, but that would remove the second "A".

CodePudding user response:

Just for fun you can use starts with predicate while iterating your first sequence from the end as follow:

let first: [String] = ["A", "B", "F", "E"]
let second: [String] = ["F", "E", "A", "G"]

var pos = first.endIndex
while pos > first.startIndex,
      second.starts(with: first[pos...], by: { $0 != $1}),
      !second.isEmpty {
    first.formIndex(before: &pos)
}

let result = first[..<pos]   second // ["A", "B", "F", "E", "A", "G"] 

This will result in a SubSequence, in this case an array slice. If you need an array just explicitly set the resulting type:

let result: [String] = first[..<pos]   second

Based on OP comments if you need to match the subsequence by pairs just offset every two elements:

let first = "ABFF"
let second = "FFAG"

var pos = first.endIndex
while pos > first.startIndex,
      second.starts(with: first[pos...], by: { $0 != $1 }),
      !second.isEmpty {
    first.formIndex(&pos, offsetBy: -2)
}

let result: String = first[..<pos]   second  // "ABFFAG"

If you need the string elements separated by spaces:

var first = "A B C D E F G D E"
var second = "D E F C B A"

first.removeAll(where: \.isWhitespace)
second.removeAll(where: \.isWhitespace)

var pos = first.endIndex
while pos > first.startIndex,
      second.starts(with: first[pos...], by: { $0 != $1 }),
      !second.isEmpty {
    first.formIndex(&pos, offsetBy: -2)
}

let result = (first[..<pos]   second)
                 .map(String.init)
                 .joined(separator: " ")
result  // "A B C D E F G D E F C B A"

CodePudding user response:

My previous solution didn't handled duplicates correctly so to fix this I added this function to Array to count the number of trailing duplicates

extension Array where Element: Equatable {
    func trailingDuplicates() -> Int {
        guard let last = self.last else { return 0 }
        let reversed = self.reversed().dropFirst()
        for (offset, element) in reversed.enumerated() {
            if last != element { return offset }
        }
        return 0
    }
}

And then I adjusted the result of calling lastIndex(of:) with the result of this new function in my old solution

func concatenateWithoutDuplicates<Value: Equatable>(_ first: [Value], with second: [Value]) -> [Value] {
    guard var firstIndex = first.lastIndex(of: second[0]) else {
        return first   second
    }

    firstIndex -= first.trailingDuplicates()
    let secondIndex = second.index(second.endIndex, offsetBy: -first[firstIndex..<first.endIndex].count)
    if first[firstIndex..<first.endIndex] == second[second.startIndex..<secondIndex] {
        return first   second[secondIndex..<second.endIndex]
    } else {
        return first   second
    }
}

Running some test examples from the question and comments

let testData = [
    (["A", "B", "F", "E"], ["F", "E", "A", "G"]),
    (["A", "B", "F", "F"], ["F", "F", "A", "G"]),
    (["A", "A", "A", "B"], ["A", "B", "B", "B"])
]

for datum in testData {
    print("\(datum.0), \(datum.1) -> \(concatenateWithoutDuplicates(datum.0, with: datum.1))")
}

["A", "B", "F", "E"], ["F", "E", "A", "G"] -> ["A", "B", "F", "E", "A", "G"]
["A", "B", "F", "F"], ["F", "F", "A", "G"] -> ["A", "B", "F", "F", "A", "G"]
["A", "A", "A", "B"], ["A", "B", "B", "B"] -> ["A", "A", "A", "B", "B", "B"]


Here is a brute force solution where I start by finding the last position in the first array of the first element of the second array and then directly compare the end of the first array with the start of the second array using the found index to create sub arrays.

let first = ["A", "B", "F", "E"]
let second = ["F", "E", "A", "G"]

let result: [String]
if let firstIndex = first.lastIndex(of: second[0]) {
    let secondIndex = second.index(second.endIndex, offsetBy: -first[firstIndex..<first.endIndex].count)

    if first[firstIndex..<first.endIndex] == second[second.startIndex..<secondIndex] {
        result = first   second[secondIndex..<second.endIndex]
    } else {
        result = first   second
    }
} else {
    result = first   second
}

Note that I haven't tested for any edge cases here, just the sample given in the question with some simple variants.

CodePudding user response:

This seems to work for what I need. The one thing I don't like is the check on (tail.count > second.count), which feels like a hack...

var first: [String] = ["at", "by", "chicken", "dog", "eat", "for", "good", "dog", "eat"]
var second: [String] = ["good", "dog", "eat", "feed", "cats", "bonk", "atrophe"]

var head: ArraySlice<String> = []
var tail: ArraySlice<String> = []
for i in stride(from: first.count-1, through: 0, by: -1) {
    tail = first[i...]
    if (tail.count > second.count) || second.prefix(tail.count) != tail {
        tail = []
    } else {
        head = first.prefix(i)
    }
}

let result = head   second
print(result) // ["at", "by", "chicken", "dog", "eat", "for", "good", "dog", "eat", "feed", "cats", "bonk", "atrophe"]

I also made this an extension... might be simpler in context (e.g., could handle token ids instead of just tokens):

extension Array where Element : Equatable {
    /**
     Extend the existing array with an array without repeating overlapping elements.
     */
    mutating func extend(withArray array: [Element]) {
        var head: [Element] = []
        var tail: [Element] = []
        for i in stride(from: self.count-1, through: 0, by: -1) {
            tail = Array(self[i...])
            if (tail.count > array.count) || Array(array[0 ..< tail.count]) != tail {
                tail = []
            } else {
                head = Array(self[..<i])
            }
        }
        if head.count > 0 {
            self = head   array
        } else {
            self  = array
        }
    }
}
  • Related