145 lines
5.8 KiB
Swift
145 lines
5.8 KiB
Swift
|
|
import Foundation
|
||
|
|
import Testing
|
||
|
|
@testable import IMessageSync
|
||
|
|
|
||
|
|
@Suite("AttributedBody Text Extraction")
|
||
|
|
struct AttributedBodyExtractionTests {
|
||
|
|
|
||
|
|
private var reader: iMessageReader { iMessageReader.shared }
|
||
|
|
|
||
|
|
@Test("returns nil for empty data")
|
||
|
|
func emptyData() {
|
||
|
|
#expect(reader.extractTextFromAttributedBody(Data()) == nil)
|
||
|
|
}
|
||
|
|
|
||
|
|
@Test("returns nil for random garbage that has no recoverable text")
|
||
|
|
func garbageBytes() {
|
||
|
|
// Short random bytes — too short to satisfy the >3 byte run check
|
||
|
|
// and missing any NSString marker.
|
||
|
|
let garbage = Data([0x00, 0x01, 0x02, 0xFF, 0xFE])
|
||
|
|
#expect(reader.extractTextFromAttributedBody(garbage) == nil)
|
||
|
|
}
|
||
|
|
|
||
|
|
@Test("does not throw on malformed NSString marker (truncated)")
|
||
|
|
func truncatedAfterMarker() {
|
||
|
|
// "NSString" marker but cut off before length byte. Should not crash.
|
||
|
|
var bytes: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103] // "NSString"
|
||
|
|
bytes.append(contentsOf: [0x00, 0x01, 0x02]) // < 5 trailing bytes needed
|
||
|
|
let data = Data(bytes)
|
||
|
|
// Should return nil (no crash) — falls through to the longest-run heuristic.
|
||
|
|
// Result must not throw; value may be nil OR a short non-letter string.
|
||
|
|
let result = reader.extractTextFromAttributedBody(data)
|
||
|
|
#expect(result == nil)
|
||
|
|
}
|
||
|
|
|
||
|
|
@Test("extracts ASCII text after NSString marker with short-form length")
|
||
|
|
func positiveFixtureShortLength() {
|
||
|
|
// Layout the heuristic expects:
|
||
|
|
// [NSString marker: 8 bytes]
|
||
|
|
// [5 bytes of NSString class metadata padding — skipped]
|
||
|
|
// [1 length byte = N]
|
||
|
|
// [N text bytes (UTF-8)]
|
||
|
|
let text = "hello world"
|
||
|
|
let textBytes = Array(text.utf8)
|
||
|
|
var bytes: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103] // "NSString"
|
||
|
|
bytes.append(contentsOf: [0x01, 0x95, 0x84, 0x01, 0x2B]) // 5 padding bytes
|
||
|
|
bytes.append(UInt8(textBytes.count)) // length prefix
|
||
|
|
bytes.append(contentsOf: textBytes)
|
||
|
|
let data = Data(bytes)
|
||
|
|
#expect(reader.extractTextFromAttributedBody(data) == text)
|
||
|
|
}
|
||
|
|
|
||
|
|
@Test("extracts text via long-form length prefix (0x81)")
|
||
|
|
func positiveFixtureLongLength() {
|
||
|
|
// 0x81 length-byte triggers the 2-byte little-endian length path.
|
||
|
|
let text = String(repeating: "x", count: 200)
|
||
|
|
let textBytes = Array(text.utf8)
|
||
|
|
var bytes: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103] // "NSString"
|
||
|
|
bytes.append(contentsOf: [0x01, 0x95, 0x84, 0x01, 0x2B]) // 5 padding bytes
|
||
|
|
bytes.append(0x81)
|
||
|
|
let len = textBytes.count
|
||
|
|
bytes.append(UInt8(len & 0xFF))
|
||
|
|
bytes.append(UInt8((len >> 8) & 0xFF))
|
||
|
|
bytes.append(contentsOf: textBytes)
|
||
|
|
let data = Data(bytes)
|
||
|
|
#expect(reader.extractTextFromAttributedBody(data) == text)
|
||
|
|
}
|
||
|
|
|
||
|
|
@Test("falls back to longest printable run when no NSString marker")
|
||
|
|
func longestRunFallback() {
|
||
|
|
// No "NSString" marker; just a printable run surrounded by control bytes.
|
||
|
|
// The heuristic skips runs starting with "NS" or "__" or containing
|
||
|
|
// "attributedString".
|
||
|
|
var bytes: [UInt8] = [0x00, 0x01, 0x02]
|
||
|
|
bytes.append(contentsOf: Array("greetings everyone".utf8))
|
||
|
|
bytes.append(contentsOf: [0x00, 0x00])
|
||
|
|
let data = Data(bytes)
|
||
|
|
#expect(reader.extractTextFromAttributedBody(data) == "greetings everyone")
|
||
|
|
}
|
||
|
|
|
||
|
|
@Test("extracts text via 0x82 (4-byte) length prefix for >64KiB payload")
|
||
|
|
func positiveFixtureFourByteLength() {
|
||
|
|
let len = 70_000
|
||
|
|
let text = String(repeating: "x", count: len)
|
||
|
|
let textBytes = Array(text.utf8)
|
||
|
|
var bytes: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103]
|
||
|
|
bytes.append(contentsOf: [0x01, 0x95, 0x84, 0x01, 0x2B])
|
||
|
|
bytes.append(0x82)
|
||
|
|
bytes.append(UInt8(len & 0xFF))
|
||
|
|
bytes.append(UInt8((len >> 8) & 0xFF))
|
||
|
|
bytes.append(UInt8((len >> 16) & 0xFF))
|
||
|
|
bytes.append(UInt8((len >> 24) & 0xFF))
|
||
|
|
bytes.append(contentsOf: textBytes)
|
||
|
|
let data = Data(bytes)
|
||
|
|
#expect(reader.extractTextFromAttributedBody(data) == text)
|
||
|
|
}
|
||
|
|
|
||
|
|
@Test("multiple NSString markers: longest valid extraction wins")
|
||
|
|
func multipleMarkersLongestWins() {
|
||
|
|
let marker: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103]
|
||
|
|
let padding: [UInt8] = [0x01, 0x95, 0x84, 0x01, 0x2B]
|
||
|
|
|
||
|
|
// Short run: "hi"
|
||
|
|
var short: [UInt8] = marker
|
||
|
|
short.append(contentsOf: padding)
|
||
|
|
short.append(2)
|
||
|
|
short.append(contentsOf: Array("hi".utf8))
|
||
|
|
|
||
|
|
// Long run: "the real conversation text"
|
||
|
|
let longText = "the real conversation text"
|
||
|
|
var long: [UInt8] = marker
|
||
|
|
long.append(contentsOf: padding)
|
||
|
|
long.append(UInt8(longText.utf8.count))
|
||
|
|
long.append(contentsOf: Array(longText.utf8))
|
||
|
|
|
||
|
|
var combined = short
|
||
|
|
combined.append(contentsOf: [0x00, 0x00])
|
||
|
|
combined.append(contentsOf: long)
|
||
|
|
|
||
|
|
#expect(reader.extractTextFromAttributedBody(Data(combined)) == longText)
|
||
|
|
}
|
||
|
|
|
||
|
|
@Test("truncated first marker does not block recovery from a valid second marker")
|
||
|
|
func recoverPastTruncatedMarker() {
|
||
|
|
let marker: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103]
|
||
|
|
let padding: [UInt8] = [0x01, 0x95, 0x84, 0x01, 0x2B]
|
||
|
|
|
||
|
|
// First marker claims length=200 but only has 5 bytes of payload.
|
||
|
|
var broken: [UInt8] = marker
|
||
|
|
broken.append(contentsOf: padding)
|
||
|
|
broken.append(200)
|
||
|
|
broken.append(contentsOf: Array("short".utf8))
|
||
|
|
|
||
|
|
// Second marker is well-formed.
|
||
|
|
var good: [UInt8] = marker
|
||
|
|
good.append(contentsOf: padding)
|
||
|
|
good.append(7)
|
||
|
|
good.append(contentsOf: Array("recover".utf8))
|
||
|
|
|
||
|
|
var combined = broken
|
||
|
|
combined.append(contentsOf: good)
|
||
|
|
|
||
|
|
#expect(reader.extractTextFromAttributedBody(Data(combined)) == "recover")
|
||
|
|
}
|
||
|
|
}
|