import Foundation import Testing @testable import IMessageSync @Suite("AttributedBody Text Extraction") struct AttributedBodyExtractionTests { private var reader: iMessageReader { iMessageReader.shared } @Test("returns nil for empty data") func emptyData() { #expect(reader.extractTextFromAttributedBody(Data()) == nil) } @Test("returns nil for random garbage that has no recoverable text") func garbageBytes() { // Short random bytes — too short to satisfy the >3 byte run check // and missing any NSString marker. let garbage = Data([0x00, 0x01, 0x02, 0xFF, 0xFE]) #expect(reader.extractTextFromAttributedBody(garbage) == nil) } @Test("does not throw on malformed NSString marker (truncated)") func truncatedAfterMarker() { // "NSString" marker but cut off before length byte. Should not crash. var bytes: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103] // "NSString" bytes.append(contentsOf: [0x00, 0x01, 0x02]) // < 5 trailing bytes needed let data = Data(bytes) // Should return nil (no crash) — falls through to the longest-run heuristic. // Result must not throw; value may be nil OR a short non-letter string. let result = reader.extractTextFromAttributedBody(data) #expect(result == nil) } @Test("extracts ASCII text after NSString marker with short-form length") func positiveFixtureShortLength() { // Layout the heuristic expects: // [NSString marker: 8 bytes] // [5 bytes of NSString class metadata padding — skipped] // [1 length byte = N] // [N text bytes (UTF-8)] let text = "hello world" let textBytes = Array(text.utf8) var bytes: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103] // "NSString" bytes.append(contentsOf: [0x01, 0x95, 0x84, 0x01, 0x2B]) // 5 padding bytes bytes.append(UInt8(textBytes.count)) // length prefix bytes.append(contentsOf: textBytes) let data = Data(bytes) #expect(reader.extractTextFromAttributedBody(data) == text) } @Test("extracts text via long-form length prefix (0x81)") func positiveFixtureLongLength() { // 0x81 length-byte triggers the 2-byte little-endian length path. let text = String(repeating: "x", count: 200) let textBytes = Array(text.utf8) var bytes: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103] // "NSString" bytes.append(contentsOf: [0x01, 0x95, 0x84, 0x01, 0x2B]) // 5 padding bytes bytes.append(0x81) let len = textBytes.count bytes.append(UInt8(len & 0xFF)) bytes.append(UInt8((len >> 8) & 0xFF)) bytes.append(contentsOf: textBytes) let data = Data(bytes) #expect(reader.extractTextFromAttributedBody(data) == text) } @Test("falls back to longest printable run when no NSString marker") func longestRunFallback() { // No "NSString" marker; just a printable run surrounded by control bytes. // The heuristic skips runs starting with "NS" or "__" or containing // "attributedString". var bytes: [UInt8] = [0x00, 0x01, 0x02] bytes.append(contentsOf: Array("greetings everyone".utf8)) bytes.append(contentsOf: [0x00, 0x00]) let data = Data(bytes) #expect(reader.extractTextFromAttributedBody(data) == "greetings everyone") } @Test("extracts text via 0x82 (4-byte) length prefix for >64KiB payload") func positiveFixtureFourByteLength() { let len = 70_000 let text = String(repeating: "x", count: len) let textBytes = Array(text.utf8) var bytes: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103] bytes.append(contentsOf: [0x01, 0x95, 0x84, 0x01, 0x2B]) bytes.append(0x82) bytes.append(UInt8(len & 0xFF)) bytes.append(UInt8((len >> 8) & 0xFF)) bytes.append(UInt8((len >> 16) & 0xFF)) bytes.append(UInt8((len >> 24) & 0xFF)) bytes.append(contentsOf: textBytes) let data = Data(bytes) #expect(reader.extractTextFromAttributedBody(data) == text) } @Test("multiple NSString markers: longest valid extraction wins") func multipleMarkersLongestWins() { let marker: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103] let padding: [UInt8] = [0x01, 0x95, 0x84, 0x01, 0x2B] // Short run: "hi" var short: [UInt8] = marker short.append(contentsOf: padding) short.append(2) short.append(contentsOf: Array("hi".utf8)) // Long run: "the real conversation text" let longText = "the real conversation text" var long: [UInt8] = marker long.append(contentsOf: padding) long.append(UInt8(longText.utf8.count)) long.append(contentsOf: Array(longText.utf8)) var combined = short combined.append(contentsOf: [0x00, 0x00]) combined.append(contentsOf: long) #expect(reader.extractTextFromAttributedBody(Data(combined)) == longText) } @Test("truncated first marker does not block recovery from a valid second marker") func recoverPastTruncatedMarker() { let marker: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103] let padding: [UInt8] = [0x01, 0x95, 0x84, 0x01, 0x2B] // First marker claims length=200 but only has 5 bytes of payload. var broken: [UInt8] = marker broken.append(contentsOf: padding) broken.append(200) broken.append(contentsOf: Array("short".utf8)) // Second marker is well-formed. var good: [UInt8] = marker good.append(contentsOf: padding) good.append(7) good.append(contentsOf: Array("recover".utf8)) var combined = broken combined.append(contentsOf: good) #expect(reader.extractTextFromAttributedBody(Data(combined)) == "recover") } }