macsync/@packages/imessage/Tests/IMessageSyncTests/AttributedBodyExtractionTests.swift

145 lines
5.8 KiB
Swift
Raw Normal View History

import Foundation
import Testing
@testable import IMessageSync
@Suite("AttributedBody Text Extraction")
struct AttributedBodyExtractionTests {
private var reader: iMessageReader { iMessageReader.shared }
@Test("returns nil for empty data")
func emptyData() {
#expect(reader.extractTextFromAttributedBody(Data()) == nil)
}
@Test("returns nil for random garbage that has no recoverable text")
func garbageBytes() {
// Short random bytes too short to satisfy the >3 byte run check
// and missing any NSString marker.
let garbage = Data([0x00, 0x01, 0x02, 0xFF, 0xFE])
#expect(reader.extractTextFromAttributedBody(garbage) == nil)
}
@Test("does not throw on malformed NSString marker (truncated)")
func truncatedAfterMarker() {
// "NSString" marker but cut off before length byte. Should not crash.
var bytes: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103] // "NSString"
bytes.append(contentsOf: [0x00, 0x01, 0x02]) // < 5 trailing bytes needed
let data = Data(bytes)
// Should return nil (no crash) falls through to the longest-run heuristic.
// Result must not throw; value may be nil OR a short non-letter string.
let result = reader.extractTextFromAttributedBody(data)
#expect(result == nil)
}
@Test("extracts ASCII text after NSString marker with short-form length")
func positiveFixtureShortLength() {
// Layout the heuristic expects:
// [NSString marker: 8 bytes]
// [5 bytes of NSString class metadata padding skipped]
// [1 length byte = N]
// [N text bytes (UTF-8)]
let text = "hello world"
let textBytes = Array(text.utf8)
var bytes: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103] // "NSString"
bytes.append(contentsOf: [0x01, 0x95, 0x84, 0x01, 0x2B]) // 5 padding bytes
bytes.append(UInt8(textBytes.count)) // length prefix
bytes.append(contentsOf: textBytes)
let data = Data(bytes)
#expect(reader.extractTextFromAttributedBody(data) == text)
}
@Test("extracts text via long-form length prefix (0x81)")
func positiveFixtureLongLength() {
// 0x81 length-byte triggers the 2-byte little-endian length path.
let text = String(repeating: "x", count: 200)
let textBytes = Array(text.utf8)
var bytes: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103] // "NSString"
bytes.append(contentsOf: [0x01, 0x95, 0x84, 0x01, 0x2B]) // 5 padding bytes
bytes.append(0x81)
let len = textBytes.count
bytes.append(UInt8(len & 0xFF))
bytes.append(UInt8((len >> 8) & 0xFF))
bytes.append(contentsOf: textBytes)
let data = Data(bytes)
#expect(reader.extractTextFromAttributedBody(data) == text)
}
@Test("falls back to longest printable run when no NSString marker")
func longestRunFallback() {
// No "NSString" marker; just a printable run surrounded by control bytes.
// The heuristic skips runs starting with "NS" or "__" or containing
// "attributedString".
var bytes: [UInt8] = [0x00, 0x01, 0x02]
bytes.append(contentsOf: Array("greetings everyone".utf8))
bytes.append(contentsOf: [0x00, 0x00])
let data = Data(bytes)
#expect(reader.extractTextFromAttributedBody(data) == "greetings everyone")
}
@Test("extracts text via 0x82 (4-byte) length prefix for >64KiB payload")
func positiveFixtureFourByteLength() {
let len = 70_000
let text = String(repeating: "x", count: len)
let textBytes = Array(text.utf8)
var bytes: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103]
bytes.append(contentsOf: [0x01, 0x95, 0x84, 0x01, 0x2B])
bytes.append(0x82)
bytes.append(UInt8(len & 0xFF))
bytes.append(UInt8((len >> 8) & 0xFF))
bytes.append(UInt8((len >> 16) & 0xFF))
bytes.append(UInt8((len >> 24) & 0xFF))
bytes.append(contentsOf: textBytes)
let data = Data(bytes)
#expect(reader.extractTextFromAttributedBody(data) == text)
}
@Test("multiple NSString markers: longest valid extraction wins")
func multipleMarkersLongestWins() {
let marker: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103]
let padding: [UInt8] = [0x01, 0x95, 0x84, 0x01, 0x2B]
// Short run: "hi"
var short: [UInt8] = marker
short.append(contentsOf: padding)
short.append(2)
short.append(contentsOf: Array("hi".utf8))
// Long run: "the real conversation text"
let longText = "the real conversation text"
var long: [UInt8] = marker
long.append(contentsOf: padding)
long.append(UInt8(longText.utf8.count))
long.append(contentsOf: Array(longText.utf8))
var combined = short
combined.append(contentsOf: [0x00, 0x00])
combined.append(contentsOf: long)
#expect(reader.extractTextFromAttributedBody(Data(combined)) == longText)
}
@Test("truncated first marker does not block recovery from a valid second marker")
func recoverPastTruncatedMarker() {
let marker: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103]
let padding: [UInt8] = [0x01, 0x95, 0x84, 0x01, 0x2B]
// First marker claims length=200 but only has 5 bytes of payload.
var broken: [UInt8] = marker
broken.append(contentsOf: padding)
broken.append(200)
broken.append(contentsOf: Array("short".utf8))
// Second marker is well-formed.
var good: [UInt8] = marker
good.append(contentsOf: padding)
good.append(7)
good.append(contentsOf: Array("recover".utf8))
var combined = broken
combined.append(contentsOf: good)
#expect(reader.extractTextFromAttributedBody(Data(combined)) == "recover")
}
}