macsync/@packages/imessage/Sources/IMessageSync/Reader.swift

543 lines
21 KiB
Swift

import Contacts
import Foundation
import GRDB
import LilithLogging
import MacSyncShared
private let log = AppLogger.logger(for: "IMessage.Reader")
struct iMessageConversation {
let id: String
let displayName: String
let isGroup: Bool
let participantIds: [String]
}
struct ContactInfo {
let identifier: String
let displayName: String
let phoneNumber: String?
let email: String?
let birthday: Date?
}
struct iMessageAttachment {
let filename: String?
let mimeType: String?
let transferName: String?
let size: Int
let data: String?
}
struct iMessage {
let guid: String
let conversationId: String
let senderId: String?
let isFromMe: Bool
let date: Date
let dateDelivered: Date?
let dateRead: Date?
let text: String?
let attributedBody: String?
let associatedMessageType: Int?
let associatedMessageGuid: String?
let isAudioMessage: Bool
let expressiveSendStyleId: String?
let replyToGuid: String?
let threadOriginatorGuid: String?
let groupTitle: String?
let balloonBundleId: String?
let service: String?
let senderIdentifier: String?
let senderDisplayName: String?
let senderPhoneNumber: String?
let senderEmail: String?
let attachments: [iMessageAttachment]
let attachmentsCount: Int
let attachmentsTotalSize: Int
let attachmentsFiletypes: [String]
}
class iMessageReader: MessageReaderProtocol {
static let shared = iMessageReader()
private var dbQueue: DatabaseQueue?
private let chatDbPath: String
private let contactStore = CNContactStore()
var contactCache: [String: ContactInfo] = [:]
private init() {
let homeDir = FileManager.default.homeDirectoryForCurrentUser
chatDbPath = homeDir.appendingPathComponent("Library/Messages/chat.db").path
}
func loadContacts() async -> Bool {
do {
let status = CNContactStore.authorizationStatus(for: .contacts)
if status == .notDetermined {
let granted = try await contactStore.requestAccess(for: .contacts)
if !granted { return false }
} else if status != .authorized {
return false
}
let keysToFetch: [CNKeyDescriptor] = [
CNContactIdentifierKey as CNKeyDescriptor,
CNContactGivenNameKey as CNKeyDescriptor,
CNContactFamilyNameKey as CNKeyDescriptor,
CNContactPhoneNumbersKey as CNKeyDescriptor,
CNContactEmailAddressesKey as CNKeyDescriptor,
CNContactBirthdayKey as CNKeyDescriptor
]
let request = CNContactFetchRequest(keysToFetch: keysToFetch)
try contactStore.enumerateContacts(with: request) { contact, _ in
let fullName = [contact.givenName, contact.familyName]
.filter { !$0.isEmpty }
.joined(separator: " ")
guard !fullName.isEmpty else { return }
let phones = contact.phoneNumbers.map { PhoneUtils.normalize($0.value.stringValue) }
let emails = contact.emailAddresses.map { ($0.value as String).lowercased() }
var birthdayDate: Date?
if let birthday = contact.birthday {
birthdayDate = Calendar.current.date(from: birthday)
}
let completeContact = ContactInfo(
identifier: phones.first ?? emails.first ?? contact.identifier,
displayName: fullName,
phoneNumber: phones.first,
email: emails.first,
birthday: birthdayDate
)
for phone in phones { self.contactCache[phone] = completeContact }
for email in emails { self.contactCache[email] = completeContact }
self.contactCache[contact.identifier] = completeContact
}
log.info("Loaded \(self.contactCache.count) contact entries")
return true
} catch {
log.warning("Failed to load contacts: \(error)")
return false
}
}
func lookupContact(identifier: String) -> ContactInfo? {
if let contact = contactCache[identifier.lowercased()] { return contact }
return contactCache[PhoneUtils.normalize(identifier)]
}
func getAllContacts() -> [ContactInfo] {
var seen = Set<String>()
return contactCache.values.filter { contact in
if seen.contains(contact.identifier) { return false }
seen.insert(contact.identifier)
return true
}
}
func getDatabaseQueue() -> DatabaseQueue? { dbQueue }
func connect() throws {
guard FileManager.default.fileExists(atPath: chatDbPath) else {
throw iMessageError.databaseNotFound
}
var config = Configuration()
config.readonly = true
dbQueue = try DatabaseQueue(path: chatDbPath, configuration: config)
}
func getConversations() throws -> [iMessageConversation] {
guard let db = dbQueue else { throw iMessageError.notConnected }
return try db.read { db in
let sql = """
SELECT
c.guid,
c.display_name,
c.group_id,
GROUP_CONCAT(DISTINCT h.id) as participant_ids,
MAX(m.date) as last_message_date
FROM chat c
LEFT JOIN chat_handle_join chj ON c.ROWID = chj.chat_id
LEFT JOIN handle h ON chj.handle_id = h.ROWID
LEFT JOIN chat_message_join cmj ON c.ROWID = cmj.chat_id
LEFT JOIN message m ON cmj.message_id = m.ROWID
GROUP BY c.ROWID
ORDER BY last_message_date DESC NULLS LAST
"""
return try Row.fetchAll(db, sql: sql).map { row in
let participantStr = row["participant_ids"] as? String ?? ""
let participants = participantStr.split(separator: ",").map(String.init)
var displayName: String = row["display_name"] ?? ""
if displayName.isEmpty && participants.count == 1 {
if let contact = lookupContact(identifier: participants[0]) {
displayName = contact.displayName
}
}
if displayName.isEmpty {
displayName = participants.first ?? "Unknown"
}
return iMessageConversation(
id: row["guid"],
displayName: displayName,
isGroup: row["group_id"] != nil,
participantIds: participants
)
}
}
}
func getMessages(conversationId: String, since: Date? = nil) throws -> [iMessage] {
guard let db = dbQueue else { throw iMessageError.notConnected }
return try db.read { db in
var sql = """
SELECT
m.guid,
m.ROWID as message_rowid,
c.guid as conversation_id,
h.id as sender_id,
m.is_from_me,
m.text,
m.attributedBody,
m.associated_message_type,
m.associated_message_guid,
m.is_audio_message,
m.expressive_send_style_id,
m.reply_to_guid,
m.thread_originator_guid,
m.group_title,
m.balloon_bundle_id,
m.service as message_service,
a.filename as attachment_filename,
a.mime_type as attachment_mime_type,
a.transfer_name as attachment_transfer_name,
a.total_bytes as attachment_size,
CAST(m.date AS REAL) / 1000000000.0 + 978307200 as date_unix,
CAST(m.date_delivered AS REAL) / 1000000000.0 + 978307200 as date_delivered_unix,
CAST(m.date_read AS REAL) / 1000000000.0 + 978307200 as date_read_unix
FROM message m
JOIN chat_message_join cmj ON m.ROWID = cmj.message_id
JOIN chat c ON cmj.chat_id = c.ROWID
LEFT JOIN handle h ON m.handle_id = h.ROWID
LEFT JOIN message_attachment_join maj ON m.ROWID = maj.message_id
LEFT JOIN attachment a ON maj.attachment_id = a.ROWID
WHERE c.guid = ?
"""
var arguments: [DatabaseValueConvertible] = [conversationId]
if let since = since {
let unixTime = Int64((since.timeIntervalSince1970 - 978307200) * 1_000_000_000)
sql += " AND m.date >= ?"
arguments.append(unixTime)
}
sql += " ORDER BY m.date DESC, m.ROWID DESC"
let rows = try Row.fetchAll(db, sql: sql, arguments: StatementArguments(arguments))
var messageMap: [String: (row: Row, attachments: [iMessageAttachment])] = [:]
var messageOrder: [String] = []
for row in rows {
let guid: String = row["guid"]
var attachment: iMessageAttachment?
if let attachmentFilename: String = row["attachment_filename"] {
let attachmentSize: Int = row["attachment_size"] ?? 0
var attachmentData: String?
if attachmentSize > 0 {
attachmentData = self.readAttachmentFile(filename: attachmentFilename)
}
attachment = iMessageAttachment(
filename: attachmentFilename,
mimeType: row["attachment_mime_type"],
transferName: row["attachment_transfer_name"],
size: attachmentSize,
data: attachmentData
)
}
if var existing = messageMap[guid] {
if let attachment = attachment {
existing.attachments.append(attachment)
messageMap[guid] = existing
}
} else {
messageOrder.append(guid)
messageMap[guid] = (row: row, attachments: attachment.map { [$0] } ?? [])
}
}
return messageOrder.compactMap { guid -> iMessage? in
guard let entry = messageMap[guid] else { return nil }
let row = entry.row
let attachments = entry.attachments
let dateUnix: TimeInterval = row["date_unix"]
let deliveredUnix: TimeInterval? = row["date_delivered_unix"]
let readUnix: TimeInterval? = row["date_read_unix"]
var attributedBodyBase64: String?
var extractedText: String?
if let attributedBodyData: Data = row["attributedBody"] {
attributedBodyBase64 = attributedBodyData.base64EncodedString()
// Try NSKeyedUnarchiver first when the blob is a real
// archived NSAttributedString it gives us the cleanest
// text. Fall back to the heuristic typedstream scan
// (`extractTextFromAttributedBody`) when it isn't, which
// is the common case for modern URL bubbles / reactions.
if let decoded = try? NSKeyedUnarchiver.unarchivedObject(
ofClass: NSAttributedString.self, from: attributedBodyData
) {
let s = decoded.string.trimmingCharacters(in: .whitespacesAndNewlines)
if !s.isEmpty { extractedText = s }
}
if extractedText == nil || extractedText!.isEmpty {
extractedText = self.extractTextFromAttributedBody(attributedBodyData)
}
}
// Prefer the chat.db `text` field; fall back to text recovered
// from attributedBody when the row's text is null or empty
// (common for URL bubbles, reactions, expressive sends).
let rawText: String? = row["text"]
let effectiveText: String? = {
if let raw = rawText, !raw.isEmpty { return raw }
if let extracted = extractedText, !extracted.isEmpty { return extracted }
return rawText
}()
let senderId: String? = row["sender_id"]
var senderDisplayName: String?
var senderPhoneNumber: String?
var senderEmail: String?
if let senderId = senderId {
if let contact = self.lookupContact(identifier: senderId) {
senderDisplayName = contact.displayName
senderPhoneNumber = contact.phoneNumber
senderEmail = contact.email
}
}
let attachmentsCount = attachments.count
let attachmentsTotalSize = attachments.reduce(0) { $0 + $1.size }
let attachmentsFiletypes = Array(Set(attachments.compactMap { $0.mimeType })).sorted()
return iMessage(
guid: row["guid"],
conversationId: row["conversation_id"],
senderId: senderId,
isFromMe: row["is_from_me"] == 1,
date: Date(timeIntervalSince1970: dateUnix),
dateDelivered: deliveredUnix.map { Date(timeIntervalSince1970: $0) },
dateRead: readUnix.map { Date(timeIntervalSince1970: $0) },
text: effectiveText,
attributedBody: attributedBodyBase64,
associatedMessageType: row["associated_message_type"],
associatedMessageGuid: row["associated_message_guid"],
isAudioMessage: row["is_audio_message"] == 1,
expressiveSendStyleId: row["expressive_send_style_id"],
replyToGuid: row["reply_to_guid"],
threadOriginatorGuid: row["thread_originator_guid"],
groupTitle: row["group_title"],
balloonBundleId: row["balloon_bundle_id"],
service: row["message_service"],
senderIdentifier: senderId,
senderDisplayName: senderDisplayName,
senderPhoneNumber: senderPhoneNumber,
senderEmail: senderEmail,
attachments: attachments,
attachmentsCount: attachmentsCount,
attachmentsTotalSize: attachmentsTotalSize,
attachmentsFiletypes: attachmentsFiletypes
)
}
}
}
private func readAttachmentFile(filename: String) -> String? {
let expandedPath: String
if filename.hasPrefix("~/") {
let homeDir = FileManager.default.homeDirectoryForCurrentUser.path
expandedPath = homeDir + filename.dropFirst(1)
} else {
expandedPath = filename
}
guard FileManager.default.fileExists(atPath: expandedPath) else {
log.info("Attachment not found: \(expandedPath)")
return nil
}
do {
let data = try Data(contentsOf: URL(fileURLWithPath: expandedPath))
return data.base64EncodedString()
} catch {
log.warning("Failed to read attachment: \(error)")
return nil
}
}
/// Decoder for the iMessage `attributedBody` NSKeyedArchiver typedstream
/// blob. Mirror of `src/server/src/shared/typedstream/decode.ts` both
/// sides must agree on which messages yield non-null text.
///
/// Strategy:
/// 1. Scan every "NSString" class-definition marker; at each, read a
/// length-prefixed UTF-8 run after the 5-byte padding. Keep the
/// longest valid extraction (handles repeated markers via
/// back-references / appended attribute dicts).
/// 2. Fallback: longest printable byte run that contains at least one
/// letter and is not a known typedstream class-name marker.
///
/// Length prefixes:
/// - 0x00..0x7F that byte is the length
/// - 0x81 LO HI 16-bit little-endian
/// - 0x82 B0..B3 32-bit little-endian
func extractTextFromAttributedBody(_ data: Data) -> String? {
guard !data.isEmpty else { return nil }
let bytes = [UInt8](data)
if let text = bestNSStringRun(in: bytes) { return text }
return longestPrintableRun(in: bytes)
}
private static let nsStringMarker: [UInt8] = [78, 83, 83, 116, 114, 105, 110, 103] // "NSString"
private static let nsStringPadding: Int = 5
private static let typedstreamMarkerWords: Set<String> = [
"NSString", "NSMutableString",
"NSAttributedString", "NSMutableAttributedString",
"NSDictionary", "NSMutableDictionary",
"NSArray", "NSMutableArray",
"NSNumber", "NSObject", "NSValue", "NSData", "NSDate",
"streamtyped"
]
private func bestNSStringRun(in bytes: [UInt8]) -> String? {
let marker = Self.nsStringMarker
var best: String?
var bestLen = 0
var searchFrom = 0
while searchFrom <= bytes.count - marker.count {
guard let idx = findSubsequence(in: bytes, subsequence: marker, from: searchFrom) else { break }
if let candidate = readNSStringRun(in: bytes, markerIndex: idx), candidate.count > bestLen {
best = candidate
bestLen = candidate.count
}
searchFrom = idx + 1
}
return best
}
private func readNSStringRun(in bytes: [UInt8], markerIndex: Int) -> String? {
let contentStart = markerIndex + Self.nsStringMarker.count + Self.nsStringPadding
guard contentStart < bytes.count else { return nil }
guard let len = readVarLength(in: bytes, at: contentStart), len.length > 0 else { return nil }
let end = len.nextOffset + len.length
guard end <= bytes.count else { return nil }
let slice = Data(bytes[len.nextOffset..<end])
guard let text = String(data: slice, encoding: .utf8) else { return nil }
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
return trimmed.isEmpty ? nil : trimmed
}
private struct VarLength { let length: Int; let nextOffset: Int }
private func readVarLength(in bytes: [UInt8], at offset: Int) -> VarLength? {
guard offset < bytes.count else { return nil }
let tag = bytes[offset]
if tag < 0x81 {
return VarLength(length: Int(tag), nextOffset: offset + 1)
}
if tag == 0x81 {
guard offset + 3 <= bytes.count else { return nil }
let length = Int(bytes[offset + 1]) | (Int(bytes[offset + 2]) << 8)
return VarLength(length: length, nextOffset: offset + 3)
}
if tag == 0x82 {
guard offset + 5 <= bytes.count else { return nil }
let b0 = UInt32(bytes[offset + 1])
let b1 = UInt32(bytes[offset + 2])
let b2 = UInt32(bytes[offset + 3])
let b3 = UInt32(bytes[offset + 4])
let length = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24)
return VarLength(length: Int(length), nextOffset: offset + 5)
}
return nil
}
private func longestPrintableRun(in bytes: [UInt8]) -> String? {
var longest: String?
var currentRun = Data()
let flush: (inout Data, inout String?) -> Void = { run, longest in
guard run.count > 3 else { run = Data(); return }
if let str = String(data: run, encoding: .utf8),
str.rangeOfCharacter(from: .letters) != nil {
let trimmed = str.trimmingCharacters(in: .whitespacesAndNewlines)
if trimmed.count > (longest?.count ?? 0)
&& !Self.typedstreamMarkerWords.contains(trimmed)
&& !trimmed.hasPrefix("NS")
&& !trimmed.hasPrefix("__")
&& !trimmed.contains("attributedString") {
longest = trimmed
}
}
run = Data()
}
for byte in bytes {
if (byte >= 0x20 && byte <= 0x7E) || byte >= 0x80 || byte == 0x0A || byte == 0x0D {
currentRun.append(byte)
} else {
flush(&currentRun, &longest)
}
}
flush(&currentRun, &longest)
return longest
}
func findSubsequence(in array: [UInt8], subsequence: [UInt8], from: Int = 0) -> Int? {
guard subsequence.count <= array.count else { return nil }
let start = max(0, from)
let last = array.count - subsequence.count
guard start <= last else { return nil }
return (start...last)
.first { Array(array[$0..<($0 + subsequence.count)]) == subsequence }
}
}
enum iMessageError: LocalizedError {
case databaseNotFound
case notConnected
var errorDescription: String? {
switch self {
case .databaseNotFound:
return "iMessage database not found. Grant Full Disk Access in System Preferences."
case .notConnected:
return "Not connected to iMessage database"
}
}
}