macsync/@packages/imail/Sources/IMailSync/MIMEParser.swift

import Foundation

// MARK: - Output Types

/// Parsed representation of a MIME message, matching the shape produced by
/// `mailparser`'s `simpleParser` on the server side.
///
/// Fields mirror `icloud-sync.service.ts` → `processMessage()` columns:
/// `messageId`, `subject`, `from`, `to`, `cc`, `text`, `html`, `date`, `inReplyTo`, `references`.
public struct ParsedMIMEMessage: Sendable {
    public let messageId: String?
    public let subject: String?
    public let from: MIMEAddress?
    public let to: [MIMEAddress]
    public let cc: [MIMEAddress]
    public let replyTo: [MIMEAddress]
    public let inReplyTo: String?
    public let references: [String]
    public let date: Date?
    public let text: String?
    public let html: String?
    public let hasAttachments: Bool
}

public struct MIMEAddress: Sendable {
    public let address: String
    public let name: String?
}

// MARK: - Parser

/// A minimal MIME parser that produces `ParsedMIMEMessage` from raw RFC 5322 source.
///
/// Covers the fields needed to match `simpleParser` output shape used in
/// `icloud-sync.service.ts`. Does NOT decode nested MIME parts beyond the primary
/// text/html bodies — attachment handling is deferred to server-side processing.
///
/// Implementation notes:
/// - Headers are RFC 2047 decoded (Q-encoding and B-encoding).
/// - Multipart/alternative bodies are walked to extract text/plain and text/html.
/// - Only flat multipart is supported — deeply nested multipart (e.g. multipart/related
///   wrapping multipart/alternative) extracts the first matching body part.
public struct MIMEParser {

    // MARK: - Public API

    /// Parse a raw RFC 5322 message into `ParsedMIMEMessage`.
    public static func parse(_ source: String) -> ParsedMIMEMessage {
        let (headerBlock, bodyBlock) = splitHeadersAndBody(source)
        let headers = parseHeaders(headerBlock)

        let contentType = headers["content-type"] ?? "text/plain"
        let (text, html, hasAttachments) = extractBodies(from: bodyBlock, contentType: contentType, headers: headers, fullSource: source)

        return ParsedMIMEMessage(
            messageId: headers["message-id"].map { cleanAngleBrackets($0) },
            subject: headers["subject"].map { decodeRFC2047($0) },
            from: headers["from"].flatMap { parseSingleAddress($0) },
            to: headers["to"].map { parseAddressList($0) } ?? [],
            cc: headers["cc"].map { parseAddressList($0) } ?? [],
            replyTo: headers["reply-to"].map { parseAddressList($0) } ?? [],
            inReplyTo: headers["in-reply-to"].map { cleanAngleBrackets($0) },
            references: parseReferences(headers["references"]),
            date: headers["date"].flatMap { parseDate($0) },
            text: text,
            html: html,
            hasAttachments: hasAttachments
        )
    }

    // MARK: - Header Parsing

    private static func splitHeadersAndBody(_ source: String) -> (headers: String, body: String) {
        // Headers end at the first blank line (\r\n\r\n or \n\n)
        if let range = source.range(of: "\r\n\r\n") {
            return (String(source[source.startIndex..<range.lowerBound]),
                    String(source[range.upperBound...]))
        }
        if let range = source.range(of: "\n\n") {
            return (String(source[source.startIndex..<range.lowerBound]),
                    String(source[range.upperBound...]))
        }
        return (source, "")
    }

    /// Parse RFC 5322 headers into a lowercased-key dictionary.
    /// Handles folded headers (continuation lines starting with whitespace).
    private static func parseHeaders(_ block: String) -> [String: String] {
        var headers: [String: String] = [:]
        var currentKey: String?
        var currentValue: String = ""

        let lines = block.components(separatedBy: .newlines)
        for line in lines {
            if line.isEmpty { continue }
            let firstChar = line.unicodeScalars.first?.value ?? 0
            if firstChar == 0x20 || firstChar == 0x09 {
                // Folded continuation
                currentValue += " " + line.trimmingCharacters(in: .whitespaces)
            } else if let colonIdx = line.firstIndex(of: ":") {
                if let key = currentKey, !currentValue.isEmpty {
                    headers[key] = currentValue.trimmingCharacters(in: .whitespaces)
                }
                currentKey = String(line[line.startIndex..<colonIdx]).lowercased().trimmingCharacters(in: .whitespaces)
                currentValue = String(line[line.index(after: colonIdx)...]).trimmingCharacters(in: .whitespaces)
            }
        }
        if let key = currentKey, !currentValue.isEmpty {
            headers[key] = currentValue.trimmingCharacters(in: .whitespaces)
        }
        return headers
    }

    // MARK: - Body Extraction

    private static func extractBodies(
        from body: String,
        contentType: String,
        headers: [String: String],
        fullSource: String
    ) -> (text: String?, html: String?, hasAttachments: Bool) {
        let ctLower = contentType.lowercased()

        if ctLower.hasPrefix("text/plain") {
            return (decodeBody(body, transferEncoding: headers["content-transfer-encoding"]), nil, false)
        }
        if ctLower.hasPrefix("text/html") {
            return (nil, decodeBody(body, transferEncoding: headers["content-transfer-encoding"]), false)
        }

        guard ctLower.hasPrefix("multipart/") else {
            // Unknown content-type, return raw body as text
            return (body.isEmpty ? nil : body, nil, false)
        }

        guard let boundary = extractBoundary(contentType) else {
            return (nil, nil, false)
        }

        let parts = splitMultipart(body, boundary: boundary)
        var textPart: String?
        var htmlPart: String?
        var hasAttachments = false

        for part in parts {
            let (partHeaders, partBody) = splitHeadersAndBody(part)
            let partParsed = parseHeaders(partHeaders)
            let partCT = (partParsed["content-type"] ?? "text/plain").lowercased()
            let partCTE = partParsed["content-transfer-encoding"]
            let disposition = (partParsed["content-disposition"] ?? "").lowercased()

            if disposition.contains("attachment") {
                hasAttachments = true
                continue
            }

            if partCT.hasPrefix("text/plain"), textPart == nil {
                textPart = decodeBody(partBody, transferEncoding: partCTE)
            } else if partCT.hasPrefix("text/html"), htmlPart == nil {
                htmlPart = decodeBody(partBody, transferEncoding: partCTE)
            } else if partCT.hasPrefix("multipart/") {
                // One level of recursion for multipart/alternative nested inside multipart/mixed
                let (inner, _, _) = extractBodies(from: partBody, contentType: partParsed["content-type"] ?? partCT,
                                                   headers: partParsed, fullSource: partBody)
                if textPart == nil { textPart = inner }
            } else if !partCT.hasPrefix("text/") && !partCT.isEmpty {
                hasAttachments = true
            }
        }

        return (textPart, htmlPart, hasAttachments)
    }

    private static func extractBoundary(_ contentType: String) -> String? {
        // boundary="..." or boundary=...
        let components = contentType.components(separatedBy: ";")
        for comp in components {
            let trimmed = comp.trimmingCharacters(in: .whitespaces)
            if trimmed.lowercased().hasPrefix("boundary=") {
                var value = String(trimmed.dropFirst("boundary=".count))
                    .trimmingCharacters(in: .whitespaces)
                if value.hasPrefix("\"") && value.hasSuffix("\"") {
                    value = String(value.dropFirst().dropLast())
                }
                return value
            }
        }
        return nil
    }

    private static func splitMultipart(_ body: String, boundary: String) -> [String] {
        let delimiter = "--\(boundary)"
        let endDelimiter = "--\(boundary)--"
        var parts: [String] = []
        var currentPart = ""
        var inPart = false

        for line in body.components(separatedBy: .newlines) {
            if line.trimmingCharacters(in: .whitespaces) == endDelimiter {
                if inPart && !currentPart.isEmpty { parts.append(currentPart) }
                break
            }
            if line.trimmingCharacters(in: .whitespaces) == delimiter {
                if inPart && !currentPart.isEmpty { parts.append(currentPart) }
                currentPart = ""
                inPart = true
                continue
            }
            if inPart { currentPart += line + "\n" }
        }
        return parts
    }

    private static func decodeBody(_ body: String, transferEncoding: String?) -> String? {
        guard !body.isEmpty else { return nil }
        let enc = (transferEncoding ?? "").lowercased().trimmingCharacters(in: .whitespaces)
        if enc == "base64" {
            let stripped = body.components(separatedBy: .newlines).joined()
            guard let data = Data(base64Encoded: stripped, options: .ignoreUnknownCharacters),
                  let decoded = String(data: data, encoding: .utf8)
                    ?? String(data: data, encoding: .isoLatin1) else {
                return body // Return raw if we can't decode
            }
            return decoded
        }
        if enc == "quoted-printable" {
            return decodeQuotedPrintable(body)
        }
        return body
    }

    // MARK: - Quoted-Printable

    private static func decodeQuotedPrintable(_ input: String) -> String {
        var result = ""
        var idx = input.startIndex
        // Join soft-line-breaks (= at end of line) before processing
        let joined = input.replacingOccurrences(of: "=\r\n", with: "")
            .replacingOccurrences(of: "=\n", with: "")

        idx = joined.startIndex
        while idx < joined.endIndex {
            let c = joined[idx]
            if c == "=" {
                let next = joined.index(after: idx)
                let afterNext = next < joined.endIndex ? joined.index(after: next) : joined.endIndex
                if next < joined.endIndex && afterNext <= joined.endIndex {
                    let hex = String(joined[next..<afterNext])
                    if let codePoint = UInt8(hex, radix: 16) {
                        result += String(UnicodeScalar(codePoint))
                        idx = afterNext
                        continue
                    }
                }
            }
            result.append(c)
            idx = joined.index(after: idx)
        }
        return result
    }

    // MARK: - Address Parsing

    private static func parseAddressList(_ value: String) -> [MIMEAddress] {
        // Split on commas that are not inside angle brackets or quotes
        var addresses: [MIMEAddress] = []
        var current = ""
        var inAngle = false
        var inQuote = false
        for ch in value {
            if ch == "<" { inAngle = true }
            else if ch == ">" { inAngle = false }
            else if ch == "\"" { inQuote.toggle() }
            else if ch == "," && !inAngle && !inQuote {
                if let addr = parseSingleAddress(current.trimmingCharacters(in: .whitespaces)) {
                    addresses.append(addr)
                }
                current = ""
                continue
            }
            current.append(ch)
        }
        if let addr = parseSingleAddress(current.trimmingCharacters(in: .whitespaces)) {
            addresses.append(addr)
        }
        return addresses
    }

    private static func parseSingleAddress(_ raw: String) -> MIMEAddress? {
        let decoded = decodeRFC2047(raw)
        if let ltRange = decoded.range(of: "<"),
           let gtRange = decoded.range(of: ">"),
           ltRange.lowerBound < gtRange.lowerBound {
            let addr = String(decoded[ltRange.upperBound..<gtRange.lowerBound]).trimmingCharacters(in: .whitespaces)
            let name = String(decoded[decoded.startIndex..<ltRange.lowerBound])
                .trimmingCharacters(in: .whitespaces)
                .trimmingCharacters(in: CharacterSet(charactersIn: "\""))
            guard !addr.isEmpty else { return nil }
            return MIMEAddress(address: addr, name: name.isEmpty ? nil : name)
        }
        let clean = decoded.trimmingCharacters(in: .whitespaces)
        guard !clean.isEmpty else { return nil }
        return MIMEAddress(address: clean, name: nil)
    }

    // MARK: - RFC 2047 Decode

    /// Decode RFC 2047 encoded words: =?charset?encoding?text?=
    static func decodeRFC2047(_ input: String) -> String {
        var result = input
        // Pattern: =?<charset>?<B|Q>?<encoded>?=
        let pattern = #"=\?([^?]+)\?([BQbq])\?([^?]*)\?="#
        guard let regex = try? NSRegularExpression(pattern: pattern, options: []) else {
            return input
        }
        let matches = regex.matches(in: input, range: NSRange(input.startIndex..., in: input))
        var offset = 0
        for match in matches {
            guard match.numberOfRanges == 4 else { continue }
            let charsetRange = Range(match.range(at: 1), in: input)!
            let encodingRange = Range(match.range(at: 2), in: input)!
            let textRange = Range(match.range(at: 3), in: input)!
            let fullRange = Range(match.range(at: 0), in: input)!

            let charset = String(input[charsetRange]).lowercased()
            let encoding = String(input[encodingRange]).uppercased()
            let text = String(input[textRange])
            let cfEncoding: String.Encoding = charset.contains("utf-8") ? .utf8 :
                charset.contains("iso-8859-1") ? .isoLatin1 : .utf8

            var decoded: String?
            if encoding == "B" {
                if let data = Data(base64Encoded: text) {
                    decoded = String(data: data, encoding: cfEncoding)
                }
            } else if encoding == "Q" {
                let qpText = text.replacingOccurrences(of: "_", with: " ")
                decoded = decodeQuotedPrintable(qpText)
            }

            if let d = decoded {
                let nsRange = NSRange(fullRange, in: input)
                // Offset adjustments for iterative replacement
                let adjustedRange = NSRange(location: nsRange.location + offset, length: nsRange.length)
                let before = result.count
                result = (result as NSString).replacingCharacters(in: adjustedRange, with: d)
                offset += result.count - before
            }
        }
        return result
    }

    // MARK: - Misc Helpers

    private static func cleanAngleBrackets(_ s: String) -> String {
        var result = s.trimmingCharacters(in: .whitespaces)
        if result.hasPrefix("<") { result = String(result.dropFirst()) }
        if result.hasSuffix(">") { result = String(result.dropLast()) }
        return result.trimmingCharacters(in: .whitespaces)
    }

    private static func parseReferences(_ raw: String?) -> [String] {
        guard let raw else { return [] }
        return raw.components(separatedBy: .whitespaces)
            .map { cleanAngleBrackets($0) }
            .filter { !$0.isEmpty }
    }

    private static func parseDate(_ raw: String) -> Date? {
        let formatters: [(String, Locale?)] = [
            ("EEE, dd MMM yyyy HH:mm:ss Z", Locale(identifier: "en_US_POSIX")),
            ("dd MMM yyyy HH:mm:ss Z", Locale(identifier: "en_US_POSIX")),
            ("EEE, dd MMM yyyy HH:mm:ss z", Locale(identifier: "en_US_POSIX")),
        ]
        let df = DateFormatter()
        for (format, locale) in formatters {
            df.dateFormat = format
            if let locale { df.locale = locale }
            if let date = df.date(from: raw) { return date }
        }
        return nil
    }
}