383 lines
16 KiB
Swift
383 lines
16 KiB
Swift
import Foundation
|
|
|
|
// MARK: - Output Types
|
|
|
|
/// Parsed representation of a MIME message, matching the shape produced by
|
|
/// `mailparser`'s `simpleParser` on the server side.
|
|
///
|
|
/// Fields mirror `icloud-sync.service.ts` → `processMessage()` columns:
|
|
/// `messageId`, `subject`, `from`, `to`, `cc`, `text`, `html`, `date`, `inReplyTo`, `references`.
|
|
public struct ParsedMIMEMessage: Sendable {
|
|
public let messageId: String?
|
|
public let subject: String?
|
|
public let from: MIMEAddress?
|
|
public let to: [MIMEAddress]
|
|
public let cc: [MIMEAddress]
|
|
public let replyTo: [MIMEAddress]
|
|
public let inReplyTo: String?
|
|
public let references: [String]
|
|
public let date: Date?
|
|
public let text: String?
|
|
public let html: String?
|
|
public let hasAttachments: Bool
|
|
}
|
|
|
|
public struct MIMEAddress: Sendable {
|
|
public let address: String
|
|
public let name: String?
|
|
}
|
|
|
|
// MARK: - Parser
|
|
|
|
/// A minimal MIME parser that produces `ParsedMIMEMessage` from raw RFC 5322 source.
|
|
///
|
|
/// Covers the fields needed to match `simpleParser` output shape used in
|
|
/// `icloud-sync.service.ts`. Does NOT decode nested MIME parts beyond the primary
|
|
/// text/html bodies — attachment handling is deferred to server-side processing.
|
|
///
|
|
/// Implementation notes:
|
|
/// - Headers are RFC 2047 decoded (Q-encoding and B-encoding).
|
|
/// - Multipart/alternative bodies are walked to extract text/plain and text/html.
|
|
/// - Only flat multipart is supported — deeply nested multipart (e.g. multipart/related
|
|
/// wrapping multipart/alternative) extracts the first matching body part.
|
|
public struct MIMEParser {
|
|
|
|
// MARK: - Public API
|
|
|
|
/// Parse a raw RFC 5322 message into `ParsedMIMEMessage`.
|
|
public static func parse(_ source: String) -> ParsedMIMEMessage {
|
|
let (headerBlock, bodyBlock) = splitHeadersAndBody(source)
|
|
let headers = parseHeaders(headerBlock)
|
|
|
|
let contentType = headers["content-type"] ?? "text/plain"
|
|
let (text, html, hasAttachments) = extractBodies(from: bodyBlock, contentType: contentType, headers: headers, fullSource: source)
|
|
|
|
return ParsedMIMEMessage(
|
|
messageId: headers["message-id"].map { cleanAngleBrackets($0) },
|
|
subject: headers["subject"].map { decodeRFC2047($0) },
|
|
from: headers["from"].flatMap { parseSingleAddress($0) },
|
|
to: headers["to"].map { parseAddressList($0) } ?? [],
|
|
cc: headers["cc"].map { parseAddressList($0) } ?? [],
|
|
replyTo: headers["reply-to"].map { parseAddressList($0) } ?? [],
|
|
inReplyTo: headers["in-reply-to"].map { cleanAngleBrackets($0) },
|
|
references: parseReferences(headers["references"]),
|
|
date: headers["date"].flatMap { parseDate($0) },
|
|
text: text,
|
|
html: html,
|
|
hasAttachments: hasAttachments
|
|
)
|
|
}
|
|
|
|
// MARK: - Header Parsing
|
|
|
|
private static func splitHeadersAndBody(_ source: String) -> (headers: String, body: String) {
|
|
// Headers end at the first blank line (\r\n\r\n or \n\n)
|
|
if let range = source.range(of: "\r\n\r\n") {
|
|
return (String(source[source.startIndex..<range.lowerBound]),
|
|
String(source[range.upperBound...]))
|
|
}
|
|
if let range = source.range(of: "\n\n") {
|
|
return (String(source[source.startIndex..<range.lowerBound]),
|
|
String(source[range.upperBound...]))
|
|
}
|
|
return (source, "")
|
|
}
|
|
|
|
/// Parse RFC 5322 headers into a lowercased-key dictionary.
|
|
/// Handles folded headers (continuation lines starting with whitespace).
|
|
private static func parseHeaders(_ block: String) -> [String: String] {
|
|
var headers: [String: String] = [:]
|
|
var currentKey: String?
|
|
var currentValue: String = ""
|
|
|
|
let lines = block.components(separatedBy: .newlines)
|
|
for line in lines {
|
|
if line.isEmpty { continue }
|
|
let firstChar = line.unicodeScalars.first?.value ?? 0
|
|
if firstChar == 0x20 || firstChar == 0x09 {
|
|
// Folded continuation
|
|
currentValue += " " + line.trimmingCharacters(in: .whitespaces)
|
|
} else if let colonIdx = line.firstIndex(of: ":") {
|
|
if let key = currentKey, !currentValue.isEmpty {
|
|
headers[key] = currentValue.trimmingCharacters(in: .whitespaces)
|
|
}
|
|
currentKey = String(line[line.startIndex..<colonIdx]).lowercased().trimmingCharacters(in: .whitespaces)
|
|
currentValue = String(line[line.index(after: colonIdx)...]).trimmingCharacters(in: .whitespaces)
|
|
}
|
|
}
|
|
if let key = currentKey, !currentValue.isEmpty {
|
|
headers[key] = currentValue.trimmingCharacters(in: .whitespaces)
|
|
}
|
|
return headers
|
|
}
|
|
|
|
// MARK: - Body Extraction
|
|
|
|
private static func extractBodies(
|
|
from body: String,
|
|
contentType: String,
|
|
headers: [String: String],
|
|
fullSource: String
|
|
) -> (text: String?, html: String?, hasAttachments: Bool) {
|
|
let ctLower = contentType.lowercased()
|
|
|
|
if ctLower.hasPrefix("text/plain") {
|
|
return (decodeBody(body, transferEncoding: headers["content-transfer-encoding"]), nil, false)
|
|
}
|
|
if ctLower.hasPrefix("text/html") {
|
|
return (nil, decodeBody(body, transferEncoding: headers["content-transfer-encoding"]), false)
|
|
}
|
|
|
|
guard ctLower.hasPrefix("multipart/") else {
|
|
// Unknown content-type, return raw body as text
|
|
return (body.isEmpty ? nil : body, nil, false)
|
|
}
|
|
|
|
guard let boundary = extractBoundary(contentType) else {
|
|
return (nil, nil, false)
|
|
}
|
|
|
|
let parts = splitMultipart(body, boundary: boundary)
|
|
var textPart: String?
|
|
var htmlPart: String?
|
|
var hasAttachments = false
|
|
|
|
for part in parts {
|
|
let (partHeaders, partBody) = splitHeadersAndBody(part)
|
|
let partParsed = parseHeaders(partHeaders)
|
|
let partCT = (partParsed["content-type"] ?? "text/plain").lowercased()
|
|
let partCTE = partParsed["content-transfer-encoding"]
|
|
let disposition = (partParsed["content-disposition"] ?? "").lowercased()
|
|
|
|
if disposition.contains("attachment") {
|
|
hasAttachments = true
|
|
continue
|
|
}
|
|
|
|
if partCT.hasPrefix("text/plain"), textPart == nil {
|
|
textPart = decodeBody(partBody, transferEncoding: partCTE)
|
|
} else if partCT.hasPrefix("text/html"), htmlPart == nil {
|
|
htmlPart = decodeBody(partBody, transferEncoding: partCTE)
|
|
} else if partCT.hasPrefix("multipart/") {
|
|
// One level of recursion for multipart/alternative nested inside multipart/mixed
|
|
let (inner, _, _) = extractBodies(from: partBody, contentType: partParsed["content-type"] ?? partCT,
|
|
headers: partParsed, fullSource: partBody)
|
|
if textPart == nil { textPart = inner }
|
|
} else if !partCT.hasPrefix("text/") && !partCT.isEmpty {
|
|
hasAttachments = true
|
|
}
|
|
}
|
|
|
|
return (textPart, htmlPart, hasAttachments)
|
|
}
|
|
|
|
private static func extractBoundary(_ contentType: String) -> String? {
|
|
// boundary="..." or boundary=...
|
|
let components = contentType.components(separatedBy: ";")
|
|
for comp in components {
|
|
let trimmed = comp.trimmingCharacters(in: .whitespaces)
|
|
if trimmed.lowercased().hasPrefix("boundary=") {
|
|
var value = String(trimmed.dropFirst("boundary=".count))
|
|
.trimmingCharacters(in: .whitespaces)
|
|
if value.hasPrefix("\"") && value.hasSuffix("\"") {
|
|
value = String(value.dropFirst().dropLast())
|
|
}
|
|
return value
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
private static func splitMultipart(_ body: String, boundary: String) -> [String] {
|
|
let delimiter = "--\(boundary)"
|
|
let endDelimiter = "--\(boundary)--"
|
|
var parts: [String] = []
|
|
var currentPart = ""
|
|
var inPart = false
|
|
|
|
for line in body.components(separatedBy: .newlines) {
|
|
if line.trimmingCharacters(in: .whitespaces) == endDelimiter {
|
|
if inPart && !currentPart.isEmpty { parts.append(currentPart) }
|
|
break
|
|
}
|
|
if line.trimmingCharacters(in: .whitespaces) == delimiter {
|
|
if inPart && !currentPart.isEmpty { parts.append(currentPart) }
|
|
currentPart = ""
|
|
inPart = true
|
|
continue
|
|
}
|
|
if inPart { currentPart += line + "\n" }
|
|
}
|
|
return parts
|
|
}
|
|
|
|
private static func decodeBody(_ body: String, transferEncoding: String?) -> String? {
|
|
guard !body.isEmpty else { return nil }
|
|
let enc = (transferEncoding ?? "").lowercased().trimmingCharacters(in: .whitespaces)
|
|
if enc == "base64" {
|
|
let stripped = body.components(separatedBy: .newlines).joined()
|
|
guard let data = Data(base64Encoded: stripped, options: .ignoreUnknownCharacters),
|
|
let decoded = String(data: data, encoding: .utf8)
|
|
?? String(data: data, encoding: .isoLatin1) else {
|
|
return body // Return raw if we can't decode
|
|
}
|
|
return decoded
|
|
}
|
|
if enc == "quoted-printable" {
|
|
return decodeQuotedPrintable(body)
|
|
}
|
|
return body
|
|
}
|
|
|
|
// MARK: - Quoted-Printable
|
|
|
|
private static func decodeQuotedPrintable(_ input: String) -> String {
|
|
var result = ""
|
|
var idx = input.startIndex
|
|
// Join soft-line-breaks (= at end of line) before processing
|
|
let joined = input.replacingOccurrences(of: "=\r\n", with: "")
|
|
.replacingOccurrences(of: "=\n", with: "")
|
|
|
|
idx = joined.startIndex
|
|
while idx < joined.endIndex {
|
|
let c = joined[idx]
|
|
if c == "=" {
|
|
let next = joined.index(after: idx)
|
|
let afterNext = next < joined.endIndex ? joined.index(after: next) : joined.endIndex
|
|
if next < joined.endIndex && afterNext <= joined.endIndex {
|
|
let hex = String(joined[next..<afterNext])
|
|
if let codePoint = UInt8(hex, radix: 16) {
|
|
result += String(UnicodeScalar(codePoint))
|
|
idx = afterNext
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
result.append(c)
|
|
idx = joined.index(after: idx)
|
|
}
|
|
return result
|
|
}
|
|
|
|
// MARK: - Address Parsing
|
|
|
|
private static func parseAddressList(_ value: String) -> [MIMEAddress] {
|
|
// Split on commas that are not inside angle brackets or quotes
|
|
var addresses: [MIMEAddress] = []
|
|
var current = ""
|
|
var inAngle = false
|
|
var inQuote = false
|
|
for ch in value {
|
|
if ch == "<" { inAngle = true }
|
|
else if ch == ">" { inAngle = false }
|
|
else if ch == "\"" { inQuote.toggle() }
|
|
else if ch == "," && !inAngle && !inQuote {
|
|
if let addr = parseSingleAddress(current.trimmingCharacters(in: .whitespaces)) {
|
|
addresses.append(addr)
|
|
}
|
|
current = ""
|
|
continue
|
|
}
|
|
current.append(ch)
|
|
}
|
|
if let addr = parseSingleAddress(current.trimmingCharacters(in: .whitespaces)) {
|
|
addresses.append(addr)
|
|
}
|
|
return addresses
|
|
}
|
|
|
|
private static func parseSingleAddress(_ raw: String) -> MIMEAddress? {
|
|
let decoded = decodeRFC2047(raw)
|
|
if let ltRange = decoded.range(of: "<"),
|
|
let gtRange = decoded.range(of: ">"),
|
|
ltRange.lowerBound < gtRange.lowerBound {
|
|
let addr = String(decoded[ltRange.upperBound..<gtRange.lowerBound]).trimmingCharacters(in: .whitespaces)
|
|
let name = String(decoded[decoded.startIndex..<ltRange.lowerBound])
|
|
.trimmingCharacters(in: .whitespaces)
|
|
.trimmingCharacters(in: CharacterSet(charactersIn: "\""))
|
|
guard !addr.isEmpty else { return nil }
|
|
return MIMEAddress(address: addr, name: name.isEmpty ? nil : name)
|
|
}
|
|
let clean = decoded.trimmingCharacters(in: .whitespaces)
|
|
guard !clean.isEmpty else { return nil }
|
|
return MIMEAddress(address: clean, name: nil)
|
|
}
|
|
|
|
// MARK: - RFC 2047 Decode
|
|
|
|
/// Decode RFC 2047 encoded words: =?charset?encoding?text?=
|
|
static func decodeRFC2047(_ input: String) -> String {
|
|
var result = input
|
|
// Pattern: =?<charset>?<B|Q>?<encoded>?=
|
|
let pattern = #"=\?([^?]+)\?([BQbq])\?([^?]*)\?="#
|
|
guard let regex = try? NSRegularExpression(pattern: pattern, options: []) else {
|
|
return input
|
|
}
|
|
let matches = regex.matches(in: input, range: NSRange(input.startIndex..., in: input))
|
|
var offset = 0
|
|
for match in matches {
|
|
guard match.numberOfRanges == 4 else { continue }
|
|
let charsetRange = Range(match.range(at: 1), in: input)!
|
|
let encodingRange = Range(match.range(at: 2), in: input)!
|
|
let textRange = Range(match.range(at: 3), in: input)!
|
|
let fullRange = Range(match.range(at: 0), in: input)!
|
|
|
|
let charset = String(input[charsetRange]).lowercased()
|
|
let encoding = String(input[encodingRange]).uppercased()
|
|
let text = String(input[textRange])
|
|
let cfEncoding: String.Encoding = charset.contains("utf-8") ? .utf8 :
|
|
charset.contains("iso-8859-1") ? .isoLatin1 : .utf8
|
|
|
|
var decoded: String?
|
|
if encoding == "B" {
|
|
if let data = Data(base64Encoded: text) {
|
|
decoded = String(data: data, encoding: cfEncoding)
|
|
}
|
|
} else if encoding == "Q" {
|
|
let qpText = text.replacingOccurrences(of: "_", with: " ")
|
|
decoded = decodeQuotedPrintable(qpText)
|
|
}
|
|
|
|
if let d = decoded {
|
|
let nsRange = NSRange(fullRange, in: input)
|
|
// Offset adjustments for iterative replacement
|
|
let adjustedRange = NSRange(location: nsRange.location + offset, length: nsRange.length)
|
|
let before = result.count
|
|
result = (result as NSString).replacingCharacters(in: adjustedRange, with: d)
|
|
offset += result.count - before
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// MARK: - Misc Helpers
|
|
|
|
private static func cleanAngleBrackets(_ s: String) -> String {
|
|
var result = s.trimmingCharacters(in: .whitespaces)
|
|
if result.hasPrefix("<") { result = String(result.dropFirst()) }
|
|
if result.hasSuffix(">") { result = String(result.dropLast()) }
|
|
return result.trimmingCharacters(in: .whitespaces)
|
|
}
|
|
|
|
private static func parseReferences(_ raw: String?) -> [String] {
|
|
guard let raw else { return [] }
|
|
return raw.components(separatedBy: .whitespaces)
|
|
.map { cleanAngleBrackets($0) }
|
|
.filter { !$0.isEmpty }
|
|
}
|
|
|
|
private static func parseDate(_ raw: String) -> Date? {
|
|
let formatters: [(String, Locale?)] = [
|
|
("EEE, dd MMM yyyy HH:mm:ss Z", Locale(identifier: "en_US_POSIX")),
|
|
("dd MMM yyyy HH:mm:ss Z", Locale(identifier: "en_US_POSIX")),
|
|
("EEE, dd MMM yyyy HH:mm:ss z", Locale(identifier: "en_US_POSIX")),
|
|
]
|
|
let df = DateFormatter()
|
|
for (format, locale) in formatters {
|
|
df.dateFormat = format
|
|
if let locale { df.locale = locale }
|
|
if let date = df.date(from: raw) { return date }
|
|
}
|
|
return nil
|
|
}
|
|
}
|