Skip to content
This repository was archived by the owner on Mar 7, 2026. It is now read-only.

Commit 689a672

Browse files
authored
Refactor HTML parsing in certChecker.swift
Refactor HTML parsing logic to improve readability and maintainability. Simplify extraction of key information and enhance handling of line breaks and whitespace.
1 parent e7d49c2 commit 689a672

1 file changed

Lines changed: 131 additions & 80 deletions

File tree

Sources/prostore/certificates/certChecker.swift

Lines changed: 131 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ class CertChecker {
181181
}
182182
}
183183

184-
// MARK: - HTML Parsing
185184
// MARK: - HTML Parsing
186185
static func parseHTML(html: String) -> [String: Any] {
187186
log("=== Starting parseHTML() ===")
@@ -208,105 +207,156 @@ static func parseHTML(html: String) -> [String: Any] {
208207
var divContent = String(html[range])
209208
log("Alert div content length: \(divContent.count) characters")
210209

211-
// Clean up the content
210+
// Clean up the content but preserve line breaks
211+
// First replace <br/> with newlines
212212
divContent = divContent.replacingOccurrences(of: "<br/>", with: "\n", options: .caseInsensitive)
213213
divContent = divContent.replacingOccurrences(of: "<br />", with: "\n", options: .caseInsensitive)
214214
divContent = divContent.replacingOccurrences(of: "&emsp;", with: " ", options: .caseInsensitive)
215215

216-
// Remove HTML tags
216+
// Now remove HTML tags
217217
let tagRegex = try? NSRegularExpression(pattern: "<[^>]+>", options: [])
218218
divContent = tagRegex?.stringByReplacingMatches(in: divContent, range: NSRange(0..<divContent.utf16.count), withTemplate: "") ?? divContent
219219

220220
// Remove emojis and special characters but keep text
221-
let emojiRegex = try? NSRegularExpression(pattern: "[\\🟢🔴]", options: [])
222-
divContent = emojiRegex?.stringByReplacingMatches(in: divContent, range: NSRange(0..<divContent.utf16.count), withTemplate: "") ?? divContent
221+
divContent = divContent.replacingOccurrences(of: "🟢", with: "")
222+
divContent = divContent.replacingOccurrences(of: "🔴", with: "")
223223

224-
// Clean up multiple spaces and newlines
225-
divContent = divContent.replacingOccurrences(of: "\\s+", with: " ", options: .regularExpression)
226-
divContent = divContent.replacingOccurrences(of: "\\n\\s*\\n", with: "\n", options: .regularExpression)
224+
// Split into lines and clean each line
225+
let rawLines = divContent.components(separatedBy: .newlines)
226+
var lines: [String] = []
227227

228-
log("Cleaned alert div content: \(divContent)")
229-
230-
var lines = divContent.components(separatedBy: .newlines)
231-
.map { $0.trimmingCharacters(in: .whitespaces) }
232-
.filter { !$0.isEmpty }
228+
for line in rawLines {
229+
let cleanedLine = line.trimmingCharacters(in: .whitespaces)
230+
if !cleanedLine.isEmpty {
231+
lines.append(cleanedLine)
232+
}
233+
}
233234

234235
log("Found \(lines.count) lines after cleaning")
235-
236-
// SIMPLIFIED PARSING APPROACH
237-
// Instead of complex line-by-line parsing, extract key information directly
238-
239-
// Extract certificate status
240-
if let certStatusRange = divContent.range(of: "Certificate Status:") {
241-
let remaining = divContent[certStatusRange.upperBound...]
242-
if let endOfLine = remaining.firstIndex(of: "\n") {
243-
let status = String(remaining[..<endOfLine]).trimmingCharacters(in: .whitespaces)
244-
var cert = data["certificate"] as! [String: String]
245-
cert["status"] = status
246-
data["certificate"] = cert
247-
log("Certificate Status: \(status)")
236+
log("Lines: \(lines)")
237+
238+
// Create a dictionary to store all parsed data
239+
var parsedDict: [String: String] = [:]
240+
241+
// Parse each line
242+
for line in lines {
243+
// Handle different colon types
244+
let separators = [": ", ""]
245+
for separator in separators {
246+
if line.contains(separator) {
247+
let parts = line.components(separatedBy: separator)
248+
if parts.count >= 2 {
249+
let key = parts[0].trimmingCharacters(in: .whitespaces)
250+
let value = parts[1...].joined(separator: separator).trimmingCharacters(in: .whitespaces)
251+
parsedDict[key] = value
252+
break
253+
}
254+
}
248255
}
249256
}
250257

251-
// Extract certificate matching status
252-
if let matchStatusRange = divContent.range(of: "Certificate Matching Status:") {
253-
let remaining = divContent[matchStatusRange.upperBound...]
254-
if let endOfLine = remaining.firstIndex(of: "\n") {
255-
let status = String(remaining[..<endOfLine]).trimmingCharacters(in: .whitespaces)
256-
data["certificate_matching_status"] = status
257-
log("Certificate Matching Status: \(status)")
258-
}
258+
log("Parsed dictionary: \(parsedDict)")
259+
260+
// Extract certificate info
261+
var cert = data["certificate"] as! [String: String]
262+
if let certName = parsedDict["CertName"] {
263+
cert["name"] = certName
264+
}
265+
if let effDate = parsedDict["Effective Date"] {
266+
cert["effective"] = effDate
267+
}
268+
if let expDate = parsedDict["Expiration Date"] {
269+
cert["expiration"] = expDate
270+
}
271+
if let issuer = parsedDict["Issuer"] {
272+
cert["issuer"] = issuer
273+
}
274+
if let country = parsedDict["Country"] {
275+
cert["country"] = country
276+
}
277+
if let org = parsedDict["Organization"] {
278+
cert["organization"] = org
279+
}
280+
if let numHex = parsedDict["Certificate Number (Hex)"] {
281+
cert["number_hex"] = numHex
259282
}
283+
if let numDec = parsedDict["Certificate Number (Decimal)"] {
284+
cert["number_decimal"] = numDec
285+
}
286+
if let certStatus = parsedDict["Certificate Status"] {
287+
cert["status"] = certStatus
288+
}
289+
data["certificate"] = cert
260290

261-
// Extract effective and expiration dates
262-
if let effDateRange = divContent.range(of: "Effective Date:") {
263-
let remaining = divContent[effDateRange.upperBound...]
264-
if let endOfLine = remaining.firstIndex(of: "\n") {
265-
let date = String(remaining[..<endOfLine]).trimmingCharacters(in: .whitespaces)
266-
var cert = data["certificate"] as! [String: String]
267-
cert["effective"] = date
268-
data["certificate"] = cert
291+
// Extract mobileprovision info
292+
var mp = data["mobileprovision"] as! [String: String]
293+
if let mpName = parsedDict["MP Name"] {
294+
mp["name"] = mpName
295+
}
296+
if let appId = parsedDict["App ID"] {
297+
mp["app_id"] = appId
298+
}
299+
if let identifier = parsedDict["Identifier"] {
300+
mp["identifier"] = identifier
301+
}
302+
if let platform = parsedDict["Platform"] {
303+
mp["platform"] = platform
304+
}
305+
// Look for mobileprovision dates (they might be duplicates from certificate section)
306+
for (key, value) in parsedDict {
307+
let lk = key.lowercased()
308+
if lk.contains("effective date") && !lk.contains("cert") {
309+
mp["effective"] = value
310+
}
311+
if lk.contains("expiration date") && !lk.contains("cert") {
312+
mp["expiration"] = value
269313
}
270314
}
271-
272-
if let expDateRange = divContent.range(of: "Expiration Date:") {
273-
let remaining = divContent[expDateRange.upperBound...]
274-
if let endOfLine = remaining.firstIndex(of: "\n") {
275-
let date = String(remaining[..<endOfLine]).trimmingCharacters(in: .whitespaces)
276-
var cert = data["certificate"] as! [String: String]
277-
cert["expiration"] = date
278-
data["certificate"] = cert
315+
data["mobileprovision"] = mp
316+
317+
// Extract binding certificate info
318+
// Look for "Certificate 1" and related info
319+
var bc1 = data["binding_certificate_1"] as! [String: String]
320+
for (key, value) in parsedDict {
321+
if key.contains("Certificate 1") || key.contains("Certificate Status") {
322+
bc1["status"] = value
323+
} else if key.contains("Certificate Number (Hex)") && (bc1["number_hex"] == nil) {
324+
bc1["number_hex"] = value
325+
} else if key.contains("Certificate Number (Decimal)") && (bc1["number_decimal"] == nil) {
326+
bc1["number_decimal"] = value
279327
}
280328
}
329+
data["binding_certificate_1"] = bc1
281330

282-
// Extract certificate name
283-
if let nameRange = divContent.range(of: "CertName:") {
284-
let remaining = divContent[nameRange.upperBound...]
285-
if let endOfLine = remaining.firstIndex(of: "\n") {
286-
let name = String(remaining[..<endOfLine]).trimmingCharacters(in: .whitespaces)
287-
var cert = data["certificate"] as! [String: String]
288-
cert["name"] = name
289-
data["certificate"] = cert
290-
}
331+
// Extract certificate matching status
332+
if let matchStatus = parsedDict["Certificate Matching Status"] {
333+
data["certificate_matching_status"] = matchStatus
291334
}
292335

293-
// Extract MP name
294-
if let mpNameRange = divContent.range(of: "MP Name:") {
295-
let remaining = divContent[mpNameRange.upperBound...]
296-
if let endOfLine = remaining.firstIndex(of: "\n") {
297-
let name = String(remaining[..<endOfLine]).trimmingCharacters(in: .whitespaces)
298-
var mp = data["mobileprovision"] as! [String: String]
299-
mp["name"] = name
300-
data["mobileprovision"] = mp
336+
// Extract permissions
337+
var perms = data["permissions"] as! [String: String]
338+
let permKeys = [
339+
"Apple Push Notification Service",
340+
"HealthKit",
341+
"VPN",
342+
"Communication Notifications",
343+
"Time-sensitive Notifications"
344+
]
345+
346+
for key in permKeys {
347+
if let value = parsedDict[key] {
348+
perms[key] = value
301349
}
302350
}
351+
data["permissions"] = perms
303352

304-
// Check for key phrases to determine overall status
353+
// Determine overall status
305354
let overallStatus: String
306-
if divContent.contains("🟢Good") || divContent.contains("🟢Match") {
355+
if let certStatus = cert["status"], certStatus.lowercased().contains("good") {
356+
overallStatus = "Valid"
357+
} else if let matchStatus = data["certificate_matching_status"] as? String,
358+
matchStatus.lowercased().contains("match") {
307359
overallStatus = "Valid"
308-
} else if divContent.contains("🔴") {
309-
overallStatus = "Invalid"
310360
} else {
311361
overallStatus = "Unknown"
312362
}
@@ -328,21 +378,22 @@ static func parseHTML(html: String) -> [String: Any] {
328378
log("Attempting fallback parsing...")
329379

330380
// Check for key status indicators in the raw HTML
331-
if html.contains("🟢Good") {
332-
data["overall_status"] = "Valid"
333-
log("Fallback: Found 🟢Good indicator")
334-
} else if html.contains("🔴Mismatch") || html.contains("🔴No Permission") {
335-
data["overall_status"] = "Invalid"
381+
var overallStatus = "Unknown"
382+
if html.contains("🟢Good") && html.contains("🟢Match") {
383+
overallStatus = "Valid"
384+
log("Fallback: Found 🟢Good and 🟢Match indicators")
385+
} else if html.contains("🔴") {
386+
overallStatus = "Invalid"
336387
log("Fallback: Found 🔴 indicator")
337-
} else {
338-
data["overall_status"] = "Unknown"
339-
log("Fallback: No clear status indicators found")
340388
}
341389

390+
data["overall_status"] = overallStatus
391+
log("Fallback Overall Status: \(overallStatus)")
392+
342393
log("=== parseHTML() Completed with Fallback ===")
343394

344395
return ["error": "Could not fully parse certificate info",
345-
"overall_status": data["overall_status"] as? String ?? "Unknown",
396+
"overall_status": overallStatus,
346397
"raw_html_preview": String(html.prefix(1000))]
347398
}
348399

0 commit comments

Comments
 (0)