-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathandroid_app_extractor.py
More file actions
406 lines (324 loc) · 16.2 KB
/
android_app_extractor.py
File metadata and controls
406 lines (324 loc) · 16.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
#!/usr/bin/env python3
"""
Android App Extractor - Extract all installed apps from Android device via ADB
Follows Simplicity First (SF) and Readability Priority (RP) principles.
Generic implementation without hardcoded app mappings for maximum reusability.
"""
import subprocess
import re
import requests
import json
import time
from typing import List, Tuple, Set, Dict, Optional
class AndroidAppExtractor:
"""Extract and process Android app information via ADB."""
def __init__(self, use_play_store_api: bool = True):
"""Initialize the extractor."""
self.device_id = None
self.system_packages = self._load_system_packages()
self.use_play_store_api = use_play_store_api
self.play_store_cache = {} # Cache for API results
def _load_system_packages(self) -> Set[str]:
"""Load known system packages to filter out from 3rd party apps."""
return {
# Core Android system packages
'android', 'com.android.', 'com.google.android.', 'com.samsung.android.',
'com.sec.', 'com.qualcomm.', 'com.qti.', 'com.mediatek.',
# Carrier and OEM packages (common prefixes)
'com.android.', 'com.google.', 'com.samsung.', 'com.huawei.',
'com.xiaomi.', 'com.oppo.', 'com.vivo.', 'com.oneplus.',
'com.lg.', 'com.htc.', 'com.sony.', 'com.motorola.',
}
def check_adb_connection(self) -> bool:
"""Check if ADB is available and device is connected."""
try:
result = subprocess.run(['adb', 'devices'], capture_output=True, text=True)
if result.returncode != 0:
print("Error: ADB not found or not working")
return False
lines = result.stdout.strip().split('\n')[1:] # Skip header
devices = [line.split('\t')[0] for line in lines if '\tdevice' in line]
if not devices:
print("Error: No Android devices connected")
return False
self.device_id = devices[0]
print(f"Connected to device: {self.device_id}")
return True
except FileNotFoundError:
print("Error: ADB not found. Please install Android SDK Platform Tools")
return False
def get_all_packages(self) -> List[str]:
"""Get all installed packages from the Android device."""
try:
cmd = ['adb', 'shell', 'pm', 'list', 'packages']
if self.device_id:
cmd = ['adb', '-s', self.device_id] + cmd[1:]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f"Error getting packages: {result.stderr}")
return []
# Extract package names (remove 'package:' prefix)
packages = []
for line in result.stdout.strip().split('\n'):
if line.startswith('package:'):
package_name = line.replace('package:', '').strip()
packages.append(package_name)
return sorted(packages)
except Exception as e:
print(f"Error executing ADB command: {e}")
return []
def get_launcher_apps_info(self) -> Dict[str, str]:
"""Get launcher apps with their display information."""
try:
cmd = ['adb', 'shell', 'cmd', 'package', 'query-activities',
'-a', 'android.intent.action.MAIN',
'-c', 'android.intent.category.LAUNCHER']
if self.device_id:
cmd = ['adb', '-s', self.device_id] + cmd[1:]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
return {}
# Parse the output to extract package names and their APK paths
launcher_apps = {}
lines = result.stdout.split('\n')
current_package = None
current_source_dir = None
for line in lines:
line = line.strip()
if line.startswith('packageName='):
current_package = line.split('=', 1)[1]
elif line.startswith('sourceDir=') and current_package:
current_source_dir = line.split('=', 1)[1]
if current_package and current_source_dir:
launcher_apps[current_package] = current_source_dir
current_package = None
current_source_dir = None
return launcher_apps
except Exception as e:
print(f"Error getting launcher apps: {e}")
return {}
def get_app_label_from_apk(self, package_name: str, apk_path: str) -> str:
"""Try to extract app label from APK using aapt (if available)."""
try:
# Try to use aapt to get the app label
cmd = ['adb', 'shell', 'aapt', 'dump', 'badging', apk_path]
if self.device_id:
cmd = ['adb', '-s', self.device_id] + cmd[1:]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
for line in result.stdout.split('\n'):
if line.startswith('application-label:'):
# Extract label from: application-label:'App Name'
label = line.split(':', 1)[1].strip().strip("'\"")
if label and label != package_name:
return label
except Exception:
pass
# Fallback to generic extraction
return self.extract_app_name_from_package(package_name)
def get_app_name_from_play_store(self, package_name: str) -> Optional[str]:
"""Get app name from Google Play Store using web scraping."""
if not self.use_play_store_api:
return None
# Check cache first
if package_name in self.play_store_cache:
return self.play_store_cache[package_name]
try:
# Use Google Play Store web interface
url = f"https://play.google.com/store/apps/details?id={package_name}"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers, timeout=10)
if response.status_code == 200:
# Extract app name from HTML title or meta tags
content = response.text
# Try to find app name in title tag
import re
title_match = re.search(r'<title>([^-]+)\s*-\s*Apps on Google Play</title>', content)
if title_match:
app_name = title_match.group(1).strip()
self.play_store_cache[package_name] = app_name
return app_name
# Try alternative pattern
name_match = re.search(r'"name"\s*:\s*"([^"]+)"', content)
if name_match:
app_name = name_match.group(1).strip()
self.play_store_cache[package_name] = app_name
return app_name
# Cache negative result to avoid repeated requests
self.play_store_cache[package_name] = None
return None
except Exception as e:
print(f"Error fetching from Play Store for {package_name}: {e}")
self.play_store_cache[package_name] = None
return None
def get_third_party_packages(self, all_packages: List[str]) -> List[str]:
"""Filter out system packages to get only 3rd party apps."""
third_party = []
for package in all_packages:
is_system = False
# Check against known system package prefixes
for system_prefix in self.system_packages:
if package.startswith(system_prefix):
is_system = True
break
# Additional system package patterns
system_patterns = [
r'^android$',
r'^com\.android\.',
r'^com\.google\.android\.',
r'^com\.samsung\.',
r'^com\.sec\.',
r'^com\.qualcomm\.',
r'^com\.qti\.',
r'^com\.mediatek\.',
]
for pattern in system_patterns:
if re.match(pattern, package):
is_system = True
break
if not is_system:
third_party.append(package)
return third_party
def extract_app_name_from_package(self, package_name: str) -> str:
"""Extract user-friendly app name from package identifier using generic patterns only."""
# Split by dots and analyze structure
parts = package_name.split('.')
# Remove common domain prefixes
if len(parts) >= 2 and parts[0] in ['com', 'org', 'net', 'io', 'app']:
parts = parts[1:]
if len(parts) == 1:
# Single part after domain (like com.discord -> discord)
return parts[0].capitalize()
if len(parts) >= 2:
# Get company/developer name (first part after domain)
company = parts[0]
# Get app name (last part, or second-to-last if last is generic)
app_name = parts[-1]
if app_name in ['android', 'mobile', 'app', 'main'] and len(parts) > 2:
app_name = parts[-2]
# Clean up names
company_clean = company.capitalize()
app_name_clean = re.sub(r'[^a-zA-Z0-9]', ' ', app_name)
app_name_clean = ' '.join(word.capitalize() for word in app_name_clean.split() if word)
# Decision logic for what to return
if app_name.lower() in ['android', 'app', 'mobile', 'main'] or not app_name_clean:
# App name is generic, use company name
return company_clean
elif company.lower() == app_name.lower():
# Company and app name are the same, just return one
return company_clean
elif len(parts) == 2:
# Simple two-part package, prefer the second part (app name)
return app_name_clean
else:
# Multi-part package, use app name with company in parentheses if different
if company_clean.lower() not in app_name_clean.lower():
return f"{app_name_clean} ({company_clean})"
else:
return app_name_clean
# Fallback for edge cases
return self._fallback_name(package_name)
def _fallback_name(self, package_name: str) -> str:
"""Fallback method to create a readable name from package identifier."""
# Remove dots and clean up
clean_name = re.sub(r'[^a-zA-Z0-9]', ' ', package_name)
words = [word.capitalize() for word in clean_name.split() if word and len(word) > 1]
return ' '.join(words) if words else package_name
def get_enhanced_app_info(self, packages: List[str]) -> List[Tuple[str, str]]:
"""Get enhanced app information using multiple sources including Play Store API."""
print("Getting enhanced app information...")
# Get launcher apps info for better name extraction
launcher_apps = self.get_launcher_apps_info()
results = []
api_requests_made = 0
max_api_requests = 50 # Limit API requests to avoid rate limiting
for i, package in enumerate(packages):
if i % 10 == 0: # Progress indicator
print(f"Processing {i+1}/{len(packages)} packages...")
friendly_name = None
# Try Google Play Store API first (for non-system packages)
if (self.use_play_store_api and
api_requests_made < max_api_requests and
not any(package.startswith(prefix) for prefix in self.system_packages)):
friendly_name = self.get_app_name_from_play_store(package)
api_requests_made += 1
# Rate limiting - small delay between requests
if api_requests_made % 5 == 0:
time.sleep(1)
# Fallback to generic extraction if API didn't work
if not friendly_name:
friendly_name = self.extract_app_name_from_package(package)
results.append((package, friendly_name))
if api_requests_made > 0:
print(f"Made {api_requests_made} Play Store API requests")
return results
def save_results(self, packages: List[str], filename: str, title: str):
"""Save results to markdown file with table and simple list."""
# Process packages to get friendly names with enhanced info
results = self.get_enhanced_app_info(packages)
# Sort alphabetically by friendly name
results.sort(key=lambda x: x[1].lower())
# Write to file
with open(filename, 'w', encoding='utf-8') as f:
f.write(f"# {title}\n\n")
f.write(f"Total apps: {len(results)}\n\n")
# Table format
f.write("| App Name | Package Name |\n")
f.write("|----------|-------------|\n")
for package, friendly_name in results:
f.write(f"| {friendly_name} | {package} |\n")
# Simple list
f.write(f"\n## App Names List\n\n")
for package, friendly_name in results:
f.write(f"{friendly_name}\n")
print(f"Results saved to: {filename}")
return results
def main():
"""Main function to process the Android apps list."""
# Initialize with Play Store API enabled
extractor = AndroidAppExtractor(use_play_store_api=True)
print("Android App Extractor with Google Play Store Integration")
print("=" * 60)
# Check ADB connection
if not extractor.check_adb_connection():
return
# Get all packages
print("\nExtracting all installed packages...")
all_packages = extractor.get_all_packages()
if not all_packages:
print("No packages found or error occurred")
return
print(f"Found {len(all_packages)} total packages")
# Get 3rd party packages first (more efficient for API calls)
print("\nFiltering 3rd party apps...")
third_party_packages = extractor.get_third_party_packages(all_packages)
print(f"Found {len(third_party_packages)} 3rd party apps")
# Save 3rd party packages with Play Store API integration
print("\nProcessing 3rd party apps with Google Play Store lookup...")
third_party_results = extractor.save_results(
third_party_packages,
"third_party_android_apps.md",
"3rd Party Android Apps (with Play Store names)"
)
# Save all packages (without extensive API calls for system apps)
print("\nProcessing all packages...")
extractor.use_play_store_api = False # Disable for system apps to save time
all_results = extractor.save_results(
all_packages,
"all_android_apps.md",
"All Installed Android Apps"
)
print(f"\nSummary:")
print(f"- Total apps: {len(all_packages)}")
print(f"- 3rd party apps: {len(third_party_packages)}")
print(f"- System apps: {len(all_packages) - len(third_party_packages)}")
# Display sample of 3rd party apps with real names
print(f"\nSample of 3rd party apps with Play Store names:")
for i, (package, friendly_name) in enumerate(third_party_results[:10]):
print(f" {friendly_name}")
if len(third_party_results) > 10:
print(f" ... and {len(third_party_results) - 10} more")
if __name__ == "__main__":
main()