-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhn.py
More file actions
123 lines (96 loc) · 3.82 KB
/
hn.py
File metadata and controls
123 lines (96 loc) · 3.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python3
import requests
from datetime import datetime, timedelta, timezone, date
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Any
logging.basicConfig(
level=logging.WARN, format="%(asctime)s [%(levelname)s] %(message)s"
)
logger = logging.getLogger(__name__)
BASE_ITEM_URL = "https://hacker-news.firebaseio.com/v0/item/{}.json"
TOPSTORIES_URL = "https://hacker-news.firebaseio.com/v0/topstories.json"
HN_COMMENT_URL = "https://news.ycombinator.com/item?id={}"
session = requests.Session()
def get_item(item_id: int) -> dict[str, Any] | None:
url = BASE_ITEM_URL.format(item_id)
try:
response = session.get(url)
response.raise_for_status()
logger.debug(f"Fetched item {item_id}")
return response.json()
except requests.RequestException as e:
logger.warning(f"Failed to fetch item {item_id}: {e}")
return None
def get_topstories_ids() -> list[int]:
try:
logger.debug("Fetching topstories IDs")
response = session.get(TOPSTORIES_URL)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
logger.error(f"Failed to fetch topstories: {e}")
return []
def timestamp_to_utc_date(ts: int) -> date:
return datetime.fromtimestamp(ts, timezone.utc).date()
def timestamp_to_local_date(ts: int) -> date:
"""Convert a unix timestamp to the user's local date."""
return datetime.fromtimestamp(ts).date()
def get_topstories_items(
max_items: int = 100, max_workers: int = 10
) -> list[dict[str, Any]]:
ids = get_topstories_ids()[:max_items]
items: list[dict[str, Any]] = []
def fetch(id_: int) -> dict[str, Any] | None:
item = get_item(id_)
return item if item and item.get("type") == "story" and "time" in item else None
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(fetch, id_) for id_ in ids]
for future in as_completed(futures):
item = future.result()
if item:
items.append(item)
logger.info(f"Fetched {len(items)} topstory items concurrently")
return items
def group_by_day(
items: list[dict[str, Any]], target_date: date
) -> list[dict[str, Any]]:
filtered = [
item for item in items if timestamp_to_local_date(item["time"]) == target_date
]
logger.debug(f"Found {len(filtered)} stories for {target_date}")
return sorted(filtered, key=lambda x: x.get("score", 0), reverse=True)[:20]
def format_output(items: list[dict[str, Any]], show_title: bool = False) -> list[str]:
result = []
for item in items:
s = HN_COMMENT_URL.format(item["id"])
if show_title:
s += f" {item['title']}"
result.append(s)
return result
def main(show_title: bool = True, num_days: int = 4) -> dict[int, list[dict[str, Any]]]:
all_items = get_topstories_items(max_items=5000)
all_daily_top = {}
for days_ago in range(num_days):
target_date = datetime.now().date() - timedelta(days=days_ago)
daily_top = group_by_day(all_items, target_date)
print(f"Top {len(daily_top)} topstories for {target_date}:")
for link in format_output(daily_top, show_title=show_title):
print(f"{link}")
if days_ago < num_days - 1:
print()
all_daily_top[days_ago] = daily_top
return all_daily_top
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description="Show top Hacker News stories by local day."
)
parser.add_argument(
"--no-title",
action="store_false",
dest="show_title",
help="Do not print the story title after the link",
)
args = parser.parse_args()
all_daily_top = main(show_title=args.show_title)