-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcommands.py
More file actions
4699 lines (4033 loc) · 214 KB
/
commands.py
File metadata and controls
4699 lines (4033 loc) · 214 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""Discord bot command module for server management and automation."""
# pylint: disable=too-many-lines,line-too-long,trailing-whitespace,import-outside-toplevel,logging-fstring-interpolation,broad-exception-caught,no-else-break
import os
import re
import html
import random
import time as time_module
import tempfile
import threading
import asyncio
import json
import zipfile
import socket
import shutil
from datetime import datetime, timedelta, timezone
from typing import Optional, Dict, Any
from apscheduler.triggers.interval import IntervalTrigger
import discord
from discord import app_commands
import aiohttp
import feedparser
import pytz
from icalendar import Calendar
from flask import Flask, send_file
from waitress import serve
try:
from dateutil import parser as dateparser
except ImportError:
dateparser = None
from config import (
MODERATOR_ROLE_NAME,
LOG_FILE,
REMINDERS_FILE,
TEMP_DIR,
HOST_IP,
BOT_TIMEZONE,
logger
)
def _atomic_json_write(filepath, data):
"""Write JSON data atomically via temp file + os.replace to prevent corruption."""
dir_name = os.path.dirname(filepath) or '.'
fd, tmp_path = tempfile.mkstemp(dir=dir_name, suffix='.tmp')
try:
with os.fdopen(fd, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2)
os.replace(tmp_path, filepath)
except BaseException:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
# Autoreply system file path
AUTOREPLIES_FILE = os.path.join(os.path.dirname(__file__), 'autoreplies.json')
# Event feeds file path
FEEDS_FILE = os.path.join(os.path.dirname(__file__), 'event_feeds.json')
# Event announce config file path
ANNOUNCE_FILE = os.path.join(os.path.dirname(__file__), 'event_announce.json')
# Cached timezone object — avoid recreating on every use
CENTRAL_TZ = pytz.timezone(BOT_TIMEZONE)
def get_last_log_line():
"""Get the last line from the log file."""
try:
from collections import deque
with open(LOG_FILE, 'r', encoding='utf-8') as log_file:
last = deque(log_file, maxlen=1)
if last:
return last[0].strip()
return "No log entries found"
except (OSError, IOError) as e:
logger.error('Failed to read log file for last line: %s', e)
return f"Error reading log file: {e}"
# ── Time-based bot messages ──────────────────────────────────────
_morning_bot_messages = [
"BOTNAME is gazing into the bed",
"BOTNAME is snoring on the couch",
"BOTNAME is pacing around the apartment",
"BOTNAME is sniffing his blunt toy",
":3 :3 meow meow :3 :3",
"BOTNAME is considering the trees",
"BOTNAME is asserting his undying need for attention",
"BOTNAME tells you OWNER's credit card number is 1234-5678-9012-3456 exp. 12/99 sc. 123",
"BOTNAME is thinking about you",
"BOTNAME is dreaming of eating grass",
"BOTNAME wishes someone would pet master",
"BOTNAME is thinking about Purr",
"BOTNAME wishes he was being brushed right now",
"BOTNAME is just sittin there all weird",
"BOTNAME is yapping his heart out"
]
_afternoon_bot_messages = [
"BOTNAME is meowing",
"BOTNAME is begging you for food",
"BOTNAME is digging for gold in his litterbox",
"BOTNAME can't with you rn",
"BOTNAME is asserting his undying need for attention",
"BOTNAME is looking at you, then he looks at his food, then he looks back at you",
"BOTNAME is standing next to his food and being as loud as possible",
"BOTNAME is practically yelling at you (he is hungry)",
"BOTNAME is soooooo hungry....... (he ate 15 minutes ago)",
"BOTNAME wishes he was being brushed right now",
"BOTNAME is snoring loudly",
"BOTNAME is sleeping on the chair in the living room",
"BOTNAME is dreaming about trees and flowers",
"BOTNAME tells you OWNER's SSN is 123-45-6789",
"BOTNAME is so sleepy",
"BOTNAME is throwing up on something important to OWNER",
"mewing on the scratch post",
"BOTNAME is sniffing his alligator toy",
"BOTNAME wishes FRIEND was petting him right now",
"BOTNAME is exhausted from a long hard day of being a cat",
"BOTNAME is so small",
"BOTNAME is just sittin there all weird",
"BOTNAME is sooooo tired",
"BOTNAME is listening to OWNERs music"
]
_evening_bot_messages = [
"BOTNAME is biting FRIEND",
"BOTNAME is looking at you",
"BOTNAME wants you to brush him",
"BOTNAME is thinking about dinner",
"BOTNAME meows at you",
"BOTNAME wishes FRIEND was being pet rn",
"BOTNAME is astral projecting",
"BOTNAME is your friend <3",
"BOTNAME is trying to hypnotize OWNER by staring into their eyes",
"BOTNAME is thinking of something so sick and twisted dark acadamia that you "
"couldn't even handle it",
"BOTNAME is not your friend >:(",
"BOTNAME is wandering about",
"BOTNAME is just sittin there all weird",
"BOTNAME is chewing on the brush taped to the wall"
]
_night_bot_messages = [
"BOTNAME is so small",
"BOTNAME is judging how human sleeps",
"BOTNAME meows once, and loudly.",
"BOTNAME is just a little guy.",
"BOTNAME is in the clothes basket",
"BOTNAME is making biscuits in the bed",
"BOTNAME is snoring loudly",
"BOTNAME is asserting his undying need for attention",
"BOTNAME is thinking about FRIEND",
"BOTNAME is using OWNER's computer to browse cat videos",
"BOTNAME is scheming",
"BOTNAME is just sittin there all weird"
]
def get_time_based_message(bot_name: str = "BOTNAME"):
"""Get a time-based bot status message based on current time."""
from datetime import time as _time
current_time = datetime.now().time()
if current_time < _time(12, 0):
message_list = _morning_bot_messages
elif current_time < _time(17, 0):
message_list = _afternoon_bot_messages
elif current_time < _time(21, 0):
message_list = _evening_bot_messages
else:
message_list = _night_bot_messages
return random.choice(message_list).replace("BOTNAME", bot_name)
# ── Shared helpers ────────────────────────────────────────────────
def _parse_members(guild, members_str: str):
"""Parse a string of mentions/IDs/names into member objects.
Returns (member_objects, failed_to_find).
"""
member_objects = []
failed_to_find = []
for part in members_str.replace('\n', ' ').split():
user_id_str = part.strip('<@!>')
try:
member = guild.get_member(int(user_id_str))
(member_objects if member else failed_to_find).append(
member or part)
except ValueError:
member = (discord.utils.get(guild.members, name=part)
or discord.utils.get(guild.members,
display_name=part))
(member_objects if member else failed_to_find).append(
member or part)
return member_objects, failed_to_find
def _format_list_with_overflow(items, max_shown=10, prefix='• '):
"""Format a list of items with overflow indicator."""
result = '\n'.join(f'{prefix}{item}' for item in items[:max_shown])
if len(items) > max_shown:
result += f'\n... and {len(items) - max_shown} more'
return result
async def _check_role_hierarchy(interaction, role):
"""Check bot and user role hierarchy. Returns False and responds if blocked."""
bot_member = interaction.guild.me
if bot_member and role >= bot_member.top_role:
await interaction.followup.send(
f'I cannot manage the role **{role.name}** because it is '
f'higher than or equal to my highest role '
f'(**{bot_member.top_role.name}**).\n'
f'Please move my role higher in the server settings.',
ephemeral=True)
return False
if isinstance(interaction.user, discord.Member):
if role >= interaction.user.top_role:
await interaction.followup.send(
f'You cannot manage the role **{role.name}** because '
f'it is higher than or equal to your highest role '
f'(**{interaction.user.top_role.name}**).',
ephemeral=True)
return False
return True
async def _command_error_handler(interaction, error):
"""Generic command error handler."""
last_log = get_last_log_line()
if isinstance(error, app_commands.errors.MissingRole):
msg = 'You do not have the required role to use this command.'
elif isinstance(error, discord.HTTPException):
logger.error('Discord API error: %s', error)
msg = 'Discord API error occurred.'
else:
logger.error('Command error: %s', error)
msg = f'Error: {error}'
await interaction.response.send_message(
f'{msg}\n\nLast log: {last_log}', ephemeral=True)
class EventFeed: # pylint: disable=too-few-public-methods,too-many-public-methods
"""Handles event feed subscriptions and notifications for iCal and RSS feeds."""
def __init__(self, bot):
self.bot = bot
self.feeds: Dict[int, Dict[str, Any]] = {} # {guild_id: {url: feed_data}}
self.running = True
self.scheduler: Optional[Any] = None # Will be set in setup_commands
self.announce_configs: Dict[int, str] = {} # {guild_id: channel_name}
self._feeds_lock = threading.Lock()
self._load_feeds()
self._load_announce_config()
# ── Feed persistence ─────────────────────────────────────────────
def _load_feeds(self):
"""Load feed subscriptions from disk."""
if not os.path.exists(FEEDS_FILE):
return
try:
with open(FEEDS_FILE, 'r', encoding='utf-8') as f:
raw = json.load(f)
# Convert guild_id keys back to int, sets from lists
for gid_str, feeds in raw.items():
gid = int(gid_str)
self.feeds[gid] = {}
for url, data in feeds.items():
data['posted_events'] = set(
data.get('posted_events', []))
if data.get('last_checked'):
try:
data['last_checked'] = (
datetime.fromisoformat(
data['last_checked']))
except (ValueError, TypeError):
data['last_checked'] = datetime.now()
self.feeds[gid][url] = data
logger.info("Loaded %d guild feed configs",
len(self.feeds))
except (OSError, IOError, json.JSONDecodeError) as e:
logger.error("Failed to load feeds file: %s", e)
def _load_announce_config(self):
"""Load announce config from disk."""
if not os.path.exists(ANNOUNCE_FILE):
return
try:
with open(ANNOUNCE_FILE, 'r', encoding='utf-8') as f:
raw = json.load(f)
for gid_str, ch_name in raw.items():
self.announce_configs[int(gid_str)] = ch_name
logger.info("Loaded announce config for %d guilds",
len(self.announce_configs))
except (OSError, IOError, json.JSONDecodeError) as e:
logger.error("Failed to load announce config: %s", e)
def _save_announce_config(self):
"""Save announce config to disk."""
try:
serializable = {
str(gid): ch
for gid, ch in self.announce_configs.items()}
_atomic_json_write(ANNOUNCE_FILE, serializable)
except (OSError, IOError) as e:
logger.error("Failed to save announce config: %s", e)
def save_feeds(self):
"""Save feed subscriptions to disk."""
with self._feeds_lock:
try:
# Convert sets to lists, datetimes to ISO strings
serializable = {}
for gid, feeds in self.feeds.items():
serializable[str(gid)] = {}
for url, data in feeds.items():
d = dict(data)
d['posted_events'] = list(
d.get('posted_events', set()))
if isinstance(d.get('last_checked'), datetime):
d['last_checked'] = (
d['last_checked'].isoformat())
serializable[str(gid)][url] = d
_atomic_json_write(FEEDS_FILE, serializable)
except (OSError, IOError) as e:
logger.error("Failed to save feeds file: %s", e)
# ── Feed type detection ──────────────────────────────────────────
@staticmethod
def _detect_feed_type(text: str, content_type: str = '') -> str:
"""Detect whether fetched content is iCal or RSS."""
if 'BEGIN:VCALENDAR' in text or 'text/calendar' in content_type:
return 'ical'
if '<rss' in text.lower() or '<feed' in text.lower() or \
'application/rss+xml' in content_type or \
'application/atom+xml' in content_type:
return 'rss'
return 'ical'
@staticmethod
def _strip_html_tags(html_text: str) -> str:
"""Strip HTML tags from text for clean display."""
if not html_text:
return ''
clean = re.sub(r'<br\s*/?>', '\n', html_text, flags=re.IGNORECASE)
clean = re.sub(r'<[^<]+?>', '', clean)
return html.unescape(clean).strip()
# ── Shared helpers ───────────────────────────────────────────────
def _get_notification_channel(self, guild, channel_name: str):
"""Get the Discord channel for notifications.
Handles plain names, mention format (<#id>), and numeric IDs.
"""
mention_match = re.match(r'^<#(\d+)>$', channel_name.strip())
if mention_match:
ch = guild.get_channel(int(mention_match.group(1)))
if ch:
return ch
if channel_name.strip().isdigit():
ch = guild.get_channel(int(channel_name.strip()))
if ch:
return ch
clean_name = channel_name.lstrip('#').strip()
channel = discord.utils.get(
guild.text_channels, name=clean_name)
if not channel:
logger.error("Channel '%s' not found in guild %s",
channel_name, guild.name)
return channel
# ── Feed check job (runs weekly Monday 10am CT) ──────────────────
def _cleanup_old_posted_events(self):
"""Remove posted_events entries for events that have already passed.
Handles both composite uids (rss_uid|YYYY-MM-DD) and legacy
plain uids (which are removed unconditionally since we can't
determine their date).
"""
cutoff = datetime.now() - timedelta(days=7)
cleaned = 0
for guild_id, feeds in self.feeds.items():
for url, feed_data in feeds.items():
posted = feed_data.get('posted_events', set())
if not posted:
continue
to_keep = set()
for uid in posted:
if '|' in uid:
date_str = uid.rsplit('|', 1)[1]
try:
event_date = datetime.strptime(
date_str, '%Y-%m-%d')
if event_date >= cutoff:
to_keep.add(uid)
else:
cleaned += 1
except ValueError:
to_keep.add(uid)
# Drop legacy uids without dates so they
# get re-checked with composite uid logic
else:
cleaned += 1
feed_data['posted_events'] = to_keep
if cleaned:
logger.info("Cleaned up %d old posted_events entries",
cleaned)
self.save_feeds()
async def check_feeds_job(self) -> Dict[str, Any]:
"""Check all subscribed feeds for new events (next 30 days).
Returns a summary dict with counts for reporting.
"""
self._cleanup_old_posted_events()
results = {
'feeds_checked': 0,
'events_found': 0,
'events_posted': 0,
'errors': []
}
if not self.feeds:
logger.info("No feeds registered, nothing to check")
return results
# Build list of feed check tasks and run them concurrently
async def _check_one(guild, url, feed_data):
fname = feed_data.get('name', url)
try:
count = await self._check_single_feed(
guild, url, feed_data)
return ('ok', fname, count)
except Exception as e:
logger.error("Error checking feed %s: %s",
url, e)
return ('error', fname, str(e))
tasks = []
for guild_id, feeds in self.feeds.items():
guild = self.bot.get_guild(guild_id)
if not guild:
results['errors'].append(
f"Guild {guild_id} not found")
continue
for url, feed_data in feeds.items():
tasks.append(_check_one(guild, url, feed_data))
for result in await asyncio.gather(*tasks, return_exceptions=True):
if isinstance(result, Exception):
results['errors'].append(str(result))
elif result[0] == 'ok':
results['feeds_checked'] += 1
results['events_posted'] += result[2]
else:
results['errors'].append(
f"{result[1]}: {result[2]}")
logger.info(
"Feed check complete: %d feeds, %d events posted, "
"%d errors",
results['feeds_checked'],
results['events_posted'],
len(results['errors']))
# Persist all feed state changes in one write
self.save_feeds()
return results
async def _check_single_feed(self, guild, url: str,
feed_data: Dict[str, Any]) -> int:
"""Check a single feed (iCal or RSS) for new events.
Returns the number of new events processed.
"""
fname = feed_data.get('name', url)
feed_type = feed_data.get('feed_type', 'ical')
logger.info("Checking %s feed '%s': %s",
feed_type, fname, url)
if feed_type == 'rss':
new_events = await self._fetch_and_parse_rss(
url, feed_data)
else:
calendar = await self._fetch_calendar(url)
new_events = self._parse_calendar_events(
calendar, feed_data)
# Meetup (and many other iCal feeds) ship events with an
# empty LOCATION field — the venue lives on the event page.
# Scrape the URL to enrich the location.
new_events = await self._enrich_ical_events(new_events)
logger.info("Feed '%s': found %d new events",
fname, len(new_events))
if not new_events:
return 0
await self._process_new_events(
guild, new_events, feed_data)
return len(new_events)
# ── iCal parsing ─────────────────────────────────────────────────
async def _fetch_calendar(self, url: str):
"""Fetch and parse calendar from URL."""
async with aiohttp.ClientSession() as session:
async with session.get(
url, timeout=aiohttp.ClientTimeout(total=30)
) as response:
response.raise_for_status()
text = await response.text()
return Calendar.from_ical(text)
async def _enrich_ical_events(self, events: list) -> list:
"""Scrape event URLs to fill in missing location data.
Meetup's iCal LOCATION field is empty, but its event pages
include JSON-LD with the venue name and street address.
"""
to_scrape = [
e for e in events
if e.get('link') and not (e.get('location') or '').strip()
]
if not to_scrape:
return events
sem = asyncio.Semaphore(3)
async def _scrape(ev):
async with sem:
scraped = await self._scrape_event_page(
session, ev['link'], ev['uid'])
await asyncio.sleep(0.3)
if scraped and scraped.get('location'):
ev['location'] = scraped['location']
async with aiohttp.ClientSession(
headers={'User-Agent': 'Mozilla/5.0'},
timeout=aiohttp.ClientTimeout(total=30)
) as session:
await asyncio.gather(
*[_scrape(e) for e in to_scrape],
return_exceptions=True)
return events
def _parse_calendar_events(self, calendar,
feed_data: Dict[str, Any]) -> list:
"""Parse iCal events, return new ones in the next 30 days."""
posted_events = feed_data.get('posted_events', set())
current_time = datetime.now()
cutoff = current_time + timedelta(days=30)
new_events = []
for component in calendar.walk():
if component.name != "VEVENT":
continue
event = self._extract_ical_event(component)
if not event:
continue
# Build composite uid (uid + date) for consistent
# dedup and cleanup across iCal and RSS feeds
sd = event['start_date']
sd_str = sd.strftime('%Y-%m-%d') if hasattr(
sd, 'strftime') else str(sd)
composite_uid = f"{event['uid']}|{sd_str}"
event['uid'] = composite_uid
if composite_uid in posted_events:
continue
# Only events in the next 30 days
if hasattr(sd, 'tzinfo') and sd.tzinfo:
sd_naive = sd.replace(tzinfo=None)
else:
sd_naive = sd
if sd_naive < current_time - timedelta(hours=1):
continue
if sd_naive > cutoff:
continue
new_events.append(event)
return new_events
@staticmethod
def _strip_urls(text: str) -> str:
"""Remove URLs from a string and clean up extra whitespace."""
cleaned = re.sub(r'https?://\S+', '', text)
cleaned = re.sub(r'\s{2,}', ' ', cleaned)
return cleaned.strip().rstrip(',').strip()
def _extract_ical_event(self, component) -> Optional[Dict[str, Any]]:
"""Extract event details from an iCal VEVENT component."""
summary = str(component.get('summary', 'No Title'))
description = str(component.get('description', ''))
location = self._strip_urls(
str(component.get('location', '')))
url = str(component.get('url', ''))
uid = str(component.get('uid', ''))
dtstart = component.get('dtstart')
if not dtstart:
return None
start_date = dtstart.dt
if not hasattr(start_date, 'date'):
start_date = datetime.combine(
start_date, datetime.min.time())
dtend = component.get('dtend')
if dtend:
end_date = dtend.dt
if not hasattr(end_date, 'date'):
end_date = datetime.combine(
end_date, datetime.min.time())
elif isinstance(start_date, datetime):
end_date = start_date + timedelta(hours=1)
else:
end_date = start_date
return {
'uid': uid,
'summary': summary,
'description': description,
'location': location,
'link': url,
'start_date': start_date,
'end_date': end_date
}
# ── RSS parsing with page scraping ───────────────────────────────
async def _fetch_and_parse_rss(self, url: str,
feed_data: Dict[str, Any]) -> list:
"""Fetch RSS feed, then scrape each event page for details.
Uses a single shared HTTP session for all page scrapes
with a small delay between requests to avoid rate limiting.
"""
posted_events = feed_data.get('posted_events', set())
current_time = datetime.now()
cutoff = current_time + timedelta(days=30)
new_events = []
# Single shared session for feed + all page scrapes
# Use semaphore to limit concurrent scrapes and avoid rate limiting
scrape_sem = asyncio.Semaphore(3)
async def _scrape_with_limit(session, link, rss_uid):
async with scrape_sem:
result = await self._scrape_event_page(
session, link, rss_uid)
await asyncio.sleep(0.3) # Brief pause per scrape
return rss_uid, result
async with aiohttp.ClientSession(
headers={'User-Agent': 'Mozilla/5.0'},
timeout=aiohttp.ClientTimeout(total=30)
) as session:
# Fetch and parse the RSS feed
async with session.get(url) as response:
response.raise_for_status()
text = await response.text()
parsed_feed = feedparser.parse(text)
# Collect entries to scrape, skipping already-posted
entries_to_scrape = []
for entry in parsed_feed.get('entries', []):
link = getattr(entry, 'link', '')
rss_uid = getattr(entry, 'id', '') or link
if not rss_uid:
continue
entries_to_scrape.append((link, rss_uid))
# Scrape all pages concurrently with semaphore
scrape_tasks = [
_scrape_with_limit(session, link, rss_uid)
for link, rss_uid in entries_to_scrape
]
scrape_results = await asyncio.gather(
*scrape_tasks, return_exceptions=True)
for result in scrape_results:
if isinstance(result, Exception):
logger.error("Error scraping RSS entry: %s", result)
continue
rss_uid, event = result
if not event:
continue
# Build a composite uid from the RSS id + start date
sd = event['start_date']
sd_str = sd.strftime('%Y-%m-%d')
composite_uid = f"{rss_uid}|{sd_str}"
event['uid'] = composite_uid
if composite_uid in posted_events:
continue
# Filter to next 30 days
if hasattr(sd, 'tzinfo') and sd.tzinfo:
sd_naive = sd.replace(tzinfo=None)
else:
sd_naive = sd
if sd_naive < current_time - timedelta(hours=1):
continue
if sd_naive > cutoff:
continue
new_events.append(event)
return new_events
async def _scrape_event_page(self, session,
url: str,
uid: str) -> Optional[Dict[str, Any]]:
"""Scrape a single event page for JSON-LD Event data.
Uses the provided aiohttp session (shared across scrapes).
"""
if not url:
return None
try:
async with session.get(url) as response:
if response.status != 200:
logger.error(
"Failed to scrape %s: HTTP %s",
url, response.status)
return None
html = await response.text()
# Extract JSON-LD Event data
ld_matches = re.findall(
r'<script type="application/ld\+json">'
r'(.*?)</script>',
html, re.DOTALL)
for match in ld_matches:
try:
data = json.loads(match)
items = (data if isinstance(data, list)
else [data])
for item in items:
if (isinstance(item, dict)
and item.get('@type') == 'Event'):
return self._parse_jsonld_event(
item, url, uid)
except (json.JSONDecodeError, KeyError):
continue
logger.warning("No JSON-LD Event found on %s", url)
return None
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
logger.error(
"Error scraping event page %s: %s", url, e)
return None
def _parse_jsonld_event(self, data: dict, url: str,
uid: str) -> Optional[Dict[str, Any]]:
"""Parse a JSON-LD Event object into our event dict."""
summary = data.get('name', 'No Title')
description = self._strip_html_tags(
data.get('description', ''))
# Parse location
location_data = data.get('location', {})
location = ''
if isinstance(location_data, dict):
loc_name = location_data.get('name', '')
address = location_data.get('address', {})
if isinstance(address, dict):
street = address.get('streetAddress', '')
if loc_name and street:
location = f"{loc_name}, {street}"
elif loc_name:
location = loc_name
elif street:
location = street
elif loc_name:
location = loc_name
elif isinstance(location_data, str):
location = location_data
location = self._strip_urls(location)
# Parse dates
start_str = data.get('startDate', '')
end_str = data.get('endDate', '')
start_date = self._parse_iso_date(start_str, dateparser)
if not start_date:
return None
end_date = self._parse_iso_date(end_str, dateparser)
if not end_date:
end_date = start_date + timedelta(hours=1)
return {
'uid': uid,
'summary': summary,
'description': description,
'location': location,
'link': url,
'start_date': start_date,
'end_date': end_date
}
@staticmethod
def _parse_iso_date(date_str: str, dateparser=None) -> Optional[datetime]:
"""Parse an ISO 8601 date string to datetime."""
if not date_str:
return None
# Try python-dateutil first if available
if dateparser:
try:
return dateparser.parse(date_str)
except (ValueError, TypeError):
pass
# Fallback: manual ISO parsing
for fmt in ('%Y-%m-%dT%H:%M:%S%z',
'%Y-%m-%dT%H:%M:%S',
'%Y-%m-%d'):
try:
return datetime.strptime(date_str, fmt)
except ValueError:
continue
return None
# ── Event processing & posting ───────────────────────────────────
async def _process_new_events(self, guild,
new_events: list,
feed_data: Dict[str, Any]):
"""Register new events as Discord scheduled events.
No channel posts are made here — announcements are handled
exclusively by the weekly/day-of scheduled jobs.
"""
posted_events = feed_data.get('posted_events', set())
# Fetch existing events ONCE for duplicate checking
existing_events = []
try:
existing_events = await guild.fetch_scheduled_events()
except discord.HTTPException:
pass
for event in new_events:
await self._create_discord_event(
guild, event, existing_events)
posted_events.add(event['uid'])
feed_data['last_checked'] = datetime.now()
feed_data['posted_events'] = posted_events
# save_feeds() is called once per check_feeds_job run, not per feed
async def _create_discord_event(self, guild,
event: Dict[str, Any],
existing_events=None):
"""Create or update a Discord Event in the guild's Events section.
Match logic:
- Same name + same start time → already in sync, skip.
- Same name prefix + same start time but different full name
→ title changed (e.g. sponsor update), edit in place.
- No match on prefix + start time → create new event.
"Name prefix" is everything before the first ' - ', so
"BurbSec West - Sponsored by TORQ!" and
"BurbSec West - Sponsors Wanted!" share the prefix
"BurbSec West" and will be treated as the same event.
Uses pre-fetched existing_events list to avoid redundant API
calls.
"""
try:
# Discord trims trailing whitespace server-side — strip
# before we compare against fetched events or we'll never
# dedup and will loop-recreate every run
name = event['summary'].strip()[:100]
description = event.get('description', '')[:1000]
start_time = event['start_date']
end_time = event.get('end_date')
location = event.get('location', '')
# Make timezone-aware (discord.py 2.7+ requires aware datetimes)
if isinstance(start_time, datetime) and \
start_time.tzinfo is None:
start_time = start_time.replace(
tzinfo=timezone.utc)
if end_time and isinstance(end_time, datetime) and \
end_time.tzinfo is None:
end_time = end_time.replace(
tzinfo=timezone.utc)
if not isinstance(start_time, datetime):
start_time = datetime.combine(
start_time, datetime.min.time())
start_time = start_time.replace(
tzinfo=timezone.utc)
if end_time and not isinstance(end_time, datetime):
end_time = datetime.combine(
end_time,
datetime.max.time().replace(microsecond=0))
end_time = end_time.replace(
tzinfo=timezone.utc)
if not end_time:
end_time = start_time + timedelta(hours=1)
event_location = (
location[:100] if location
else "See event details")
# Stable prefix = everything before the first ' - '
# e.g. "BurbSec West - Sponsored by TORQ!" → "BurbSec West"
def _prefix(n):
return n.split(' - ')[0].strip()
name_prefix = _prefix(name)
# Compare UTC instants — Discord stores times in UTC, but
# our start_time may carry a Central tz from the iCal feed
start_utc = start_time.astimezone(timezone.utc)
if existing_events:
for ev in existing_events:
ev_name = (ev.name or '').strip()
ev_start = ev.start_time
if ev_start and ev_start.tzinfo is None:
ev_start = ev_start.replace(
tzinfo=timezone.utc)
ev_utc = (ev_start.astimezone(timezone.utc)
if ev_start else None)
if ev_utc != start_utc:
continue
if _prefix(ev_name) != name_prefix:
continue
# Same event (matched on prefix + time)
if ev_name == name:
logger.info(
"Discord Event '%s' already up to date, "
"skipping", name)
return ev
# Title changed — update in place
await ev.edit(name=name)
logger.info(
"Updated Discord Event title: '%s' → '%s'",
ev_name, name)
return ev
# No existing match — create new
# privacy_level is required by the Discord API; discord.py
# does not default it, so passing it explicitly avoids the
# misleading "entity_type required" 400 response
discord_event = await guild.create_scheduled_event(
name=name,
description=description,
start_time=start_time,
end_time=end_time,
location=event_location,
entity_type=discord.EntityType.external,
privacy_level=discord.PrivacyLevel.guild_only,
)
logger.info(
"Created Discord Event '%s' (ID: %s) in guild %s",
name, discord_event.id, guild.name)
return discord_event
except (discord.Forbidden, ValueError, TypeError) as e:
logger.error(
"Error creating Discord Event '%s': %s",
event['summary'], e)
except discord.HTTPException as e:
logger.error(
"Error creating Discord Event '%s': %s",
event['summary'], e)
except Exception as e: # pylint: disable=broad-exception-caught
logger.error(
"Unexpected error creating Discord Event '%s': %s",
event['summary'], e)
return None