-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathscraping.py
More file actions
77 lines (59 loc) · 3.35 KB
/
scraping.py
File metadata and controls
77 lines (59 loc) · 3.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import requests
from bs4 import BeautifulSoup
import discord
import os
intents = discord.Intents.default()
intents.message_content = True
client = discord.Client(intents=intents)
CHANNEL_ID=1082254805591928872 #THIS IS THE CHANNEL ID FOR CP-DOUBTS
TOKEN=os.getenv('TOKEN') # don't share this live.
#the bot right now can't handle multiple requests simultaneously! --> need to use asynchronisations properly.. what a SHIT code this is.
def getPageAndPreprocess(URL): # returns the name, tag and difficulty
ProblemName="IDK" # this would mever happen unless you messed up big time!
problemDifficulty="IDK" #this can happen.
problemTags=[]
page=requests.get(URL)
soup = BeautifulSoup(page.content, "html.parser")
ProblemName=soup.find("div",class_="title").contents[0][2:]
scraped_tags=soup.find_all("span",class_="tag-box")
for pt in scraped_tags:
name=pt.contents[0].lstrip().rstrip()
if(name[0]=='*'):
problemDifficulty=name[1:]
else:
problemTags.append(name)
return ProblemName,problemTags,problemDifficulty
@client.event
async def on_ready():
print('Logged in as {0.user}'.format(client))
async def post_doubt_on_channel(problem,tags,difficulty,doubt,url,userId):
channel=client.get_channel(CHANNEL_ID)
doubt=url+'' +' asked by : '+' <@'+str(userId)+'> \n issue faced: '+doubt
await channel.create_thread(name=f'[Codeforces][{difficulty}][{problem}]{tags}',content=doubt)
def isNotCodeforcesUrl(url):
return ((not url.startswith('https://codeforces.com/')) and (not url.startswith('http://codeforces.com/')) and (not url.startswith('https://www.codeforces.com/')) and (not url.startswith('http://www.codeforces.com/')) and (not url.startswith('www.codeforces.com/')))
async def processContentAndPost(message,contents): #'url issue'
seperator=contents.find(' ')
if(seperator==-1):
await message.channel.send('Invalid Input - Please mention the url and then your doubt')
else:
url=contents[:seperator]
if(isNotCodeforcesUrl(url)):
await message.channel.send('bruhhh the admin was busy so he could only do this for codeforces..... please wait for other platforms')
else: #the handle is valid and is a codeforces handle!
doubt=contents[seperator:]
problemName,problemTags,problemDifficulty=getPageAndPreprocess(url)
await message.channel.send(f'you entered: {url} \nproblem name: {problemName} \nproblem tags: {problemTags} \nproblem difficulty: {problemDifficulty} \nyour doubt: {doubt}')
await post_doubt_on_channel(problemName,problemTags,problemDifficulty,doubt,url,message.author.id)
@client.event
async def on_message(message): #
if message.author == client.user: #if I client is the author.
return
if message.content.startswith('`~postDoubt`') or message.content.startswith('~postdoubt'):
await message.channel.send('Enter the URL followed by the doubt you\'re having:')
def check(m):
return m.author == message.author and m.channel == message.channel
recd = await client.wait_for('message', check=check)
contents = recd.content
await processContentAndPost(message=message,contents=contents) #not a good prac, explained...
client.run(TOKEN)