Skip to content

Commit 4febfe9

Browse files
Prefer full match for user agent.
1 parent ca6c0d5 commit 4febfe9

2 files changed

Lines changed: 11 additions & 4 deletions

File tree

Lib/test/test_robotparser.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -260,22 +260,25 @@ class AnotherInvalidRequestRateTest(BaseRobotTest, unittest.TestCase):
260260

261261

262262
class UserAgentOrderingTest(BaseRobotTest, unittest.TestCase):
263-
# the order of User-agent should be correct. note
264-
# that this file is incorrect because "Googlebot" is a
265-
# substring of "Googlebot-Mobile"
263+
# the order of User-agent should not matter
266264
robots_txt = """\
267265
User-agent: Googlebot
268266
Disallow: /
267+
Allow: /folder1/
269268
270269
User-agent: Googlebot-Mobile
271270
Allow: /
271+
Disallow: /folder1/
272272
"""
273273
agent = 'Googlebot'
274274
bad = ['/something.jpg']
275+
good = ['/folder1/myfile.html']
275276

276277

277278
class UserAgentGoogleMobileTest(UserAgentOrderingTest):
278-
agent = 'Googlebot-Mobile'
279+
agent = 'Googlebot-mobile'
280+
bad = ['/folder1/myfile.html']
281+
good = ['/something.jpg']
279282

280283

281284
class LongestMatchTest(BaseRobotTest, unittest.TestCase):

Lib/urllib/robotparser.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def read(self):
7676
def _add_entry(self, entry):
7777
self.entries.append(entry)
7878
for agent in entry.useragents:
79+
agent = agent.lower()
7980
if agent not in self.groups:
8081
self.groups[agent] = entry
8182
else:
@@ -156,6 +157,9 @@ def parse(self, lines):
156157
self._add_entry(entry)
157158

158159
def _find_entry(self, useragent):
160+
entry = self.groups.get(useragent.lower())
161+
if entry is not None:
162+
return entry
159163
for entry in self.groups.values():
160164
if entry.applies_to(useragent):
161165
return entry

0 commit comments

Comments
 (0)