Skip to content

Commit 5567aaf

Browse files
Remove sorting for now.
1 parent c5fe9e7 commit 5567aaf

2 files changed

Lines changed: 3 additions & 18 deletions

File tree

Lib/test/test_robotparser.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -388,8 +388,8 @@ class IgnoreEmptyLinesTest(BaseRobotTest, unittest.TestCase):
388388
expected_output = """\
389389
User-agent: spambot
390390
User-agent: eggsbot
391-
Disallow: /another/path
392-
Disallow: /some/path\
391+
Disallow: /some/path
392+
Disallow: /another/path\
393393
"""
394394

395395

@@ -445,10 +445,10 @@ class WeirdPathTest(BaseRobotTest, unittest.TestCase):
445445
'/e$$', '/ex$y$', '/g']
446446
expected_output = """\
447447
User-agent: *
448+
Disallow: /a$
448449
Disallow: /c*
449450
Disallow: /d*z
450451
Disallow: /e*$
451-
Disallow: /a$
452452
Disallow: /g$\
453453
"""
454454

Lib/urllib/robotparser.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ def _add_entry(self, entry):
8181
self.groups[agent] = entry
8282
else:
8383
self.groups[agent] = merge_entries(self.groups[agent], entry)
84-
sort_rulelines(self.groups[agent].rulelines)
8584

8685
def parse(self, lines):
8786
"""Parse the input lines from a robots.txt file.
@@ -306,9 +305,6 @@ def allowance(self, filename):
306305
"""Preconditions:
307306
- our agent applies to this entry
308307
- filename is URL encoded
309-
- rules are sorted:
310-
- wildcards before literal paths
311-
- literal paths from longest to shortest, "Allow" before "Disallow"
312308
"""
313309
best_match = -1
314310
allowance = True
@@ -320,9 +316,6 @@ def allowance(self, filename):
320316
allowance = line.allowance
321317
elif m == best_match and not allowance:
322318
allowance = line.allowance
323-
# Optimization. Requires rules to be sorted.
324-
if line.matcher is None and (m or len(line.path) + 1 < best_match):
325-
break
326319
return allowance
327320

328321

@@ -360,11 +353,3 @@ def merge_entries(e1, e2):
360353
entry.delay = e1.delay if e2.delay is None else e2.delay
361354
entry.req_rate = e1.req_rate if e2.req_rate is None else e2.req_rate
362355
return entry
363-
364-
def sort_rulelines(rulelines):
365-
def sortkey(line):
366-
if line.matcher is not None:
367-
return (True,)
368-
else:
369-
return (False, len(line.path), line.allowance)
370-
rulelines.sort(key=sortkey, reverse=True)

0 commit comments

Comments
 (0)