Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 16 additions & 28 deletions bot/code_review_bot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,41 +191,29 @@ def hash(self):
We make the assumption that the message does not contain the line number
If an error occurs reading the file content (locally or remotely), None is returned
"""
from code_review_bot.revisions import GithubRevision, PhabricatorRevision

assert self.revision is not None, "Missing revision"

local_repository = None
if isinstance(self.revision, PhabricatorRevision):
if settings.mercurial_cache_checkout:
local_repository = settings.mercurial_cache_checkout
elif isinstance(self.revision, GithubRevision):
assert (
settings.github_cache
), "Github cache repository is mandatory to analyse a github revision"
local_repository = settings.github_cache
else:
raise Exception(self.revision.__class__)
raise NotImplementedError

# Build the hash only if the file is not autogenerated.
# An autogenerated file resides in the build directory that it has the
# format `obj-x86_64-pc-linux-gnu`
file_content = None
if "/obj-" not in self.path:
if settings.mercurial_cache_checkout:
logger.debug("Using the local repository to build issue's hash")
try:
with (settings.mercurial_cache_checkout / self.path).open() as f:
file_content = f.read()
except (FileNotFoundError, IsADirectoryError):
logger.warning(
"Failed to find issue's related file", path=self.path
)
file_content = None
else:
try:
# Load all the lines affected by the issue
file_content = self.revision.load_file(self.path)
except ValueError:
# Build the hash with an empty content in case the path is erroneous
file_content = None
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
logger.warning(
"Failed to download a file with an issue", path=self.path
)

# We still build the hash with empty content
file_content = None
else:
# When encountering another HTTP error, raise the issue
raise
file_content = self.revision.get_file_content(self.path, local_repository)

if file_content is None:
self._hash = None
Expand Down
57 changes: 24 additions & 33 deletions bot/code_review_bot/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from code_review_bot import taskcluster
from code_review_bot.config import GetAppUserAgent, settings
from code_review_bot.revisions import PhabricatorRevision
from code_review_bot.tasks.lint import MozLintIssue

logger = structlog.get_logger(__name__)
Expand Down Expand Up @@ -46,38 +47,31 @@ def publish_revision(self, revision):
logger.warn("Skipping revision publication on backend")
return

# Check the repositories are urls
for url in (revision.base_repository, revision.head_repository):
assert isinstance(url, str), "Repository must be a string"
res = urllib.parse.urlparse(url)
assert res.scheme and res.netloc, f"Repository {url} is not an url"

# Check the Mercurial changesets are strings
for changeset in (
revision.base_changeset,
revision.head_changeset,
):
assert isinstance(changeset, str), "Mercurial changeset must be a string"

# Create revision on backend if it does not exists
data = {
"phabricator_id": revision.phabricator_id,
"phabricator_phid": revision.phabricator_phid,
"title": revision.title,
"bugzilla_id": revision.bugzilla_id,
"base_repository": revision.base_repository,
"head_repository": revision.head_repository,
"base_changeset": revision.base_changeset,
"head_changeset": revision.head_changeset,
}

# Try to create the revision, or retrieve it in case it exists with that Phabricator ID.
elif isinstance(revision, PhabricatorRevision):
# Check the repositories are urls
for url in (revision.base_repository, revision.head_repository):
assert isinstance(url, str), "Repository must be a string"
res = urllib.parse.urlparse(url)
assert res.scheme and res.netloc, f"Repository {url} is not an url"

# Check the Mercurial changesets are strings
for changeset in (
revision.base_changeset,
revision.head_changeset,
):
assert isinstance(
changeset, str
), "Mercurial changeset must be a string"

revision_data, diff_data = revision.serialize()

# Try to create the revision, or retrieve it in case it exists with that provider and ID.
# The backend always returns a revisions, either a new one, or a pre-existing one
revision_url = "/v1/revision/"
auth = (self.username, self.password)
url_post = urllib.parse.urljoin(self.url, revision_url)
response = requests.post(
url_post, headers=GetAppUserAgent(), json=data, auth=auth
url_post, headers=GetAppUserAgent(), json=revision_data, auth=auth
)
if not response.ok:
logger.warn(f"Backend rejected the payload: {response.content}")
Expand All @@ -87,17 +81,14 @@ def publish_revision(self, revision):
revision.issues_url = backend_revision["issues_bulk_url"]
revision.id = backend_revision["id"]

# A revision may have no diff (e.g. Mozilla-central group tasks)
if not revision.diff_id:
# A revision may have no diff (e.g. Phabricator Mozilla-central group tasks)
if isinstance(revision, PhabricatorRevision) and not revision.diff_id:
return backend_revision

# Create diff attached to revision on backend
data = {
"id": revision.diff_id,
"phid": revision.diff_phid,
**diff_data,
"review_task_id": settings.taskcluster.task_id,
"mercurial_hash": revision.head_changeset,
"repository": revision.head_repository,
}
backend_diff = self.create(backend_revision["diffs_url"], data)

Expand Down
15 changes: 14 additions & 1 deletion bot/code_review_bot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
)
from code_review_bot.config import settings
from code_review_bot.report import get_reporters
from code_review_bot.revisions import PhabricatorRevision, Revision
from code_review_bot.revisions import GithubRevision, PhabricatorRevision, Revision
from code_review_bot.tools.libmozdata import setup as setup_libmozdata
from code_review_bot.tools.log import init_logger
from code_review_bot.workflow import Workflow
Expand Down Expand Up @@ -64,6 +64,13 @@ def parse_cli():
type=Path,
default=None,
)
parser.add_argument(
"--github-repository",
help="Optional path to a up-to-date github repository matching the analyzed revision.\n"
"This argument is required for Github reviusions in order to compute issues' hashes based on file content.",
type=Path,
default=None,
)
parser.add_argument("--taskcluster-client-id", help="Taskcluster Client ID")
parser.add_argument("--taskcluster-access-token", help="Taskcluster Access token")
return parser.parse_args()
Expand Down Expand Up @@ -116,6 +123,7 @@ def main():
taskcluster.secrets["repositories"],
taskcluster.secrets["ssh_key"],
args.mercurial_repository,
args.github_repository,
)

# Setup statistics
Expand Down Expand Up @@ -205,6 +213,11 @@ def main():
)
return 1

if isinstance(revision, GithubRevision):
assert (
args.github_repository is not None
), "Girhub revision analysis requires the --github-repository argument to be set"

# Run workflow according to source
w = Workflow(
reporters,
Expand Down
10 changes: 10 additions & 0 deletions bot/code_review_bot/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def __init__(self):

# Cache to store whole repositories
self.mercurial_cache = None
self.github_cache = None

# SSH Key used to push on try
self.ssh_key = None
Expand All @@ -78,6 +79,7 @@ def setup(
repositories,
ssh_key=None,
mercurial_cache=None,
github_cache=None,
):
# Detect source from env
if "TRY_TASK_ID" in os.environ and "TRY_TASK_GROUP_ID" in os.environ:
Expand Down Expand Up @@ -148,6 +150,14 @@ def build_conf(nb, repo):
# Save ssh key when mercurial cache is enabled
self.ssh_key = ssh_key

# Store github cache path
if github_cache is not None:
self.github_cache = Path(github_cache)
assert (
self.github_cache.exists()
), f"Github cache does not exist {self.github_cache}"
logger.info("Using Github cache", path=self.mercurial_cache)

def load_user_blacklist(self, usernames, phabricator_api):
"""
Load all black listed users from Phabricator API
Expand Down
6 changes: 4 additions & 2 deletions bot/code_review_bot/report/lando.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@ def publish(self, issues, revision, task_failures, links, reviewers):
Send an email to administrators
"""
if not isinstance(revision, PhabricatorRevision):
raise NotImplementedError(
"Only Phabricator revisions are supported for now"
logger.warning(
"Lando publication only works with Phabricator revisions. Skipping.",
revision=revision,
)
return

assert (
revision.phabricator_id and revision.phabricator_phid and revision.diff
Expand Down
3 changes: 2 additions & 1 deletion bot/code_review_bot/revisions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from code_review_bot.revisions.base import ImprovementPatch, Revision
from code_review_bot.revisions.github import GithubRevision
from code_review_bot.revisions.phabricator import PhabricatorRevision

__all__ = [ImprovementPatch, Revision, PhabricatorRevision]
__all__ = [ImprovementPatch, Revision, PhabricatorRevision, GithubRevision]
39 changes: 34 additions & 5 deletions bot/code_review_bot/revisions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import random
from abc import ABC
from datetime import timedelta
from pathlib import Path

import rs_parsepatch
import structlog
Expand Down Expand Up @@ -165,6 +166,25 @@ def contains(self, issue):
lines = set(range(issue.line, issue.line + issue.nb_lines))
return not lines.isdisjoint(modified_lines)

def get_file_content(
self, file_path: str, local_cache_repository: Path | None = None
):
if local_cache_repository:
logger.debug("Using the local repository to build issue's hash")
try:
with (local_cache_repository / file_path).open() as f:
file_content = f.read()
except (FileNotFoundError, IsADirectoryError):
logger.warning("Failed to find issue's related file", path=file_path)
file_content = None
else:
try:
file_content = self.load_file(file_path)
except ValueError:
# The path is erroneous, consider as empty content
file_content = None
return file_content

@property
def has_clang_files(self):
"""
Expand Down Expand Up @@ -237,17 +257,26 @@ def as_dict(self):
"""
raise NotImplementedError

def serialize(self):
"""
Outputs a tuple of dicts for revision and diff sent to backend
"""
raise NotImplementedError

@staticmethod
def from_try_task(try_task: dict, decision_task: dict, phabricator: PhabricatorAPI):
"""
Load identifiers from Phabricator, using the remote task description
Load identifiers from Phabricator or Github, using the remote task description
"""
from code_review_bot.revisions.github import GithubRevision
from code_review_bot.revisions.phabricator import PhabricatorRevision

# Load build target phid from the task env
code_review = try_task["extra"]["code-review"]

# TODO: support github revision here too
return PhabricatorRevision.from_try_task(
code_review, decision_task, phabricator
)
if "github" in code_review:
return GithubRevision(**code_review["github"])
else:
return PhabricatorRevision.from_try_task(
code_review, decision_task, phabricator
)
Loading