diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2d01bc445..ca8922215 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,7 +33,7 @@ repos: rev: v2.2.4 hooks: - id: codespell - exclude_types: [json] + exclude_types: [json, pem] - repo: https://github.com/marco-c/taskcluster_yml_validator rev: v0.0.11 hooks: diff --git a/backend/code_review_backend/app/settings.py b/backend/code_review_backend/app/settings.py index 78e026789..7a25eb384 100644 --- a/backend/code_review_backend/app/settings.py +++ b/backend/code_review_backend/app/settings.py @@ -212,7 +212,7 @@ PHABRICATOR_HOST = "https://phabricator.services.mozilla.com" # Limit the automatic creation of reositories to allowed hosts -ALLOWED_REPOSITORY_HOSTS = ["hg.mozilla.org"] +ALLOWED_REPOSITORY_HOSTS = ["hg.mozilla.org", "github.com"] DYNO = env("DYNO") # Heroku settings override to run the web app through dyno diff --git a/backend/code_review_backend/issues/admin.py b/backend/code_review_backend/issues/admin.py index a29efb634..775098467 100644 --- a/backend/code_review_backend/issues/admin.py +++ b/backend/code_review_backend/issues/admin.py @@ -14,19 +14,30 @@ class RepositoryAdmin(admin.ModelAdmin): class DiffInline(admin.TabularInline): # Read only inline model = Diff - readonly_fields = ("id", "repository", "mercurial_hash", "phid", "review_task_id") + readonly_fields = ( + "id", + "repository", + "mercurial_hash", + "provider_id", + "review_task_id", + ) class RevisionAdmin(admin.ModelAdmin): list_display = ( "id", - "phabricator_id", + "provider", + "provider_id", "title", "bugzilla_id", "base_repository", "head_repository", ) - list_filter = ("base_repository", "head_repository") + list_filter = ( + "base_repository", + "head_repository", + "provider", + ) inlines = (DiffInline,) diff --git a/backend/code_review_backend/issues/api.py b/backend/code_review_backend/issues/api.py index e9fc5966f..671f61e36 100644 --- a/backend/code_review_backend/issues/api.py +++ b/backend/code_review_backend/issues/api.py @@ -6,6 +6,7 @@ from datetime import date, datetime, timedelta from django.conf import settings +from django.core.exceptions import BadRequest from django.db.models import BooleanField, Count, ExpressionWrapper, Prefetch, Q from django.db.models.functions import TruncDate from django.shortcuts import get_object_or_404 @@ -77,10 +78,15 @@ def create(self, request, *args, **kwargs): # When a revision already exists with that phabricator ID we return its data without creating a new one # This value is used by the bot to identify a revision and publish new Phabricator diffs. # The phabricator ID can be null (on mozilla-central) so we must always try to create a revision for that case - phabricator_id = request.data["phabricator_id"] - if phabricator_id is not None: + try: + provider = request.data["provider"] + provider_id = request.data["provider_id"] + except KeyError: + raise BadRequest("Invalid provider identification") + if provider_id is not None: if revision := Revision.objects.filter( - phabricator_id=phabricator_id + provider=provider, + provider_id=provider_id, ).first(): serializer = RevisionSerializer( instance=revision, context={"request": request} @@ -166,7 +172,7 @@ def get_queryset(self): if query is not None: search_query = ( Q(id__icontains=query) - | Q(revision__phabricator_id__icontains=query) + | Q(revision__provider_id__icontains=query) | Q(revision__bugzilla_id__icontains=query) | Q(revision__title__icontains=query) ) @@ -200,14 +206,14 @@ class IssueViewSet( def get_queryset(self): # Required to generate the OpenAPI documentation - if not self.kwargs.get("diff_id"): + if not self.kwargs.get("diff_provider_id"): return Issue.objects.none() - diff = get_object_or_404(Diff, id=self.kwargs["diff_id"]) + diff = get_object_or_404(Diff, provider_id=self.kwargs["diff_provider_id"]) return ( Issue.objects.filter(issue_links__diff=diff) .annotate(publishable=Q(issue_links__in_patch=True) & Q(level=LEVEL_ERROR)) .values( - "id", + "provider_id", "hash", "analyzer", "analyzer_check", @@ -497,7 +503,9 @@ def get_queryset(self): basename="revision-diffs", ) router.register(r"diff", DiffViewSet, basename="diffs") -router.register(r"diff/(?P\d+)/issues", IssueViewSet, basename="issues") +router.register( + r"diff/(?P[0-9a-zA-Z-]+)/issues", IssueViewSet, basename="issues" +) urls = router.urls + [ path( "revision//issues/", diff --git a/backend/code_review_backend/issues/compare.py b/backend/code_review_backend/issues/compare.py index ed753f703..eb0b56207 100644 --- a/backend/code_review_backend/issues/compare.py +++ b/backend/code_review_backend/issues/compare.py @@ -12,7 +12,8 @@ def detect_new_for_revision(diff: Diff, path: str, hash: str) -> bool: assert diff is not None, "Missing diff" return not IssueLink.objects.filter( revision_id=diff.revision_id, - diff_id__lt=diff.id, + # Ideally we would rely on the diff integer ID, but that information was lost adding the Github support + diff__created__lt=diff.created, issue__path=path, issue__hash=hash, ).exists() diff --git a/backend/code_review_backend/issues/management/commands/load_in_patch.py b/backend/code_review_backend/issues/management/commands/load_in_patch.py index dc9d571d0..5fc38307f 100644 --- a/backend/code_review_backend/issues/management/commands/load_in_patch.py +++ b/backend/code_review_backend/issues/management/commands/load_in_patch.py @@ -79,11 +79,11 @@ def process_diff(diff: Diff): detect_in_patch(issue_link, lines) for issue_link in diff.issue_links.all() ] logging.info( - f"Found {len([i for i in issue_links if i.in_patch])} issue link in patch for {diff.id}" + f"Found {len([i for i in issue_links if i.in_patch])} issue link in patch for {diff.provider_id}" ) IssueLink.objects.bulk_update(issue_links, ["in_patch"]) except Exception as e: - logging.info(f"Failure on diff {diff.id}: {e}") + logging.info(f"Failure on diff {diff.provider_id}: {e}") class Command(BaseCommand): diff --git a/backend/code_review_backend/issues/management/commands/load_issues.py b/backend/code_review_backend/issues/management/commands/load_issues.py index 76e2f7eaa..a7dd5b202 100644 --- a/backend/code_review_backend/issues/management/commands/load_issues.py +++ b/backend/code_review_backend/issues/management/commands/load_issues.py @@ -213,9 +213,9 @@ def build_revision_and_diff(self, data, task_id): return None, None revision, _ = head_repository.head_revisions.get_or_create( - phabricator_id=data["id"], + provider="phabricator", + provider_id=data["id"], defaults={ - "phabricator_phid": data["phid"], "title": data["title"], "bugzilla_id": int(data["bugzilla_id"]) if data["bugzilla_id"] @@ -224,10 +224,9 @@ def build_revision_and_diff(self, data, task_id): }, ) diff, _ = revision.diffs.get_or_create( - id=data["diff_id"], + provider_id=data["diff_phid"], defaults={ "repository": head_repository, - "phid": data["diff_phid"], "review_task_id": task_id, "mercurial_hash": data["mercurial_revision"], }, diff --git a/backend/code_review_backend/issues/migrations/0016_alter_revision_options_and_more.py b/backend/code_review_backend/issues/migrations/0016_alter_revision_options_and_more.py new file mode 100644 index 000000000..ba0230a1e --- /dev/null +++ b/backend/code_review_backend/issues/migrations/0016_alter_revision_options_and_more.py @@ -0,0 +1,50 @@ +# Generated by Django 5.1.6 on 2026-01-21 16:01 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("issues", "0015_remove_repository_phid_alter_repository_id"), + ] + + operations = [ + migrations.AlterModelOptions( + name="revision", + options={"ordering": ("provider", "provider_id", "id")}, + ), + migrations.RemoveConstraint( + model_name="revision", + name="revision_unique_phab_id", + ), + migrations.RenameField( + model_name="revision", + old_name="phabricator_id", + new_name="provider_id", + ), + migrations.AddField( + model_name="revision", + name="provider", + field=models.CharField( + choices=[("phabricator", "Phabricator"), ("github", "Github")], + default="phabricator", + max_length=20, + ), + ), + migrations.AddConstraint( + model_name="revision", + constraint=models.UniqueConstraint( + condition=models.Q(("provider_id__isnull", False)), + fields=("provider_id",), + name="revision_unique_phab_id", + ), + ), + migrations.RemoveConstraint( + model_name="revision", + name="revision_unique_phab_phabid", + ), + migrations.RemoveField( + model_name="revision", + name="phabricator_phid", + ), + ] diff --git a/backend/code_review_backend/issues/migrations/0017_rename_phid_diff_provider_id.py b/backend/code_review_backend/issues/migrations/0017_rename_phid_diff_provider_id.py new file mode 100644 index 000000000..7c0bbbce9 --- /dev/null +++ b/backend/code_review_backend/issues/migrations/0017_rename_phid_diff_provider_id.py @@ -0,0 +1,17 @@ +# Generated by Django 5.1.6 on 2026-01-27 16:13 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("issues", "0016_alter_revision_options_and_more"), + ] + + operations = [ + migrations.RenameField( + model_name="diff", + old_name="phid", + new_name="provider_id", + ), + ] diff --git a/backend/code_review_backend/issues/migrations/0018_rely_on_diff_provider_id.py b/backend/code_review_backend/issues/migrations/0018_rely_on_diff_provider_id.py new file mode 100644 index 000000000..dcd5f878f --- /dev/null +++ b/backend/code_review_backend/issues/migrations/0018_rely_on_diff_provider_id.py @@ -0,0 +1,67 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from django.db import migrations, models +from django.db.models import F + + +def _update_provider_id(apps, schema_editor): + """ + Update all diffs, setting the current ID as the provider ID. + This step is required no to loose the Phabricator integer ID, + which is the reference used by most developers. + The previous value (PHID-DIFF-xxxxxxxxxxxxxxxxxxxx) is dropped, + then the PK is replaced by a random UUID, allowing the support + of other providers without a known unique integer identifier. + """ + Diff = apps.get_model("issues", "Diff") + Diff.objects.update(provider_id=F("id")) + + +def _reset_postgres_sequense(apps, schema_editor): + """ + PostgreSQL needs to update the auto increment sequence value, otherwise + conflicts would happen when we reach the initial Diff ID. + + This command was initially generated with ./manage.py sqlsequencereset issues + """ + if schema_editor.connection.vendor != "postgresql": + # The SQLite backend automatically handles the auto increment update + return + schema_editor.execute( + """SELECT setval( + pg_get_serial_sequence('"issues_diff"','id'), + coalesce(max("id"), 1), + max("id") IS NOT null + ) FROM "issues_diff"; + """, + ) + + +class Migration(migrations.Migration): + dependencies = [ + ("issues", "0017_rename_phid_diff_provider_id"), + ] + + operations = [ + # Save the Phabricator integer ID in Diff.provider_id + migrations.RunPython( + _update_provider_id, + reverse_code=migrations.RunPython.noop, + ), + migrations.AlterField( + model_name="diff", + name="id", + field=models.AutoField(primary_key=True, serialize=False), + ), + migrations.AlterModelOptions( + name="diff", + options={"ordering": ("created",)}, + ), + # Restore the sequence with PostgreSQL backend + migrations.RunPython( + _reset_postgres_sequense, + reverse_code=migrations.RunPython.noop, + ), + ] diff --git a/backend/code_review_backend/issues/models.py b/backend/code_review_backend/issues/models.py index da457656b..098f431ef 100644 --- a/backend/code_review_backend/issues/models.py +++ b/backend/code_review_backend/issues/models.py @@ -13,6 +13,13 @@ LEVEL_ERROR = "error" ISSUE_LEVELS = ((LEVEL_WARNING, "Warning"), (LEVEL_ERROR, "Error")) +PROVIDER_PHABRICATOR = "phabricator" +PROVIDER_GITHUB = "github" +PROVIDERS = ( + (PROVIDER_PHABRICATOR, "Phabricator"), + (PROVIDER_GITHUB, "Github"), +) + class Repository(models.Model): id = models.AutoField(primary_key=True) @@ -33,11 +40,12 @@ def __str__(self): class Revision(models.Model): id = models.BigAutoField(primary_key=True) + # Phabricator references will be left empty when ingesting a decision task (e.g. from MC or autoland) - phabricator_id = models.PositiveIntegerField(unique=True, null=True, blank=True) - phabricator_phid = models.CharField( - max_length=40, unique=True, null=True, blank=True + provider = models.CharField( + max_length=20, choices=PROVIDERS, default=PROVIDER_PHABRICATOR ) + provider_id = models.PositiveIntegerField(unique=True, null=True, blank=True) created = models.DateTimeField(auto_now_add=True) updated = models.DateTimeField(auto_now=True) @@ -72,43 +80,45 @@ class Revision(models.Model): bugzilla_id = models.PositiveIntegerField(null=True) class Meta: - ordering = ("phabricator_id", "id") + ordering = ("provider", "provider_id", "id") indexes = (models.Index(fields=["head_repository", "head_changeset"]),) constraints = [ models.UniqueConstraint( - fields=["phabricator_id"], + fields=["provider_id"], name="revision_unique_phab_id", - condition=Q(phabricator_id__isnull=False), - ), - models.UniqueConstraint( - fields=["phabricator_phid"], - name="revision_unique_phab_phabid", - condition=Q(phabricator_phid__isnull=False), + condition=Q(provider_id__isnull=False), ), ] def __str__(self): - if self.phabricator_id is not None: - return f"D{self.phabricator_id} - {self.title}" + if self.provider == PROVIDER_PHABRICATOR and self.provider_id is not None: + return f"Phabricator D{self.provider_id} - {self.title}" return f"#{self.id} - {self.title}" @property - def phabricator_url(self): - if self.phabricator_id is None: + def url(self): + if self.provider_id is None: return - parser = urllib.parse.urlparse(settings.PHABRICATOR_HOST) - return f"{parser.scheme}://{parser.netloc}/D{self.phabricator_id}" + + if self.provider == PROVIDER_PHABRICATOR: + parser = urllib.parse.urlparse(settings.PHABRICATOR_HOST) + return f"{parser.scheme}://{parser.netloc}/D{self.provider_id}" + elif self.provider == PROVIDER_GITHUB: + return f"{self.base_repository.url}/issues/{self.provider_id}" + else: + raise NotImplementedError class Diff(models.Model): - """Reference of a specific code patch (diff) in Phabricator. + """Reference of a specific code patch (diff) in Phabricator or Github. A revision can be linked to multiple successive diffs, or none in case of a repository push. """ - # Phabricator's attributes - id = models.PositiveIntegerField(primary_key=True) - phid = models.CharField(max_length=40, unique=True) + id = models.AutoField(primary_key=True) + + # Provider ID is unique among all providers (integer for Phabricator, hash for github) + provider_id = models.CharField(max_length=40, unique=True) created = models.DateTimeField(auto_now_add=True) updated = models.DateTimeField(auto_now=True) @@ -126,10 +136,10 @@ class Diff(models.Model): ) def __str__(self): - return f"Diff {self.id}" + return f"Diff {self.provider_id}" class Meta: - ordering = ("id",) + ordering = ("created",) class IssueLink(models.Model): diff --git a/backend/code_review_backend/issues/serializers.py b/backend/code_review_backend/issues/serializers.py index 888fd52ff..7bb0c2324 100644 --- a/backend/code_review_backend/issues/serializers.py +++ b/backend/code_review_backend/issues/serializers.py @@ -35,7 +35,7 @@ class RepositoryGetOrCreateField(serializers.SlugRelatedField): ) default_error_messages = { **serializers.SlugRelatedField.default_error_messages, - "invalid_url": "Repository URL must match hg.mozilla.org.", + "invalid_url": "Repository base URL is not allowed.", } queryset = Repository.objects.all() @@ -52,9 +52,12 @@ def to_internal_value(self, url): except (TypeError, ValueError): self.fail("invalid") return + # Github repositories uses a path like /, so we need to replace the + # inner slash to have a slug usable within URLs (e.g. to list issues on a repo). + slug = parsed.path.lstrip("/").replace("/", "-") try: repo, _ = self.get_queryset().get_or_create( - url=url, defaults={"slug": parsed.path.lstrip("/")} + url=url, defaults={"slug": slug} ) return repo except (TypeError, ValueError): @@ -74,8 +77,8 @@ class RevisionSerializer(serializers.ModelSerializer): issues_bulk_url = serializers.HyperlinkedIdentityField( view_name="revision-issues-bulk", lookup_url_kwarg="revision_id" ) - phabricator_url = serializers.URLField(read_only=True) - phabricator_id = serializers.IntegerField( + url = serializers.URLField(read_only=True) + provider_id = serializers.IntegerField( required=False, allow_null=True, min_value=1, @@ -90,13 +93,13 @@ class Meta: "head_repository", "base_changeset", "head_changeset", - "phabricator_id", - "phabricator_phid", + "provider", + "provider_id", "title", "bugzilla_id", "diffs_url", "issues_bulk_url", - "phabricator_url", + "url", ) @@ -107,21 +110,21 @@ class RevisionLightSerializer(serializers.ModelSerializer): base_repository = RepositoryGetOrCreateField() head_repository = RepositoryGetOrCreateField() - phabricator_url = serializers.URLField(read_only=True) + url = serializers.URLField(read_only=True) class Meta: model = Revision fields = ( "id", - "phabricator_id", + "provider", + "provider_id", "base_repository", "head_repository", "base_changeset", "head_changeset", - "phabricator_id", "title", "bugzilla_id", - "phabricator_url", + "url", ) @@ -135,14 +138,15 @@ class DiffSerializer(serializers.ModelSerializer): queryset=Repository.objects.all(), slug_field="url" ) issues_url = serializers.HyperlinkedIdentityField( - view_name="issues-list", lookup_url_kwarg="diff_id" + view_name="issues-list", + lookup_url_kwarg="diff_provider_id", + lookup_field="provider_id", ) class Meta: model = Diff fields = ( - "id", - "phid", + "provider_id", "review_task_id", "repository", "mercurial_hash", @@ -163,7 +167,7 @@ class DiffLightSerializer(serializers.ModelSerializer): class Meta: model = Diff - fields = ("id", "repository", "revision") + fields = ("provider_id", "repository", "revision") class DiffFullSerializer(serializers.ModelSerializer): @@ -175,7 +179,9 @@ class DiffFullSerializer(serializers.ModelSerializer): revision = RevisionSerializer(read_only=True) repository = RepositorySerializer(read_only=True) issues_url = serializers.HyperlinkedIdentityField( - view_name="issues-list", lookup_url_kwarg="diff_id" + view_name="issues-list", + lookup_url_kwarg="diff_provider_id", + lookup_field="provider_id", ) nb_issues = serializers.IntegerField(read_only=True) nb_issues_publishable = serializers.IntegerField(read_only=True) @@ -185,9 +191,8 @@ class DiffFullSerializer(serializers.ModelSerializer): class Meta: model = Diff fields = ( - "id", + "provider_id", "revision", - "phid", "review_task_id", "repository", "mercurial_hash", @@ -264,7 +269,8 @@ class SingleIssueBulkSerializer(IssueSerializer): class IssueBulkSerializer(serializers.Serializer): - diff_id = serializers.PrimaryKeyRelatedField( + diff_provider_id = serializers.SlugRelatedField( + slug_field="provider_id", # Initialized depending on the revision used for the creation queryset=Diff.objects.none(), style={"base_template": "input.html"}, @@ -277,11 +283,11 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) if not self.context.get("revision"): return - self.fields["diff_id"].queryset = self.context["revision"].diffs.all() + self.fields["diff_provider_id"].queryset = self.context["revision"].diffs.all() @transaction.atomic def create(self, validated_data): - diff = validated_data.get("diff_id", None) + diff = validated_data.get("diff_provider_id", None) link_attrs = defaultdict(list) # Separate attributes that are specific to the IssueLink M2M for issue in validated_data["issues"]: @@ -340,7 +346,7 @@ def create(self, validated_data): output.append(output_link) return { - "diff_id": diff, + "diff_provider_id": diff, "issues": output, } diff --git a/backend/code_review_backend/issues/tests/commands/test_cleanup_issues.py b/backend/code_review_backend/issues/tests/commands/test_cleanup_issues.py index b6ba5fe40..135d92362 100644 --- a/backend/code_review_backend/issues/tests/commands/test_cleanup_issues.py +++ b/backend/code_review_backend/issues/tests/commands/test_cleanup_issues.py @@ -56,8 +56,8 @@ def setUp(self): [ Revision( id=i, - phabricator_id=i, - phabricator_phid=i, + provider="phabricator", + provider_id=i, title=f"Revision {i}", base_repository=repo, head_repository=repo, diff --git a/backend/code_review_backend/issues/tests/test_api.py b/backend/code_review_backend/issues/tests/test_api.py index 09d3e8ff4..0b99190a5 100644 --- a/backend/code_review_backend/issues/tests/test_api.py +++ b/backend/code_review_backend/issues/tests/test_api.py @@ -31,15 +31,14 @@ def setUp(self): ) # Create revision and diff self.revision = self.repo_try.head_revisions.create( - phabricator_id=456, - phabricator_phid="PHID-REV-XXX", + provider="phabricator", + provider_id=456, title="Bug XXX - Yet Another bug", bugzilla_id=78901, base_repository=self.repo, ) self.diff = self.revision.diffs.create( - id=1234, - phid="PHID-DIFF-xxx", + provider_id="PHID-DIFF-1234", review_task_id="deadbeef123", mercurial_hash="coffee12345", repository=self.repo_try, @@ -51,8 +50,8 @@ def test_create_revision(self): """ self.revision.delete() data = { - "phabricator_id": 123, - "phabricator_phid": "PHID-REV-xxx", + "provider": "phabricator", + "provider_id": 123, "title": "Bug XXX - Some bug", "bugzilla_id": 123456, "base_repository": "http://repo.test/myrepo", @@ -78,9 +77,9 @@ def test_create_revision(self): "bugzilla_id": 123456, "diffs_url": f"http://testserver/v1/revision/{revision_id}/diffs/", "issues_bulk_url": f"http://testserver/v1/revision/{revision_id}/issues/", - "phabricator_url": "https://phabricator.services.mozilla.com/D123", - "phabricator_id": 123, - "phabricator_phid": "PHID-REV-xxx", + "url": "https://phabricator.services.mozilla.com/D123", + "provider": "phabricator", + "provider_id": 123, "base_repository": "http://repo.test/myrepo", "head_repository": "http://repo.test/myrepo", "base_changeset": "123456789ABCDEF", @@ -88,7 +87,7 @@ def test_create_revision(self): "title": "Bug XXX - Some bug", } self.assertEqual(Revision.objects.count(), 1) - revision = Revision.objects.get(phabricator_id=123) + revision = Revision.objects.get(provider_id=123) self.assertEqual(revision.title, "Bug XXX - Some bug") self.assertEqual(revision.bugzilla_id, 123456) self.assertDictEqual(response.json(), expected_response) @@ -103,8 +102,8 @@ def test_create_revision(self): def test_create_revision_wrong_new_repo(self): self.revision.delete() data = { - "phabricator_id": 123, - "phabricator_phid": "PHID-REV-xxx", + "provider": "phabricator", + "provider_id": 123, "title": "Bug XXX - Some bug", "bugzilla_id": 123456, "base_repository": "http://notamozrepo.test/myrepo", @@ -120,16 +119,16 @@ def test_create_revision_wrong_new_repo(self): self.assertDictEqual( response.json(), { - "base_repository": ["Repository URL must match hg.mozilla.org."], - "head_repository": ["Repository URL must match hg.mozilla.org."], + "base_repository": ["Repository base URL is not allowed."], + "head_repository": ["Repository base URL is not allowed."], }, ) def test_create_revision_creates_new_repo(self): self.revision.delete() data = { - "phabricator_id": 123, - "phabricator_phid": "PHID-REV-xxx", + "provider": "phabricator", + "provider_id": 123, "title": "Bug XXX - Some bug", "bugzilla_id": 123456, "base_repository": "http://repo.test/myrepo", @@ -153,7 +152,7 @@ def test_create_diff(self): self.diff.delete() data = { "id": 1234, - "phid": "PHID-DIFF-xxx", + "provider_id": "PHID-DIFF-1234", "review_task_id": "deadbeef123", "mercurial_hash": "coffee12345", "repository": "http://repo.test/try", @@ -181,12 +180,13 @@ def test_create_diff(self): # Response should have url to create issues self.assertEqual( - response.json()["issues_url"], "http://testserver/v1/diff/1234/issues/" + response.json()["issues_url"], + "http://testserver/v1/diff/PHID-DIFF-1234/issues/", ) # Check a diff has been created self.assertEqual(Diff.objects.count(), 1) - diff = Diff.objects.get(pk=1234) + diff = Diff.objects.get(provider_id="PHID-DIFF-1234") self.assertEqual(diff.mercurial_hash, "coffee12345") self.assertEqual(diff.revision, self.revision) @@ -204,13 +204,17 @@ def test_create_issue_disabled(self): } # No auth will give a permission denied - response = self.client.post("/v1/diff/1234/issues/", data, format="json") + response = self.client.post( + "/v1/diff/PHID-DIFF-1234/issues/", data, format="json" + ) self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) # Once authenticated, creation will work self.assertEqual(Issue.objects.count(), 0) self.client.force_authenticate(user=self.user) - response = self.client.post("/v1/diff/1234/issues/", data, format="json") + response = self.client.post( + "/v1/diff/PHID-DIFF-1234/issues/", data, format="json" + ) self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) # Do not check the content of issue created as it's a random UUID @@ -281,7 +285,7 @@ def test_create_issue_bulk(self): self.assertDictEqual( issue_data, { - "diff_id": None, + "diff_provider_id": None, "issues": [ { "analyzer": "remote-flake8", @@ -334,7 +338,7 @@ def test_create_issue_bulk_with_diff(self): Check we can create issues on a revision with a reference to a diff """ data = { - "diff_id": 1234, + "diff_provider_id": "PHID-DIFF-1234", "issues": [ { "hash": "somemd5hash", @@ -354,13 +358,13 @@ def test_create_issue_bulk_with_diff(self): ) self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(Issue.objects.filter(revisions__phabricator_id=456).count(), 1) + self.assertEqual(Issue.objects.filter(revisions__provider_id=456).count(), 1) - issue = Issue.objects.filter(revisions__phabricator_id=456).get() + issue = Issue.objects.filter(revisions__provider_id=456).get() self.assertDictEqual( response.json(), { - "diff_id": 1234, + "diff_provider_id": "PHID-DIFF-1234", "issues": [ { "analyzer": "remote-flake8", @@ -551,7 +555,7 @@ def test_create_issue_bulk_duplicate(self): self.assertDictEqual( issue_data, { - "diff_id": None, + "diff_provider_id": None, # The same issue link is returned twice in the resulting payload "issues": 2 * [ @@ -665,7 +669,7 @@ def test_create_issue_already_referenced(self): self.client.force_authenticate(user=self.user) another_diff = self.revision.diffs.create( id=56748, - phid="PHID-DIFF-yyy", + provider_id="PHID-DIFF-yyy", review_task_id="deadbeef456", mercurial_hash="coffee67890", repository=self.repo_try, diff --git a/backend/code_review_backend/issues/tests/test_compare.py b/backend/code_review_backend/issues/tests/test_compare.py index 78c05ef0b..6c163633c 100644 --- a/backend/code_review_backend/issues/tests/test_compare.py +++ b/backend/code_review_backend/issues/tests/test_compare.py @@ -27,8 +27,8 @@ def setUp(self): # Create a simple stack with 2 diffs self.revision = self.repo_try.head_revisions.create( - phabricator_id=1, - phabricator_phid="PHID-DREV-1", + provider="phabricator", + provider_id=1, title="Revision XYZ", bugzilla_id=1234567, base_repository=self.repo, @@ -36,7 +36,7 @@ def setUp(self): for i in range(2): self.revision.diffs.create( id=i + 1, - phid=f"PHID-DIFF-{i+1}", + provider_id=f"PHID-DIFF-{i+1}", review_task_id=f"task-{i}", mercurial_hash=hashlib.sha1(f"hg {i}".encode()).hexdigest(), repository=self.repo_try, diff --git a/backend/code_review_backend/issues/tests/test_diff.py b/backend/code_review_backend/issues/tests/test_diff.py index 6359ec73b..8b8ae2572 100644 --- a/backend/code_review_backend/issues/tests/test_diff.py +++ b/backend/code_review_backend/issues/tests/test_diff.py @@ -30,8 +30,8 @@ def setUpTestData(cls): for i in range(2): cls.repo_try.head_revisions.create( id=i + 1, - phabricator_id=i + 1, - phabricator_phid=f"PHID-DREV-{i+1}", + provider="phabricator", + provider_id=i + 1, title=f"Revision {i+1}", bugzilla_id=10000 + i, base_repository=cls.repo, @@ -39,7 +39,7 @@ def setUpTestData(cls): for i in range(3): Diff.objects.create( id=i + 1, - phid=f"PHID-DIFF-{i+1}", + provider_id=f"PHID-DIFF-{i+1}", revision_id=(i % 2) + 1, review_task_id=f"task-{i}", mercurial_hash=hashlib.sha1(f"hg {i}".encode()).hexdigest(), @@ -58,6 +58,7 @@ def test_list_diffs(self): """ response = self.client.get("/v1/diff/") self.assertEqual(response.status_code, status.HTTP_200_OK) + self.maxDiff = None self.assertDictEqual( response.json(), { @@ -66,30 +67,29 @@ def test_list_diffs(self): "previous": None, "results": [ { - "id": 3, + "provider_id": "PHID-DIFF-3", "revision": { "id": 1, "base_repository": "http://repo.test/myrepo", "head_repository": "http://repo.test/try", "base_changeset": None, "head_changeset": None, - "phabricator_id": 1, - "phabricator_phid": "PHID-DREV-1", + "provider": "phabricator", + "provider_id": 1, "title": "Revision 1", "bugzilla_id": 10000, "diffs_url": "http://testserver/v1/revision/1/diffs/", "issues_bulk_url": "http://testserver/v1/revision/1/issues/", - "phabricator_url": "https://phabricator.services.mozilla.com/D1", + "url": "https://phabricator.services.mozilla.com/D1", }, "repository": { "id": 2, "slug": "myrepo-try", "url": "http://repo.test/try", }, - "phid": "PHID-DIFF-3", "review_task_id": "task-2", "mercurial_hash": "30b501affc4d3b9c670fc297ab903b406afd5f04", - "issues_url": "http://testserver/v1/diff/3/issues/", + "issues_url": "http://testserver/v1/diff/PHID-DIFF-3/issues/", "nb_issues": 0, "nb_issues_publishable": 0, "nb_warnings": 0, @@ -97,30 +97,29 @@ def test_list_diffs(self): "created": self.now, }, { - "id": 2, + "provider_id": "PHID-DIFF-2", "revision": { "id": 2, "base_repository": "http://repo.test/myrepo", "head_repository": "http://repo.test/try", "base_changeset": None, "head_changeset": None, - "phabricator_id": 2, - "phabricator_phid": "PHID-DREV-2", + "provider": "phabricator", + "provider_id": 2, "title": "Revision 2", "bugzilla_id": 10001, "diffs_url": "http://testserver/v1/revision/2/diffs/", "issues_bulk_url": "http://testserver/v1/revision/2/issues/", - "phabricator_url": "https://phabricator.services.mozilla.com/D2", + "url": "https://phabricator.services.mozilla.com/D2", }, "repository": { "id": 2, "slug": "myrepo-try", "url": "http://repo.test/try", }, - "phid": "PHID-DIFF-2", "review_task_id": "task-1", "mercurial_hash": "32d2a594cfef74fcb524028d1521d0d4bd98bd35", - "issues_url": "http://testserver/v1/diff/2/issues/", + "issues_url": "http://testserver/v1/diff/PHID-DIFF-2/issues/", "nb_issues": 0, "nb_issues_publishable": 0, "nb_warnings": 0, @@ -128,30 +127,29 @@ def test_list_diffs(self): "created": self.now, }, { - "id": 1, + "provider_id": "PHID-DIFF-1", "revision": { "id": 1, "base_repository": "http://repo.test/myrepo", "head_repository": "http://repo.test/try", "base_changeset": None, "head_changeset": None, - "phabricator_id": 1, - "phabricator_phid": "PHID-DREV-1", + "provider": "phabricator", + "provider_id": 1, "title": "Revision 1", "bugzilla_id": 10000, "diffs_url": "http://testserver/v1/revision/1/diffs/", "issues_bulk_url": "http://testserver/v1/revision/1/issues/", - "phabricator_url": "https://phabricator.services.mozilla.com/D1", + "url": "https://phabricator.services.mozilla.com/D1", }, "repository": { "id": 2, "slug": "myrepo-try", "url": "http://repo.test/try", }, - "phid": "PHID-DIFF-1", "review_task_id": "task-0", "mercurial_hash": "a2ac78b7d12d6e55b9b15c1c2048a16c58c6c803", - "issues_url": "http://testserver/v1/diff/1/issues/", + "issues_url": "http://testserver/v1/diff/PHID-DIFF-1/issues/", "nb_issues": 0, "nb_issues_publishable": 0, "nb_warnings": 0, @@ -171,7 +169,10 @@ def test_filter_repo(self): response = self.client.get("/v1/diff/?repository=myrepo") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.json()["count"], 3) - self.assertEqual([d["id"] for d in response.json()["results"]], [3, 2, 1]) + self.assertEqual( + [d["provider_id"] for d in response.json()["results"]], + ["PHID-DIFF-3", "PHID-DIFF-2", "PHID-DIFF-1"], + ) # Missing repo response = self.client.get("/v1/diff/?repository=missing") @@ -187,13 +188,18 @@ def test_search(self): response = self.client.get("/v1/diff/?search=10001") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.json()["count"], 1) - self.assertEqual([d["id"] for d in response.json()["results"]], [2]) + self.assertEqual( + [d["provider_id"] for d in response.json()["results"]], ["PHID-DIFF-2"] + ) # In title response = self.client.get("/v1/diff/?search=revision 1") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.json()["count"], 2) - self.assertEqual([d["id"] for d in response.json()["results"]], [3, 1]) + self.assertEqual( + [d["provider_id"] for d in response.json()["results"]], + ["PHID-DIFF-3", "PHID-DIFF-1"], + ) def test_filter_issues(self): """ @@ -204,7 +210,10 @@ def test_filter_issues(self): response = self.client.get("/v1/diff/?issues=no") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.json()["count"], 3) - self.assertEqual([d["id"] for d in response.json()["results"]], [3, 2, 1]) + self.assertEqual( + [d["provider_id"] for d in response.json()["results"]], + ["PHID-DIFF-3", "PHID-DIFF-2", "PHID-DIFF-1"], + ) # Any issues response = self.client.get("/v1/diff/?issues=any") diff --git a/backend/code_review_backend/issues/tests/test_issue.py b/backend/code_review_backend/issues/tests/test_issue.py index 070ca4679..a4c5c2311 100644 --- a/backend/code_review_backend/issues/tests/test_issue.py +++ b/backend/code_review_backend/issues/tests/test_issue.py @@ -24,16 +24,16 @@ def setUp(self): mock_now.return_value = datetime.fromisoformat("2010-01-01:10") self.revision = self.repo.base_revisions.create( id=4000, - phabricator_id=1111, - phabricator_phid="PH-1111", + provider="phabricator", + provider_id=1111, head_changeset="1" * 40, head_repository=self.repo, ) mock_now.return_value = datetime.fromisoformat("2000-01-01:10") self.old_revision = self.repo.base_revisions.create( id=4001, - phabricator_id=2222, - phabricator_phid="PH-2222", + provider="phabricator", + provider_id=2222, head_changeset="2" * 40, head_repository=self.repo, ) diff --git a/backend/code_review_backend/issues/tests/test_revision.py b/backend/code_review_backend/issues/tests/test_revision.py index fb1f93668..1fb6f4265 100644 --- a/backend/code_review_backend/issues/tests/test_revision.py +++ b/backend/code_review_backend/issues/tests/test_revision.py @@ -18,21 +18,19 @@ def setUp(self): def test_phabricator_url(self): rev = Revision.objects.create( - phabricator_id=12, - phabricator_phid="PHID-REV-12345", + provider="phabricator", + provider_id=12, base_repository=self.repo, head_repository=self.repo, ) # Default settings - self.assertEqual( - rev.phabricator_url, "https://phabricator.services.mozilla.com/D12" - ) + self.assertEqual(rev.url, "https://phabricator.services.mozilla.com/D12") # Override host with /api settings.PHABRICATOR_HOST = "http://phab.test/api" - self.assertEqual(rev.phabricator_url, "http://phab.test/D12") + self.assertEqual(rev.url, "http://phab.test/D12") # Override host with complex url settings.PHABRICATOR_HOST = "http://anotherphab.test/api123/?custom" - self.assertEqual(rev.phabricator_url, "http://anotherphab.test/D12") + self.assertEqual(rev.url, "http://anotherphab.test/D12") diff --git a/backend/code_review_backend/issues/tests/test_stats.py b/backend/code_review_backend/issues/tests/test_stats.py index 1746b64ca..191ae93fe 100644 --- a/backend/code_review_backend/issues/tests/test_stats.py +++ b/backend/code_review_backend/issues/tests/test_stats.py @@ -29,8 +29,8 @@ def setUp(self): # Create a revision revision = self.repo_try.head_revisions.create( - phabricator_id=10, - phabricator_phid="PHID-DREV-arev", + provider="phabricator", + provider_id=10, title="Revision A", bugzilla_id=None, base_repository=self.repo, @@ -40,7 +40,7 @@ def setUp(self): for i in range(10): revision.diffs.create( id=i + 1, - phid=f"PHID-DIFF-{i+1}", + provider_id=f"PHID-DIFF-{i+1}", review_task_id=f"task-{i}", mercurial_hash=hashlib.sha1(f"hg {i}".encode()).hexdigest(), repository=self.repo_try, @@ -245,18 +245,18 @@ def check_issue(issue): diffs = issue["diffs"] self.assertEqual(len(diffs), 1) diff = diffs[0] - self.assertTrue(diff["id"] > 0) self.assertEqual(diff["repository"], "http://repo.test/myrepo-try") # Revision rev = diff["revision"] self.assertTrue(rev["id"] > 0) - self.assertTrue(rev["phabricator_id"] > 0) + self.assertEqual(rev["provider"], "phabricator") + self.assertTrue(rev["provider_id"] > 0) self.assertEqual(rev["base_repository"], "http://repo.test/myrepo") self.assertEqual(rev["head_repository"], "http://repo.test/myrepo-try") self.assertEqual( - rev["phabricator_url"], - f"http://anotherphab.test/D{rev['phabricator_id']}", + rev["url"], + f"http://anotherphab.test/D{rev['provider_id']}", ) return True diff --git a/bot/README.md b/bot/README.md index 1fffb7035..3945128e6 100644 --- a/bot/README.md +++ b/bot/README.md @@ -76,6 +76,21 @@ Configuration: This reporter will send detailed information about every **publishable** issue. +## Reporter: Github + +Key `reporter` is `github` + +Configuration: + +- `client_id` : Github App client ID. +- `private_key_pem` : Content of the github App private key. +- `installation_id` : ID of the Github App [installation](https://docs.github.com/en/apps/using-github-apps/installing-your-own-github-app) (integer). +- `analyzers_skipped` : The analyzers that will **not** be published on Phabricator. + +This reporter will send detailed information about every **publishable** issue. + +You can find more details about the Github reporter setup in the [documentation](/docs/github.md). + ## Example configuration ```yaml @@ -88,5 +103,12 @@ common: bot: REPORTERS: - - reporter: phabricator + - reporter: github + client_id: xxxxxxxxxxxxxxxxxxxx + private_key_pem: |- + -----BEGIN RSA PRIVATE KEY----- + XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + -----END RSA PRIVATE KEY----- + installation_id: 123456789 ``` diff --git a/bot/code_review_bot/__init__.py b/bot/code_review_bot/__init__.py index 1bfe0c0f8..2c8d2b79c 100644 --- a/bot/code_review_bot/__init__.py +++ b/bot/code_review_bot/__init__.py @@ -191,41 +191,29 @@ def hash(self): We make the assumption that the message does not contain the line number If an error occurs reading the file content (locally or remotely), None is returned """ + from code_review_bot.revisions import GithubRevision, PhabricatorRevision + assert self.revision is not None, "Missing revision" + local_repository = None + if isinstance(self.revision, PhabricatorRevision): + if settings.mercurial_cache_checkout: + local_repository = settings.mercurial_cache_checkout + elif isinstance(self.revision, GithubRevision): + assert ( + settings.github_cache + ), "Github cache repository is mandatory to analyse a github revision" + local_repository = settings.github_cache + else: + raise Exception(self.revision.__class__) + raise NotImplementedError + # Build the hash only if the file is not autogenerated. # An autogenerated file resides in the build directory that it has the # format `obj-x86_64-pc-linux-gnu` file_content = None if "/obj-" not in self.path: - if settings.mercurial_cache_checkout: - logger.debug("Using the local repository to build issue's hash") - try: - with (settings.mercurial_cache_checkout / self.path).open() as f: - file_content = f.read() - except (FileNotFoundError, IsADirectoryError): - logger.warning( - "Failed to find issue's related file", path=self.path - ) - file_content = None - else: - try: - # Load all the lines affected by the issue - file_content = self.revision.load_file(self.path) - except ValueError: - # Build the hash with an empty content in case the path is erroneous - file_content = None - except requests.exceptions.HTTPError as e: - if e.response.status_code == 404: - logger.warning( - "Failed to download a file with an issue", path=self.path - ) - - # We still build the hash with empty content - file_content = None - else: - # When encountering another HTTP error, raise the issue - raise + file_content = self.revision.get_file_content(self.path, local_repository) if file_content is None: self._hash = None diff --git a/bot/code_review_bot/backend.py b/bot/code_review_bot/backend.py index 17019b5b3..7a81c6c9a 100644 --- a/bot/code_review_bot/backend.py +++ b/bot/code_review_bot/backend.py @@ -9,6 +9,7 @@ from code_review_bot import taskcluster from code_review_bot.config import GetAppUserAgent, settings +from code_review_bot.revisions import PhabricatorRevision from code_review_bot.tasks.lint import MozLintIssue logger = structlog.get_logger(__name__) @@ -46,38 +47,31 @@ def publish_revision(self, revision): logger.warn("Skipping revision publication on backend") return - # Check the repositories are urls - for url in (revision.base_repository, revision.head_repository): - assert isinstance(url, str), "Repository must be a string" - res = urllib.parse.urlparse(url) - assert res.scheme and res.netloc, f"Repository {url} is not an url" - - # Check the Mercurial changesets are strings - for changeset in ( - revision.base_changeset, - revision.head_changeset, - ): - assert isinstance(changeset, str), "Mercurial changeset must be a string" - - # Create revision on backend if it does not exists - data = { - "phabricator_id": revision.phabricator_id, - "phabricator_phid": revision.phabricator_phid, - "title": revision.title, - "bugzilla_id": revision.bugzilla_id, - "base_repository": revision.base_repository, - "head_repository": revision.head_repository, - "base_changeset": revision.base_changeset, - "head_changeset": revision.head_changeset, - } - - # Try to create the revision, or retrieve it in case it exists with that Phabricator ID. + elif isinstance(revision, PhabricatorRevision): + # Check the repositories are urls + for url in (revision.base_repository, revision.head_repository): + assert isinstance(url, str), "Repository must be a string" + res = urllib.parse.urlparse(url) + assert res.scheme and res.netloc, f"Repository {url} is not an url" + + # Check the Mercurial changesets are strings + for changeset in ( + revision.base_changeset, + revision.head_changeset, + ): + assert isinstance( + changeset, str + ), "Mercurial changeset must be a string" + + revision_data, diff_data = revision.serialize() + + # Try to create the revision, or retrieve it in case it exists with that provider and ID. # The backend always returns a revisions, either a new one, or a pre-existing one revision_url = "/v1/revision/" auth = (self.username, self.password) url_post = urllib.parse.urljoin(self.url, revision_url) response = requests.post( - url_post, headers=GetAppUserAgent(), json=data, auth=auth + url_post, headers=GetAppUserAgent(), json=revision_data, auth=auth ) if not response.ok: logger.warn(f"Backend rejected the payload: {response.content}") @@ -87,17 +81,14 @@ def publish_revision(self, revision): revision.issues_url = backend_revision["issues_bulk_url"] revision.id = backend_revision["id"] - # A revision may have no diff (e.g. Mozilla-central group tasks) - if not revision.diff_id: + # A revision may have no diff (e.g. Phabricator Mozilla-central group tasks) + if isinstance(revision, PhabricatorRevision) and not revision.diff_id: return backend_revision # Create diff attached to revision on backend data = { - "id": revision.diff_id, - "phid": revision.diff_phid, + **diff_data, "review_task_id": settings.taskcluster.task_id, - "mercurial_hash": revision.head_changeset, - "repository": revision.head_repository, } backend_diff = self.create(backend_revision["diffs_url"], data) diff --git a/bot/code_review_bot/cli.py b/bot/code_review_bot/cli.py index 6e2a9a875..8f590f6a0 100644 --- a/bot/code_review_bot/cli.py +++ b/bot/code_review_bot/cli.py @@ -26,7 +26,7 @@ ) from code_review_bot.config import settings from code_review_bot.report import get_reporters -from code_review_bot.revisions import PhabricatorRevision, Revision +from code_review_bot.revisions import GithubRevision, PhabricatorRevision, Revision from code_review_bot.tools.libmozdata import setup as setup_libmozdata from code_review_bot.tools.log import init_logger from code_review_bot.workflow import Workflow @@ -64,6 +64,13 @@ def parse_cli(): type=Path, default=None, ) + parser.add_argument( + "--github-repository", + help="Optional path to a up-to-date github repository matching the analyzed revision.\n" + "This argument is required for Github reviusions in order to compute issues' hashes based on file content.", + type=Path, + default=None, + ) parser.add_argument("--taskcluster-client-id", help="Taskcluster Client ID") parser.add_argument("--taskcluster-access-token", help="Taskcluster Access token") return parser.parse_args() @@ -116,6 +123,7 @@ def main(): taskcluster.secrets["repositories"], taskcluster.secrets["ssh_key"], args.mercurial_repository, + args.github_repository, ) # Setup statistics @@ -205,6 +213,11 @@ def main(): ) return 1 + if isinstance(revision, GithubRevision): + assert ( + args.github_repository is not None + ), "Girhub revision analysis requires the --github-repository argument to be set" + # Run workflow according to source w = Workflow( reporters, diff --git a/bot/code_review_bot/config.py b/bot/code_review_bot/config.py index c571aa3ef..dfa54b2cb 100644 --- a/bot/code_review_bot/config.py +++ b/bot/code_review_bot/config.py @@ -58,6 +58,7 @@ def __init__(self): # Cache to store whole repositories self.mercurial_cache = None + self.github_cache = None # SSH Key used to push on try self.ssh_key = None @@ -78,6 +79,7 @@ def setup( repositories, ssh_key=None, mercurial_cache=None, + github_cache=None, ): # Detect source from env if "TRY_TASK_ID" in os.environ and "TRY_TASK_GROUP_ID" in os.environ: @@ -148,6 +150,14 @@ def build_conf(nb, repo): # Save ssh key when mercurial cache is enabled self.ssh_key = ssh_key + # Store github cache path + if github_cache is not None: + self.github_cache = Path(github_cache) + assert ( + self.github_cache.exists() + ), f"Github cache does not exist {self.github_cache}" + logger.info("Using Github cache", path=self.mercurial_cache) + def load_user_blacklist(self, usernames, phabricator_api): """ Load all black listed users from Phabricator API diff --git a/bot/code_review_bot/github.py b/bot/code_review_bot/github.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/bot/code_review_bot/report/__init__.py b/bot/code_review_bot/report/__init__.py index f1d5294ee..ed79bb1ec 100644 --- a/bot/code_review_bot/report/__init__.py +++ b/bot/code_review_bot/report/__init__.py @@ -4,6 +4,7 @@ import structlog +from code_review_bot.report.github import GithubReporter from code_review_bot.report.lando import LandoReporter from code_review_bot.report.mail import MailReporter from code_review_bot.report.mail_builderrors import BuildErrorsReporter @@ -22,6 +23,7 @@ def get_reporters(configuration): "mail": MailReporter, "build_error": BuildErrorsReporter, "phabricator": PhabricatorReporter, + "github": GithubReporter, } out = {} diff --git a/bot/code_review_bot/report/github.py b/bot/code_review_bot/report/github.py new file mode 100644 index 000000000..25db93bf6 --- /dev/null +++ b/bot/code_review_bot/report/github.py @@ -0,0 +1,62 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import structlog + +from code_review_bot.report.base import Reporter +from code_review_bot.sources.github import GithubClient, ReviewEvent + +logger = structlog.get_logger(__name__) + + +class GithubReporter(Reporter): + # Auth to Github using a configuration (from Taskcluster secret) + + def __init__(self, configuration={}, *args, **kwargs): + for key in ("client_id", "private_key_pem", "installation_id"): + if not configuration.get(key): + raise Exception(f"Missing github reporter configuration key {key}") + + # Setup github App secret from the configuration + self.github_client = GithubClient( + client_id=configuration["client_id"], + private_key=configuration["private_key_pem"], + installation_id=configuration["installation_id"], + ) + + self.analyzers_skipped = configuration.get("analyzers_skipped", []) + assert isinstance( + self.analyzers_skipped, list + ), "analyzers_skipped must be a list" + + def publish(self, issues, revision, task_failures, notices, reviewers): + """ + Publish issues on a Github pull request. + """ + if reviewers: + raise NotImplementedError + # Avoid publishing a patch from a de-activated analyzer + publishable_issues = [ + issue + for issue in issues + if issue.is_publishable() + and issue.analyzer.name not in self.analyzers_skipped + ] + + if publishable_issues: + # Publish a review summarizing detected, unresolved and closed issues + message = f"{len(issues)} issues have been found in this revision" + event = ReviewEvent.RequestChanges + else: + # Simply approve the pull request + logger.info("No publishable issue, approving the pull request") + message = None + event = ReviewEvent.Approved + + self.github_client.publish_review( + issues=publishable_issues, + revision=revision, + message=message, + event=event, + ) diff --git a/bot/code_review_bot/report/lando.py b/bot/code_review_bot/report/lando.py index 21ed73771..a6758bec6 100644 --- a/bot/code_review_bot/report/lando.py +++ b/bot/code_review_bot/report/lando.py @@ -6,7 +6,7 @@ from code_review_bot import Level from code_review_bot.report.base import Reporter -from code_review_bot.revisions import PhabricatorRevision +from code_review_bot.revisions import GithubRevision logger = structlog.get_logger(__name__) @@ -30,10 +30,9 @@ def publish(self, issues, revision, task_failures, links, reviewers): """ Send an email to administrators """ - if not isinstance(revision, PhabricatorRevision): - raise NotImplementedError( - "Only Phabricator revisions are supported for now" - ) + if isinstance(revision, GithubRevision): + logger.warning("No Lando publication for Github yet") + return assert ( revision.phabricator_id and revision.phabricator_phid and revision.diff diff --git a/bot/code_review_bot/revisions/__init__.py b/bot/code_review_bot/revisions/__init__.py index 10d4224fa..f98bd00a7 100644 --- a/bot/code_review_bot/revisions/__init__.py +++ b/bot/code_review_bot/revisions/__init__.py @@ -3,6 +3,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. from code_review_bot.revisions.base import ImprovementPatch, Revision +from code_review_bot.revisions.github import GithubRevision from code_review_bot.revisions.phabricator import PhabricatorRevision -__all__ = [ImprovementPatch, Revision, PhabricatorRevision] +__all__ = [ImprovementPatch, Revision, PhabricatorRevision, GithubRevision] diff --git a/bot/code_review_bot/revisions/base.py b/bot/code_review_bot/revisions/base.py index 1cd319401..492b9a35a 100644 --- a/bot/code_review_bot/revisions/base.py +++ b/bot/code_review_bot/revisions/base.py @@ -6,6 +6,7 @@ import random from abc import ABC from datetime import timedelta +from pathlib import Path import rs_parsepatch import structlog @@ -165,6 +166,25 @@ def contains(self, issue): lines = set(range(issue.line, issue.line + issue.nb_lines)) return not lines.isdisjoint(modified_lines) + def get_file_content( + self, file_path: str, local_cache_repository: Path | None = None + ): + if local_cache_repository: + logger.debug("Using the local repository to build issue's hash") + try: + with (local_cache_repository / file_path).open() as f: + file_content = f.read() + except (FileNotFoundError, IsADirectoryError): + logger.warning("Failed to find issue's related file", path=file_path) + file_content = None + else: + try: + file_content = self.load_file(file_path) + except ValueError: + # The path is erroneous, consider as empty content + file_content = None + return file_content + @property def has_clang_files(self): """ @@ -237,17 +257,26 @@ def as_dict(self): """ raise NotImplementedError + def serialize(self): + """ + Outputs a tuple of dicts for revision and diff sent to backend + """ + raise NotImplementedError + @staticmethod def from_try_task(try_task: dict, decision_task: dict, phabricator: PhabricatorAPI): """ - Load identifiers from Phabricator, using the remote task description + Load identifiers from Phabricator or Github, using the remote task description """ + from code_review_bot.revisions.github import GithubRevision from code_review_bot.revisions.phabricator import PhabricatorRevision # Load build target phid from the task env code_review = try_task["extra"]["code-review"] - # TODO: support github revision here too - return PhabricatorRevision.from_try_task( - code_review, decision_task, phabricator - ) + if "github" in code_review: + return GithubRevision(**code_review["github"]) + else: + return PhabricatorRevision.from_try_task( + code_review, decision_task, phabricator + ) diff --git a/bot/code_review_bot/revisions/github.py b/bot/code_review_bot/revisions/github.py new file mode 100644 index 000000000..a44b4427b --- /dev/null +++ b/bot/code_review_bot/revisions/github.py @@ -0,0 +1,114 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from functools import cached_property +from urllib.parse import urlparse + +import requests +import structlog + +from code_review_bot import taskcluster +from code_review_bot.revisions import Revision + +logger = structlog.get_logger(__name__) + + +class GithubRevision(Revision): + """ + A revision from a github pull-request + """ + + def __init__(self, repo_url, branch, pull_number, pull_head_sha): + super().__init__() + + self.repo_url = repo_url + self.branch = branch + self.pull_number = pull_number + self.pull_head_sha = pull_head_sha + + # Load the patch from Github + self.patch = self.load_patch() + + def __str__(self): + return f"Github pull request {self.repo_url} #{self.pull_number} ({self.pull_head_sha[:8]})" + + def __repr__(self): + return f"GithubRevision repo_url={self.repo_url} branch={self.branch} pull_number={self.pull_number} sha={self.pull_head_sha}" + + @property + def repo_name(self): + """ + Extract the name of the repository from its URL + """ + return urlparse(self.repo_url).path.strip("/") + + @property + def repository_slug(self): + """ + Generate a slug from the Github repository. + This method copies the automatic slug creation in backend's RepositoryGetOrCreateField serializer field. + """ + base_repo_url = self.pull_request.base.repo.html_url + parsed = urlparse(base_repo_url) + return parsed.path.lstrip("/").replace("/", "-") + + def load_patch(self): + """ + Load the patch content for the current pull request HEAD + """ + # TODO: use specific sha + url = f"{self.repo_url}/pull/{self.pull_number}.diff" + logger.info("Loading github patch", url=url) + resp = requests.get(url, allow_redirects=True) + resp.raise_for_status() + return resp.content.decode() + + def as_dict(self): + return { + "repo_url": self.repo_url, + "branch": self.branch, + "pull_number": self.pull_number, + "pull_head_sha": self.pull_head_sha, + } + + @cached_property + def pull_request(self): + from code_review_bot.sources.github import GithubClient + + reporter_conf = next( + ( + reporter + for reporter in taskcluster.secrets["REPORTERS"] + if reporter["reporter"] == "github" + ), + None, + ) + # A github reporter configuration is required to perform a github Pull Request analysis + assert reporter_conf, "Github reporter secrets must be set to access information about the pull request" + client = GithubClient( + client_id=reporter_conf["client_id"], + private_key=reporter_conf["private_key_pem"], + installation_id=reporter_conf["installation_id"], + ) + return client.get_pull_request(self) + + def serialize(self): + """ + Outputs a tuple of dicts for revision and diff (empty for Github) sent to backend + """ + revision = { + "provider": "github", + "provider_id": self.pull_number, + "title": self.pull_request.title, + "bugzilla_id": None, + "base_repository": self.pull_request.base.repo.html_url, + "head_repository": self.repo_url, + } + diff = { + "provider": "github", + "provider_id": self.pull_head_sha, + "mercurial_hash": self.pull_head_sha, + "repository": self.repo_url, + } + return revision, diff diff --git a/bot/code_review_bot/revisions/phabricator.py b/bot/code_review_bot/revisions/phabricator.py index 8970e19ae..975268600 100644 --- a/bot/code_review_bot/revisions/phabricator.py +++ b/bot/code_review_bot/revisions/phabricator.py @@ -130,12 +130,13 @@ def __str__(self): return f"Phabricator #{self.diff_id} - {self.diff_phid}" @staticmethod - def from_try_task(try_task: dict, decision_task: dict, phabricator: PhabricatorAPI): + def from_try_task( + code_review: dict, decision_task: dict, phabricator: PhabricatorAPI + ): """ Load identifiers from Phabricator, using the remote task description """ # Load build target phid from the task env - code_review = try_task["extra"]["code-review"] build_target_phid = code_review.get("phabricator-diff") or code_review.get( "phabricator-build-target" ) @@ -361,8 +362,17 @@ def load_file(self, path): ) logger.info("Downloading HGMO file", url=url) - response = requests.get(url, headers=GetAppUserAgent()) - response.raise_for_status() + try: + response = requests.get(url, headers=GetAppUserAgent()) + response.raise_for_status() + except requests.exceptions.HTTPError as e: + if e.response.status_code == 404: + logger.warning("Failed to download file", path=self.path) + # Consider as empty content if the file is not found + return None + else: + # When encountering another HTTP error, raise the issue + raise e # Store in cache content = response.content.decode("utf-8") @@ -430,3 +440,26 @@ def as_dict(self): "head_changeset": self.head_changeset, "base_changeset": self.base_changeset, } + + def serialize(self): + """ + Outputs a tuple of dicts for revision and diff sent to backend + """ + revision = { + "provider": "phabricator", + "provider_id": self.phabricator_id, + "title": self.title, + "bugzilla_id": self.bugzilla_id, + "base_repository": self.base_repository, + "head_repository": self.head_repository, + "base_changeset": self.base_changeset, + "head_changeset": self.head_changeset, + } + diff = { + "id": self.diff_id, + "provider_id": self.diff_phid, + "mercurial_hash": self.head_changeset, + "repository": self.head_repository, + } + + return revision, diff diff --git a/bot/code_review_bot/sources/github.py b/bot/code_review_bot/sources/github.py new file mode 100644 index 000000000..b45f527df --- /dev/null +++ b/bot/code_review_bot/sources/github.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import enum + +import structlog +from github import Auth, GithubIntegration +from github.PullRequest import ReviewComment + +from code_review_bot import Issue +from code_review_bot.revisions import GithubRevision + +logger = structlog.get_logger(__name__) + + +class ReviewEvent(enum.Enum): + """ + Review action you want to perform. + https://docs.github.com/en/rest/pulls/reviews?apiVersion=2022-11-28#create-a-review-for-a-pull-request--parameters + """ + + Pending = "PENDING" + Approved = "APPROVE" + RequestChanges = "REQUEST_CHANGES" + Comment = "COMMENT" + + +class GithubClient: + def __init__(self, client_id: str, private_key: str, installation_id: str): + self.client_id = client_id + + # Setup auth + self.auth = Auth.AppAuth(self.client_id, private_key) + self.github_integration = GithubIntegration(auth=self.auth) + + installations = self.github_integration.get_installations() + self.installation = next( + (i for i in installations if i.id == installation_id), None + ) + if not self.installation: + raise ValueError( + f"Installation ID is not available. Available installations are {list(installations)}" + ) + # setup API + self.api = self.installation.get_github_for_installation() + + self.review_comments = [] + + def get_pull_request(self, revision: GithubRevision): + repo = self.api.get_repo(revision.repo_name) + return repo.get_pull(revision.pull_number) + + def _build_review_comment(self, issue): + return ReviewComment( + path=issue.path, + line=issue.line, + body=issue.message, + ) + + def publish_review( + self, + issues: list[Issue], + revision: GithubRevision, + event: ReviewEvent, + message: str | None = None, + ): + """ + Publish a review from a list of publishable issues, requesting changes to the author. + """ + + if not isinstance(revision, GithubRevision): + logger.warning( + f"Revision must originate from Github in order to publish a review, skipping {revision}." + ) + return + + repo = self.api.get_repo(revision.repo_name) + pull_request = repo.get_pull(revision.pull_number) + + attrs = {} + if message is None: + assert ( + event == ReviewEvent.Approved + ), "Body can be left null only when approving a pull request" + else: + attrs["body"] = message + + pull_request.create_review( + commit=repo.get_commit(revision.pull_head_sha), + comments=[self._build_review_comment(issue) for issue in issues], + # https://docs.github.com/en/rest/pulls/reviews?apiVersion=2022-11-28#create-a-review-for-a-pull-request + event=event.value, + **attrs, + ) diff --git a/bot/code_review_bot/workflow.py b/bot/code_review_bot/workflow.py index 260a638e3..b1bbb16f1 100644 --- a/bot/code_review_bot/workflow.py +++ b/bot/code_review_bot/workflow.py @@ -20,7 +20,7 @@ from code_review_bot.config import settings from code_review_bot.mercurial import MercurialWorker, Repository, robust_checkout from code_review_bot.report.debug import DebugReporter -from code_review_bot.revisions import PhabricatorRevision, Revision +from code_review_bot.revisions import GithubRevision, PhabricatorRevision, Revision from code_review_bot.sources.phabricator import ( PhabricatorActions, PhabricatorBuildState, @@ -133,7 +133,7 @@ def run(self, revision): self.clone_repository(revision) # Mark know issues to avoid publishing them on this patch - self.find_previous_issues(issues, base_rev_changeset) + self.find_previous_issues(revision, issues, base_rev_changeset) new_issues_count = sum(issue.new_issue for issue in issues) logger.info( f"Found {new_issues_count} new issues (over {len(issues)} total detected issues)", @@ -265,9 +265,11 @@ def start_analysis(self, revision): logger.warning("Blacklisted author, stopping there.") return - # Cannot run without mercurial cache configured - if not settings.mercurial_cache: - raise Exception("Mercurial cache must be configured to start analysis") + # Cannot run without either mercurial or github cache configured + if not settings.mercurial_cache and not settings.github_cache: + raise Exception( + "One of Mercurial cache or github cache must be configured to start analysis" + ) # Cannot run without ssh key if not settings.ssh_key: @@ -361,6 +363,12 @@ def clone_repository(self, revision): Clone the repo locally when configured On production this should use a Taskcluster cache """ + if not isinstance(revision, PhabricatorRevision): + logger.info( + "Mercurial clone only supports Phabricator revisions, skipping." + ) + return + if not settings.mercurial_cache: logger.debug("Local clone not required") return @@ -490,7 +498,7 @@ def index(self, revision, **kwargs): }, ) - def find_previous_issues(self, issues, base_rev_changeset=None): + def find_previous_issues(self, revision, issues, base_rev_changeset=None): """ Look for known issues in the backend matching the given list of issues @@ -513,9 +521,17 @@ def find_previous_issues(self, issues, base_rev_changeset=None): base_revision_changeset=base_rev_changeset, ) + if isinstance(revision, PhabricatorRevision): + repository_slug = "mozilla-central" + elif isinstance(revision, GithubRevision): + # TODO: Rely on the central repository for known issues + repository_slug = revision.repository_slug + else: + raise NotImplementedError + for path, group_issues in issues_groups: known_issues = self.backend_api.list_repo_issues( - "mozilla-central", + repository_slug, date=current_date, revision_changeset=base_rev_changeset, path=path, @@ -673,11 +689,12 @@ def update_status(self, revision, state): """ Update build status on HarborMaster """ - if not isinstance(revision, PhabricatorRevision): - raise NotImplementedError( - "Only Phabricator revisions are supported for now" - ) assert isinstance(state, BuildState) + + # Skip github status update, as we rely on the github reporter for publication + if isinstance(revision, GithubRevision): + return + if not revision.build_target_phid: logger.info( "No build target found, skipping HarborMaster update", state=state.value @@ -701,6 +718,7 @@ def publish_link(self, revision: Revision, slug: str, name: str, url: str): raise NotImplementedError( "Only Phabricator revisions are supported for now" ) + if not revision.build_target_phid: logger.info( "No build target found, skipping HarborMaster link creation", diff --git a/bot/requirements.txt b/bot/requirements.txt index 63a693933..9636e4bbe 100644 --- a/bot/requirements.txt +++ b/bot/requirements.txt @@ -1,6 +1,7 @@ aiohttp<4 influxdb==5.3.2 libmozdata==0.2.12 +PyGithub==2.8.1 python-hglib==2.6.2 pyyaml==6.0.3 rs_parsepatch==0.4.4 diff --git a/bot/tests/conftest.py b/bot/tests/conftest.py index 9d75614dc..8ae1326a0 100644 --- a/bot/tests/conftest.py +++ b/bot/tests/conftest.py @@ -13,6 +13,8 @@ from collections import defaultdict, namedtuple from configparser import ConfigParser from contextlib import contextmanager +from datetime import UTC, datetime, timedelta +from textwrap import dedent from unittest.mock import MagicMock import hglib @@ -282,6 +284,64 @@ def diff_search(request): yield PhabricatorAPI(url="http://phabricator.test/api/", api_key="deadbeef") +@pytest.fixture +def mock_github(mock_config): + """ + Mock default github API calls made by the client + """ + diff = dedent( + """diff --git a/path/to/test.cpp b/path/to/test.cpp + index c57eff55..980a0d5f 100644 + --- a/path/to/test.cpp + +++ b/path/to/test.cpp + @@ -1 +1 @@ + -#include + +Hello World! + """ + ) + + responses.add( + responses.GET, + "https://github.tests.com/owner/repo-name/pull/1.diff", + json=diff, + ) + responses.add( + responses.GET, + "https://api.github.com:443/app/installations", + json=[ + { + "id": 123456789, + "access_tokens_url": "https://github.tests.com/app/installations/123456789/access_tokens", + } + ], + ) + responses.add( + responses.POST, + "https://api.github.com:443/app/installations/123456789/access_tokens", + json={ + "token": "auth_token", + "expires_at": (datetime.now(UTC) + timedelta(1)).strftime( + "%Y-%m-%dT%H:%M:%S.%fZ" + ), + }, + ) + responses.add( + responses.GET, + "https://api.github.com:443/repos/owner/repo-name", + json={"url": "https://api.github.com/repos/owner/repo-name"}, + ) + responses.add( + responses.GET, + "https://api.github.com:443/repos/owner/repo-name/pulls/1", + json={"url": "https://api.github.com/repos/owner/repo-name/pulls/1"}, + ) + responses.add( + responses.GET, + "https://api.github.com:443/repos/owner/repo-name/commits/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + json={"sha": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}, + ) + + @pytest.fixture def mock_try_task(): """ @@ -290,6 +350,25 @@ def mock_try_task(): return {"extra": {"code-review": {"phabricator-diff": "PHID-HMBT-test"}}} +@pytest.fixture +def mock_github_try_task(): + """ + Mock a remote Try task definition from a github revision + """ + return { + "extra": { + "code-review": { + "github": { + "repo_url": "https://github.tests.com/owner/repo-name", + "branch": "test", + "pull_number": 1, + "pull_head_sha": "a" * 40, + } + } + } + } + + @pytest.fixture def mock_decision_task(): """ @@ -332,10 +411,10 @@ def mock_revision(mock_phabricator, mock_try_task, mock_decision_task, mock_conf """ Mock a mercurial revision """ - from code_review_bot.revisions import PhabricatorRevision + from code_review_bot.revisions import Revision with mock_phabricator as api: - return PhabricatorRevision.from_try_task(mock_try_task, mock_decision_task, api) + return Revision.from_try_task(mock_try_task, mock_decision_task, api) @pytest.fixture diff --git a/bot/tests/fixtures/private_key.pem b/bot/tests/fixtures/private_key.pem new file mode 100644 index 000000000..8f33755ca --- /dev/null +++ b/bot/tests/fixtures/private_key.pem @@ -0,0 +1,28 @@ +# THIS IS A FAKE PRIVATE KEY USED FOR TESTS +-----BEGIN RSA PRIVATE KEY----- +MIIEowIBAAKCAQEAjIf0Q38ga5TF9CbNXewI/duyPJgz/TJAvdHvexwtp3qIIWfH +1CNK0NvcmaLWcvgyVn4nj8aLexQiJZQVQYII/YMwXAg2tK75dWkP56cWL0odb4Zs +o2GU14xRGdonixVb8COC8CxiLzFGBXpY2gMfru/9di6/0hRY1o5Qd2aYrXZVHDpe +0ATuIF0gR8MbMz8y8Azqvs+89epjAmKo+6+sU/7yITm9aJ685CEiug3127Kowk07 +TYeebXlzw2eqxcIw35loAs/oqyOIEX607KtVjCBl73FoEfuLHhsi1hFxBOg+HvXn +TzdFiLN/99ZGRRMlJhZCw/DWpZ9uip5MrohneQIDAQABAoIBAD9TVELGGnngBIPM +qGZWYobiZSLhAyxpZLskyuGTBQ+fK5DCD04MyT3slS+2LSSJq0VGe9VSBrBjli+Q +1zM5wYtbfoM6QEyTPF4oBb7BkEGnCDSlQnctFcE7vaAEqiUGbvN7TRmlJmlVrtPx +GfDDz5cpFfIXhuDHwnCMmL31QX+ITV8pmrYYwhpOp+js+25wA7PFzXhgqQtqgGbM +L0O1Tn/POqTbEUBi/S95KwMnNYVEAx6lwmcpLb0KD3x2g0bkHFyeIc+pMeJa+3Kn +OpY1DTlJP3nkpiLmpWIpKk4HMo6oZhcRo9DDLQtJzE+rllEwjZBrSKW3l4d3Eh6t +itsGj7ECgYEA+9OcFcUuMVwFlknTVSoykGv4DQ2gcN/W+H70bYHgAAvjolYxnsQX +hYucxJqTBaG8l5LXjlSynoqMnbm5FY1FLOu8lzt8YhPfc0f/N1jTVL5v52mxyUNr +HjPcbbv9+vihcrhEFkhgRN/lzimv7Eweo5x4wlm8aLOIb/Eg8Y8Wth8CgYEAjtwq +hsPdf0NP/tkJFSrB7Y2I1zBruNvTuJarcv1/w61XLEBR42Odq+Y11/crY8qkwK7z +A5SOxsI6o/si2RDlSZJ5w8t+7Kz/yr5PSFcQHGsokIadQgXCP/hetS2eiNNH94vM +YvwHvSl0ey47qK0h3ugqIZJ1pBSRc0NlfJz8v2cCgYEA0hXdd0QCn2cXuiNozPnh +KR8J10nw+XmkC7dODzV0PFWu2DV0O/F3dg/c/x+9W8tsXD9C2RjL0vvfB45zXAl5 +FlqsALa9s8zEc5Yy0memFmKxVKuWiENYT+AQGvPklMVrWxtiofxLY+ot+2pHu6hd +Pz1AeVMHnYl5X3oYc61d0x0CgYBBYT9RJ8hx2rN8lYVTm5rfBdwvZ2iVVH2jx8i1 +OpDDU8xGYzVW1JsvNY9ExEimRfJ6gFaVN+LT0cYWj/OV1eapchCp67KtzErQVaJh +H/8uklghNIo50frhXeCyGCuqwM752o/yaRd9mcBGM5V4D6wloKjPboDKU+NxFdIX +Yp1FVwKBgGgG4RA3UqAY51E7zA7k3WR3sj49c6oktXVi2n7FuO3PPVTg5LAZ/c81 +vVrip+dOQD53APtrwnFpDeM+AJ03RsIfjVVfB822xRpcy7jDA04bOmJ1Skouoptx +CyIV/PUVbtmNdxJ6T1dCzAvhmK6895FK+xCwBnpaN213Nx/eG49+ +-----END RSA PRIVATE KEY----- diff --git a/bot/tests/test_backend.py b/bot/tests/test_backend.py index e6cc07bba..8d3d922cc 100644 --- a/bot/tests/test_backend.py +++ b/bot/tests/test_backend.py @@ -37,8 +37,8 @@ def test_publication(mock_clang_tidy_issues, mock_revision, mock_backend, mock_h assert revisions[1] == { "bugzilla_id": 1234567, "id": 1, - "phabricator_id": 51, - "phabricator_phid": "PHID-DREV-zzzzz", + "provider": "phabricator", + "provider_id": 51, "title": "Static Analysis tests", "diffs_url": "http://code-review-backend.test/v1/revision/1/diffs/", "issues_bulk_url": "http://code-review-backend.test/v1/revision/1/issues/", @@ -55,7 +55,7 @@ def test_publication(mock_clang_tidy_issues, mock_revision, mock_backend, mock_h "id": 42, "issues_url": "http://code-review-backend.test/v1/diff/42/issues/", "mercurial_hash": "deadbeef1234", - "phid": "PHID-DIFF-test", + "provider_id": "PHID-DIFF-test", "review_task_id": "local instance", "repository": "http://hgmo/test-try", } @@ -132,8 +132,8 @@ def test_missing_bugzilla_id(mock_revision, mock_backend, mock_hgmo): assert revisions[1] == { "id": 1, "bugzilla_id": None, - "phabricator_id": 51, - "phabricator_phid": "PHID-DREV-zzzzz", + "provider": "phabricator", + "provider_id": 51, "title": "Static Analysis tests", "diffs_url": "http://code-review-backend.test/v1/revision/1/diffs/", "issues_bulk_url": "http://code-review-backend.test/v1/revision/1/issues/", diff --git a/bot/tests/test_index.py b/bot/tests/test_index.py index 768bf06d8..fa5e6c032 100644 --- a/bot/tests/test_index.py +++ b/bot/tests/test_index.py @@ -5,7 +5,7 @@ from unittest import mock from code_review_bot.config import TaskCluster -from code_review_bot.revisions import PhabricatorRevision +from code_review_bot.revisions import PhabricatorRevision, Revision class MockPhabricatorRevision(PhabricatorRevision): @@ -199,9 +199,8 @@ def test_index_from_try( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) mock_workflow.index_service = mock.Mock() mock_config.taskcluster = TaskCluster("/tmp/dummy", "12345deadbeef", 0, False) diff --git a/bot/tests/test_reporter_github.py b/bot/tests/test_reporter_github.py new file mode 100644 index 000000000..430590443 --- /dev/null +++ b/bot/tests/test_reporter_github.py @@ -0,0 +1,163 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import json +from pathlib import Path + +import responses +from conftest import FIXTURES_DIR + +from code_review_bot.report.github import GithubReporter +from code_review_bot.revisions import GithubRevision, Revision +from code_review_bot.tasks.clang_tidy import ClangTidyIssue, ClangTidyTask +from code_review_bot.tasks.coverage import CoverageIssue, ZeroCoverageTask + + +def test_github_review( + monkeypatch, + mock_github, + mock_config, + phab, + mock_github_try_task, + mock_decision_task, + mock_task, + mock_backend_secret, +): + """ + Report 2 cland tidy issues by pushing a review to a Github pull request + """ + revision = Revision.from_try_task(mock_github_try_task, mock_decision_task, None) + assert isinstance(revision, GithubRevision) + revision.lines = { + # Add dummy lines diff + "test.txt": [0], + "path/to/test.cpp": [0], + "another_test.cpp": [41, 42, 43], + } + revision.files = ["test.txt", "test.cpp", "another_test.cpp"] + revision.id = 52 + monkeypatch.setattr(revision, "load_file", lambda x: "some_content") + + reporter = GithubReporter( + { + "client_id": "client_id", + "private_key_pem": (Path(FIXTURES_DIR) / "private_key.pem").read_text(), + "installation_id": 123456789, + } + ) + + issue_clang_tidy = ClangTidyIssue( + mock_task(ClangTidyTask, "source-test-clang-tidy"), + revision, + "another_test.cpp", + "42", + "51", + "modernize-use-nullptr", + "dummy message", + ) + assert issue_clang_tidy.is_publishable() + + issue_coverage = CoverageIssue( + mock_task(ZeroCoverageTask, "coverage"), + "path/to/test.cpp", + "1", + "This file is uncovered", + revision, + ) + assert issue_coverage.is_publishable() + + responses.add( + responses.POST, + "https://api.github.com:443/repos/owner/repo-name/pulls/1/reviews", + json={}, + ) + + reporter.publish([issue_clang_tidy, issue_coverage], revision, [], [], []) + assert [(call.request.method, call.request.url) for call in responses.calls] == [ + ("GET", "https://github.tests.com/owner/repo-name/pull/1.diff"), + ("GET", "https://api.github.com:443/app/installations"), + ( + "POST", + "https://api.github.com:443/app/installations/123456789/access_tokens", + ), + ("GET", "https://api.github.com:443/repos/owner/repo-name"), + ("GET", "https://api.github.com:443/repos/owner/repo-name/pulls/1"), + ( + "GET", + "https://api.github.com:443/repos/owner/repo-name/commits/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + ), + ("POST", "https://api.github.com:443/repos/owner/repo-name/pulls/1/reviews"), + ] + review_creation = responses.calls[-1] + assert json.loads(review_creation.request.body) == { + "body": "2 issues have been found in this revision", + "comments": [ + { + "body": "dummy message", + "path": "another_test.cpp", + "line": 42, + }, + { + "body": "This file is uncovered", + "path": "path/to/test.cpp", + "line": 1, + }, + ], + "commit_id": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "event": "REQUEST_CHANGES", + } + + +def test_github_review_approve( + monkeypatch, + mock_github, + mock_config, + phab, + mock_github_try_task, + mock_decision_task, + mock_task, + mock_backend_secret, +): + """In case no issue is found, the pull request is approved""" + revision = Revision.from_try_task(mock_github_try_task, mock_decision_task, None) + revision.lines = {} + revision.files = ["test.txt", "test.cpp", "another_test.cpp"] + revision.id = 52 + reporter = GithubReporter( + { + "client_id": "client_id", + "private_key_pem": (Path(FIXTURES_DIR) / "private_key.pem").read_text(), + "installation_id": 123456789, + } + ) + + responses.add( + responses.POST, + "https://api.github.com:443/repos/owner/repo-name/pulls/1/reviews", + json={}, + ) + + reporter.publish([], revision, [], [], []) + assert [(call.request.method, call.request.url) for call in responses.calls] == [ + ("GET", "https://github.tests.com/owner/repo-name/pull/1.diff"), + ("GET", "https://api.github.com:443/app/installations"), + ( + "POST", + "https://api.github.com:443/app/installations/123456789/access_tokens", + ), + ("GET", "https://api.github.com:443/repos/owner/repo-name"), + ("GET", "https://api.github.com:443/repos/owner/repo-name/pulls/1"), + ( + "GET", + "https://api.github.com:443/repos/owner/repo-name/commits/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + ), + ("POST", "https://api.github.com:443/repos/owner/repo-name/pulls/1/reviews"), + ] + review_creation = responses.calls[-1] + assert json.loads(review_creation.request.body) == { + "comments": [], + "commit_id": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "event": "APPROVE", + } diff --git a/bot/tests/test_reporter_phabricator.py b/bot/tests/test_reporter_phabricator.py index 9fa67ccea..cc9e88af6 100644 --- a/bot/tests/test_reporter_phabricator.py +++ b/bot/tests/test_reporter_phabricator.py @@ -13,7 +13,7 @@ from code_review_bot import Level from code_review_bot.report.phabricator import PhabricatorReporter -from code_review_bot.revisions import ImprovementPatch, PhabricatorRevision +from code_review_bot.revisions import ImprovementPatch, PhabricatorRevision, Revision from code_review_bot.tasks.clang_format import ClangFormatIssue, ClangFormatTask from code_review_bot.tasks.clang_tidy import ClangTidyIssue, ClangTidyTask from code_review_bot.tasks.clang_tidy_external import ( @@ -270,9 +270,8 @@ def test_phabricator_clang_tidy( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "another_test.cpp": [41, 42, 43] @@ -308,9 +307,8 @@ def test_phabricator_clang_format( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.cpp": [41, 42, 43], @@ -352,9 +350,8 @@ def test_phabricator_mozlint( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "python/test.py": [41, 42, 43], @@ -443,9 +440,8 @@ def test_phabricator_coverage( Test Phabricator reporter publication on a mock coverage issue """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.txt": [0], @@ -511,9 +507,8 @@ def raise_404(*args, **kwargs): raise HTTPError(response=resp_mock) with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.txt": [0], @@ -547,9 +542,8 @@ def test_phabricator_clang_tidy_and_coverage( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.txt": [0], @@ -673,9 +667,8 @@ def test_phabricator_analyzers( api.comment = unittest.mock.Mock(return_value=True) # Always use the same setup, only varies the analyzers - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = {"test.cpp": [0, 41, 42, 43], "dom/test.cpp": [42]} revision.id = 52 reporter = PhabricatorReporter( @@ -759,9 +752,8 @@ def test_phabricator_clang_tidy_build_error( from code_review_bot import Level with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.cpp": [41, 42, 43] @@ -819,9 +811,8 @@ def test_full_file( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "xx.cpp": [123, 124, 125] @@ -883,9 +874,8 @@ def test_task_failures(mock_phabricator, phab, mock_try_task, mock_decision_task """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.id = 52 reporter = PhabricatorReporter({"analyzers": ["clang-tidy"]}, api=api) @@ -910,9 +900,8 @@ def test_extra_errors( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = {"path/to/file.py": [1, 2, 3]} revision.files = ["path/to/file.py"] revision.id = 52 @@ -1003,9 +992,8 @@ def test_phabricator_notices(mock_phabricator, phab, mock_try_task, mock_decisio """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.rst": [41, 42, 43], @@ -1053,9 +1041,8 @@ def test_phabricator_tgdiff(mock_phabricator, phab, mock_try_task, mock_decision """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.rst": [41, 42, 43], @@ -1091,9 +1078,8 @@ def test_phabricator_external_tidy( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "another_test.cpp": [41, 42, 43] @@ -1144,9 +1130,8 @@ def test_phabricator_newer_diff( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.txt": [0], @@ -1224,9 +1209,8 @@ def test_phabricator_former_diff_comparison( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.txt": [0], @@ -1374,9 +1358,8 @@ def test_phabricator_before_after_comment( mock_taskcluster_config.secrets = {"BEFORE_AFTER_RATIO": 1} with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.txt": [0], diff --git a/docs/github.md b/docs/github.md new file mode 100644 index 000000000..3c657959f --- /dev/null +++ b/docs/github.md @@ -0,0 +1,33 @@ +# Github + +Starting from 2026, the project was expannded to support revisions from sources other than Phabricator, particularly Github [pull requests](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests). + +The code review bot automatically publishes a review containing the different comments, interacting with the Github API through the [App feature](https://docs.github.com/en/apps/using-github-apps/about-using-github-apps). It uses the [PyGithub](https://pypi.org/project/PyGithub/) external library to handle low level authentication steps. + +## App setup + +First, a Github App must be created and installed to your user or organisation. You can follow Github's [documentation](https://docs.github.com/en/apps/creating-github-apps/about-creating-github-apps/about-creating-github-apps#building-a-github-app) for this process. + +Once the app is installed, you can manage it from the [installation settings in github](https://github.com/settings/installations). The App must have access to the repositories to which it should be allowed to publish comments. +It should be granted a **read** and **write** scope access to pull requests to be able to publish reviews. This can be configured through the **App settings** section of your installation. +On this page, you should be able to find the App **installation ID** required to configure the bot. + +## Authentication + +Once an App is installed and has the valid access scopes, you can generate a new secret key from the [App settings](https://github.com/settings/apps). In the private key section, click the **Generate a private key** button and save the generated `.pem` file. +On this page, you should be able to find the App **Client ID** required to configure the bot. + +The code review bot YAML configuration should then be updated with the corresponding information: + +```yaml +bot: + REPORTERS: + - reporter: github + client_id: xxxxxxxxxxxxxxxxxxxx + private_key_pem: |- + -----BEGIN RSA PRIVATE KEY----- + XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + -----END RSA PRIVATE KEY----- + installation_id: 123456789 +``` diff --git a/docs/phabricator.md b/docs/phabricator.md index 76f5e9885..d1524eff1 100644 --- a/docs/phabricator.md +++ b/docs/phabricator.md @@ -1,6 +1,6 @@ # Phabricator -The main goal of the project is to publish issues on Phabricator, so we need a good integration with their API and interface. +The main goal of the project was to publish issues on Phabricator, so we need a good integration with their API and interface. ## Structure diff --git a/docs/publication.md b/docs/publication.md index 999ab8aad..ce21101ea 100644 --- a/docs/publication.md +++ b/docs/publication.md @@ -23,14 +23,14 @@ worker: Here, the analyzer produces its JSON output as `/builds/worker/clang-tidy.json`, but Taskcluster will expose it on its own public hostname as `https://taskcluster-artifacts.net///public/code-review/clang-tidy.json` -## Publish results on Phabricator +## Publish results Once your task is triggered with the `code-review` attribute, its analysis artifact will be retrieved automatically by the bot. All issues found will be filtered using those basic rules: - if the issue is not in a modifided line of a file in the patch, it will be discarded. - if the issue is in a third party path, it will be discarded. -We have [plans](https://bugzilla.mozilla.org/show_bug.cgi?id=1555721) to remove the first filter, by using a two pass approach and comparing the issues found before vs. after applying the patch. +The bot supports publishing a review to either a Phabricator revision or a Github pull request. ## Troubleshooting diff --git a/frontend/src/Diffs.vue b/frontend/src/Diffs.vue index 5aa43c888..1a7fc769e 100644 --- a/frontend/src/Diffs.vue +++ b/frontend/src/Diffs.vue @@ -156,7 +156,7 @@ export default { name: 'revision', params: { revisionId: diff.revision.id }, }" - >D{{ diff.revision.phabricator_id }}Revision {{ diff.revision.provider_id }} @ base: {{ diff.revision.base_repository | short_repo }} - head: {{ diff.revision.head_repository | short_repo }} @@ -217,10 +217,18 @@ export default {