CI Failure Comment #8078
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright (c) Facebook, Inc. and its affiliates. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| name: CI Failure Comment | |
| # This workflow runs in the context of the base repo (not the fork) and has | |
| # write access to post PR comments. It is triggered when the Linux Build or | |
| # Fuzzer Jobs workflow completes with a failure and uses Claude to analyze | |
| # failure logs, correlate with the PR diff and open issues, and post a rich | |
| # diagnostic comment. | |
| # | |
| # Security: This workflow never checks out or executes PR code. It reads | |
| # failure metadata from artifacts uploaded by our own status jobs, then uses | |
| # Claude with read-only gh CLI access to fetch logs and produce comments. | |
| # | |
| # zizmor:disable:dangerous-triggers -- only reads artifacts and posts comments, no PR code execution. | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| run_id: | |
| description: Workflow run ID to analyze (from a failed CI run) | |
| required: true | |
| pr_number: | |
| description: PR number to comment on | |
| required: true | |
| workflow_run: | |
| workflows: [Linux Build using GCC, Fuzzer Jobs] | |
| types: | |
| - completed | |
| permissions: | |
| contents: read | |
| pull-requests: write | |
| issues: write | |
| actions: read | |
| id-token: write | |
| jobs: | |
| analyze-and-comment: | |
| if: > | |
| github.repository == 'facebookincubator/velox' && | |
| (github.event_name == 'workflow_dispatch' || | |
| (github.event.workflow_run.event == 'pull_request' && | |
| github.event.workflow_run.conclusion == 'failure')) | |
| runs-on: ubuntu-latest | |
| steps: | |
| # workflow_run-triggered jobs start with an empty workspace; check | |
| # out just the helper-scripts dir so the sourced gh-api-retry.sh | |
| # is on disk for the gh_api callers below. Sparse-checkout keeps | |
| # this fast; the later "Checkout for Claude context and prompt" | |
| # step swaps to a different sparse set (CLAUDE.md + skill files) | |
| # but only after gh_api usage is done. | |
| - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 | |
| with: | |
| persist-credentials: false | |
| sparse-checkout: .github/scripts | |
| - name: Get PR number | |
| id: pr | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| REPO: ${{ github.repository }} | |
| HEAD_OWNER: ${{ github.event.workflow_run.head_repository.owner.login }} | |
| HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }} | |
| INPUT_PR_NUMBER: ${{ inputs.pr_number }} | |
| run: | | |
| # For workflow_dispatch, use the provided PR number directly. | |
| if [ -n "$INPUT_PR_NUMBER" ]; then | |
| echo "number=$INPUT_PR_NUMBER" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| source .github/scripts/gh-api-retry.sh | |
| pr_number=$(gh_api \ | |
| "/repos/${REPO}/pulls?head=${HEAD_OWNER}:${HEAD_BRANCH}&state=open" \ | |
| -q '.[0].number // empty') | |
| if [ -z "$pr_number" ]; then | |
| echo "No open PR found for branch ${HEAD_BRANCH}" | |
| exit 0 | |
| fi | |
| echo "number=$pr_number" >> "$GITHUB_OUTPUT" | |
| - name: Download failure artifacts | |
| if: steps.pr.outputs.number | |
| uses: actions/download-artifact@v4 | |
| with: | |
| github-token: ${{ github.token }} | |
| run-id: ${{ inputs.run_id || github.event.workflow_run.id }} | |
| pattern: ci-failure-* | |
| path: /tmp/ci-failures | |
| merge-multiple: false | |
| - name: Collect failure metadata | |
| if: steps.pr.outputs.number | |
| id: metadata | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| REPO: ${{ github.repository }} | |
| RUN_ID: ${{ inputs.run_id || github.event.workflow_run.id }} | |
| run: | | |
| FAILURES_DIR="/tmp/ci-failures" | |
| if [ -d "$FAILURES_DIR" ] && [ -n "$(ls -A "$FAILURES_DIR" 2>/dev/null)" ]; then | |
| # Collect all failure.json contents into a single JSON array. | |
| METADATA="[" | |
| FIRST=true | |
| for entry in "$FAILURES_DIR"/ci-failure-*/failure.json; do | |
| if [ -f "$entry" ]; then | |
| if [ "$FIRST" = true ]; then | |
| FIRST=false | |
| else | |
| METADATA="$METADATA," | |
| fi | |
| METADATA="$METADATA$(cat "$entry")" | |
| fi | |
| done | |
| METADATA="$METADATA]" | |
| else | |
| # No failure artifacts — build metadata from failed jobs in the run. | |
| # This handles workflows (e.g., Fuzzer Jobs) that don't upload | |
| # ci-failure-* artifacts. | |
| source .github/scripts/gh-api-retry.sh | |
| METADATA=$(gh_api "repos/${REPO}/actions/runs/${RUN_ID}/jobs" \ | |
| --paginate --jq '[.jobs[] | select(.conclusion == "failure") | {job: .name, type: "unknown"}]') | |
| if [ "$METADATA" = "[]" ] || [ -z "$METADATA" ]; then | |
| echo "No failure artifacts or failed jobs found." | |
| echo "has_failures=false" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| fi | |
| echo "has_failures=true" >> "$GITHUB_OUTPUT" | |
| { | |
| echo 'failure_metadata<<EOF' | |
| echo "$METADATA" | |
| echo 'EOF' | |
| } >> "$GITHUB_OUTPUT" | |
| - name: Checkout for Claude context and prompt | |
| if: steps.metadata.outputs.has_failures == 'true' | |
| uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 | |
| with: | |
| sparse-checkout: | | |
| CLAUDE.md | |
| .claude/CLAUDE.md | |
| .claude/skills/ci-failure-analysis/SKILL.md | |
| # Cone mode (the default) rejects file paths as of git 2.54; the | |
| # entries above are files, not directories, so disable cone mode. | |
| sparse-checkout-cone-mode: false | |
| persist-credentials: false | |
| - name: Prepare analysis prompt | |
| if: steps.metadata.outputs.has_failures == 'true' | |
| id: prompt | |
| env: | |
| PR_NUMBER: ${{ steps.pr.outputs.number }} | |
| REPOSITORY: ${{ github.repository }} | |
| RUN_ID: ${{ inputs.run_id || github.event.workflow_run.id }} | |
| FAILURE_METADATA: ${{ steps.metadata.outputs.failure_metadata }} | |
| run: | | |
| # Read the prompt template and interpolate variables. | |
| PROMPT=$(sed \ | |
| -e "s|{{PR_NUMBER}}|${PR_NUMBER}|g" \ | |
| -e "s|{{REPOSITORY}}|${REPOSITORY}|g" \ | |
| -e "s|{{RUN_ID}}|${RUN_ID}|g" \ | |
| .claude/skills/ci-failure-analysis/SKILL.md) | |
| # Failure metadata may contain newlines/special chars — use bash | |
| # parameter expansion instead of sed to handle multiline content. | |
| PROMPT="${PROMPT//\{\{FAILURE_METADATA\}\}/${FAILURE_METADATA}}" | |
| { | |
| echo 'value<<EOF' | |
| echo "$PROMPT" | |
| echo 'EOF' | |
| } >> "$GITHUB_OUTPUT" | |
| - name: Analyze failures with Claude | |
| if: steps.metadata.outputs.has_failures == 'true' | |
| uses: izaitsevfb/claude-code-action@ececd56fb999d06b4dd2477437bc408938295d76 # forked-pr-fix | |
| with: | |
| anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} | |
| github_token: ${{ github.token }} | |
| claude_args: --model claude-opus-4-6 --allowedTools Bash Read Grep Glob | |
| prompt: ${{ steps.prompt.outputs.value }} | |
| # Most Velox PRs are exported from Phabricator and pushed to GitHub | |
| # by the meta-codesync bot, so the upstream workflow_run's `actor` | |
| # is a Bot. Without this allowlist, claude-code-action refuses to | |
| # run for those PRs and the failure analysis is silently dropped. | |
| allowed_bots: meta-codesync | |
| env: | |
| GH_TOKEN: ${{ github.token }} |