Files
slinky/cmd/check.go
Luke Hagar eb6a7a4366 Update action.yml and check.go to clarify PR comment behavior
Enhance the description for the 'comment_pr' input in action.yml to specify that it defaults to true when GITHUB_TOKEN is present. In check.go, update the logic to explicitly disable PR commenting only when the input is set to "false", improving clarity on the commenting behavior during PR checks.
2025-11-14 20:36:58 +00:00

588 lines
17 KiB
Go

package cmd
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/spf13/cobra"
"slinky/internal/fsurls"
"slinky/internal/report"
"slinky/internal/web"
)
// SerializableResult mirrors web.Result but omits the error field for JSON.
type SerializableResult struct {
URL string `json:"url"`
OK bool `json:"ok"`
Status int `json:"status"`
ErrMsg string `json:"error"`
Method string `json:"method"`
ContentType string `json:"contentType"`
Sources []string `json:"sources"`
}
func init() {
checkCmd := &cobra.Command{
Use: "check [targets...]",
Short: "Scan for URLs and validate them (headless)",
Args: cobra.ArbitraryArgs,
RunE: func(cmd *cobra.Command, args []string) error {
// Parse targets: allow comma-separated chunks
var raw []string
for _, a := range args {
for part := range strings.SplitSeq(a, ",") {
p := strings.TrimSpace(part)
if p != "" {
raw = append(raw, toSlash(p))
}
}
}
if len(raw) == 0 {
raw = []string{"**/*"}
}
// Separate into globs (relative to ".") and concrete paths (dirs/files)
var globPatterns []string
type pathRoot struct {
path string
isDir bool
}
var roots []pathRoot
for _, t := range raw {
if hasGlobMeta(t) {
globPatterns = append(globPatterns, t)
continue
}
if fi, err := os.Stat(t); err == nil {
roots = append(roots, pathRoot{path: t, isDir: fi.IsDir()})
} else {
// If stat fails, treat as glob pattern under "."
globPatterns = append(globPatterns, t)
}
}
// Debug: show effective targets
if shouldDebug() {
fmt.Printf("::debug:: Roots: %s\n", strings.Join(func() []string {
var out []string
for _, r := range roots {
out = append(out, r.path)
}
return out
}(), ","))
fmt.Printf("::debug:: Glob patterns: %s\n", strings.Join(globPatterns, ","))
}
// Load ignore configurations once for all targets
gitIgnore := fsurls.LoadGitIgnore(".")
slPathIgnore, slURLPatterns := fsurls.LoadSlinkyIgnore(".")
// Aggregate URL->files across all targets
agg := make(map[string]map[string]struct{})
merge := func(res map[string][]string, prefix string, isDir bool) {
for u, files := range res {
set, ok := agg[u]
if !ok {
set = make(map[string]struct{})
agg[u] = set
}
for _, fp := range files {
var merged string
if prefix == "" {
merged = fp
} else if isDir {
merged = toSlash(filepath.Join(prefix, fp))
} else {
// File root: keep the concrete file path
merged = toSlash(prefix)
}
set[merged] = struct{}{}
}
}
}
// 1) Collect for globs under current dir
if len(globPatterns) > 0 {
res, err := fsurls.CollectURLsWithIgnoreConfig(".", globPatterns, respectGitignore, gitIgnore, slPathIgnore, slURLPatterns)
if err != nil {
return err
}
merge(res, "", true)
}
// 2) Collect for each concrete root
for _, r := range roots {
clean := toSlash(filepath.Clean(r.path))
if r.isDir {
res, err := fsurls.CollectURLsWithIgnoreConfig(r.path, []string{"**/*"}, respectGitignore, gitIgnore, slPathIgnore, slURLPatterns)
if err != nil {
return err
}
merge(res, clean, true)
} else {
res, err := fsurls.CollectURLsWithIgnoreConfig(r.path, nil, respectGitignore, gitIgnore, slPathIgnore, slURLPatterns)
if err != nil {
return err
}
merge(res, clean, false)
}
}
// Convert aggregator to final map with sorted file lists
urlToFiles := make(map[string][]string, len(agg))
for u, set := range agg {
var files []string
for f := range set {
files = append(files, f)
}
sort.Strings(files)
urlToFiles[u] = files
}
// Derive display root; we use "." when multiple roots to avoid confusion
displayRoot := "."
if len(roots) == 1 && len(globPatterns) == 0 {
displayRoot = roots[0].path
}
if shouldDebug() {
fmt.Printf("::debug:: Root: %s\n", displayRoot)
}
// Validate and clamp numeric inputs
if maxConcurrency < 1 {
maxConcurrency = 1
} else if maxConcurrency > 100 {
maxConcurrency = 100
}
if timeoutSeconds < 1 {
timeoutSeconds = 1
} else if timeoutSeconds > 300 {
timeoutSeconds = 300 // Max 5 minutes
}
// Build config
timeout := time.Duration(timeoutSeconds) * time.Second
// Set up URL cache if cache path is provided via environment variable
var urlCache *web.URLCache
if cachePath := os.Getenv("SLINKY_CACHE_PATH"); cachePath != "" {
cacheTTL := 24 // Default 24 hours
if ttlStr := os.Getenv("SLINKY_CACHE_TTL_HOURS"); ttlStr != "" {
if ttl, err := time.ParseDuration(ttlStr + "h"); err == nil && ttl > 0 {
cacheTTL = int(ttl.Hours())
}
}
urlCache = web.NewURLCache(cachePath, cacheTTL)
if err := urlCache.Load(); err != nil {
if shouldDebug() {
fmt.Printf("::debug:: Failed to load cache: %v\n", err)
}
}
// Save cache when done
defer func() {
if err := urlCache.Save(); err != nil {
if shouldDebug() {
fmt.Printf("::debug:: Failed to save cache: %v\n", err)
}
}
}()
}
cfg := web.Config{
MaxConcurrency: maxConcurrency,
RequestTimeout: timeout,
Cache: urlCache,
}
// Prepare URL list
var urls []string
for u := range urlToFiles {
urls = append(urls, u)
}
sort.Strings(urls)
// If no URLs found, exit early
if len(urls) == 0 {
fmt.Println("No URLs found.")
return nil
}
// Run checks
startedAt := time.Now()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
results := make(chan web.Result, 256)
go web.CheckURLs(ctx, urls, urlToFiles, results, nil, cfg)
var total, okCount, failCount int
totalURLs := len(urls)
lastPctLogged := 0
var failures []SerializableResult
var failedResults []web.Result
for r := range results {
total++
if r.OK {
okCount++
} else {
failCount++
}
// Progress notices every 5%
if totalURLs > 0 {
pct := (total * 100) / totalURLs
for pct >= lastPctLogged+5 && lastPctLogged < 100 {
lastPctLogged += 5
fmt.Printf("::progress:: %d%% (%d/%d)\n", lastPctLogged, total, totalURLs)
}
}
// Emit GitHub Actions debug log for each URL.
// These lines appear only when step debug logging is enabled via the
// repository/organization secret ACTIONS_STEP_DEBUG=true.
if shouldDebug() {
fmt.Printf("::debug:: Scanned URL: %s status=%d ok=%v err=%s sources=%d\n", r.URL, r.Status, r.OK, r.ErrMsg, len(r.Sources))
}
if jsonOut != "" && !r.OK {
failures = append(failures, SerializableResult{
URL: r.URL,
OK: r.OK,
Status: r.Status,
ErrMsg: r.ErrMsg,
Method: r.Method,
ContentType: r.ContentType,
Sources: r.Sources,
})
}
if !r.OK {
failedResults = append(failedResults, r)
}
}
// Write JSON if requested (failures only)
if jsonOut != "" {
f, ferr := os.Create(jsonOut)
if ferr != nil {
return ferr
}
enc := json.NewEncoder(f)
enc.SetIndent("", " ")
if err := enc.Encode(failures); err != nil {
_ = f.Close()
return err
}
_ = f.Close()
}
// Build report summary
base := repoBlobBase
if strings.TrimSpace(base) == "" {
base = os.Getenv("SLINKY_REPO_BLOB_BASE_URL")
}
summary := report.Summary{
RootPath: displayRoot,
StartedAt: startedAt,
FinishedAt: time.Now(),
Processed: total,
OK: okCount,
Fail: failCount,
FilesScanned: countFiles(urlToFiles),
JSONPath: jsonOut,
RepoBlobBaseURL: base,
}
// Ensure we have a markdown file if needed for PR comment
mdPath := mdOut
ghRepo, ghPR, ghToken, ghOK := detectGitHubPR()
var finalMDPath string
if strings.TrimSpace(mdPath) != "" {
if _, err := report.WriteMarkdown(mdPath, failedResults, summary); err != nil {
return err
}
finalMDPath = mdPath
} else if ghOK {
p, err := report.WriteMarkdown("", failedResults, summary)
if err != nil {
return err
}
finalMDPath = p
}
// If running on a PR, post or update the comment(s), chunking as needed
// PR comments are enabled by default when token is present
// Only disable if explicitly set to "false"
commentPR := true // Default: enabled
if val := os.Getenv("INPUT_COMMENT_PR"); val != "" {
// Explicitly check for "false" to disable, everything else enables
commentPR = !strings.EqualFold(strings.TrimSpace(val), "false")
}
// Only post comments if: GitHub PR detected, commenting enabled, and report exists
if ghOK && commentPR && strings.TrimSpace(finalMDPath) != "" {
b, rerr := os.ReadFile(finalMDPath)
if rerr != nil {
fmt.Printf("::warning:: Failed to read markdown report for PR comment: %v\n", rerr)
} else {
full := string(b)
if shouldDebug() {
fmt.Printf("::debug:: Report size (chars): %d\n", len(full))
}
chunks := chunkMarkdownByURL(full)
if shouldDebug() {
fmt.Printf("::debug:: Posting %d chunk(s)\n", len(chunks))
}
if err := upsertPRComments(ghRepo, ghPR, ghToken, chunks); err != nil {
// Non-critical error: log warning but don't fail the run
fmt.Printf("::warning:: Failed to post PR comment: %v\n", err)
}
}
}
fmt.Printf("Checked %d URLs: %d OK, %d failed\n", total, okCount, failCount)
if failOnFailures && failCount > 0 {
return fmt.Errorf("%d links failed", failCount)
}
return nil
},
}
checkCmd.Flags().IntVar(&maxConcurrency, "concurrency", 16, "maximum concurrent requests")
checkCmd.Flags().StringVar(&jsonOut, "json-out", "", "path to write full JSON results (array)")
checkCmd.Flags().StringVar(&mdOut, "md-out", "", "path to write Markdown report for PR comment")
checkCmd.Flags().StringVar(&repoBlobBase, "repo-blob-base", "", "override GitHub blob base URL (e.g. https://github.com/owner/repo/blob/<sha>)")
checkCmd.Flags().IntVar(&timeoutSeconds, "timeout", 10, "HTTP request timeout in seconds")
checkCmd.Flags().BoolVar(&failOnFailures, "fail-on-failures", true, "exit non-zero if any links fail")
checkCmd.Flags().BoolVar(&respectGitignore, "respect-gitignore", true, "respect .gitignore while scanning (default true)")
rootCmd.AddCommand(checkCmd)
}
var (
timeoutSeconds int
failOnFailures bool
repoBlobBase string
respectGitignore bool
)
func toSlash(p string) string {
p = strings.TrimSpace(p)
if p == "" {
return p
}
p = filepath.ToSlash(p)
if after, ok := strings.CutPrefix(p, "./"); ok {
p = after
}
return p
}
func hasGlobMeta(s string) bool {
return strings.ContainsAny(s, "*?[")
}
func countFiles(urlToFiles map[string][]string) int {
seen := make(map[string]struct{})
for _, files := range urlToFiles {
for _, f := range files {
seen[f] = struct{}{}
}
}
return len(seen)
}
func detectGitHubPR() (repo string, prNumber int, token string, ok bool) {
repo = os.Getenv("GITHUB_REPOSITORY")
token = os.Getenv("GITHUB_TOKEN")
eventPath := os.Getenv("GITHUB_EVENT_PATH")
if repo == "" || eventPath == "" || token == "" {
return "", 0, "", false
}
data, err := os.ReadFile(eventPath)
if err != nil {
return "", 0, "", false
}
var ev struct {
PullRequest struct {
Number int `json:"number"`
} `json:"pull_request"`
}
_ = json.Unmarshal(data, &ev)
if ev.PullRequest.Number == 0 {
return "", 0, "", false
}
return repo, ev.PullRequest.Number, token, true
}
// chunkMarkdownByURL splits markdown into chunks under GitHub's comment body limit,
// keeping whole URL entries together. Only the first chunk includes the original
// header and the "Failures by URL" section header. Subsequent chunks have no headers.
func chunkMarkdownByURL(body string) []string {
const maxBody = 65000
lines := strings.Split(body, "\n")
// locate failures header
failIdx := -1
for i, ln := range lines {
if strings.TrimSpace(ln) == "### Failures by URL" {
failIdx = i
break
}
}
if failIdx < 0 {
// no failures section; return as single chunk
return []string{body}
}
preamble := strings.Join(lines[:failIdx+1], "\n") + "\n"
entryLines := lines[failIdx+1:]
// build entries by URL block, starting at lines with "- " at column 0
type entry struct {
text string
length int
}
var entries []entry
for i := 0; i < len(entryLines); {
// skip leading blank lines
for i < len(entryLines) && strings.TrimSpace(entryLines[i]) == "" {
i++
}
if i >= len(entryLines) {
break
}
if !strings.HasPrefix(entryLines[i], "- ") {
// if unexpected, include line as is
entries = append(entries, entry{text: entryLines[i] + "\n", length: len(entryLines[i]) + 1})
i++
continue
}
start := i
i++
for i < len(entryLines) && !strings.HasPrefix(entryLines[i], "- ") {
i++
}
block := strings.Join(entryLines[start:i], "\n") + "\n"
entries = append(entries, entry{text: block, length: len(block)})
}
var chunks []string
// start first chunk with full preamble
cur := preamble
curLen := len(cur)
for _, e := range entries {
if curLen+e.length > maxBody && curLen > len(preamble) {
// flush current chunk, start new without headers
chunks = append(chunks, cur)
cur = ""
curLen = 0
}
// if new chunk and would still exceed, force place the single large entry
if curLen == 0 && e.length > maxBody {
// fallback: include as is; GitHub will still likely accept since entries are typically smaller
}
cur += e.text
curLen += e.length
}
if strings.TrimSpace(cur) != "" {
chunks = append(chunks, cur)
}
return chunks
}
// upsertPRComments deletes any existing slinky comments and posts the new chunked comments in order.
// Returns error if critical failures occur, but individual comment failures are logged and ignored.
func upsertPRComments(repo string, prNumber int, token string, chunks []string) error {
apiBase := "https://api.github.com"
listURL := fmt.Sprintf("%s/repos/%s/issues/%d/comments?per_page=100", apiBase, repo, prNumber)
req, err := http.NewRequest(http.MethodGet, listURL, nil)
if err != nil {
return fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+token)
req.Header.Set("Accept", "application/vnd.github+json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return fmt.Errorf("failed to list comments: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
return fmt.Errorf("failed to list comments: HTTP %d", resp.StatusCode)
}
var comments []struct {
ID int `json:"id"`
Body string `json:"body"`
}
b, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("failed to read comments response: %w", err)
}
if err := json.Unmarshal(b, &comments); err != nil {
// Non-critical: continue even if we can't parse existing comments
if shouldDebug() {
fmt.Printf("::debug:: Failed to parse comments: %v\n", err)
}
}
// Delete all existing slinky-report comments to avoid stale entries
for _, c := range comments {
if strings.Contains(c.Body, "<!-- slinky-report -->") {
delURL := fmt.Sprintf("%s/repos/%s/issues/comments/%d", apiBase, repo, c.ID)
dReq, err := http.NewRequest(http.MethodDelete, delURL, nil)
if err != nil {
continue // Skip if we can't create request
}
dReq.Header.Set("Authorization", "Bearer "+token)
dReq.Header.Set("Accept", "application/vnd.github+json")
_, _ = http.DefaultClient.Do(dReq) // Non-critical: ignore delete errors
}
}
// Post new comments in order
for idx, chunk := range chunks {
body := fmt.Sprintf("%s\n%s", "<!-- slinky-report -->", chunk)
postURL := fmt.Sprintf("%s/repos/%s/issues/%d/comments", apiBase, repo, prNumber)
payload, err := json.Marshal(map[string]string{"body": body})
if err != nil {
if shouldDebug() {
fmt.Printf("::debug:: Failed to marshal comment payload: %v\n", err)
}
continue
}
req, err := http.NewRequest(http.MethodPost, postURL, bytes.NewReader(payload))
if err != nil {
if shouldDebug() {
fmt.Printf("::debug:: Failed to create POST request: %v\n", err)
}
continue
}
req.Header.Set("Authorization", "Bearer "+token)
req.Header.Set("Accept", "application/vnd.github+json")
req.Header.Set("Content-Type", "application/json")
res, err := http.DefaultClient.Do(req)
if err != nil {
if shouldDebug() {
fmt.Printf("::debug:: Failed to post chunk %d/%d: %v\n", idx+1, len(chunks), err)
}
continue
}
res.Body.Close()
if res.StatusCode >= 400 {
if shouldDebug() {
fmt.Printf("::debug:: Failed to post chunk %d/%d: HTTP %d\n", idx+1, len(chunks), res.StatusCode)
}
continue
}
if shouldDebug() {
fmt.Printf("::debug:: Posted chunk %d/%d successfully\n", idx+1, len(chunks))
}
}
return nil
}