mirror of
https://github.com/LukeHagar/slinky.git
synced 2025-12-06 04:21:20 +00:00
484 lines
13 KiB
Go
484 lines
13 KiB
Go
package cmd
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/spf13/cobra"
|
|
|
|
"slinky/internal/fsurls"
|
|
"slinky/internal/report"
|
|
"slinky/internal/web"
|
|
)
|
|
|
|
// SerializableResult mirrors web.Result but omits the error field for JSON.
|
|
type SerializableResult struct {
|
|
URL string `json:"url"`
|
|
OK bool `json:"ok"`
|
|
Status int `json:"status"`
|
|
ErrMsg string `json:"error"`
|
|
Method string `json:"method"`
|
|
ContentType string `json:"contentType"`
|
|
Sources []string `json:"sources"`
|
|
}
|
|
|
|
func init() {
|
|
checkCmd := &cobra.Command{
|
|
Use: "check [targets...]",
|
|
Short: "Scan for URLs and validate them (headless)",
|
|
Args: cobra.ArbitraryArgs,
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
// Parse targets: allow comma-separated chunks
|
|
var raw []string
|
|
for _, a := range args {
|
|
for part := range strings.SplitSeq(a, ",") {
|
|
p := strings.TrimSpace(part)
|
|
if p != "" {
|
|
raw = append(raw, toSlash(p))
|
|
}
|
|
}
|
|
}
|
|
if len(raw) == 0 {
|
|
raw = []string{"**/*"}
|
|
}
|
|
|
|
// Separate into globs (relative to ".") and concrete paths (dirs/files)
|
|
var globPatterns []string
|
|
type pathRoot struct {
|
|
path string
|
|
isDir bool
|
|
}
|
|
var roots []pathRoot
|
|
for _, t := range raw {
|
|
if hasGlobMeta(t) {
|
|
globPatterns = append(globPatterns, t)
|
|
continue
|
|
}
|
|
if fi, err := os.Stat(t); err == nil {
|
|
roots = append(roots, pathRoot{path: t, isDir: fi.IsDir()})
|
|
} else {
|
|
// If stat fails, treat as glob pattern under "."
|
|
globPatterns = append(globPatterns, t)
|
|
}
|
|
}
|
|
|
|
// Debug: show effective targets
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Roots: %s\n", strings.Join(func() []string {
|
|
var out []string
|
|
for _, r := range roots {
|
|
out = append(out, r.path)
|
|
}
|
|
return out
|
|
}(), ","))
|
|
fmt.Printf("::debug:: Glob patterns: %s\n", strings.Join(globPatterns, ","))
|
|
}
|
|
|
|
// Aggregate URL->files across all targets
|
|
agg := make(map[string]map[string]struct{})
|
|
merge := func(res map[string][]string, prefix string, isDir bool) {
|
|
for u, files := range res {
|
|
set, ok := agg[u]
|
|
if !ok {
|
|
set = make(map[string]struct{})
|
|
agg[u] = set
|
|
}
|
|
for _, fp := range files {
|
|
var merged string
|
|
if prefix == "" {
|
|
merged = fp
|
|
} else if isDir {
|
|
merged = toSlash(filepath.Join(prefix, fp))
|
|
} else {
|
|
// File root: keep the concrete file path
|
|
merged = toSlash(prefix)
|
|
}
|
|
set[merged] = struct{}{}
|
|
}
|
|
}
|
|
}
|
|
|
|
// 1) Collect for globs under current dir
|
|
if len(globPatterns) > 0 {
|
|
res, err := fsurls.CollectURLs(".", globPatterns, respectGitignore)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
merge(res, "", true)
|
|
}
|
|
|
|
// 2) Collect for each concrete root
|
|
for _, r := range roots {
|
|
clean := toSlash(filepath.Clean(r.path))
|
|
if r.isDir {
|
|
res, err := fsurls.CollectURLs(r.path, []string{"**/*"}, respectGitignore)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
merge(res, clean, true)
|
|
} else {
|
|
res, err := fsurls.CollectURLs(r.path, nil, respectGitignore)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
merge(res, clean, false)
|
|
}
|
|
}
|
|
|
|
// Convert aggregator to final map with sorted file lists
|
|
urlToFiles := make(map[string][]string, len(agg))
|
|
for u, set := range agg {
|
|
var files []string
|
|
for f := range set {
|
|
files = append(files, f)
|
|
}
|
|
sort.Strings(files)
|
|
urlToFiles[u] = files
|
|
}
|
|
|
|
// Derive display root; we use "." when multiple roots to avoid confusion
|
|
displayRoot := "."
|
|
if len(roots) == 1 && len(globPatterns) == 0 {
|
|
displayRoot = roots[0].path
|
|
}
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Root: %s\n", displayRoot)
|
|
}
|
|
|
|
// Build config
|
|
timeout := time.Duration(timeoutSeconds) * time.Second
|
|
cfg := web.Config{MaxConcurrency: maxConcurrency, RequestTimeout: timeout}
|
|
|
|
// Prepare URL list
|
|
var urls []string
|
|
for u := range urlToFiles {
|
|
urls = append(urls, u)
|
|
}
|
|
sort.Strings(urls)
|
|
|
|
// If no URLs found, exit early
|
|
if len(urls) == 0 {
|
|
fmt.Println("No URLs found.")
|
|
return nil
|
|
}
|
|
|
|
// Run checks
|
|
startedAt := time.Now()
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
results := make(chan web.Result, 256)
|
|
go web.CheckURLs(ctx, urls, urlToFiles, results, nil, cfg)
|
|
|
|
var total, okCount, failCount int
|
|
totalURLs := len(urls)
|
|
lastPctLogged := 0
|
|
var failures []SerializableResult
|
|
var failedResults []web.Result
|
|
|
|
for r := range results {
|
|
total++
|
|
if r.OK {
|
|
okCount++
|
|
} else {
|
|
failCount++
|
|
}
|
|
// Progress notices every 5%
|
|
if totalURLs > 0 {
|
|
pct := (total * 100) / totalURLs
|
|
for pct >= lastPctLogged+5 && lastPctLogged < 100 {
|
|
lastPctLogged += 5
|
|
fmt.Printf("::progress:: %d%% (%d/%d)\n", lastPctLogged, total, totalURLs)
|
|
}
|
|
}
|
|
// Emit GitHub Actions debug log for each URL.
|
|
// These lines appear only when step debug logging is enabled via the
|
|
// repository/organization secret ACTIONS_STEP_DEBUG=true.
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Scanned URL: %s status=%d ok=%v err=%s sources=%d\n", r.URL, r.Status, r.OK, r.ErrMsg, len(r.Sources))
|
|
}
|
|
if jsonOut != "" && !r.OK {
|
|
failures = append(failures, SerializableResult{
|
|
URL: r.URL,
|
|
OK: r.OK,
|
|
Status: r.Status,
|
|
ErrMsg: r.ErrMsg,
|
|
Method: r.Method,
|
|
ContentType: r.ContentType,
|
|
Sources: r.Sources,
|
|
})
|
|
}
|
|
if !r.OK {
|
|
failedResults = append(failedResults, r)
|
|
}
|
|
}
|
|
|
|
// Write JSON if requested (failures only)
|
|
if jsonOut != "" {
|
|
f, ferr := os.Create(jsonOut)
|
|
if ferr != nil {
|
|
return ferr
|
|
}
|
|
enc := json.NewEncoder(f)
|
|
enc.SetIndent("", " ")
|
|
if err := enc.Encode(failures); err != nil {
|
|
_ = f.Close()
|
|
return err
|
|
}
|
|
_ = f.Close()
|
|
}
|
|
|
|
// Build report summary
|
|
base := repoBlobBase
|
|
if strings.TrimSpace(base) == "" {
|
|
base = os.Getenv("SLINKY_REPO_BLOB_BASE_URL")
|
|
}
|
|
summary := report.Summary{
|
|
RootPath: displayRoot,
|
|
StartedAt: startedAt,
|
|
FinishedAt: time.Now(),
|
|
Processed: total,
|
|
OK: okCount,
|
|
Fail: failCount,
|
|
FilesScanned: countFiles(urlToFiles),
|
|
JSONPath: jsonOut,
|
|
RepoBlobBaseURL: base,
|
|
}
|
|
|
|
// Ensure we have a markdown file if needed for PR comment
|
|
mdPath := mdOut
|
|
ghRepo, ghPR, ghToken, ghOK := detectGitHubPR()
|
|
var finalMDPath string
|
|
if strings.TrimSpace(mdPath) != "" {
|
|
if _, err := report.WriteMarkdown(mdPath, failedResults, summary); err != nil {
|
|
return err
|
|
}
|
|
finalMDPath = mdPath
|
|
} else if ghOK {
|
|
p, err := report.WriteMarkdown("", failedResults, summary)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
finalMDPath = p
|
|
}
|
|
|
|
// If running on a PR, post or update the comment(s), chunking as needed
|
|
if ghOK && strings.TrimSpace(finalMDPath) != "" {
|
|
b, rerr := os.ReadFile(finalMDPath)
|
|
if rerr == nil {
|
|
full := string(b)
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Report size (chars): %d\n", len(full))
|
|
}
|
|
chunks := chunkMarkdownByURL(full)
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Posting %d chunk(s)\n", len(chunks))
|
|
}
|
|
_ = upsertPRComments(ghRepo, ghPR, ghToken, chunks)
|
|
}
|
|
}
|
|
|
|
fmt.Printf("Checked %d URLs: %d OK, %d failed\n", total, okCount, failCount)
|
|
if failOnFailures && failCount > 0 {
|
|
return fmt.Errorf("%d links failed", failCount)
|
|
}
|
|
return nil
|
|
},
|
|
}
|
|
|
|
checkCmd.Flags().IntVar(&maxConcurrency, "concurrency", 16, "maximum concurrent requests")
|
|
checkCmd.Flags().StringVar(&jsonOut, "json-out", "", "path to write full JSON results (array)")
|
|
checkCmd.Flags().StringVar(&mdOut, "md-out", "", "path to write Markdown report for PR comment")
|
|
checkCmd.Flags().StringVar(&repoBlobBase, "repo-blob-base", "", "override GitHub blob base URL (e.g. https://github.com/owner/repo/blob/<sha>)")
|
|
checkCmd.Flags().IntVar(&timeoutSeconds, "timeout", 10, "HTTP request timeout in seconds")
|
|
checkCmd.Flags().BoolVar(&failOnFailures, "fail-on-failures", true, "exit non-zero if any links fail")
|
|
checkCmd.Flags().BoolVar(&respectGitignore, "respect-gitignore", true, "respect .gitignore while scanning (default true)")
|
|
|
|
rootCmd.AddCommand(checkCmd)
|
|
}
|
|
|
|
var (
|
|
timeoutSeconds int
|
|
failOnFailures bool
|
|
repoBlobBase string
|
|
respectGitignore bool
|
|
)
|
|
|
|
func toSlash(p string) string {
|
|
p = strings.TrimSpace(p)
|
|
if p == "" {
|
|
return p
|
|
}
|
|
p = filepath.ToSlash(p)
|
|
if after, ok := strings.CutPrefix(p, "./"); ok {
|
|
p = after
|
|
}
|
|
return p
|
|
}
|
|
|
|
func hasGlobMeta(s string) bool {
|
|
return strings.ContainsAny(s, "*?[")
|
|
}
|
|
|
|
func countFiles(urlToFiles map[string][]string) int {
|
|
seen := make(map[string]struct{})
|
|
for _, files := range urlToFiles {
|
|
for _, f := range files {
|
|
seen[f] = struct{}{}
|
|
}
|
|
}
|
|
return len(seen)
|
|
}
|
|
|
|
func detectGitHubPR() (repo string, prNumber int, token string, ok bool) {
|
|
repo = os.Getenv("GITHUB_REPOSITORY")
|
|
token = os.Getenv("GITHUB_TOKEN")
|
|
eventPath := os.Getenv("GITHUB_EVENT_PATH")
|
|
if repo == "" || eventPath == "" || token == "" {
|
|
return "", 0, "", false
|
|
}
|
|
data, err := os.ReadFile(eventPath)
|
|
if err != nil {
|
|
return "", 0, "", false
|
|
}
|
|
var ev struct {
|
|
PullRequest struct {
|
|
Number int `json:"number"`
|
|
} `json:"pull_request"`
|
|
}
|
|
_ = json.Unmarshal(data, &ev)
|
|
if ev.PullRequest.Number == 0 {
|
|
return "", 0, "", false
|
|
}
|
|
return repo, ev.PullRequest.Number, token, true
|
|
}
|
|
|
|
// chunkMarkdownByURL splits markdown into chunks under GitHub's comment body limit,
|
|
// keeping whole URL entries together. Only the first chunk includes the original
|
|
// header and the "Failures by URL" section header. Subsequent chunks have no headers.
|
|
func chunkMarkdownByURL(body string) []string {
|
|
const maxBody = 65000
|
|
lines := strings.Split(body, "\n")
|
|
// locate failures header
|
|
failIdx := -1
|
|
for i, ln := range lines {
|
|
if strings.TrimSpace(ln) == "### Failures by URL" {
|
|
failIdx = i
|
|
break
|
|
}
|
|
}
|
|
if failIdx < 0 {
|
|
// no failures section; return as single chunk
|
|
return []string{body}
|
|
}
|
|
preamble := strings.Join(lines[:failIdx+1], "\n") + "\n"
|
|
entryLines := lines[failIdx+1:]
|
|
|
|
// build entries by URL block, starting at lines with "- " at column 0
|
|
type entry struct {
|
|
text string
|
|
length int
|
|
}
|
|
var entries []entry
|
|
for i := 0; i < len(entryLines); {
|
|
// skip leading blank lines
|
|
for i < len(entryLines) && strings.TrimSpace(entryLines[i]) == "" {
|
|
i++
|
|
}
|
|
if i >= len(entryLines) {
|
|
break
|
|
}
|
|
if !strings.HasPrefix(entryLines[i], "- ") {
|
|
// if unexpected, include line as is
|
|
entries = append(entries, entry{text: entryLines[i] + "\n", length: len(entryLines[i]) + 1})
|
|
i++
|
|
continue
|
|
}
|
|
start := i
|
|
i++
|
|
for i < len(entryLines) && !strings.HasPrefix(entryLines[i], "- ") {
|
|
i++
|
|
}
|
|
block := strings.Join(entryLines[start:i], "\n") + "\n"
|
|
entries = append(entries, entry{text: block, length: len(block)})
|
|
}
|
|
|
|
var chunks []string
|
|
// start first chunk with full preamble
|
|
cur := preamble
|
|
curLen := len(cur)
|
|
for _, e := range entries {
|
|
if curLen+e.length > maxBody && curLen > len(preamble) {
|
|
// flush current chunk, start new without headers
|
|
chunks = append(chunks, cur)
|
|
cur = ""
|
|
curLen = 0
|
|
}
|
|
// if new chunk and would still exceed, force place the single large entry
|
|
if curLen == 0 && e.length > maxBody {
|
|
// fallback: include as is; GitHub will still likely accept since entries are typically smaller
|
|
}
|
|
cur += e.text
|
|
curLen += e.length
|
|
}
|
|
if strings.TrimSpace(cur) != "" {
|
|
chunks = append(chunks, cur)
|
|
}
|
|
return chunks
|
|
}
|
|
|
|
// upsertPRComments deletes any existing slinky comments and posts the new chunked comments in order.
|
|
func upsertPRComments(repo string, prNumber int, token string, chunks []string) error {
|
|
apiBase := "https://api.github.com"
|
|
listURL := fmt.Sprintf("%s/repos/%s/issues/%d/comments?per_page=100", apiBase, repo, prNumber)
|
|
req, _ := http.NewRequest(http.MethodGet, listURL, nil)
|
|
req.Header.Set("Authorization", "Bearer "+token)
|
|
req.Header.Set("Accept", "application/vnd.github+json")
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer resp.Body.Close()
|
|
var comments []struct {
|
|
ID int `json:"id"`
|
|
Body string `json:"body"`
|
|
}
|
|
b, _ := io.ReadAll(resp.Body)
|
|
_ = json.Unmarshal(b, &comments)
|
|
|
|
// Delete all existing slinky-report comments to avoid stale entries
|
|
for _, c := range comments {
|
|
if strings.Contains(c.Body, "<!-- slinky-report -->") {
|
|
delURL := fmt.Sprintf("%s/repos/%s/issues/comments/%d", apiBase, repo, c.ID)
|
|
dReq, _ := http.NewRequest(http.MethodDelete, delURL, nil)
|
|
dReq.Header.Set("Authorization", "Bearer "+token)
|
|
dReq.Header.Set("Accept", "application/vnd.github+json")
|
|
_, _ = http.DefaultClient.Do(dReq)
|
|
}
|
|
}
|
|
|
|
// Post new comments in order
|
|
for idx, chunk := range chunks {
|
|
body := fmt.Sprintf("%s\n%s", "<!-- slinky-report -->", chunk)
|
|
postURL := fmt.Sprintf("%s/repos/%s/issues/%d/comments", apiBase, repo, prNumber)
|
|
payload, _ := json.Marshal(map[string]string{"body": body})
|
|
req, _ = http.NewRequest(http.MethodPost, postURL, bytes.NewReader(payload))
|
|
req.Header.Set("Authorization", "Bearer "+token)
|
|
req.Header.Set("Accept", "application/vnd.github+json")
|
|
req.Header.Set("Content-Type", "application/json")
|
|
res, _ := http.DefaultClient.Do(req)
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Posted chunk %d/%d: %v\n", idx+1, len(chunks), res)
|
|
}
|
|
}
|
|
return nil
|
|
}
|