mirror of
https://github.com/LukeHagar/slinky.git
synced 2025-12-06 04:21:20 +00:00
Introduce a new .slinkignore file format to allow users to specify paths and URLs to ignore during scanning. Update the CollectURLs and CollectURLsProgress functions to respect these ignore rules. Add tests to verify the functionality of the .slinkignore file, ensuring that specified paths and URLs are excluded from results. Update README.md to document the new feature and its usage.
483 lines
13 KiB
Go
483 lines
13 KiB
Go
package cmd
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/spf13/cobra"
|
|
|
|
"slinky/internal/fsurls"
|
|
"slinky/internal/report"
|
|
"slinky/internal/web"
|
|
)
|
|
|
|
// SerializableResult mirrors web.Result but omits the error field for JSON.
|
|
type SerializableResult struct {
|
|
URL string `json:"url"`
|
|
OK bool `json:"ok"`
|
|
Status int `json:"status"`
|
|
ErrMsg string `json:"error"`
|
|
Method string `json:"method"`
|
|
ContentType string `json:"contentType"`
|
|
Sources []string `json:"sources"`
|
|
}
|
|
|
|
func init() {
|
|
checkCmd := &cobra.Command{
|
|
Use: "check [targets...]",
|
|
Short: "Scan for URLs and validate them (headless)",
|
|
Args: cobra.ArbitraryArgs,
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
// Parse targets: allow comma-separated chunks
|
|
var raw []string
|
|
for _, a := range args {
|
|
for _, part := range strings.Split(a, ",") {
|
|
p := strings.TrimSpace(part)
|
|
if p != "" {
|
|
raw = append(raw, toSlash(p))
|
|
}
|
|
}
|
|
}
|
|
if len(raw) == 0 {
|
|
raw = []string{"**/*"}
|
|
}
|
|
|
|
// Separate into globs (relative to ".") and concrete paths (dirs/files)
|
|
var globPatterns []string
|
|
type pathRoot struct {
|
|
path string
|
|
isDir bool
|
|
}
|
|
var roots []pathRoot
|
|
for _, t := range raw {
|
|
if hasGlobMeta(t) {
|
|
globPatterns = append(globPatterns, t)
|
|
continue
|
|
}
|
|
if fi, err := os.Stat(t); err == nil {
|
|
roots = append(roots, pathRoot{path: t, isDir: fi.IsDir()})
|
|
} else {
|
|
// If stat fails, treat as glob pattern under "."
|
|
globPatterns = append(globPatterns, t)
|
|
}
|
|
}
|
|
|
|
// Debug: show effective targets
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Roots: %s\n", strings.Join(func() []string {
|
|
var out []string
|
|
for _, r := range roots {
|
|
out = append(out, r.path)
|
|
}
|
|
return out
|
|
}(), ","))
|
|
fmt.Printf("::debug:: Glob patterns: %s\n", strings.Join(globPatterns, ","))
|
|
}
|
|
|
|
// Aggregate URL->files across all targets
|
|
agg := make(map[string]map[string]struct{})
|
|
merge := func(res map[string][]string, prefix string, isDir bool) {
|
|
for u, files := range res {
|
|
set, ok := agg[u]
|
|
if !ok {
|
|
set = make(map[string]struct{})
|
|
agg[u] = set
|
|
}
|
|
for _, fp := range files {
|
|
var merged string
|
|
if prefix == "" {
|
|
merged = fp
|
|
} else if isDir {
|
|
merged = toSlash(filepath.Join(prefix, fp))
|
|
} else {
|
|
// File root: keep the concrete file path
|
|
merged = toSlash(prefix)
|
|
}
|
|
set[merged] = struct{}{}
|
|
}
|
|
}
|
|
}
|
|
|
|
// 1) Collect for globs under current dir
|
|
if len(globPatterns) > 0 {
|
|
res, err := fsurls.CollectURLs(".", globPatterns, respectGitignore)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
merge(res, "", true)
|
|
}
|
|
// 2) Collect for each concrete root
|
|
for _, r := range roots {
|
|
clean := toSlash(filepath.Clean(r.path))
|
|
if r.isDir {
|
|
res, err := fsurls.CollectURLs(r.path, []string{"**/*"}, respectGitignore)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
merge(res, clean, true)
|
|
} else {
|
|
res, err := fsurls.CollectURLs(r.path, nil, respectGitignore)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
merge(res, clean, false)
|
|
}
|
|
}
|
|
|
|
// Convert aggregator to final map with sorted file lists
|
|
urlToFiles := make(map[string][]string, len(agg))
|
|
for u, set := range agg {
|
|
var files []string
|
|
for f := range set {
|
|
files = append(files, f)
|
|
}
|
|
sort.Strings(files)
|
|
urlToFiles[u] = files
|
|
}
|
|
|
|
// Derive display root; we use "." when multiple roots to avoid confusion
|
|
displayRoot := "."
|
|
if len(roots) == 1 && len(globPatterns) == 0 {
|
|
displayRoot = roots[0].path
|
|
}
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Root: %s\n", displayRoot)
|
|
}
|
|
|
|
// Build config
|
|
timeout := time.Duration(timeoutSeconds) * time.Second
|
|
cfg := web.Config{MaxConcurrency: maxConcurrency, RequestTimeout: timeout}
|
|
|
|
// Prepare URL list
|
|
var urls []string
|
|
for u := range urlToFiles {
|
|
urls = append(urls, u)
|
|
}
|
|
sort.Strings(urls)
|
|
|
|
// If no URLs found, exit early
|
|
if len(urls) == 0 {
|
|
fmt.Println("No URLs found.")
|
|
return nil
|
|
}
|
|
|
|
// Run checks
|
|
startedAt := time.Now()
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
results := make(chan web.Result, 256)
|
|
go web.CheckURLs(ctx, urls, urlToFiles, results, nil, cfg)
|
|
|
|
var total, okCount, failCount int
|
|
totalURLs := len(urls)
|
|
lastPctLogged := 0
|
|
var failures []SerializableResult
|
|
var failedResults []web.Result
|
|
|
|
for r := range results {
|
|
total++
|
|
if r.OK {
|
|
okCount++
|
|
} else {
|
|
failCount++
|
|
}
|
|
// Progress notices every 5%
|
|
if totalURLs > 0 {
|
|
pct := (total * 100) / totalURLs
|
|
for pct >= lastPctLogged+5 && lastPctLogged < 100 {
|
|
lastPctLogged += 5
|
|
fmt.Printf("::notice:: Checking progress: %d%% (%d/%d)\n", lastPctLogged, total, totalURLs)
|
|
}
|
|
}
|
|
// Emit GitHub Actions debug log for each URL.
|
|
// These lines appear only when step debug logging is enabled via the
|
|
// repository/organization secret ACTIONS_STEP_DEBUG=true.
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Scanned URL: %s status=%d ok=%v err=%s sources=%d\n", r.URL, r.Status, r.OK, r.ErrMsg, len(r.Sources))
|
|
}
|
|
if jsonOut != "" && !r.OK {
|
|
failures = append(failures, SerializableResult{
|
|
URL: r.URL,
|
|
OK: r.OK,
|
|
Status: r.Status,
|
|
ErrMsg: r.ErrMsg,
|
|
Method: r.Method,
|
|
ContentType: r.ContentType,
|
|
Sources: r.Sources,
|
|
})
|
|
}
|
|
if !r.OK {
|
|
failedResults = append(failedResults, r)
|
|
}
|
|
}
|
|
|
|
// Write JSON if requested (failures only)
|
|
if jsonOut != "" {
|
|
f, ferr := os.Create(jsonOut)
|
|
if ferr != nil {
|
|
return ferr
|
|
}
|
|
enc := json.NewEncoder(f)
|
|
enc.SetIndent("", " ")
|
|
if err := enc.Encode(failures); err != nil {
|
|
_ = f.Close()
|
|
return err
|
|
}
|
|
_ = f.Close()
|
|
}
|
|
|
|
// Build report summary
|
|
base := repoBlobBase
|
|
if strings.TrimSpace(base) == "" {
|
|
base = os.Getenv("SLINKY_REPO_BLOB_BASE_URL")
|
|
}
|
|
summary := report.Summary{
|
|
RootPath: displayRoot,
|
|
StartedAt: startedAt,
|
|
FinishedAt: time.Now(),
|
|
Processed: total,
|
|
OK: okCount,
|
|
Fail: failCount,
|
|
FilesScanned: countFiles(urlToFiles),
|
|
JSONPath: jsonOut,
|
|
RepoBlobBaseURL: base,
|
|
}
|
|
|
|
// Ensure we have a markdown file if needed for PR comment
|
|
mdPath := mdOut
|
|
ghRepo, ghPR, ghToken, ghOK := detectGitHubPR()
|
|
var finalMDPath string
|
|
if strings.TrimSpace(mdPath) != "" {
|
|
if _, err := report.WriteMarkdown(mdPath, failedResults, summary); err != nil {
|
|
return err
|
|
}
|
|
finalMDPath = mdPath
|
|
} else if ghOK {
|
|
p, err := report.WriteMarkdown("", failedResults, summary)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
finalMDPath = p
|
|
}
|
|
|
|
// If running on a PR, post or update the comment(s), chunking as needed
|
|
if ghOK && strings.TrimSpace(finalMDPath) != "" {
|
|
b, rerr := os.ReadFile(finalMDPath)
|
|
if rerr == nil {
|
|
full := string(b)
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Report size (chars): %d\n", len(full))
|
|
}
|
|
chunks := chunkMarkdownByURL(full)
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Posting %d chunk(s)\n", len(chunks))
|
|
}
|
|
_ = upsertPRComments(ghRepo, ghPR, ghToken, chunks)
|
|
}
|
|
}
|
|
|
|
fmt.Printf("Checked %d URLs: %d OK, %d failed\n", total, okCount, failCount)
|
|
if failOnFailures && failCount > 0 {
|
|
return fmt.Errorf("%d links failed", failCount)
|
|
}
|
|
return nil
|
|
},
|
|
}
|
|
|
|
checkCmd.Flags().IntVar(&maxConcurrency, "concurrency", 16, "maximum concurrent requests")
|
|
checkCmd.Flags().StringVar(&jsonOut, "json-out", "", "path to write full JSON results (array)")
|
|
checkCmd.Flags().StringVar(&mdOut, "md-out", "", "path to write Markdown report for PR comment")
|
|
checkCmd.Flags().StringVar(&repoBlobBase, "repo-blob-base", "", "override GitHub blob base URL (e.g. https://github.com/owner/repo/blob/<sha>)")
|
|
checkCmd.Flags().IntVar(&timeoutSeconds, "timeout", 10, "HTTP request timeout in seconds")
|
|
checkCmd.Flags().BoolVar(&failOnFailures, "fail-on-failures", true, "exit non-zero if any links fail")
|
|
checkCmd.Flags().BoolVar(&respectGitignore, "respect-gitignore", true, "respect .gitignore while scanning (default true)")
|
|
|
|
rootCmd.AddCommand(checkCmd)
|
|
}
|
|
|
|
var (
|
|
timeoutSeconds int
|
|
failOnFailures bool
|
|
repoBlobBase string
|
|
respectGitignore bool
|
|
)
|
|
|
|
func toSlash(p string) string {
|
|
p = strings.TrimSpace(p)
|
|
if p == "" {
|
|
return p
|
|
}
|
|
p = filepath.ToSlash(p)
|
|
if after, ok := strings.CutPrefix(p, "./"); ok {
|
|
p = after
|
|
}
|
|
return p
|
|
}
|
|
|
|
func hasGlobMeta(s string) bool {
|
|
return strings.ContainsAny(s, "*?[")
|
|
}
|
|
|
|
func countFiles(urlToFiles map[string][]string) int {
|
|
seen := make(map[string]struct{})
|
|
for _, files := range urlToFiles {
|
|
for _, f := range files {
|
|
seen[f] = struct{}{}
|
|
}
|
|
}
|
|
return len(seen)
|
|
}
|
|
|
|
func detectGitHubPR() (repo string, prNumber int, token string, ok bool) {
|
|
repo = os.Getenv("GITHUB_REPOSITORY")
|
|
token = os.Getenv("GITHUB_TOKEN")
|
|
eventPath := os.Getenv("GITHUB_EVENT_PATH")
|
|
if repo == "" || eventPath == "" || token == "" {
|
|
return "", 0, "", false
|
|
}
|
|
data, err := os.ReadFile(eventPath)
|
|
if err != nil {
|
|
return "", 0, "", false
|
|
}
|
|
var ev struct {
|
|
PullRequest struct {
|
|
Number int `json:"number"`
|
|
} `json:"pull_request"`
|
|
}
|
|
_ = json.Unmarshal(data, &ev)
|
|
if ev.PullRequest.Number == 0 {
|
|
return "", 0, "", false
|
|
}
|
|
return repo, ev.PullRequest.Number, token, true
|
|
}
|
|
|
|
// chunkMarkdownByURL splits markdown into chunks under GitHub's comment body limit,
|
|
// keeping whole URL entries together. Only the first chunk includes the original
|
|
// header and the "Failures by URL" section header. Subsequent chunks have no headers.
|
|
func chunkMarkdownByURL(body string) []string {
|
|
const maxBody = 65000
|
|
lines := strings.Split(body, "\n")
|
|
// locate failures header
|
|
failIdx := -1
|
|
for i, ln := range lines {
|
|
if strings.TrimSpace(ln) == "### Failures by URL" {
|
|
failIdx = i
|
|
break
|
|
}
|
|
}
|
|
if failIdx < 0 {
|
|
// no failures section; return as single chunk
|
|
return []string{body}
|
|
}
|
|
preamble := strings.Join(lines[:failIdx+1], "\n") + "\n"
|
|
entryLines := lines[failIdx+1:]
|
|
|
|
// build entries by URL block, starting at lines with "- " at column 0
|
|
type entry struct {
|
|
text string
|
|
length int
|
|
}
|
|
var entries []entry
|
|
for i := 0; i < len(entryLines); {
|
|
// skip leading blank lines
|
|
for i < len(entryLines) && strings.TrimSpace(entryLines[i]) == "" {
|
|
i++
|
|
}
|
|
if i >= len(entryLines) {
|
|
break
|
|
}
|
|
if !strings.HasPrefix(entryLines[i], "- ") {
|
|
// if unexpected, include line as is
|
|
entries = append(entries, entry{text: entryLines[i] + "\n", length: len(entryLines[i]) + 1})
|
|
i++
|
|
continue
|
|
}
|
|
start := i
|
|
i++
|
|
for i < len(entryLines) && !strings.HasPrefix(entryLines[i], "- ") {
|
|
i++
|
|
}
|
|
block := strings.Join(entryLines[start:i], "\n") + "\n\n"
|
|
entries = append(entries, entry{text: block, length: len(block)})
|
|
}
|
|
|
|
var chunks []string
|
|
// start first chunk with full preamble
|
|
cur := preamble
|
|
curLen := len(cur)
|
|
for _, e := range entries {
|
|
if curLen+e.length > maxBody && curLen > len(preamble) {
|
|
// flush current chunk, start new without headers
|
|
chunks = append(chunks, cur)
|
|
cur = ""
|
|
curLen = 0
|
|
}
|
|
// if new chunk and would still exceed, force place the single large entry
|
|
if curLen == 0 && e.length > maxBody {
|
|
// fallback: include as is; GitHub will still likely accept since entries are typically smaller
|
|
}
|
|
cur += e.text
|
|
curLen += e.length
|
|
}
|
|
if strings.TrimSpace(cur) != "" {
|
|
chunks = append(chunks, cur)
|
|
}
|
|
return chunks
|
|
}
|
|
|
|
// upsertPRComments deletes any existing slinky comments and posts the new chunked comments in order.
|
|
func upsertPRComments(repo string, prNumber int, token string, chunks []string) error {
|
|
apiBase := "https://api.github.com"
|
|
listURL := fmt.Sprintf("%s/repos/%s/issues/%d/comments?per_page=100", apiBase, repo, prNumber)
|
|
req, _ := http.NewRequest(http.MethodGet, listURL, nil)
|
|
req.Header.Set("Authorization", "Bearer "+token)
|
|
req.Header.Set("Accept", "application/vnd.github+json")
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer resp.Body.Close()
|
|
var comments []struct {
|
|
ID int `json:"id"`
|
|
Body string `json:"body"`
|
|
}
|
|
b, _ := io.ReadAll(resp.Body)
|
|
_ = json.Unmarshal(b, &comments)
|
|
|
|
// Delete all existing slinky-report comments to avoid stale entries
|
|
for _, c := range comments {
|
|
if strings.Contains(c.Body, "<!-- slinky-report -->") {
|
|
delURL := fmt.Sprintf("%s/repos/%s/issues/comments/%d", apiBase, repo, c.ID)
|
|
dReq, _ := http.NewRequest(http.MethodDelete, delURL, nil)
|
|
dReq.Header.Set("Authorization", "Bearer "+token)
|
|
dReq.Header.Set("Accept", "application/vnd.github+json")
|
|
_, _ = http.DefaultClient.Do(dReq)
|
|
}
|
|
}
|
|
|
|
// Post new comments in order
|
|
for idx, chunk := range chunks {
|
|
body := fmt.Sprintf("%s\n%s", "<!-- slinky-report -->", chunk)
|
|
postURL := fmt.Sprintf("%s/repos/%s/issues/%d/comments", apiBase, repo, prNumber)
|
|
payload, _ := json.Marshal(map[string]string{"body": body})
|
|
req, _ = http.NewRequest(http.MethodPost, postURL, bytes.NewReader(payload))
|
|
req.Header.Set("Authorization", "Bearer "+token)
|
|
req.Header.Set("Accept", "application/vnd.github+json")
|
|
req.Header.Set("Content-Type", "application/json")
|
|
res, _ := http.DefaultClient.Do(req)
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Posted chunk %d/%d: %v\n", idx+1, len(chunks), res)
|
|
}
|
|
}
|
|
return nil
|
|
}
|