mirror of
https://github.com/LukeHagar/slinky.git
synced 2025-12-06 04:21:20 +00:00
Enhance the CollectURLs and CollectURLsProgress functions in fsurls.go to accept pre-loaded ignore configurations, reducing redundant loading of .gitignore and .slinkignore files. Update the init function in check.go to load these configurations once for all targets, improving efficiency in URL collection processes.
488 lines
14 KiB
Go
488 lines
14 KiB
Go
package cmd
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/spf13/cobra"
|
|
|
|
"slinky/internal/fsurls"
|
|
"slinky/internal/report"
|
|
"slinky/internal/web"
|
|
)
|
|
|
|
// SerializableResult mirrors web.Result but omits the error field for JSON.
|
|
type SerializableResult struct {
|
|
URL string `json:"url"`
|
|
OK bool `json:"ok"`
|
|
Status int `json:"status"`
|
|
ErrMsg string `json:"error"`
|
|
Method string `json:"method"`
|
|
ContentType string `json:"contentType"`
|
|
Sources []string `json:"sources"`
|
|
}
|
|
|
|
func init() {
|
|
checkCmd := &cobra.Command{
|
|
Use: "check [targets...]",
|
|
Short: "Scan for URLs and validate them (headless)",
|
|
Args: cobra.ArbitraryArgs,
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
// Parse targets: allow comma-separated chunks
|
|
var raw []string
|
|
for _, a := range args {
|
|
for part := range strings.SplitSeq(a, ",") {
|
|
p := strings.TrimSpace(part)
|
|
if p != "" {
|
|
raw = append(raw, toSlash(p))
|
|
}
|
|
}
|
|
}
|
|
if len(raw) == 0 {
|
|
raw = []string{"**/*"}
|
|
}
|
|
|
|
// Separate into globs (relative to ".") and concrete paths (dirs/files)
|
|
var globPatterns []string
|
|
type pathRoot struct {
|
|
path string
|
|
isDir bool
|
|
}
|
|
var roots []pathRoot
|
|
for _, t := range raw {
|
|
if hasGlobMeta(t) {
|
|
globPatterns = append(globPatterns, t)
|
|
continue
|
|
}
|
|
if fi, err := os.Stat(t); err == nil {
|
|
roots = append(roots, pathRoot{path: t, isDir: fi.IsDir()})
|
|
} else {
|
|
// If stat fails, treat as glob pattern under "."
|
|
globPatterns = append(globPatterns, t)
|
|
}
|
|
}
|
|
|
|
// Debug: show effective targets
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Roots: %s\n", strings.Join(func() []string {
|
|
var out []string
|
|
for _, r := range roots {
|
|
out = append(out, r.path)
|
|
}
|
|
return out
|
|
}(), ","))
|
|
fmt.Printf("::debug:: Glob patterns: %s\n", strings.Join(globPatterns, ","))
|
|
}
|
|
|
|
// Load ignore configurations once for all targets
|
|
gitIgnore := fsurls.LoadGitIgnore(".")
|
|
slPathIgnore, slURLPatterns := fsurls.LoadSlinkyIgnore(".")
|
|
|
|
// Aggregate URL->files across all targets
|
|
agg := make(map[string]map[string]struct{})
|
|
merge := func(res map[string][]string, prefix string, isDir bool) {
|
|
for u, files := range res {
|
|
set, ok := agg[u]
|
|
if !ok {
|
|
set = make(map[string]struct{})
|
|
agg[u] = set
|
|
}
|
|
for _, fp := range files {
|
|
var merged string
|
|
if prefix == "" {
|
|
merged = fp
|
|
} else if isDir {
|
|
merged = toSlash(filepath.Join(prefix, fp))
|
|
} else {
|
|
// File root: keep the concrete file path
|
|
merged = toSlash(prefix)
|
|
}
|
|
set[merged] = struct{}{}
|
|
}
|
|
}
|
|
}
|
|
|
|
// 1) Collect for globs under current dir
|
|
if len(globPatterns) > 0 {
|
|
res, err := fsurls.CollectURLsWithIgnoreConfig(".", globPatterns, respectGitignore, gitIgnore, slPathIgnore, slURLPatterns)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
merge(res, "", true)
|
|
}
|
|
|
|
// 2) Collect for each concrete root
|
|
for _, r := range roots {
|
|
clean := toSlash(filepath.Clean(r.path))
|
|
if r.isDir {
|
|
res, err := fsurls.CollectURLsWithIgnoreConfig(r.path, []string{"**/*"}, respectGitignore, gitIgnore, slPathIgnore, slURLPatterns)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
merge(res, clean, true)
|
|
} else {
|
|
res, err := fsurls.CollectURLsWithIgnoreConfig(r.path, nil, respectGitignore, gitIgnore, slPathIgnore, slURLPatterns)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
merge(res, clean, false)
|
|
}
|
|
}
|
|
|
|
// Convert aggregator to final map with sorted file lists
|
|
urlToFiles := make(map[string][]string, len(agg))
|
|
for u, set := range agg {
|
|
var files []string
|
|
for f := range set {
|
|
files = append(files, f)
|
|
}
|
|
sort.Strings(files)
|
|
urlToFiles[u] = files
|
|
}
|
|
|
|
// Derive display root; we use "." when multiple roots to avoid confusion
|
|
displayRoot := "."
|
|
if len(roots) == 1 && len(globPatterns) == 0 {
|
|
displayRoot = roots[0].path
|
|
}
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Root: %s\n", displayRoot)
|
|
}
|
|
|
|
// Build config
|
|
timeout := time.Duration(timeoutSeconds) * time.Second
|
|
cfg := web.Config{MaxConcurrency: maxConcurrency, RequestTimeout: timeout}
|
|
|
|
// Prepare URL list
|
|
var urls []string
|
|
for u := range urlToFiles {
|
|
urls = append(urls, u)
|
|
}
|
|
sort.Strings(urls)
|
|
|
|
// If no URLs found, exit early
|
|
if len(urls) == 0 {
|
|
fmt.Println("No URLs found.")
|
|
return nil
|
|
}
|
|
|
|
// Run checks
|
|
startedAt := time.Now()
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
results := make(chan web.Result, 256)
|
|
go web.CheckURLs(ctx, urls, urlToFiles, results, nil, cfg)
|
|
|
|
var total, okCount, failCount int
|
|
totalURLs := len(urls)
|
|
lastPctLogged := 0
|
|
var failures []SerializableResult
|
|
var failedResults []web.Result
|
|
|
|
for r := range results {
|
|
total++
|
|
if r.OK {
|
|
okCount++
|
|
} else {
|
|
failCount++
|
|
}
|
|
// Progress notices every 5%
|
|
if totalURLs > 0 {
|
|
pct := (total * 100) / totalURLs
|
|
for pct >= lastPctLogged+5 && lastPctLogged < 100 {
|
|
lastPctLogged += 5
|
|
fmt.Printf("::progress:: %d%% (%d/%d)\n", lastPctLogged, total, totalURLs)
|
|
}
|
|
}
|
|
// Emit GitHub Actions debug log for each URL.
|
|
// These lines appear only when step debug logging is enabled via the
|
|
// repository/organization secret ACTIONS_STEP_DEBUG=true.
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Scanned URL: %s status=%d ok=%v err=%s sources=%d\n", r.URL, r.Status, r.OK, r.ErrMsg, len(r.Sources))
|
|
}
|
|
if jsonOut != "" && !r.OK {
|
|
failures = append(failures, SerializableResult{
|
|
URL: r.URL,
|
|
OK: r.OK,
|
|
Status: r.Status,
|
|
ErrMsg: r.ErrMsg,
|
|
Method: r.Method,
|
|
ContentType: r.ContentType,
|
|
Sources: r.Sources,
|
|
})
|
|
}
|
|
if !r.OK {
|
|
failedResults = append(failedResults, r)
|
|
}
|
|
}
|
|
|
|
// Write JSON if requested (failures only)
|
|
if jsonOut != "" {
|
|
f, ferr := os.Create(jsonOut)
|
|
if ferr != nil {
|
|
return ferr
|
|
}
|
|
enc := json.NewEncoder(f)
|
|
enc.SetIndent("", " ")
|
|
if err := enc.Encode(failures); err != nil {
|
|
_ = f.Close()
|
|
return err
|
|
}
|
|
_ = f.Close()
|
|
}
|
|
|
|
// Build report summary
|
|
base := repoBlobBase
|
|
if strings.TrimSpace(base) == "" {
|
|
base = os.Getenv("SLINKY_REPO_BLOB_BASE_URL")
|
|
}
|
|
summary := report.Summary{
|
|
RootPath: displayRoot,
|
|
StartedAt: startedAt,
|
|
FinishedAt: time.Now(),
|
|
Processed: total,
|
|
OK: okCount,
|
|
Fail: failCount,
|
|
FilesScanned: countFiles(urlToFiles),
|
|
JSONPath: jsonOut,
|
|
RepoBlobBaseURL: base,
|
|
}
|
|
|
|
// Ensure we have a markdown file if needed for PR comment
|
|
mdPath := mdOut
|
|
ghRepo, ghPR, ghToken, ghOK := detectGitHubPR()
|
|
var finalMDPath string
|
|
if strings.TrimSpace(mdPath) != "" {
|
|
if _, err := report.WriteMarkdown(mdPath, failedResults, summary); err != nil {
|
|
return err
|
|
}
|
|
finalMDPath = mdPath
|
|
} else if ghOK {
|
|
p, err := report.WriteMarkdown("", failedResults, summary)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
finalMDPath = p
|
|
}
|
|
|
|
// If running on a PR, post or update the comment(s), chunking as needed
|
|
if ghOK && strings.TrimSpace(finalMDPath) != "" {
|
|
b, rerr := os.ReadFile(finalMDPath)
|
|
if rerr == nil {
|
|
full := string(b)
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Report size (chars): %d\n", len(full))
|
|
}
|
|
chunks := chunkMarkdownByURL(full)
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Posting %d chunk(s)\n", len(chunks))
|
|
}
|
|
_ = upsertPRComments(ghRepo, ghPR, ghToken, chunks)
|
|
}
|
|
}
|
|
|
|
fmt.Printf("Checked %d URLs: %d OK, %d failed\n", total, okCount, failCount)
|
|
if failOnFailures && failCount > 0 {
|
|
return fmt.Errorf("%d links failed", failCount)
|
|
}
|
|
return nil
|
|
},
|
|
}
|
|
|
|
checkCmd.Flags().IntVar(&maxConcurrency, "concurrency", 16, "maximum concurrent requests")
|
|
checkCmd.Flags().StringVar(&jsonOut, "json-out", "", "path to write full JSON results (array)")
|
|
checkCmd.Flags().StringVar(&mdOut, "md-out", "", "path to write Markdown report for PR comment")
|
|
checkCmd.Flags().StringVar(&repoBlobBase, "repo-blob-base", "", "override GitHub blob base URL (e.g. https://github.com/owner/repo/blob/<sha>)")
|
|
checkCmd.Flags().IntVar(&timeoutSeconds, "timeout", 10, "HTTP request timeout in seconds")
|
|
checkCmd.Flags().BoolVar(&failOnFailures, "fail-on-failures", true, "exit non-zero if any links fail")
|
|
checkCmd.Flags().BoolVar(&respectGitignore, "respect-gitignore", true, "respect .gitignore while scanning (default true)")
|
|
|
|
rootCmd.AddCommand(checkCmd)
|
|
}
|
|
|
|
var (
|
|
timeoutSeconds int
|
|
failOnFailures bool
|
|
repoBlobBase string
|
|
respectGitignore bool
|
|
)
|
|
|
|
func toSlash(p string) string {
|
|
p = strings.TrimSpace(p)
|
|
if p == "" {
|
|
return p
|
|
}
|
|
p = filepath.ToSlash(p)
|
|
if after, ok := strings.CutPrefix(p, "./"); ok {
|
|
p = after
|
|
}
|
|
return p
|
|
}
|
|
|
|
func hasGlobMeta(s string) bool {
|
|
return strings.ContainsAny(s, "*?[")
|
|
}
|
|
|
|
func countFiles(urlToFiles map[string][]string) int {
|
|
seen := make(map[string]struct{})
|
|
for _, files := range urlToFiles {
|
|
for _, f := range files {
|
|
seen[f] = struct{}{}
|
|
}
|
|
}
|
|
return len(seen)
|
|
}
|
|
|
|
func detectGitHubPR() (repo string, prNumber int, token string, ok bool) {
|
|
repo = os.Getenv("GITHUB_REPOSITORY")
|
|
token = os.Getenv("GITHUB_TOKEN")
|
|
eventPath := os.Getenv("GITHUB_EVENT_PATH")
|
|
if repo == "" || eventPath == "" || token == "" {
|
|
return "", 0, "", false
|
|
}
|
|
data, err := os.ReadFile(eventPath)
|
|
if err != nil {
|
|
return "", 0, "", false
|
|
}
|
|
var ev struct {
|
|
PullRequest struct {
|
|
Number int `json:"number"`
|
|
} `json:"pull_request"`
|
|
}
|
|
_ = json.Unmarshal(data, &ev)
|
|
if ev.PullRequest.Number == 0 {
|
|
return "", 0, "", false
|
|
}
|
|
return repo, ev.PullRequest.Number, token, true
|
|
}
|
|
|
|
// chunkMarkdownByURL splits markdown into chunks under GitHub's comment body limit,
|
|
// keeping whole URL entries together. Only the first chunk includes the original
|
|
// header and the "Failures by URL" section header. Subsequent chunks have no headers.
|
|
func chunkMarkdownByURL(body string) []string {
|
|
const maxBody = 65000
|
|
lines := strings.Split(body, "\n")
|
|
// locate failures header
|
|
failIdx := -1
|
|
for i, ln := range lines {
|
|
if strings.TrimSpace(ln) == "### Failures by URL" {
|
|
failIdx = i
|
|
break
|
|
}
|
|
}
|
|
if failIdx < 0 {
|
|
// no failures section; return as single chunk
|
|
return []string{body}
|
|
}
|
|
preamble := strings.Join(lines[:failIdx+1], "\n") + "\n"
|
|
entryLines := lines[failIdx+1:]
|
|
|
|
// build entries by URL block, starting at lines with "- " at column 0
|
|
type entry struct {
|
|
text string
|
|
length int
|
|
}
|
|
var entries []entry
|
|
for i := 0; i < len(entryLines); {
|
|
// skip leading blank lines
|
|
for i < len(entryLines) && strings.TrimSpace(entryLines[i]) == "" {
|
|
i++
|
|
}
|
|
if i >= len(entryLines) {
|
|
break
|
|
}
|
|
if !strings.HasPrefix(entryLines[i], "- ") {
|
|
// if unexpected, include line as is
|
|
entries = append(entries, entry{text: entryLines[i] + "\n", length: len(entryLines[i]) + 1})
|
|
i++
|
|
continue
|
|
}
|
|
start := i
|
|
i++
|
|
for i < len(entryLines) && !strings.HasPrefix(entryLines[i], "- ") {
|
|
i++
|
|
}
|
|
block := strings.Join(entryLines[start:i], "\n") + "\n"
|
|
entries = append(entries, entry{text: block, length: len(block)})
|
|
}
|
|
|
|
var chunks []string
|
|
// start first chunk with full preamble
|
|
cur := preamble
|
|
curLen := len(cur)
|
|
for _, e := range entries {
|
|
if curLen+e.length > maxBody && curLen > len(preamble) {
|
|
// flush current chunk, start new without headers
|
|
chunks = append(chunks, cur)
|
|
cur = ""
|
|
curLen = 0
|
|
}
|
|
// if new chunk and would still exceed, force place the single large entry
|
|
if curLen == 0 && e.length > maxBody {
|
|
// fallback: include as is; GitHub will still likely accept since entries are typically smaller
|
|
}
|
|
cur += e.text
|
|
curLen += e.length
|
|
}
|
|
if strings.TrimSpace(cur) != "" {
|
|
chunks = append(chunks, cur)
|
|
}
|
|
return chunks
|
|
}
|
|
|
|
// upsertPRComments deletes any existing slinky comments and posts the new chunked comments in order.
|
|
func upsertPRComments(repo string, prNumber int, token string, chunks []string) error {
|
|
apiBase := "https://api.github.com"
|
|
listURL := fmt.Sprintf("%s/repos/%s/issues/%d/comments?per_page=100", apiBase, repo, prNumber)
|
|
req, _ := http.NewRequest(http.MethodGet, listURL, nil)
|
|
req.Header.Set("Authorization", "Bearer "+token)
|
|
req.Header.Set("Accept", "application/vnd.github+json")
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer resp.Body.Close()
|
|
var comments []struct {
|
|
ID int `json:"id"`
|
|
Body string `json:"body"`
|
|
}
|
|
b, _ := io.ReadAll(resp.Body)
|
|
_ = json.Unmarshal(b, &comments)
|
|
|
|
// Delete all existing slinky-report comments to avoid stale entries
|
|
for _, c := range comments {
|
|
if strings.Contains(c.Body, "<!-- slinky-report -->") {
|
|
delURL := fmt.Sprintf("%s/repos/%s/issues/comments/%d", apiBase, repo, c.ID)
|
|
dReq, _ := http.NewRequest(http.MethodDelete, delURL, nil)
|
|
dReq.Header.Set("Authorization", "Bearer "+token)
|
|
dReq.Header.Set("Accept", "application/vnd.github+json")
|
|
_, _ = http.DefaultClient.Do(dReq)
|
|
}
|
|
}
|
|
|
|
// Post new comments in order
|
|
for idx, chunk := range chunks {
|
|
body := fmt.Sprintf("%s\n%s", "<!-- slinky-report -->", chunk)
|
|
postURL := fmt.Sprintf("%s/repos/%s/issues/%d/comments", apiBase, repo, prNumber)
|
|
payload, _ := json.Marshal(map[string]string{"body": body})
|
|
req, _ = http.NewRequest(http.MethodPost, postURL, bytes.NewReader(payload))
|
|
req.Header.Set("Authorization", "Bearer "+token)
|
|
req.Header.Set("Accept", "application/vnd.github+json")
|
|
req.Header.Set("Content-Type", "application/json")
|
|
res, _ := http.DefaultClient.Do(req)
|
|
if shouldDebug() {
|
|
fmt.Printf("::debug:: Posted chunk %d/%d: %v\n", idx+1, len(chunks), res)
|
|
}
|
|
}
|
|
return nil
|
|
}
|