Enhance URL scanning functionality in check.go by allowing comma-separated targets and improving debug output. Refactor root.go and run.go to support new target handling and display scanned file counts in the TUI. Introduce CollectURLsProgress in fsurls.go for detailed file processing feedback.

This commit is contained in:
Luke Hagar
2025-09-12 16:50:40 +00:00
parent 1085baa766
commit 64c05b380c
6 changed files with 325 additions and 52 deletions

View File

@@ -34,35 +34,127 @@ func init() {
Short: "Scan for URLs and validate them (headless)", Short: "Scan for URLs and validate them (headless)",
Args: cobra.ArbitraryArgs, Args: cobra.ArbitraryArgs,
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
path := "." // Parse targets: allow comma-separated chunks
var raw []string
var gl []string for _, a := range args {
if len(args) > 0 { for _, part := range strings.Split(a, ",") {
for _, a := range args { p := strings.TrimSpace(part)
for _, part := range strings.Split(a, ",") { if p != "" {
p := strings.TrimSpace(part) raw = append(raw, toSlash(p))
if p != "" {
gl = append(gl, toSlash(p))
}
} }
} }
} else { }
gl = []string{"**/*"} if len(raw) == 0 {
raw = []string{"**/*"}
} }
gl = expandDirectories(path, gl) // Separate into globs (relative to ".") and concrete paths (dirs/files)
var globPatterns []string
type pathRoot struct {
path string
isDir bool
}
var roots []pathRoot
for _, t := range raw {
if hasGlobMeta(t) {
globPatterns = append(globPatterns, t)
continue
}
if fi, err := os.Stat(t); err == nil {
roots = append(roots, pathRoot{path: t, isDir: fi.IsDir()})
} else {
// If stat fails, treat as glob pattern under "."
globPatterns = append(globPatterns, t)
}
}
// Emit normalized patterns for debugging // Debug: show effective targets
fmt.Printf("::debug:: Effective patterns: %s\n", strings.Join(gl, ",")) if shouldDebug() {
fmt.Printf("::debug:: Roots: %s\n", strings.Join(func() []string {
var out []string
for _, r := range roots {
out = append(out, r.path)
}
return out
}(), ","))
fmt.Printf("::debug:: Glob patterns: %s\n", strings.Join(globPatterns, ","))
}
// Aggregate URL->files across all targets
agg := make(map[string]map[string]struct{})
merge := func(res map[string][]string, prefix string, isDir bool) {
for u, files := range res {
set, ok := agg[u]
if !ok {
set = make(map[string]struct{})
agg[u] = set
}
for _, fp := range files {
var merged string
if prefix == "" {
merged = fp
} else if isDir {
merged = toSlash(filepath.Join(prefix, fp))
} else {
// File root: keep the concrete file path
merged = toSlash(prefix)
}
set[merged] = struct{}{}
}
}
}
// 1) Collect for globs under current dir
if len(globPatterns) > 0 {
res, err := fsurls.CollectURLs(".", globPatterns, respectGitignore)
if err != nil {
return err
}
merge(res, "", true)
}
// 2) Collect for each concrete root
for _, r := range roots {
clean := toSlash(filepath.Clean(r.path))
if r.isDir {
res, err := fsurls.CollectURLs(r.path, []string{"**/*"}, respectGitignore)
if err != nil {
return err
}
merge(res, clean, true)
} else {
res, err := fsurls.CollectURLs(r.path, nil, respectGitignore)
if err != nil {
return err
}
merge(res, clean, false)
}
}
// Convert aggregator to final map with sorted file lists
urlToFiles := make(map[string][]string, len(agg))
for u, set := range agg {
var files []string
for f := range set {
files = append(files, f)
}
sort.Strings(files)
urlToFiles[u] = files
}
// Derive display root; we use "." when multiple roots to avoid confusion
displayRoot := "."
if len(roots) == 1 && len(globPatterns) == 0 {
displayRoot = roots[0].path
}
if shouldDebug() {
fmt.Printf("::debug:: Root: %s\n", displayRoot)
}
// Build config
timeout := time.Duration(timeoutSeconds) * time.Second timeout := time.Duration(timeoutSeconds) * time.Second
cfg := web.Config{MaxConcurrency: maxConcurrency, RequestTimeout: timeout} cfg := web.Config{MaxConcurrency: maxConcurrency, RequestTimeout: timeout}
// Collect URLs // Prepare URL list
urlToFiles, err := fsurls.CollectURLs(path, gl, respectGitignore)
if err != nil {
return err
}
var urls []string var urls []string
for u := range urlToFiles { for u := range urlToFiles {
urls = append(urls, u) urls = append(urls, u)
@@ -96,7 +188,9 @@ func init() {
// Emit GitHub Actions debug log for each URL. // Emit GitHub Actions debug log for each URL.
// These lines appear only when step debug logging is enabled via the // These lines appear only when step debug logging is enabled via the
// repository/organization secret ACTIONS_STEP_DEBUG=true. // repository/organization secret ACTIONS_STEP_DEBUG=true.
fmt.Printf("::debug:: Scanned URL: %s status=%d ok=%v err=%s sources=%d\n", r.URL, r.Status, r.OK, r.ErrMsg, len(r.Sources)) if shouldDebug() {
fmt.Printf("::debug:: Scanned URL: %s status=%d ok=%v err=%s sources=%d\n", r.URL, r.Status, r.OK, r.ErrMsg, len(r.Sources))
}
if jsonOut != "" && !r.OK { if jsonOut != "" && !r.OK {
failures = append(failures, SerializableResult{ failures = append(failures, SerializableResult{
URL: r.URL, URL: r.URL,
@@ -135,7 +229,7 @@ func init() {
base = os.Getenv("SLINKY_REPO_BLOB_BASE_URL") base = os.Getenv("SLINKY_REPO_BLOB_BASE_URL")
} }
summary := report.Summary{ summary := report.Summary{
RootPath: path, RootPath: displayRoot,
StartedAt: startedAt, StartedAt: startedAt,
FinishedAt: time.Now(), FinishedAt: time.Now(),
Processed: total, Processed: total,
@@ -190,24 +284,3 @@ func toSlash(p string) string {
func hasGlobMeta(s string) bool { func hasGlobMeta(s string) bool {
return strings.ContainsAny(s, "*?[") return strings.ContainsAny(s, "*?[")
} }
func expandDirectories(root string, pats []string) []string {
var out []string
for _, p := range pats {
pp := strings.TrimSpace(p)
if pp == "" {
continue
}
if hasGlobMeta(pp) {
out = append(out, pp)
continue
}
abs := filepath.Join(root, filepath.FromSlash(pp))
if fi, err := os.Stat(abs); err == nil && fi.IsDir() {
out = append(out, strings.TrimSuffix(pp, "/")+"/**/*")
} else {
out = append(out, pp)
}
}
return out
}

View File

@@ -3,16 +3,23 @@ package cmd
import ( import (
"fmt" "fmt"
"os" "os"
"strings"
"github.com/spf13/cobra" "github.com/spf13/cobra"
) )
var debugLogs bool
var rootCmd = &cobra.Command{ var rootCmd = &cobra.Command{
Use: "slinky", Use: "slinky",
Short: "Link checker for repos/directories and webpages (TUI)", Short: "Link checker for repos/directories and webpages (TUI)",
Long: "Slinky scans a directory/repo for URLs in files or crawls a URL, then validates links concurrently in a TUI.", Long: "Slinky scans a directory/repo for URLs in files or crawls a URL, then validates links concurrently in a TUI.",
} }
func init() {
rootCmd.PersistentFlags().BoolVar(&debugLogs, "debug", false, "enable debug logs")
}
func Execute() { func Execute() {
if err := rootCmd.Execute(); err != nil { if err := rootCmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, err) fmt.Fprintln(os.Stderr, err)
@@ -20,4 +27,15 @@ func Execute() {
} }
} }
func shouldDebug() bool {
if debugLogs {
return true
}
if strings.EqualFold(os.Getenv("ACTIONS_STEP_DEBUG"), "true") {
return true
}
if os.Getenv("RUNNER_DEBUG") == "1" {
return true
}
return false
}

View File

@@ -1,6 +1,7 @@
package cmd package cmd
import ( import (
"os"
"strings" "strings"
"github.com/spf13/cobra" "github.com/spf13/cobra"
@@ -29,7 +30,22 @@ func init() {
} else { } else {
gl = []string{"**/*"} gl = []string{"**/*"}
} }
return tui.Run(".", gl, cfg, jsonOut, mdOut)
root := "."
if len(gl) == 1 && !hasGlobMeta(gl[0]) {
candidate := gl[0]
if fi, err := os.Stat(candidate); err == nil {
if fi.IsDir() {
root = candidate
gl = []string{"**/*"}
} else {
root = candidate
gl = nil
}
}
}
return tui.Run(root, gl, cfg, jsonOut, mdOut)
}, },
} }

View File

@@ -26,6 +26,19 @@ var htmlSrcRegex = regexp.MustCompile(`(?i)src\s*=\s*"([^"]+)"|src\s*=\s*'([^']+
// Strict hostname validation: labels 1-63 chars, alnum & hyphen, not start/end hyphen, at least one dot, simple TLD // Strict hostname validation: labels 1-63 chars, alnum & hyphen, not start/end hyphen, at least one dot, simple TLD
var hostnameRegex = regexp.MustCompile(`^(?i)([a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)+$`) var hostnameRegex = regexp.MustCompile(`^(?i)([a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)+$`)
func isDebugEnv() bool {
if os.Getenv("SLINKY_DEBUG") == "1" {
return true
}
if strings.EqualFold(os.Getenv("ACTIONS_STEP_DEBUG"), "true") {
return true
}
if os.Getenv("RUNNER_DEBUG") == "1" {
return true
}
return false
}
// CollectURLs walks the directory tree rooted at rootPath and collects URLs found in // CollectURLs walks the directory tree rooted at rootPath and collects URLs found in
// text-based files matching any of the provided glob patterns (doublestar ** supported). // text-based files matching any of the provided glob patterns (doublestar ** supported).
// If globs is empty, all files are considered. Respects .gitignore if present and respectGitignore=true. // If globs is empty, all files are considered. Respects .gitignore if present and respectGitignore=true.
@@ -73,7 +86,9 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
// Walk the filesystem // Walk the filesystem
walkFn := func(path string, d os.DirEntry, err error) error { walkFn := func(path string, d os.DirEntry, err error) error {
fmt.Printf("::debug:: Walking path: %s\n", path) if isDebugEnv() {
fmt.Printf("::debug:: Walking path: %s\n", path)
}
if err != nil { if err != nil {
return nil return nil
@@ -108,7 +123,9 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
} }
// Debug: announce file being parsed; GitHub shows ::debug only in debug runs // Debug: announce file being parsed; GitHub shows ::debug only in debug runs
fmt.Printf("::debug:: Scanned File: %s\n", rel) if isDebugEnv() {
fmt.Printf("::debug:: Scanned File: %s\n", rel)
}
f, ferr := os.Open(path) f, ferr := os.Open(path)
if ferr != nil { if ferr != nil {
@@ -170,6 +187,142 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
return result, nil return result, nil
} }
// CollectURLsProgress is like CollectURLs but invokes onFile(relPath) for each included file.
func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool, onFile func(string)) (map[string][]string, error) {
if strings.TrimSpace(rootPath) == "" {
rootPath = "."
}
cleanRoot := filepath.Clean(rootPath)
st, _ := os.Stat(cleanRoot)
isFileRoot := st != nil && !st.IsDir()
var ign *ignore.GitIgnore
if !isFileRoot && respectGitignore {
ign = loadGitIgnore(cleanRoot)
}
var patterns []string
for _, g := range globs {
g = strings.TrimSpace(g)
if g == "" {
continue
}
patterns = append(patterns, g)
}
shouldInclude := func(rel string) bool {
if len(patterns) == 0 {
return true
}
for _, p := range patterns {
ok, _ := doublestar.PathMatch(p, rel)
if ok {
return true
}
}
return false
}
urlToFiles := make(map[string]map[string]struct{})
// 2 MiB max file size to avoid huge/binary files
const maxSize = 2 * 1024 * 1024
walkFn := func(path string, d os.DirEntry, err error) error {
if err != nil {
return nil
}
rel, rerr := filepath.Rel(cleanRoot, path)
if rerr != nil {
rel = path
}
rel = filepath.ToSlash(rel)
if d.IsDir() {
base := filepath.Base(path)
if base == ".git" {
return filepath.SkipDir
}
return nil
}
if ign != nil && ign.MatchesPath(rel) {
return nil
}
info, ierr := d.Info()
if ierr != nil {
return nil
}
if info.Size() > maxSize {
return nil
}
if isFileRoot && rel == "." {
rel = filepath.ToSlash(filepath.Base(path))
}
if !shouldInclude(rel) {
return nil
}
if onFile != nil {
onFile(rel)
}
f, ferr := os.Open(path)
if ferr != nil {
return nil
}
defer f.Close()
br := bufio.NewReader(f)
var b strings.Builder
read := int64(0)
for {
chunk, cerr := br.ReadString('\n')
b.WriteString(chunk)
read += int64(len(chunk))
if cerr == io.EOF || read > maxSize {
break
}
if cerr != nil {
break
}
}
content := b.String()
if strings.IndexByte(content, '\x00') >= 0 {
return nil
}
candidates := extractCandidates(content)
if len(candidates) == 0 {
return nil
}
for _, raw := range candidates {
u := sanitizeURLToken(raw)
if u == "" {
continue
}
fileSet, ok := urlToFiles[u]
if !ok {
fileSet = make(map[string]struct{})
urlToFiles[u] = fileSet
}
fileSet[rel] = struct{}{}
}
return nil
}
_ = filepath.WalkDir(cleanRoot, walkFn)
result := make(map[string][]string, len(urlToFiles))
for u, files := range urlToFiles {
var list []string
for fp := range files {
list = append(list, fp)
}
sort.Strings(list)
result[u] = list
}
return result, nil
}
func sanitizeURLToken(s string) string { func sanitizeURLToken(s string) string {
s = strings.TrimSpace(s) s = strings.TrimSpace(s)
// Strip surrounding angle brackets or quotes // Strip surrounding angle brackets or quotes

View File

@@ -8,3 +8,7 @@ import (
func fsCollect(root string, globs []string) (map[string][]string, error) { func fsCollect(root string, globs []string) (map[string][]string, error) {
return fsurls.CollectURLs(root, globs, true) return fsurls.CollectURLs(root, globs, true)
} }
func fsCollectProgress(root string, globs []string, onFile func(string)) (map[string][]string, error) {
return fsurls.CollectURLsProgress(root, globs, true, onFile)
}

View File

@@ -25,6 +25,8 @@ type crawlDoneMsg struct{}
type statsMsg struct{ s web.Stats } type statsMsg struct{ s web.Stats }
type tickMsg struct{ t time.Time } type tickMsg struct{ t time.Time }
type fileScannedMsg struct{ rel string }
type model struct { type model struct {
rootPath string rootPath string
cfg web.Config cfg web.Config
@@ -48,14 +50,15 @@ type model struct {
ok int ok int
fail int fail int
pending int pending int
processed int processed int
lastProcessed int lastProcessed int
rps float64 rps float64
peakRPS float64 peakRPS float64
lowRPS float64 lowRPS float64
filesScanned int
allResults []web.Result allResults []web.Result
jsonPath string jsonPath string
mdPath string mdPath string
@@ -83,7 +86,12 @@ func (m *model) Init() tea.Cmd {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
go func() { go func() {
defer cancel() defer cancel()
urlsMap, _ := fsCollect(m.rootPath, m.globs) urlsMap, _ := fsCollectProgress(m.rootPath, m.globs, func(rel string) {
m.filesScanned++
// Emit a short event line per file to show activity
m.lines = append(m.lines, fmt.Sprintf("📄 %s", rel))
m.refreshViewport()
})
var urls []string var urls []string
for u := range urlsMap { for u := range urlsMap {
urls = append(urls, u) urls = append(urls, u)
@@ -280,6 +288,7 @@ func (m *model) View() string {
fmt.Sprintf("Duration: %s", dur.Truncate(time.Millisecond)), fmt.Sprintf("Duration: %s", dur.Truncate(time.Millisecond)),
fmt.Sprintf("Processed: %d OK:%d Fail:%d", m.processed, m.ok, m.fail), fmt.Sprintf("Processed: %d OK:%d Fail:%d", m.processed, m.ok, m.fail),
fmt.Sprintf("Rates: avg %.1f/s peak %.1f/s low %.1f/s", avg, m.peakRPS, m.lowRPS), fmt.Sprintf("Rates: avg %.1f/s peak %.1f/s low %.1f/s", avg, m.peakRPS, m.lowRPS),
fmt.Sprintf("Files scanned: %d", m.filesScanned),
} }
if m.jsonPath != "" { if m.jsonPath != "" {
summary = append(summary, fmt.Sprintf("JSON: %s", m.jsonPath)) summary = append(summary, fmt.Sprintf("JSON: %s", m.jsonPath))
@@ -297,7 +306,7 @@ func (m *model) View() string {
percent = float64(m.processed) / float64(totalWork) percent = float64(m.processed) / float64(totalWork)
} }
progressLine := m.prog.ViewAs(percent) progressLine := m.prog.ViewAs(percent)
stats := fmt.Sprintf("%s total:%d ok:%d fail:%d pending:%d processed:%d rps:%.1f/s", m.spin.View(), m.total, m.ok, m.fail, m.pending, m.processed, m.rps) stats := fmt.Sprintf("%s total:%d ok:%d fail:%d pending:%d processed:%d rps:%.1f/s files:%d", m.spin.View(), m.total, m.ok, m.fail, m.pending, m.processed, m.rps, m.filesScanned)
body := m.vp.View() body := m.vp.View()
footer := lipgloss.NewStyle().Faint(true).Render("Controls: [q] quit [f] toggle fails") footer := lipgloss.NewStyle().Faint(true).Render("Controls: [q] quit [f] toggle fails")
container := lipgloss.NewStyle().Padding(1) container := lipgloss.NewStyle().Padding(1)