Enhance URL scanning functionality in check.go by allowing comma-separated targets and improving debug output. Refactor root.go and run.go to support new target handling and display scanned file counts in the TUI. Introduce CollectURLsProgress in fsurls.go for detailed file processing feedback.

This commit is contained in:
Luke Hagar
2025-09-12 16:50:40 +00:00
parent 1085baa766
commit 64c05b380c
6 changed files with 325 additions and 52 deletions

View File

@@ -34,35 +34,127 @@ func init() {
Short: "Scan for URLs and validate them (headless)",
Args: cobra.ArbitraryArgs,
RunE: func(cmd *cobra.Command, args []string) error {
path := "."
var gl []string
if len(args) > 0 {
// Parse targets: allow comma-separated chunks
var raw []string
for _, a := range args {
for _, part := range strings.Split(a, ",") {
p := strings.TrimSpace(part)
if p != "" {
gl = append(gl, toSlash(p))
raw = append(raw, toSlash(p))
}
}
}
if len(raw) == 0 {
raw = []string{"**/*"}
}
// Separate into globs (relative to ".") and concrete paths (dirs/files)
var globPatterns []string
type pathRoot struct {
path string
isDir bool
}
var roots []pathRoot
for _, t := range raw {
if hasGlobMeta(t) {
globPatterns = append(globPatterns, t)
continue
}
if fi, err := os.Stat(t); err == nil {
roots = append(roots, pathRoot{path: t, isDir: fi.IsDir()})
} else {
gl = []string{"**/*"}
// If stat fails, treat as glob pattern under "."
globPatterns = append(globPatterns, t)
}
}
gl = expandDirectories(path, gl)
// Debug: show effective targets
if shouldDebug() {
fmt.Printf("::debug:: Roots: %s\n", strings.Join(func() []string {
var out []string
for _, r := range roots {
out = append(out, r.path)
}
return out
}(), ","))
fmt.Printf("::debug:: Glob patterns: %s\n", strings.Join(globPatterns, ","))
}
// Emit normalized patterns for debugging
fmt.Printf("::debug:: Effective patterns: %s\n", strings.Join(gl, ","))
// Aggregate URL->files across all targets
agg := make(map[string]map[string]struct{})
merge := func(res map[string][]string, prefix string, isDir bool) {
for u, files := range res {
set, ok := agg[u]
if !ok {
set = make(map[string]struct{})
agg[u] = set
}
for _, fp := range files {
var merged string
if prefix == "" {
merged = fp
} else if isDir {
merged = toSlash(filepath.Join(prefix, fp))
} else {
// File root: keep the concrete file path
merged = toSlash(prefix)
}
set[merged] = struct{}{}
}
}
}
timeout := time.Duration(timeoutSeconds) * time.Second
cfg := web.Config{MaxConcurrency: maxConcurrency, RequestTimeout: timeout}
// Collect URLs
urlToFiles, err := fsurls.CollectURLs(path, gl, respectGitignore)
// 1) Collect for globs under current dir
if len(globPatterns) > 0 {
res, err := fsurls.CollectURLs(".", globPatterns, respectGitignore)
if err != nil {
return err
}
merge(res, "", true)
}
// 2) Collect for each concrete root
for _, r := range roots {
clean := toSlash(filepath.Clean(r.path))
if r.isDir {
res, err := fsurls.CollectURLs(r.path, []string{"**/*"}, respectGitignore)
if err != nil {
return err
}
merge(res, clean, true)
} else {
res, err := fsurls.CollectURLs(r.path, nil, respectGitignore)
if err != nil {
return err
}
merge(res, clean, false)
}
}
// Convert aggregator to final map with sorted file lists
urlToFiles := make(map[string][]string, len(agg))
for u, set := range agg {
var files []string
for f := range set {
files = append(files, f)
}
sort.Strings(files)
urlToFiles[u] = files
}
// Derive display root; we use "." when multiple roots to avoid confusion
displayRoot := "."
if len(roots) == 1 && len(globPatterns) == 0 {
displayRoot = roots[0].path
}
if shouldDebug() {
fmt.Printf("::debug:: Root: %s\n", displayRoot)
}
// Build config
timeout := time.Duration(timeoutSeconds) * time.Second
cfg := web.Config{MaxConcurrency: maxConcurrency, RequestTimeout: timeout}
// Prepare URL list
var urls []string
for u := range urlToFiles {
urls = append(urls, u)
@@ -96,7 +188,9 @@ func init() {
// Emit GitHub Actions debug log for each URL.
// These lines appear only when step debug logging is enabled via the
// repository/organization secret ACTIONS_STEP_DEBUG=true.
if shouldDebug() {
fmt.Printf("::debug:: Scanned URL: %s status=%d ok=%v err=%s sources=%d\n", r.URL, r.Status, r.OK, r.ErrMsg, len(r.Sources))
}
if jsonOut != "" && !r.OK {
failures = append(failures, SerializableResult{
URL: r.URL,
@@ -135,7 +229,7 @@ func init() {
base = os.Getenv("SLINKY_REPO_BLOB_BASE_URL")
}
summary := report.Summary{
RootPath: path,
RootPath: displayRoot,
StartedAt: startedAt,
FinishedAt: time.Now(),
Processed: total,
@@ -190,24 +284,3 @@ func toSlash(p string) string {
func hasGlobMeta(s string) bool {
return strings.ContainsAny(s, "*?[")
}
func expandDirectories(root string, pats []string) []string {
var out []string
for _, p := range pats {
pp := strings.TrimSpace(p)
if pp == "" {
continue
}
if hasGlobMeta(pp) {
out = append(out, pp)
continue
}
abs := filepath.Join(root, filepath.FromSlash(pp))
if fi, err := os.Stat(abs); err == nil && fi.IsDir() {
out = append(out, strings.TrimSuffix(pp, "/")+"/**/*")
} else {
out = append(out, pp)
}
}
return out
}

View File

@@ -3,16 +3,23 @@ package cmd
import (
"fmt"
"os"
"strings"
"github.com/spf13/cobra"
)
var debugLogs bool
var rootCmd = &cobra.Command{
Use: "slinky",
Short: "Link checker for repos/directories and webpages (TUI)",
Long: "Slinky scans a directory/repo for URLs in files or crawls a URL, then validates links concurrently in a TUI.",
}
func init() {
rootCmd.PersistentFlags().BoolVar(&debugLogs, "debug", false, "enable debug logs")
}
func Execute() {
if err := rootCmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, err)
@@ -20,4 +27,15 @@ func Execute() {
}
}
func shouldDebug() bool {
if debugLogs {
return true
}
if strings.EqualFold(os.Getenv("ACTIONS_STEP_DEBUG"), "true") {
return true
}
if os.Getenv("RUNNER_DEBUG") == "1" {
return true
}
return false
}

View File

@@ -1,6 +1,7 @@
package cmd
import (
"os"
"strings"
"github.com/spf13/cobra"
@@ -29,7 +30,22 @@ func init() {
} else {
gl = []string{"**/*"}
}
return tui.Run(".", gl, cfg, jsonOut, mdOut)
root := "."
if len(gl) == 1 && !hasGlobMeta(gl[0]) {
candidate := gl[0]
if fi, err := os.Stat(candidate); err == nil {
if fi.IsDir() {
root = candidate
gl = []string{"**/*"}
} else {
root = candidate
gl = nil
}
}
}
return tui.Run(root, gl, cfg, jsonOut, mdOut)
},
}

View File

@@ -26,6 +26,19 @@ var htmlSrcRegex = regexp.MustCompile(`(?i)src\s*=\s*"([^"]+)"|src\s*=\s*'([^']+
// Strict hostname validation: labels 1-63 chars, alnum & hyphen, not start/end hyphen, at least one dot, simple TLD
var hostnameRegex = regexp.MustCompile(`^(?i)([a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)+$`)
func isDebugEnv() bool {
if os.Getenv("SLINKY_DEBUG") == "1" {
return true
}
if strings.EqualFold(os.Getenv("ACTIONS_STEP_DEBUG"), "true") {
return true
}
if os.Getenv("RUNNER_DEBUG") == "1" {
return true
}
return false
}
// CollectURLs walks the directory tree rooted at rootPath and collects URLs found in
// text-based files matching any of the provided glob patterns (doublestar ** supported).
// If globs is empty, all files are considered. Respects .gitignore if present and respectGitignore=true.
@@ -73,7 +86,9 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
// Walk the filesystem
walkFn := func(path string, d os.DirEntry, err error) error {
if isDebugEnv() {
fmt.Printf("::debug:: Walking path: %s\n", path)
}
if err != nil {
return nil
@@ -108,7 +123,9 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
}
// Debug: announce file being parsed; GitHub shows ::debug only in debug runs
if isDebugEnv() {
fmt.Printf("::debug:: Scanned File: %s\n", rel)
}
f, ferr := os.Open(path)
if ferr != nil {
@@ -170,6 +187,142 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
return result, nil
}
// CollectURLsProgress is like CollectURLs but invokes onFile(relPath) for each included file.
func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool, onFile func(string)) (map[string][]string, error) {
if strings.TrimSpace(rootPath) == "" {
rootPath = "."
}
cleanRoot := filepath.Clean(rootPath)
st, _ := os.Stat(cleanRoot)
isFileRoot := st != nil && !st.IsDir()
var ign *ignore.GitIgnore
if !isFileRoot && respectGitignore {
ign = loadGitIgnore(cleanRoot)
}
var patterns []string
for _, g := range globs {
g = strings.TrimSpace(g)
if g == "" {
continue
}
patterns = append(patterns, g)
}
shouldInclude := func(rel string) bool {
if len(patterns) == 0 {
return true
}
for _, p := range patterns {
ok, _ := doublestar.PathMatch(p, rel)
if ok {
return true
}
}
return false
}
urlToFiles := make(map[string]map[string]struct{})
// 2 MiB max file size to avoid huge/binary files
const maxSize = 2 * 1024 * 1024
walkFn := func(path string, d os.DirEntry, err error) error {
if err != nil {
return nil
}
rel, rerr := filepath.Rel(cleanRoot, path)
if rerr != nil {
rel = path
}
rel = filepath.ToSlash(rel)
if d.IsDir() {
base := filepath.Base(path)
if base == ".git" {
return filepath.SkipDir
}
return nil
}
if ign != nil && ign.MatchesPath(rel) {
return nil
}
info, ierr := d.Info()
if ierr != nil {
return nil
}
if info.Size() > maxSize {
return nil
}
if isFileRoot && rel == "." {
rel = filepath.ToSlash(filepath.Base(path))
}
if !shouldInclude(rel) {
return nil
}
if onFile != nil {
onFile(rel)
}
f, ferr := os.Open(path)
if ferr != nil {
return nil
}
defer f.Close()
br := bufio.NewReader(f)
var b strings.Builder
read := int64(0)
for {
chunk, cerr := br.ReadString('\n')
b.WriteString(chunk)
read += int64(len(chunk))
if cerr == io.EOF || read > maxSize {
break
}
if cerr != nil {
break
}
}
content := b.String()
if strings.IndexByte(content, '\x00') >= 0 {
return nil
}
candidates := extractCandidates(content)
if len(candidates) == 0 {
return nil
}
for _, raw := range candidates {
u := sanitizeURLToken(raw)
if u == "" {
continue
}
fileSet, ok := urlToFiles[u]
if !ok {
fileSet = make(map[string]struct{})
urlToFiles[u] = fileSet
}
fileSet[rel] = struct{}{}
}
return nil
}
_ = filepath.WalkDir(cleanRoot, walkFn)
result := make(map[string][]string, len(urlToFiles))
for u, files := range urlToFiles {
var list []string
for fp := range files {
list = append(list, fp)
}
sort.Strings(list)
result[u] = list
}
return result, nil
}
func sanitizeURLToken(s string) string {
s = strings.TrimSpace(s)
// Strip surrounding angle brackets or quotes

View File

@@ -8,3 +8,7 @@ import (
func fsCollect(root string, globs []string) (map[string][]string, error) {
return fsurls.CollectURLs(root, globs, true)
}
func fsCollectProgress(root string, globs []string, onFile func(string)) (map[string][]string, error) {
return fsurls.CollectURLsProgress(root, globs, true, onFile)
}

View File

@@ -25,6 +25,8 @@ type crawlDoneMsg struct{}
type statsMsg struct{ s web.Stats }
type tickMsg struct{ t time.Time }
type fileScannedMsg struct{ rel string }
type model struct {
rootPath string
cfg web.Config
@@ -50,12 +52,13 @@ type model struct {
pending int
processed int
lastProcessed int
rps float64
peakRPS float64
lowRPS float64
filesScanned int
allResults []web.Result
jsonPath string
mdPath string
@@ -83,7 +86,12 @@ func (m *model) Init() tea.Cmd {
ctx, cancel := context.WithCancel(context.Background())
go func() {
defer cancel()
urlsMap, _ := fsCollect(m.rootPath, m.globs)
urlsMap, _ := fsCollectProgress(m.rootPath, m.globs, func(rel string) {
m.filesScanned++
// Emit a short event line per file to show activity
m.lines = append(m.lines, fmt.Sprintf("📄 %s", rel))
m.refreshViewport()
})
var urls []string
for u := range urlsMap {
urls = append(urls, u)
@@ -280,6 +288,7 @@ func (m *model) View() string {
fmt.Sprintf("Duration: %s", dur.Truncate(time.Millisecond)),
fmt.Sprintf("Processed: %d OK:%d Fail:%d", m.processed, m.ok, m.fail),
fmt.Sprintf("Rates: avg %.1f/s peak %.1f/s low %.1f/s", avg, m.peakRPS, m.lowRPS),
fmt.Sprintf("Files scanned: %d", m.filesScanned),
}
if m.jsonPath != "" {
summary = append(summary, fmt.Sprintf("JSON: %s", m.jsonPath))
@@ -297,7 +306,7 @@ func (m *model) View() string {
percent = float64(m.processed) / float64(totalWork)
}
progressLine := m.prog.ViewAs(percent)
stats := fmt.Sprintf("%s total:%d ok:%d fail:%d pending:%d processed:%d rps:%.1f/s", m.spin.View(), m.total, m.ok, m.fail, m.pending, m.processed, m.rps)
stats := fmt.Sprintf("%s total:%d ok:%d fail:%d pending:%d processed:%d rps:%.1f/s files:%d", m.spin.View(), m.total, m.ok, m.fail, m.pending, m.processed, m.rps, m.filesScanned)
body := m.vp.View()
footer := lipgloss.NewStyle().Faint(true).Render("Controls: [q] quit [f] toggle fails")
container := lipgloss.NewStyle().Padding(1)