mirror of
https://github.com/LukeHagar/slinky.git
synced 2025-12-06 04:21:20 +00:00
Enhance URL scanning functionality in check.go by allowing comma-separated targets and improving debug output. Refactor root.go and run.go to support new target handling and display scanned file counts in the TUI. Introduce CollectURLsProgress in fsurls.go for detailed file processing feedback.
This commit is contained in:
159
cmd/check.go
159
cmd/check.go
@@ -34,35 +34,127 @@ func init() {
|
||||
Short: "Scan for URLs and validate them (headless)",
|
||||
Args: cobra.ArbitraryArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
path := "."
|
||||
|
||||
var gl []string
|
||||
if len(args) > 0 {
|
||||
for _, a := range args {
|
||||
for _, part := range strings.Split(a, ",") {
|
||||
p := strings.TrimSpace(part)
|
||||
if p != "" {
|
||||
gl = append(gl, toSlash(p))
|
||||
}
|
||||
// Parse targets: allow comma-separated chunks
|
||||
var raw []string
|
||||
for _, a := range args {
|
||||
for _, part := range strings.Split(a, ",") {
|
||||
p := strings.TrimSpace(part)
|
||||
if p != "" {
|
||||
raw = append(raw, toSlash(p))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
gl = []string{"**/*"}
|
||||
}
|
||||
if len(raw) == 0 {
|
||||
raw = []string{"**/*"}
|
||||
}
|
||||
|
||||
gl = expandDirectories(path, gl)
|
||||
// Separate into globs (relative to ".") and concrete paths (dirs/files)
|
||||
var globPatterns []string
|
||||
type pathRoot struct {
|
||||
path string
|
||||
isDir bool
|
||||
}
|
||||
var roots []pathRoot
|
||||
for _, t := range raw {
|
||||
if hasGlobMeta(t) {
|
||||
globPatterns = append(globPatterns, t)
|
||||
continue
|
||||
}
|
||||
if fi, err := os.Stat(t); err == nil {
|
||||
roots = append(roots, pathRoot{path: t, isDir: fi.IsDir()})
|
||||
} else {
|
||||
// If stat fails, treat as glob pattern under "."
|
||||
globPatterns = append(globPatterns, t)
|
||||
}
|
||||
}
|
||||
|
||||
// Emit normalized patterns for debugging
|
||||
fmt.Printf("::debug:: Effective patterns: %s\n", strings.Join(gl, ","))
|
||||
// Debug: show effective targets
|
||||
if shouldDebug() {
|
||||
fmt.Printf("::debug:: Roots: %s\n", strings.Join(func() []string {
|
||||
var out []string
|
||||
for _, r := range roots {
|
||||
out = append(out, r.path)
|
||||
}
|
||||
return out
|
||||
}(), ","))
|
||||
fmt.Printf("::debug:: Glob patterns: %s\n", strings.Join(globPatterns, ","))
|
||||
}
|
||||
|
||||
// Aggregate URL->files across all targets
|
||||
agg := make(map[string]map[string]struct{})
|
||||
merge := func(res map[string][]string, prefix string, isDir bool) {
|
||||
for u, files := range res {
|
||||
set, ok := agg[u]
|
||||
if !ok {
|
||||
set = make(map[string]struct{})
|
||||
agg[u] = set
|
||||
}
|
||||
for _, fp := range files {
|
||||
var merged string
|
||||
if prefix == "" {
|
||||
merged = fp
|
||||
} else if isDir {
|
||||
merged = toSlash(filepath.Join(prefix, fp))
|
||||
} else {
|
||||
// File root: keep the concrete file path
|
||||
merged = toSlash(prefix)
|
||||
}
|
||||
set[merged] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 1) Collect for globs under current dir
|
||||
if len(globPatterns) > 0 {
|
||||
res, err := fsurls.CollectURLs(".", globPatterns, respectGitignore)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
merge(res, "", true)
|
||||
}
|
||||
// 2) Collect for each concrete root
|
||||
for _, r := range roots {
|
||||
clean := toSlash(filepath.Clean(r.path))
|
||||
if r.isDir {
|
||||
res, err := fsurls.CollectURLs(r.path, []string{"**/*"}, respectGitignore)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
merge(res, clean, true)
|
||||
} else {
|
||||
res, err := fsurls.CollectURLs(r.path, nil, respectGitignore)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
merge(res, clean, false)
|
||||
}
|
||||
}
|
||||
|
||||
// Convert aggregator to final map with sorted file lists
|
||||
urlToFiles := make(map[string][]string, len(agg))
|
||||
for u, set := range agg {
|
||||
var files []string
|
||||
for f := range set {
|
||||
files = append(files, f)
|
||||
}
|
||||
sort.Strings(files)
|
||||
urlToFiles[u] = files
|
||||
}
|
||||
|
||||
// Derive display root; we use "." when multiple roots to avoid confusion
|
||||
displayRoot := "."
|
||||
if len(roots) == 1 && len(globPatterns) == 0 {
|
||||
displayRoot = roots[0].path
|
||||
}
|
||||
if shouldDebug() {
|
||||
fmt.Printf("::debug:: Root: %s\n", displayRoot)
|
||||
}
|
||||
|
||||
// Build config
|
||||
timeout := time.Duration(timeoutSeconds) * time.Second
|
||||
cfg := web.Config{MaxConcurrency: maxConcurrency, RequestTimeout: timeout}
|
||||
|
||||
// Collect URLs
|
||||
urlToFiles, err := fsurls.CollectURLs(path, gl, respectGitignore)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Prepare URL list
|
||||
var urls []string
|
||||
for u := range urlToFiles {
|
||||
urls = append(urls, u)
|
||||
@@ -96,7 +188,9 @@ func init() {
|
||||
// Emit GitHub Actions debug log for each URL.
|
||||
// These lines appear only when step debug logging is enabled via the
|
||||
// repository/organization secret ACTIONS_STEP_DEBUG=true.
|
||||
fmt.Printf("::debug:: Scanned URL: %s status=%d ok=%v err=%s sources=%d\n", r.URL, r.Status, r.OK, r.ErrMsg, len(r.Sources))
|
||||
if shouldDebug() {
|
||||
fmt.Printf("::debug:: Scanned URL: %s status=%d ok=%v err=%s sources=%d\n", r.URL, r.Status, r.OK, r.ErrMsg, len(r.Sources))
|
||||
}
|
||||
if jsonOut != "" && !r.OK {
|
||||
failures = append(failures, SerializableResult{
|
||||
URL: r.URL,
|
||||
@@ -135,7 +229,7 @@ func init() {
|
||||
base = os.Getenv("SLINKY_REPO_BLOB_BASE_URL")
|
||||
}
|
||||
summary := report.Summary{
|
||||
RootPath: path,
|
||||
RootPath: displayRoot,
|
||||
StartedAt: startedAt,
|
||||
FinishedAt: time.Now(),
|
||||
Processed: total,
|
||||
@@ -190,24 +284,3 @@ func toSlash(p string) string {
|
||||
func hasGlobMeta(s string) bool {
|
||||
return strings.ContainsAny(s, "*?[")
|
||||
}
|
||||
|
||||
func expandDirectories(root string, pats []string) []string {
|
||||
var out []string
|
||||
for _, p := range pats {
|
||||
pp := strings.TrimSpace(p)
|
||||
if pp == "" {
|
||||
continue
|
||||
}
|
||||
if hasGlobMeta(pp) {
|
||||
out = append(out, pp)
|
||||
continue
|
||||
}
|
||||
abs := filepath.Join(root, filepath.FromSlash(pp))
|
||||
if fi, err := os.Stat(abs); err == nil && fi.IsDir() {
|
||||
out = append(out, strings.TrimSuffix(pp, "/")+"/**/*")
|
||||
} else {
|
||||
out = append(out, pp)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
20
cmd/root.go
20
cmd/root.go
@@ -3,16 +3,23 @@ package cmd
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var debugLogs bool
|
||||
|
||||
var rootCmd = &cobra.Command{
|
||||
Use: "slinky",
|
||||
Short: "Link checker for repos/directories and webpages (TUI)",
|
||||
Long: "Slinky scans a directory/repo for URLs in files or crawls a URL, then validates links concurrently in a TUI.",
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.PersistentFlags().BoolVar(&debugLogs, "debug", false, "enable debug logs")
|
||||
}
|
||||
|
||||
func Execute() {
|
||||
if err := rootCmd.Execute(); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
@@ -20,4 +27,15 @@ func Execute() {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func shouldDebug() bool {
|
||||
if debugLogs {
|
||||
return true
|
||||
}
|
||||
if strings.EqualFold(os.Getenv("ACTIONS_STEP_DEBUG"), "true") {
|
||||
return true
|
||||
}
|
||||
if os.Getenv("RUNNER_DEBUG") == "1" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
18
cmd/run.go
18
cmd/run.go
@@ -1,6 +1,7 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
@@ -29,7 +30,22 @@ func init() {
|
||||
} else {
|
||||
gl = []string{"**/*"}
|
||||
}
|
||||
return tui.Run(".", gl, cfg, jsonOut, mdOut)
|
||||
|
||||
root := "."
|
||||
if len(gl) == 1 && !hasGlobMeta(gl[0]) {
|
||||
candidate := gl[0]
|
||||
if fi, err := os.Stat(candidate); err == nil {
|
||||
if fi.IsDir() {
|
||||
root = candidate
|
||||
gl = []string{"**/*"}
|
||||
} else {
|
||||
root = candidate
|
||||
gl = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tui.Run(root, gl, cfg, jsonOut, mdOut)
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -26,6 +26,19 @@ var htmlSrcRegex = regexp.MustCompile(`(?i)src\s*=\s*"([^"]+)"|src\s*=\s*'([^']+
|
||||
// Strict hostname validation: labels 1-63 chars, alnum & hyphen, not start/end hyphen, at least one dot, simple TLD
|
||||
var hostnameRegex = regexp.MustCompile(`^(?i)([a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)+$`)
|
||||
|
||||
func isDebugEnv() bool {
|
||||
if os.Getenv("SLINKY_DEBUG") == "1" {
|
||||
return true
|
||||
}
|
||||
if strings.EqualFold(os.Getenv("ACTIONS_STEP_DEBUG"), "true") {
|
||||
return true
|
||||
}
|
||||
if os.Getenv("RUNNER_DEBUG") == "1" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// CollectURLs walks the directory tree rooted at rootPath and collects URLs found in
|
||||
// text-based files matching any of the provided glob patterns (doublestar ** supported).
|
||||
// If globs is empty, all files are considered. Respects .gitignore if present and respectGitignore=true.
|
||||
@@ -73,7 +86,9 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
|
||||
|
||||
// Walk the filesystem
|
||||
walkFn := func(path string, d os.DirEntry, err error) error {
|
||||
fmt.Printf("::debug:: Walking path: %s\n", path)
|
||||
if isDebugEnv() {
|
||||
fmt.Printf("::debug:: Walking path: %s\n", path)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil
|
||||
@@ -108,7 +123,9 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
|
||||
}
|
||||
|
||||
// Debug: announce file being parsed; GitHub shows ::debug only in debug runs
|
||||
fmt.Printf("::debug:: Scanned File: %s\n", rel)
|
||||
if isDebugEnv() {
|
||||
fmt.Printf("::debug:: Scanned File: %s\n", rel)
|
||||
}
|
||||
|
||||
f, ferr := os.Open(path)
|
||||
if ferr != nil {
|
||||
@@ -170,6 +187,142 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// CollectURLsProgress is like CollectURLs but invokes onFile(relPath) for each included file.
|
||||
func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool, onFile func(string)) (map[string][]string, error) {
|
||||
if strings.TrimSpace(rootPath) == "" {
|
||||
rootPath = "."
|
||||
}
|
||||
cleanRoot := filepath.Clean(rootPath)
|
||||
|
||||
st, _ := os.Stat(cleanRoot)
|
||||
isFileRoot := st != nil && !st.IsDir()
|
||||
|
||||
var ign *ignore.GitIgnore
|
||||
if !isFileRoot && respectGitignore {
|
||||
ign = loadGitIgnore(cleanRoot)
|
||||
}
|
||||
|
||||
var patterns []string
|
||||
for _, g := range globs {
|
||||
g = strings.TrimSpace(g)
|
||||
if g == "" {
|
||||
continue
|
||||
}
|
||||
patterns = append(patterns, g)
|
||||
}
|
||||
|
||||
shouldInclude := func(rel string) bool {
|
||||
if len(patterns) == 0 {
|
||||
return true
|
||||
}
|
||||
for _, p := range patterns {
|
||||
ok, _ := doublestar.PathMatch(p, rel)
|
||||
if ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
urlToFiles := make(map[string]map[string]struct{})
|
||||
|
||||
// 2 MiB max file size to avoid huge/binary files
|
||||
const maxSize = 2 * 1024 * 1024
|
||||
|
||||
walkFn := func(path string, d os.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
rel, rerr := filepath.Rel(cleanRoot, path)
|
||||
if rerr != nil {
|
||||
rel = path
|
||||
}
|
||||
rel = filepath.ToSlash(rel)
|
||||
if d.IsDir() {
|
||||
base := filepath.Base(path)
|
||||
if base == ".git" {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if ign != nil && ign.MatchesPath(rel) {
|
||||
return nil
|
||||
}
|
||||
info, ierr := d.Info()
|
||||
if ierr != nil {
|
||||
return nil
|
||||
}
|
||||
if info.Size() > maxSize {
|
||||
return nil
|
||||
}
|
||||
if isFileRoot && rel == "." {
|
||||
rel = filepath.ToSlash(filepath.Base(path))
|
||||
}
|
||||
if !shouldInclude(rel) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if onFile != nil {
|
||||
onFile(rel)
|
||||
}
|
||||
|
||||
f, ferr := os.Open(path)
|
||||
if ferr != nil {
|
||||
return nil
|
||||
}
|
||||
defer f.Close()
|
||||
br := bufio.NewReader(f)
|
||||
var b strings.Builder
|
||||
read := int64(0)
|
||||
for {
|
||||
chunk, cerr := br.ReadString('\n')
|
||||
b.WriteString(chunk)
|
||||
read += int64(len(chunk))
|
||||
if cerr == io.EOF || read > maxSize {
|
||||
break
|
||||
}
|
||||
if cerr != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
content := b.String()
|
||||
if strings.IndexByte(content, '\x00') >= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
candidates := extractCandidates(content)
|
||||
if len(candidates) == 0 {
|
||||
return nil
|
||||
}
|
||||
for _, raw := range candidates {
|
||||
u := sanitizeURLToken(raw)
|
||||
if u == "" {
|
||||
continue
|
||||
}
|
||||
fileSet, ok := urlToFiles[u]
|
||||
if !ok {
|
||||
fileSet = make(map[string]struct{})
|
||||
urlToFiles[u] = fileSet
|
||||
}
|
||||
fileSet[rel] = struct{}{}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
_ = filepath.WalkDir(cleanRoot, walkFn)
|
||||
|
||||
result := make(map[string][]string, len(urlToFiles))
|
||||
for u, files := range urlToFiles {
|
||||
var list []string
|
||||
for fp := range files {
|
||||
list = append(list, fp)
|
||||
}
|
||||
sort.Strings(list)
|
||||
result[u] = list
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func sanitizeURLToken(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
// Strip surrounding angle brackets or quotes
|
||||
|
||||
@@ -8,3 +8,7 @@ import (
|
||||
func fsCollect(root string, globs []string) (map[string][]string, error) {
|
||||
return fsurls.CollectURLs(root, globs, true)
|
||||
}
|
||||
|
||||
func fsCollectProgress(root string, globs []string, onFile func(string)) (map[string][]string, error) {
|
||||
return fsurls.CollectURLsProgress(root, globs, true, onFile)
|
||||
}
|
||||
|
||||
@@ -25,6 +25,8 @@ type crawlDoneMsg struct{}
|
||||
type statsMsg struct{ s web.Stats }
|
||||
type tickMsg struct{ t time.Time }
|
||||
|
||||
type fileScannedMsg struct{ rel string }
|
||||
|
||||
type model struct {
|
||||
rootPath string
|
||||
cfg web.Config
|
||||
@@ -48,14 +50,15 @@ type model struct {
|
||||
ok int
|
||||
fail int
|
||||
|
||||
pending int
|
||||
processed int
|
||||
|
||||
pending int
|
||||
processed int
|
||||
lastProcessed int
|
||||
rps float64
|
||||
peakRPS float64
|
||||
lowRPS float64
|
||||
|
||||
filesScanned int
|
||||
|
||||
allResults []web.Result
|
||||
jsonPath string
|
||||
mdPath string
|
||||
@@ -83,7 +86,12 @@ func (m *model) Init() tea.Cmd {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
go func() {
|
||||
defer cancel()
|
||||
urlsMap, _ := fsCollect(m.rootPath, m.globs)
|
||||
urlsMap, _ := fsCollectProgress(m.rootPath, m.globs, func(rel string) {
|
||||
m.filesScanned++
|
||||
// Emit a short event line per file to show activity
|
||||
m.lines = append(m.lines, fmt.Sprintf("📄 %s", rel))
|
||||
m.refreshViewport()
|
||||
})
|
||||
var urls []string
|
||||
for u := range urlsMap {
|
||||
urls = append(urls, u)
|
||||
@@ -280,6 +288,7 @@ func (m *model) View() string {
|
||||
fmt.Sprintf("Duration: %s", dur.Truncate(time.Millisecond)),
|
||||
fmt.Sprintf("Processed: %d OK:%d Fail:%d", m.processed, m.ok, m.fail),
|
||||
fmt.Sprintf("Rates: avg %.1f/s peak %.1f/s low %.1f/s", avg, m.peakRPS, m.lowRPS),
|
||||
fmt.Sprintf("Files scanned: %d", m.filesScanned),
|
||||
}
|
||||
if m.jsonPath != "" {
|
||||
summary = append(summary, fmt.Sprintf("JSON: %s", m.jsonPath))
|
||||
@@ -297,7 +306,7 @@ func (m *model) View() string {
|
||||
percent = float64(m.processed) / float64(totalWork)
|
||||
}
|
||||
progressLine := m.prog.ViewAs(percent)
|
||||
stats := fmt.Sprintf("%s total:%d ok:%d fail:%d pending:%d processed:%d rps:%.1f/s", m.spin.View(), m.total, m.ok, m.fail, m.pending, m.processed, m.rps)
|
||||
stats := fmt.Sprintf("%s total:%d ok:%d fail:%d pending:%d processed:%d rps:%.1f/s files:%d", m.spin.View(), m.total, m.ok, m.fail, m.pending, m.processed, m.rps, m.filesScanned)
|
||||
body := m.vp.View()
|
||||
footer := lipgloss.NewStyle().Faint(true).Render("Controls: [q] quit [f] toggle fails")
|
||||
container := lipgloss.NewStyle().Padding(1)
|
||||
|
||||
Reference in New Issue
Block a user