From 54d7797089bcafc3fd23ba546d6d0d88e0132b25 Mon Sep 17 00:00:00 2001 From: Luke Hagar Date: Fri, 12 Sep 2025 20:56:45 +0000 Subject: [PATCH] Add .slinkignore support for URL and path exclusions Introduce a new .slinkignore file format to allow users to specify paths and URLs to ignore during scanning. Update the CollectURLs and CollectURLsProgress functions to respect these ignore rules. Add tests to verify the functionality of the .slinkignore file, ensuring that specified paths and URLs are excluded from results. Update README.md to document the new feature and its usage. --- README.md | 25 +++++++++++++ cmd/check.go | 10 ++++++ internal/fsurls/fsurls.go | 64 ++++++++++++++++++++++++++++++++-- internal/fsurls/fsurls_test.go | 14 ++++++++ internal/report/markdown.go | 1 - testdata/.slinkignore | 10 ++++++ testdata/ignore-me/ignored.md | 1 + 7 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 testdata/.slinkignore create mode 100644 testdata/ignore-me/ignored.md diff --git a/README.md b/README.md index ed52c90..316649d 100644 --- a/README.md +++ b/README.md @@ -70,3 +70,28 @@ Notes: - Skips likely binary files and files > 2 MiB. - Uses a browser-like User-Agent to reduce false negatives. +### .slinkignore + +Place a `.slinkignore` file at the repository root to exclude paths and/or specific URLs from scanning and reporting. The format is JSON with two optional arrays: + +```json +{ + "ignorePaths": [ + "**/vendor/**", + "**/*.bak" + ], + "ignoreURLs": [ + "https://example.com/this/path/does/not/exist", + "*localhost:*", + "*internal.example.com*" + ] +} +``` + +- ignorePaths: gitignore-style patterns evaluated against repository-relative paths (uses doublestar `**`). +- ignoreURLs: patterns applied to the full URL string. Supports exact matches, substring contains, and doublestar-style wildcard matches. + +Examples: +- Ignore generated folders: `"**/dist/**"`, backups: `"**/*.bak"`. +- Ignore known example or placeholder links: `"*example.com*"`, `"https://example.com/foo"`. + diff --git a/cmd/check.go b/cmd/check.go index af40c8c..f8ff310 100644 --- a/cmd/check.go +++ b/cmd/check.go @@ -178,6 +178,8 @@ func init() { go web.CheckURLs(ctx, urls, urlToFiles, results, nil, cfg) var total, okCount, failCount int + totalURLs := len(urls) + lastPctLogged := 0 var failures []SerializableResult var failedResults []web.Result @@ -188,6 +190,14 @@ func init() { } else { failCount++ } + // Progress notices every 5% + if totalURLs > 0 { + pct := (total * 100) / totalURLs + for pct >= lastPctLogged+5 && lastPctLogged < 100 { + lastPctLogged += 5 + fmt.Printf("::notice:: Checking progress: %d%% (%d/%d)\n", lastPctLogged, total, totalURLs) + } + } // Emit GitHub Actions debug log for each URL. // These lines appear only when step debug logging is enabled via the // repository/organization secret ACTIONS_STEP_DEBUG=true. diff --git a/internal/fsurls/fsurls.go b/internal/fsurls/fsurls.go index 68ac136..9f971c3 100644 --- a/internal/fsurls/fsurls.go +++ b/internal/fsurls/fsurls.go @@ -2,6 +2,7 @@ package fsurls import ( "bufio" + "encoding/json" "fmt" "io" "net/url" @@ -60,6 +61,8 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st if !isFileRoot && respectGitignore { ign = loadGitIgnore(cleanRoot) } + // Load optional .slinkignore config + slPathIgnore, slURLPatterns := loadSlinkyIgnore(cleanRoot) var patterns []string for _, g := range globs { @@ -109,7 +112,7 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st } return nil } - if ign != nil && ign.MatchesPath(rel) { + if (ign != nil && ign.MatchesPath(rel)) || (slPathIgnore != nil && slPathIgnore.MatchesPath(rel)) { return nil } info, ierr := d.Info() @@ -166,6 +169,9 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st if u == "" { continue } + if isURLIgnored(u, slURLPatterns) { + continue + } fileSet, ok := urlToFiles[u] if !ok { fileSet = make(map[string]struct{}) @@ -205,6 +211,7 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool, if !isFileRoot && respectGitignore { ign = loadGitIgnore(cleanRoot) } + slPathIgnore, slURLPatterns := loadSlinkyIgnore(cleanRoot) var patterns []string for _, g := range globs { @@ -249,7 +256,7 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool, } return nil } - if ign != nil && ign.MatchesPath(rel) { + if (ign != nil && ign.MatchesPath(rel)) || (slPathIgnore != nil && slPathIgnore.MatchesPath(rel)) { return nil } info, ierr := d.Info() @@ -303,6 +310,9 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool, if u == "" { continue } + if isURLIgnored(u, slURLPatterns) { + continue + } fileSet, ok := urlToFiles[u] if !ok { fileSet = make(map[string]struct{}) @@ -558,3 +568,53 @@ func loadGitIgnore(root string) *ignore.GitIgnore { } return ignore.CompileIgnoreLines(lines...) } + +// .slinkignore support +type slinkyIgnore struct { + IgnorePaths []string `json:"ignorePaths"` + IgnoreURLs []string `json:"ignoreURLs"` +} + +func loadSlinkyIgnore(root string) (*ignore.GitIgnore, []string) { + cfgPath := filepath.Join(root, ".slinkignore") + b, err := os.ReadFile(cfgPath) + if err != nil || len(b) == 0 { + return nil, nil + } + var cfg slinkyIgnore + if jerr := json.Unmarshal(b, &cfg); jerr != nil { + return nil, nil + } + var ign *ignore.GitIgnore + if len(cfg.IgnorePaths) > 0 { + ign = ignore.CompileIgnoreLines(cfg.IgnorePaths...) + } + var urlPatterns []string + for _, p := range cfg.IgnoreURLs { + p = strings.TrimSpace(p) + if p != "" { + urlPatterns = append(urlPatterns, p) + } + } + return ign, urlPatterns +} + +func isURLIgnored(u string, patterns []string) bool { + if len(patterns) == 0 { + return false + } + for _, p := range patterns { + if p == "" { + continue + } + // simple contains or wildcard suffix/prefix match + if p == u || strings.Contains(u, p) { + return true + } + // doublestar path-like match for full URL string + if ok, _ := doublestar.PathMatch(p, u); ok { + return true + } + } + return false +} diff --git a/internal/fsurls/fsurls_test.go b/internal/fsurls/fsurls_test.go index 3006c10..a0221e8 100644 --- a/internal/fsurls/fsurls_test.go +++ b/internal/fsurls/fsurls_test.go @@ -39,4 +39,18 @@ func TestCollectURLs_FromTestFiles(t *testing.T) { if len(srcs) == 0 { t.Fatalf("expected sources for https://example.com, got none") } + + // Verify .slinkignore URL ignores + if _, ok := urls["https://example.com/this/path/does/not/exist"]; ok { + t.Fatalf("expected URL ignored by .slinkignore to be absent") + } + + // Verify .slinkignore path ignores: file under ignore-me should not contribute + for u, files := range urls { + for _, f := range files { + if strings.Contains(f, "ignore-me/") { + t.Fatalf("file %s should have been ignored via .slinkignore, but contributed to URL %s", f, u) + } + } + } } diff --git a/internal/report/markdown.go b/internal/report/markdown.go index 47f6493..0d1b6f3 100644 --- a/internal/report/markdown.go +++ b/internal/report/markdown.go @@ -151,7 +151,6 @@ func WriteMarkdown(path string, results []web.Result, s Summary) (string, error) buf.WriteString(fmt.Sprintf(" - [%s](./%s)\n", escapeMD(fn), escapeLinkPath(fn))) } } - buf.WriteString("\n") } f, err := os.Create(path) diff --git a/testdata/.slinkignore b/testdata/.slinkignore new file mode 100644 index 0000000..18019b0 --- /dev/null +++ b/testdata/.slinkignore @@ -0,0 +1,10 @@ +{ + "ignorePaths": [ + "**/ignore-me/**", + "**/*.bak" + ], + "ignoreURLs": [ + "https://example.com/this/path/does/not/exist", + "*notarealwebsite.com*" + ] +} diff --git a/testdata/ignore-me/ignored.md b/testdata/ignore-me/ignored.md new file mode 100644 index 0000000..23c9be3 --- /dev/null +++ b/testdata/ignore-me/ignored.md @@ -0,0 +1 @@ +This file should be ignored entirely. URL here should not be collected: https://example.com/ignored