diff --git a/README.md b/README.md index ed52c90..316649d 100644 --- a/README.md +++ b/README.md @@ -70,3 +70,28 @@ Notes: - Skips likely binary files and files > 2 MiB. - Uses a browser-like User-Agent to reduce false negatives. +### .slinkignore + +Place a `.slinkignore` file at the repository root to exclude paths and/or specific URLs from scanning and reporting. The format is JSON with two optional arrays: + +```json +{ + "ignorePaths": [ + "**/vendor/**", + "**/*.bak" + ], + "ignoreURLs": [ + "https://example.com/this/path/does/not/exist", + "*localhost:*", + "*internal.example.com*" + ] +} +``` + +- ignorePaths: gitignore-style patterns evaluated against repository-relative paths (uses doublestar `**`). +- ignoreURLs: patterns applied to the full URL string. Supports exact matches, substring contains, and doublestar-style wildcard matches. + +Examples: +- Ignore generated folders: `"**/dist/**"`, backups: `"**/*.bak"`. +- Ignore known example or placeholder links: `"*example.com*"`, `"https://example.com/foo"`. + diff --git a/cmd/check.go b/cmd/check.go index af40c8c..f8ff310 100644 --- a/cmd/check.go +++ b/cmd/check.go @@ -178,6 +178,8 @@ func init() { go web.CheckURLs(ctx, urls, urlToFiles, results, nil, cfg) var total, okCount, failCount int + totalURLs := len(urls) + lastPctLogged := 0 var failures []SerializableResult var failedResults []web.Result @@ -188,6 +190,14 @@ func init() { } else { failCount++ } + // Progress notices every 5% + if totalURLs > 0 { + pct := (total * 100) / totalURLs + for pct >= lastPctLogged+5 && lastPctLogged < 100 { + lastPctLogged += 5 + fmt.Printf("::notice:: Checking progress: %d%% (%d/%d)\n", lastPctLogged, total, totalURLs) + } + } // Emit GitHub Actions debug log for each URL. // These lines appear only when step debug logging is enabled via the // repository/organization secret ACTIONS_STEP_DEBUG=true. diff --git a/internal/fsurls/fsurls.go b/internal/fsurls/fsurls.go index 68ac136..9f971c3 100644 --- a/internal/fsurls/fsurls.go +++ b/internal/fsurls/fsurls.go @@ -2,6 +2,7 @@ package fsurls import ( "bufio" + "encoding/json" "fmt" "io" "net/url" @@ -60,6 +61,8 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st if !isFileRoot && respectGitignore { ign = loadGitIgnore(cleanRoot) } + // Load optional .slinkignore config + slPathIgnore, slURLPatterns := loadSlinkyIgnore(cleanRoot) var patterns []string for _, g := range globs { @@ -109,7 +112,7 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st } return nil } - if ign != nil && ign.MatchesPath(rel) { + if (ign != nil && ign.MatchesPath(rel)) || (slPathIgnore != nil && slPathIgnore.MatchesPath(rel)) { return nil } info, ierr := d.Info() @@ -166,6 +169,9 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st if u == "" { continue } + if isURLIgnored(u, slURLPatterns) { + continue + } fileSet, ok := urlToFiles[u] if !ok { fileSet = make(map[string]struct{}) @@ -205,6 +211,7 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool, if !isFileRoot && respectGitignore { ign = loadGitIgnore(cleanRoot) } + slPathIgnore, slURLPatterns := loadSlinkyIgnore(cleanRoot) var patterns []string for _, g := range globs { @@ -249,7 +256,7 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool, } return nil } - if ign != nil && ign.MatchesPath(rel) { + if (ign != nil && ign.MatchesPath(rel)) || (slPathIgnore != nil && slPathIgnore.MatchesPath(rel)) { return nil } info, ierr := d.Info() @@ -303,6 +310,9 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool, if u == "" { continue } + if isURLIgnored(u, slURLPatterns) { + continue + } fileSet, ok := urlToFiles[u] if !ok { fileSet = make(map[string]struct{}) @@ -558,3 +568,53 @@ func loadGitIgnore(root string) *ignore.GitIgnore { } return ignore.CompileIgnoreLines(lines...) } + +// .slinkignore support +type slinkyIgnore struct { + IgnorePaths []string `json:"ignorePaths"` + IgnoreURLs []string `json:"ignoreURLs"` +} + +func loadSlinkyIgnore(root string) (*ignore.GitIgnore, []string) { + cfgPath := filepath.Join(root, ".slinkignore") + b, err := os.ReadFile(cfgPath) + if err != nil || len(b) == 0 { + return nil, nil + } + var cfg slinkyIgnore + if jerr := json.Unmarshal(b, &cfg); jerr != nil { + return nil, nil + } + var ign *ignore.GitIgnore + if len(cfg.IgnorePaths) > 0 { + ign = ignore.CompileIgnoreLines(cfg.IgnorePaths...) + } + var urlPatterns []string + for _, p := range cfg.IgnoreURLs { + p = strings.TrimSpace(p) + if p != "" { + urlPatterns = append(urlPatterns, p) + } + } + return ign, urlPatterns +} + +func isURLIgnored(u string, patterns []string) bool { + if len(patterns) == 0 { + return false + } + for _, p := range patterns { + if p == "" { + continue + } + // simple contains or wildcard suffix/prefix match + if p == u || strings.Contains(u, p) { + return true + } + // doublestar path-like match for full URL string + if ok, _ := doublestar.PathMatch(p, u); ok { + return true + } + } + return false +} diff --git a/internal/fsurls/fsurls_test.go b/internal/fsurls/fsurls_test.go index 3006c10..a0221e8 100644 --- a/internal/fsurls/fsurls_test.go +++ b/internal/fsurls/fsurls_test.go @@ -39,4 +39,18 @@ func TestCollectURLs_FromTestFiles(t *testing.T) { if len(srcs) == 0 { t.Fatalf("expected sources for https://example.com, got none") } + + // Verify .slinkignore URL ignores + if _, ok := urls["https://example.com/this/path/does/not/exist"]; ok { + t.Fatalf("expected URL ignored by .slinkignore to be absent") + } + + // Verify .slinkignore path ignores: file under ignore-me should not contribute + for u, files := range urls { + for _, f := range files { + if strings.Contains(f, "ignore-me/") { + t.Fatalf("file %s should have been ignored via .slinkignore, but contributed to URL %s", f, u) + } + } + } } diff --git a/internal/report/markdown.go b/internal/report/markdown.go index 47f6493..0d1b6f3 100644 --- a/internal/report/markdown.go +++ b/internal/report/markdown.go @@ -151,7 +151,6 @@ func WriteMarkdown(path string, results []web.Result, s Summary) (string, error) buf.WriteString(fmt.Sprintf(" - [%s](./%s)\n", escapeMD(fn), escapeLinkPath(fn))) } } - buf.WriteString("\n") } f, err := os.Create(path) diff --git a/testdata/.slinkignore b/testdata/.slinkignore new file mode 100644 index 0000000..18019b0 --- /dev/null +++ b/testdata/.slinkignore @@ -0,0 +1,10 @@ +{ + "ignorePaths": [ + "**/ignore-me/**", + "**/*.bak" + ], + "ignoreURLs": [ + "https://example.com/this/path/does/not/exist", + "*notarealwebsite.com*" + ] +} diff --git a/testdata/ignore-me/ignored.md b/testdata/ignore-me/ignored.md new file mode 100644 index 0000000..23c9be3 --- /dev/null +++ b/testdata/ignore-me/ignored.md @@ -0,0 +1 @@ +This file should be ignored entirely. URL here should not be collected: https://example.com/ignored