Add .slinkignore support for URL and path exclusions

Introduce a new .slinkignore file format to allow users to specify paths and URLs to ignore during scanning. Update the CollectURLs and CollectURLsProgress functions to respect these ignore rules. Add tests to verify the functionality of the .slinkignore file, ensuring that specified paths and URLs are excluded from results. Update README.md to document the new feature and its usage.
This commit is contained in:
Luke Hagar
2025-09-12 20:56:45 +00:00
parent ae5fcf868c
commit 54d7797089
7 changed files with 122 additions and 3 deletions

View File

@@ -70,3 +70,28 @@ Notes:
- Skips likely binary files and files > 2 MiB. - Skips likely binary files and files > 2 MiB.
- Uses a browser-like User-Agent to reduce false negatives. - Uses a browser-like User-Agent to reduce false negatives.
### .slinkignore
Place a `.slinkignore` file at the repository root to exclude paths and/or specific URLs from scanning and reporting. The format is JSON with two optional arrays:
```json
{
"ignorePaths": [
"**/vendor/**",
"**/*.bak"
],
"ignoreURLs": [
"https://example.com/this/path/does/not/exist",
"*localhost:*",
"*internal.example.com*"
]
}
```
- ignorePaths: gitignore-style patterns evaluated against repository-relative paths (uses doublestar `**`).
- ignoreURLs: patterns applied to the full URL string. Supports exact matches, substring contains, and doublestar-style wildcard matches.
Examples:
- Ignore generated folders: `"**/dist/**"`, backups: `"**/*.bak"`.
- Ignore known example or placeholder links: `"*example.com*"`, `"https://example.com/foo"`.

View File

@@ -178,6 +178,8 @@ func init() {
go web.CheckURLs(ctx, urls, urlToFiles, results, nil, cfg) go web.CheckURLs(ctx, urls, urlToFiles, results, nil, cfg)
var total, okCount, failCount int var total, okCount, failCount int
totalURLs := len(urls)
lastPctLogged := 0
var failures []SerializableResult var failures []SerializableResult
var failedResults []web.Result var failedResults []web.Result
@@ -188,6 +190,14 @@ func init() {
} else { } else {
failCount++ failCount++
} }
// Progress notices every 5%
if totalURLs > 0 {
pct := (total * 100) / totalURLs
for pct >= lastPctLogged+5 && lastPctLogged < 100 {
lastPctLogged += 5
fmt.Printf("::notice:: Checking progress: %d%% (%d/%d)\n", lastPctLogged, total, totalURLs)
}
}
// Emit GitHub Actions debug log for each URL. // Emit GitHub Actions debug log for each URL.
// These lines appear only when step debug logging is enabled via the // These lines appear only when step debug logging is enabled via the
// repository/organization secret ACTIONS_STEP_DEBUG=true. // repository/organization secret ACTIONS_STEP_DEBUG=true.

View File

@@ -2,6 +2,7 @@ package fsurls
import ( import (
"bufio" "bufio"
"encoding/json"
"fmt" "fmt"
"io" "io"
"net/url" "net/url"
@@ -60,6 +61,8 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
if !isFileRoot && respectGitignore { if !isFileRoot && respectGitignore {
ign = loadGitIgnore(cleanRoot) ign = loadGitIgnore(cleanRoot)
} }
// Load optional .slinkignore config
slPathIgnore, slURLPatterns := loadSlinkyIgnore(cleanRoot)
var patterns []string var patterns []string
for _, g := range globs { for _, g := range globs {
@@ -109,7 +112,7 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
} }
return nil return nil
} }
if ign != nil && ign.MatchesPath(rel) { if (ign != nil && ign.MatchesPath(rel)) || (slPathIgnore != nil && slPathIgnore.MatchesPath(rel)) {
return nil return nil
} }
info, ierr := d.Info() info, ierr := d.Info()
@@ -166,6 +169,9 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
if u == "" { if u == "" {
continue continue
} }
if isURLIgnored(u, slURLPatterns) {
continue
}
fileSet, ok := urlToFiles[u] fileSet, ok := urlToFiles[u]
if !ok { if !ok {
fileSet = make(map[string]struct{}) fileSet = make(map[string]struct{})
@@ -205,6 +211,7 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool,
if !isFileRoot && respectGitignore { if !isFileRoot && respectGitignore {
ign = loadGitIgnore(cleanRoot) ign = loadGitIgnore(cleanRoot)
} }
slPathIgnore, slURLPatterns := loadSlinkyIgnore(cleanRoot)
var patterns []string var patterns []string
for _, g := range globs { for _, g := range globs {
@@ -249,7 +256,7 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool,
} }
return nil return nil
} }
if ign != nil && ign.MatchesPath(rel) { if (ign != nil && ign.MatchesPath(rel)) || (slPathIgnore != nil && slPathIgnore.MatchesPath(rel)) {
return nil return nil
} }
info, ierr := d.Info() info, ierr := d.Info()
@@ -303,6 +310,9 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool,
if u == "" { if u == "" {
continue continue
} }
if isURLIgnored(u, slURLPatterns) {
continue
}
fileSet, ok := urlToFiles[u] fileSet, ok := urlToFiles[u]
if !ok { if !ok {
fileSet = make(map[string]struct{}) fileSet = make(map[string]struct{})
@@ -558,3 +568,53 @@ func loadGitIgnore(root string) *ignore.GitIgnore {
} }
return ignore.CompileIgnoreLines(lines...) return ignore.CompileIgnoreLines(lines...)
} }
// .slinkignore support
type slinkyIgnore struct {
IgnorePaths []string `json:"ignorePaths"`
IgnoreURLs []string `json:"ignoreURLs"`
}
func loadSlinkyIgnore(root string) (*ignore.GitIgnore, []string) {
cfgPath := filepath.Join(root, ".slinkignore")
b, err := os.ReadFile(cfgPath)
if err != nil || len(b) == 0 {
return nil, nil
}
var cfg slinkyIgnore
if jerr := json.Unmarshal(b, &cfg); jerr != nil {
return nil, nil
}
var ign *ignore.GitIgnore
if len(cfg.IgnorePaths) > 0 {
ign = ignore.CompileIgnoreLines(cfg.IgnorePaths...)
}
var urlPatterns []string
for _, p := range cfg.IgnoreURLs {
p = strings.TrimSpace(p)
if p != "" {
urlPatterns = append(urlPatterns, p)
}
}
return ign, urlPatterns
}
func isURLIgnored(u string, patterns []string) bool {
if len(patterns) == 0 {
return false
}
for _, p := range patterns {
if p == "" {
continue
}
// simple contains or wildcard suffix/prefix match
if p == u || strings.Contains(u, p) {
return true
}
// doublestar path-like match for full URL string
if ok, _ := doublestar.PathMatch(p, u); ok {
return true
}
}
return false
}

View File

@@ -39,4 +39,18 @@ func TestCollectURLs_FromTestFiles(t *testing.T) {
if len(srcs) == 0 { if len(srcs) == 0 {
t.Fatalf("expected sources for https://example.com, got none") t.Fatalf("expected sources for https://example.com, got none")
} }
// Verify .slinkignore URL ignores
if _, ok := urls["https://example.com/this/path/does/not/exist"]; ok {
t.Fatalf("expected URL ignored by .slinkignore to be absent")
}
// Verify .slinkignore path ignores: file under ignore-me should not contribute
for u, files := range urls {
for _, f := range files {
if strings.Contains(f, "ignore-me/") {
t.Fatalf("file %s should have been ignored via .slinkignore, but contributed to URL %s", f, u)
}
}
}
} }

View File

@@ -151,7 +151,6 @@ func WriteMarkdown(path string, results []web.Result, s Summary) (string, error)
buf.WriteString(fmt.Sprintf(" - [%s](./%s)\n", escapeMD(fn), escapeLinkPath(fn))) buf.WriteString(fmt.Sprintf(" - [%s](./%s)\n", escapeMD(fn), escapeLinkPath(fn)))
} }
} }
buf.WriteString("\n")
} }
f, err := os.Create(path) f, err := os.Create(path)

10
testdata/.slinkignore vendored Normal file
View File

@@ -0,0 +1,10 @@
{
"ignorePaths": [
"**/ignore-me/**",
"**/*.bak"
],
"ignoreURLs": [
"https://example.com/this/path/does/not/exist",
"*notarealwebsite.com*"
]
}

1
testdata/ignore-me/ignored.md vendored Normal file
View File

@@ -0,0 +1 @@
This file should be ignored entirely. URL here should not be collected: https://example.com/ignored