Add .slinkignore support for URL and path exclusions

Introduce a new .slinkignore file format to allow users to specify paths and URLs to ignore during scanning. Update the CollectURLs and CollectURLsProgress functions to respect these ignore rules. Add tests to verify the functionality of the .slinkignore file, ensuring that specified paths and URLs are excluded from results. Update README.md to document the new feature and its usage.
This commit is contained in:
Luke Hagar
2025-09-12 20:56:45 +00:00
parent ae5fcf868c
commit 54d7797089
7 changed files with 122 additions and 3 deletions

View File

@@ -2,6 +2,7 @@ package fsurls
import (
"bufio"
"encoding/json"
"fmt"
"io"
"net/url"
@@ -60,6 +61,8 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
if !isFileRoot && respectGitignore {
ign = loadGitIgnore(cleanRoot)
}
// Load optional .slinkignore config
slPathIgnore, slURLPatterns := loadSlinkyIgnore(cleanRoot)
var patterns []string
for _, g := range globs {
@@ -109,7 +112,7 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
}
return nil
}
if ign != nil && ign.MatchesPath(rel) {
if (ign != nil && ign.MatchesPath(rel)) || (slPathIgnore != nil && slPathIgnore.MatchesPath(rel)) {
return nil
}
info, ierr := d.Info()
@@ -166,6 +169,9 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
if u == "" {
continue
}
if isURLIgnored(u, slURLPatterns) {
continue
}
fileSet, ok := urlToFiles[u]
if !ok {
fileSet = make(map[string]struct{})
@@ -205,6 +211,7 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool,
if !isFileRoot && respectGitignore {
ign = loadGitIgnore(cleanRoot)
}
slPathIgnore, slURLPatterns := loadSlinkyIgnore(cleanRoot)
var patterns []string
for _, g := range globs {
@@ -249,7 +256,7 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool,
}
return nil
}
if ign != nil && ign.MatchesPath(rel) {
if (ign != nil && ign.MatchesPath(rel)) || (slPathIgnore != nil && slPathIgnore.MatchesPath(rel)) {
return nil
}
info, ierr := d.Info()
@@ -303,6 +310,9 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool,
if u == "" {
continue
}
if isURLIgnored(u, slURLPatterns) {
continue
}
fileSet, ok := urlToFiles[u]
if !ok {
fileSet = make(map[string]struct{})
@@ -558,3 +568,53 @@ func loadGitIgnore(root string) *ignore.GitIgnore {
}
return ignore.CompileIgnoreLines(lines...)
}
// .slinkignore support
type slinkyIgnore struct {
IgnorePaths []string `json:"ignorePaths"`
IgnoreURLs []string `json:"ignoreURLs"`
}
func loadSlinkyIgnore(root string) (*ignore.GitIgnore, []string) {
cfgPath := filepath.Join(root, ".slinkignore")
b, err := os.ReadFile(cfgPath)
if err != nil || len(b) == 0 {
return nil, nil
}
var cfg slinkyIgnore
if jerr := json.Unmarshal(b, &cfg); jerr != nil {
return nil, nil
}
var ign *ignore.GitIgnore
if len(cfg.IgnorePaths) > 0 {
ign = ignore.CompileIgnoreLines(cfg.IgnorePaths...)
}
var urlPatterns []string
for _, p := range cfg.IgnoreURLs {
p = strings.TrimSpace(p)
if p != "" {
urlPatterns = append(urlPatterns, p)
}
}
return ign, urlPatterns
}
func isURLIgnored(u string, patterns []string) bool {
if len(patterns) == 0 {
return false
}
for _, p := range patterns {
if p == "" {
continue
}
// simple contains or wildcard suffix/prefix match
if p == u || strings.Contains(u, p) {
return true
}
// doublestar path-like match for full URL string
if ok, _ := doublestar.PathMatch(p, u); ok {
return true
}
}
return false
}

View File

@@ -39,4 +39,18 @@ func TestCollectURLs_FromTestFiles(t *testing.T) {
if len(srcs) == 0 {
t.Fatalf("expected sources for https://example.com, got none")
}
// Verify .slinkignore URL ignores
if _, ok := urls["https://example.com/this/path/does/not/exist"]; ok {
t.Fatalf("expected URL ignored by .slinkignore to be absent")
}
// Verify .slinkignore path ignores: file under ignore-me should not contribute
for u, files := range urls {
for _, f := range files {
if strings.Contains(f, "ignore-me/") {
t.Fatalf("file %s should have been ignored via .slinkignore, but contributed to URL %s", f, u)
}
}
}
}

View File

@@ -151,7 +151,6 @@ func WriteMarkdown(path string, results []web.Result, s Summary) (string, error)
buf.WriteString(fmt.Sprintf(" - [%s](./%s)\n", escapeMD(fn), escapeLinkPath(fn)))
}
}
buf.WriteString("\n")
}
f, err := os.Create(path)