mirror of
https://github.com/LukeHagar/slinky.git
synced 2025-12-06 04:21:20 +00:00
Add .slinkignore support for URL and path exclusions
Introduce a new .slinkignore file format to allow users to specify paths and URLs to ignore during scanning. Update the CollectURLs and CollectURLsProgress functions to respect these ignore rules. Add tests to verify the functionality of the .slinkignore file, ensuring that specified paths and URLs are excluded from results. Update README.md to document the new feature and its usage.
This commit is contained in:
25
README.md
25
README.md
@@ -70,3 +70,28 @@ Notes:
|
||||
- Skips likely binary files and files > 2 MiB.
|
||||
- Uses a browser-like User-Agent to reduce false negatives.
|
||||
|
||||
### .slinkignore
|
||||
|
||||
Place a `.slinkignore` file at the repository root to exclude paths and/or specific URLs from scanning and reporting. The format is JSON with two optional arrays:
|
||||
|
||||
```json
|
||||
{
|
||||
"ignorePaths": [
|
||||
"**/vendor/**",
|
||||
"**/*.bak"
|
||||
],
|
||||
"ignoreURLs": [
|
||||
"https://example.com/this/path/does/not/exist",
|
||||
"*localhost:*",
|
||||
"*internal.example.com*"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
- ignorePaths: gitignore-style patterns evaluated against repository-relative paths (uses doublestar `**`).
|
||||
- ignoreURLs: patterns applied to the full URL string. Supports exact matches, substring contains, and doublestar-style wildcard matches.
|
||||
|
||||
Examples:
|
||||
- Ignore generated folders: `"**/dist/**"`, backups: `"**/*.bak"`.
|
||||
- Ignore known example or placeholder links: `"*example.com*"`, `"https://example.com/foo"`.
|
||||
|
||||
|
||||
10
cmd/check.go
10
cmd/check.go
@@ -178,6 +178,8 @@ func init() {
|
||||
go web.CheckURLs(ctx, urls, urlToFiles, results, nil, cfg)
|
||||
|
||||
var total, okCount, failCount int
|
||||
totalURLs := len(urls)
|
||||
lastPctLogged := 0
|
||||
var failures []SerializableResult
|
||||
var failedResults []web.Result
|
||||
|
||||
@@ -188,6 +190,14 @@ func init() {
|
||||
} else {
|
||||
failCount++
|
||||
}
|
||||
// Progress notices every 5%
|
||||
if totalURLs > 0 {
|
||||
pct := (total * 100) / totalURLs
|
||||
for pct >= lastPctLogged+5 && lastPctLogged < 100 {
|
||||
lastPctLogged += 5
|
||||
fmt.Printf("::notice:: Checking progress: %d%% (%d/%d)\n", lastPctLogged, total, totalURLs)
|
||||
}
|
||||
}
|
||||
// Emit GitHub Actions debug log for each URL.
|
||||
// These lines appear only when step debug logging is enabled via the
|
||||
// repository/organization secret ACTIONS_STEP_DEBUG=true.
|
||||
|
||||
@@ -2,6 +2,7 @@ package fsurls
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
@@ -60,6 +61,8 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
|
||||
if !isFileRoot && respectGitignore {
|
||||
ign = loadGitIgnore(cleanRoot)
|
||||
}
|
||||
// Load optional .slinkignore config
|
||||
slPathIgnore, slURLPatterns := loadSlinkyIgnore(cleanRoot)
|
||||
|
||||
var patterns []string
|
||||
for _, g := range globs {
|
||||
@@ -109,7 +112,7 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if ign != nil && ign.MatchesPath(rel) {
|
||||
if (ign != nil && ign.MatchesPath(rel)) || (slPathIgnore != nil && slPathIgnore.MatchesPath(rel)) {
|
||||
return nil
|
||||
}
|
||||
info, ierr := d.Info()
|
||||
@@ -166,6 +169,9 @@ func CollectURLs(rootPath string, globs []string, respectGitignore bool) (map[st
|
||||
if u == "" {
|
||||
continue
|
||||
}
|
||||
if isURLIgnored(u, slURLPatterns) {
|
||||
continue
|
||||
}
|
||||
fileSet, ok := urlToFiles[u]
|
||||
if !ok {
|
||||
fileSet = make(map[string]struct{})
|
||||
@@ -205,6 +211,7 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool,
|
||||
if !isFileRoot && respectGitignore {
|
||||
ign = loadGitIgnore(cleanRoot)
|
||||
}
|
||||
slPathIgnore, slURLPatterns := loadSlinkyIgnore(cleanRoot)
|
||||
|
||||
var patterns []string
|
||||
for _, g := range globs {
|
||||
@@ -249,7 +256,7 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if ign != nil && ign.MatchesPath(rel) {
|
||||
if (ign != nil && ign.MatchesPath(rel)) || (slPathIgnore != nil && slPathIgnore.MatchesPath(rel)) {
|
||||
return nil
|
||||
}
|
||||
info, ierr := d.Info()
|
||||
@@ -303,6 +310,9 @@ func CollectURLsProgress(rootPath string, globs []string, respectGitignore bool,
|
||||
if u == "" {
|
||||
continue
|
||||
}
|
||||
if isURLIgnored(u, slURLPatterns) {
|
||||
continue
|
||||
}
|
||||
fileSet, ok := urlToFiles[u]
|
||||
if !ok {
|
||||
fileSet = make(map[string]struct{})
|
||||
@@ -558,3 +568,53 @@ func loadGitIgnore(root string) *ignore.GitIgnore {
|
||||
}
|
||||
return ignore.CompileIgnoreLines(lines...)
|
||||
}
|
||||
|
||||
// .slinkignore support
|
||||
type slinkyIgnore struct {
|
||||
IgnorePaths []string `json:"ignorePaths"`
|
||||
IgnoreURLs []string `json:"ignoreURLs"`
|
||||
}
|
||||
|
||||
func loadSlinkyIgnore(root string) (*ignore.GitIgnore, []string) {
|
||||
cfgPath := filepath.Join(root, ".slinkignore")
|
||||
b, err := os.ReadFile(cfgPath)
|
||||
if err != nil || len(b) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
var cfg slinkyIgnore
|
||||
if jerr := json.Unmarshal(b, &cfg); jerr != nil {
|
||||
return nil, nil
|
||||
}
|
||||
var ign *ignore.GitIgnore
|
||||
if len(cfg.IgnorePaths) > 0 {
|
||||
ign = ignore.CompileIgnoreLines(cfg.IgnorePaths...)
|
||||
}
|
||||
var urlPatterns []string
|
||||
for _, p := range cfg.IgnoreURLs {
|
||||
p = strings.TrimSpace(p)
|
||||
if p != "" {
|
||||
urlPatterns = append(urlPatterns, p)
|
||||
}
|
||||
}
|
||||
return ign, urlPatterns
|
||||
}
|
||||
|
||||
func isURLIgnored(u string, patterns []string) bool {
|
||||
if len(patterns) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, p := range patterns {
|
||||
if p == "" {
|
||||
continue
|
||||
}
|
||||
// simple contains or wildcard suffix/prefix match
|
||||
if p == u || strings.Contains(u, p) {
|
||||
return true
|
||||
}
|
||||
// doublestar path-like match for full URL string
|
||||
if ok, _ := doublestar.PathMatch(p, u); ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -39,4 +39,18 @@ func TestCollectURLs_FromTestFiles(t *testing.T) {
|
||||
if len(srcs) == 0 {
|
||||
t.Fatalf("expected sources for https://example.com, got none")
|
||||
}
|
||||
|
||||
// Verify .slinkignore URL ignores
|
||||
if _, ok := urls["https://example.com/this/path/does/not/exist"]; ok {
|
||||
t.Fatalf("expected URL ignored by .slinkignore to be absent")
|
||||
}
|
||||
|
||||
// Verify .slinkignore path ignores: file under ignore-me should not contribute
|
||||
for u, files := range urls {
|
||||
for _, f := range files {
|
||||
if strings.Contains(f, "ignore-me/") {
|
||||
t.Fatalf("file %s should have been ignored via .slinkignore, but contributed to URL %s", f, u)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -151,7 +151,6 @@ func WriteMarkdown(path string, results []web.Result, s Summary) (string, error)
|
||||
buf.WriteString(fmt.Sprintf(" - [%s](./%s)\n", escapeMD(fn), escapeLinkPath(fn)))
|
||||
}
|
||||
}
|
||||
buf.WriteString("\n")
|
||||
}
|
||||
|
||||
f, err := os.Create(path)
|
||||
|
||||
10
testdata/.slinkignore
vendored
Normal file
10
testdata/.slinkignore
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"ignorePaths": [
|
||||
"**/ignore-me/**",
|
||||
"**/*.bak"
|
||||
],
|
||||
"ignoreURLs": [
|
||||
"https://example.com/this/path/does/not/exist",
|
||||
"*notarealwebsite.com*"
|
||||
]
|
||||
}
|
||||
1
testdata/ignore-me/ignored.md
vendored
Normal file
1
testdata/ignore-me/ignored.md
vendored
Normal file
@@ -0,0 +1 @@
|
||||
This file should be ignored entirely. URL here should not be collected: https://example.com/ignored
|
||||
Reference in New Issue
Block a user