mirror of
https://github.com/LukeHagar/slinky.git
synced 2025-12-06 04:21:20 +00:00
pushing the rest
This commit is contained in:
19
.github/actions/slinky/Dockerfile
vendored
Normal file
19
.github/actions/slinky/Dockerfile
vendored
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
FROM golang:1.24 as build
|
||||||
|
WORKDIR /app
|
||||||
|
# Expect the repository root as build context when building this image
|
||||||
|
COPY go.mod go.sum ./
|
||||||
|
RUN go mod download
|
||||||
|
COPY . .
|
||||||
|
RUN CGO_ENABLED=0 go build -o /usr/local/bin/slinky ./
|
||||||
|
|
||||||
|
FROM alpine:3.20
|
||||||
|
RUN adduser -D -u 10001 appuser \
|
||||||
|
&& apk add --no-cache curl jq ca-certificates
|
||||||
|
COPY --from=build /usr/local/bin/slinky /usr/local/bin/slinky
|
||||||
|
COPY .github/actions/slinky/entrypoint.sh /entrypoint.sh
|
||||||
|
RUN chmod +x /entrypoint.sh
|
||||||
|
USER appuser
|
||||||
|
ENTRYPOINT ["/entrypoint.sh"]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
72
.github/actions/slinky/action.yml
vendored
Normal file
72
.github/actions/slinky/action.yml
vendored
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
name: "Slinky Link Checker"
|
||||||
|
description: "Slink through your repository looking for dead links"
|
||||||
|
author: "LukeHagar"
|
||||||
|
branding:
|
||||||
|
icon: "link"
|
||||||
|
color: "blue"
|
||||||
|
|
||||||
|
inputs:
|
||||||
|
path:
|
||||||
|
description: "Root path to scan"
|
||||||
|
required: false
|
||||||
|
default: "."
|
||||||
|
patterns:
|
||||||
|
description: "Comma-separated doublestar patterns. Ex: docs/**/*.md,**/*.go; default **/*"
|
||||||
|
required: false
|
||||||
|
default: "**/*"
|
||||||
|
concurrency:
|
||||||
|
description: "Maximum concurrent requests"
|
||||||
|
required: false
|
||||||
|
default: "16"
|
||||||
|
timeout:
|
||||||
|
description: "HTTP timeout seconds"
|
||||||
|
required: false
|
||||||
|
default: "10"
|
||||||
|
json-out:
|
||||||
|
description: "Optional path to write JSON results"
|
||||||
|
required: false
|
||||||
|
default: "results.json"
|
||||||
|
md-out:
|
||||||
|
description: "Optional path to write Markdown report for PR comment"
|
||||||
|
required: false
|
||||||
|
default: "results.md"
|
||||||
|
repo-blob-base:
|
||||||
|
description: "Override GitHub blob base URL (https://github.com/<owner>/<repo>/blob/<sha>)"
|
||||||
|
required: false
|
||||||
|
default: ""
|
||||||
|
fail-on-failures:
|
||||||
|
description: "Fail the job if any links fail"
|
||||||
|
required: false
|
||||||
|
default: "true"
|
||||||
|
comment-pr:
|
||||||
|
description: "If running on a PR, post a comment with the report"
|
||||||
|
required: false
|
||||||
|
default: "true"
|
||||||
|
step-summary:
|
||||||
|
description: "Append the report to the GitHub Step Summary"
|
||||||
|
required: false
|
||||||
|
default: "true"
|
||||||
|
|
||||||
|
runs:
|
||||||
|
using: "docker"
|
||||||
|
image: "Dockerfile"
|
||||||
|
args: []
|
||||||
|
env:
|
||||||
|
INPUT_PATH: ${{ inputs.path }}
|
||||||
|
INPUT_PATTERNS: ${{ inputs.patterns }}
|
||||||
|
INPUT_CONCURRENCY: ${{ inputs.concurrency }}
|
||||||
|
INPUT_TIMEOUT: ${{ inputs.timeout }}
|
||||||
|
INPUT_JSON_OUT: ${{ inputs["json-out"] }}
|
||||||
|
INPUT_MD_OUT: ${{ inputs["md-out"] }}
|
||||||
|
INPUT_REPO_BLOB_BASE: ${{ inputs["repo-blob-base"] }}
|
||||||
|
INPUT_FAIL_ON_FAILURES: ${{ inputs["fail-on-failures"] }}
|
||||||
|
INPUT_COMMENT_PR: ${{ inputs["comment-pr"] }}
|
||||||
|
INPUT_STEP_SUMMARY: ${{ inputs["step-summary"] }}
|
||||||
|
|
||||||
|
outputs:
|
||||||
|
json-path:
|
||||||
|
description: "Path to JSON results file"
|
||||||
|
md-path:
|
||||||
|
description: "Path to Markdown report file"
|
||||||
|
|
||||||
|
|
||||||
91
.github/actions/slinky/entrypoint.sh
vendored
Normal file
91
.github/actions/slinky/entrypoint.sh
vendored
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
PATH_ARG="${INPUT_PATH:-.}"
|
||||||
|
PATTERNS_ARG="${INPUT_PATTERNS:-**/*}"
|
||||||
|
CONCURRENCY_ARG="${INPUT_CONCURRENCY:-16}"
|
||||||
|
TIMEOUT_ARG="${INPUT_TIMEOUT:-10}"
|
||||||
|
JSON_OUT_ARG="${INPUT_JSON_OUT:-results.json}"
|
||||||
|
MD_OUT_ARG="${INPUT_MD_OUT:-results.md}"
|
||||||
|
REPO_BLOB_BASE_ARG="${INPUT_REPO_BLOB_BASE:-}"
|
||||||
|
FAIL_ON_FAILURES_ARG="${INPUT_FAIL_ON_FAILURES:-true}"
|
||||||
|
COMMENT_PR_ARG="${INPUT_COMMENT_PR:-true}"
|
||||||
|
STEP_SUMMARY_ARG="${INPUT_STEP_SUMMARY:-true}"
|
||||||
|
|
||||||
|
ARGS="check \"${PATH_ARG}\" --concurrency ${CONCURRENCY_ARG} --timeout ${TIMEOUT_ARG}"
|
||||||
|
if [ "${FAIL_ON_FAILURES_ARG}" = "true" ]; then
|
||||||
|
ARGS="$ARGS --fail-on-failures true"
|
||||||
|
else
|
||||||
|
ARGS="$ARGS --fail-on-failures false"
|
||||||
|
fi
|
||||||
|
if [ -n "${PATTERNS_ARG}" ]; then
|
||||||
|
# normalize by removing spaces around commas
|
||||||
|
NORM_PATTERNS=$(printf "%s" "${PATTERNS_ARG}" | sed 's/,\s*/,/g')
|
||||||
|
IFS=','
|
||||||
|
set -- $NORM_PATTERNS
|
||||||
|
unset IFS
|
||||||
|
for pat in "$@"; do
|
||||||
|
ARGS="$ARGS --patterns \"$pat\""
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
if [ -n "${JSON_OUT_ARG}" ]; then
|
||||||
|
ARGS="$ARGS --json-out \"${JSON_OUT_ARG}\""
|
||||||
|
fi
|
||||||
|
if [ -n "${MD_OUT_ARG}" ]; then
|
||||||
|
ARGS="$ARGS --md-out \"${MD_OUT_ARG}\""
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Compute GitHub blob base URL for file links used in the Markdown report
|
||||||
|
if [ -n "${REPO_BLOB_BASE_ARG}" ]; then
|
||||||
|
export SLINKY_REPO_BLOB_BASE_URL="${REPO_BLOB_BASE_ARG}"
|
||||||
|
elif [ -n "${GITHUB_REPOSITORY:-}" ]; then
|
||||||
|
COMMIT_SHA="${GITHUB_SHA:-}"
|
||||||
|
if [ -n "${GITHUB_EVENT_PATH:-}" ] && command -v jq >/dev/null 2>&1; then
|
||||||
|
PR_HEAD_SHA="$(jq -r '.pull_request.head.sha // empty' "$GITHUB_EVENT_PATH" || true)"
|
||||||
|
if [ -n "$PR_HEAD_SHA" ]; then
|
||||||
|
COMMIT_SHA="$PR_HEAD_SHA"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
if [ -n "$COMMIT_SHA" ]; then
|
||||||
|
export SLINKY_REPO_BLOB_BASE_URL="https://github.com/${GITHUB_REPOSITORY}/blob/${COMMIT_SHA}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
eval slinky ${ARGS}
|
||||||
|
|
||||||
|
# Expose outputs
|
||||||
|
if [ -n "${GITHUB_OUTPUT:-}" ]; then
|
||||||
|
if [ -n "${JSON_OUT_ARG}" ]; then
|
||||||
|
echo "json-path=${JSON_OUT_ARG}" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
if [ -n "${MD_OUT_ARG}" ]; then
|
||||||
|
echo "md-path=${MD_OUT_ARG}" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Append report to job summary if requested
|
||||||
|
if [ "${STEP_SUMMARY_ARG}" = "true" ] && [ -n "${GITHUB_STEP_SUMMARY:-}" ] && [ -n "${MD_OUT_ARG}" ] && [ -f "${MD_OUT_ARG}" ]; then
|
||||||
|
cat "${MD_OUT_ARG}" >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Post PR comment if this is a PR and requested
|
||||||
|
if [ "${COMMENT_PR_ARG}" = "true" ] && [ -n "${MD_OUT_ARG}" ] && [ -f "${MD_OUT_ARG}" ]; then
|
||||||
|
PR_NUMBER=""
|
||||||
|
if [ -n "${GITHUB_EVENT_PATH:-}" ] && command -v jq >/dev/null 2>&1; then
|
||||||
|
PR_NUMBER="$(jq -r '.pull_request.number // empty' "$GITHUB_EVENT_PATH" || true)"
|
||||||
|
fi
|
||||||
|
if [ -n "${PR_NUMBER}" ] && [ -n "${GITHUB_REPOSITORY:-}" ] && [ -n "${GITHUB_TOKEN:-}" ]; then
|
||||||
|
BODY_CONTENT="$(cat "${MD_OUT_ARG}")"
|
||||||
|
# Post the comment
|
||||||
|
curl -sS -H "Authorization: Bearer ${GITHUB_TOKEN}" \
|
||||||
|
-H "Accept: application/vnd.github+json" \
|
||||||
|
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||||
|
-X POST "https://api.github.com/repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \
|
||||||
|
-d "$(printf '{"body": %s}' "$(jq -Rs . <<EOF
|
||||||
|
${BODY_CONTENT}
|
||||||
|
EOF
|
||||||
|
)" )" >/dev/null || true
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
70
.github/workflows/ci.yml
vendored
Normal file
70
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
name: CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.24.x'
|
||||||
|
|
||||||
|
- name: Cache go build
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/.cache/go-build
|
||||||
|
~/go/pkg/mod
|
||||||
|
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
|
||||||
|
restore-keys: |
|
||||||
|
${{ runner.os }}-go-
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
run: go build ./...
|
||||||
|
|
||||||
|
- name: Run unit tests
|
||||||
|
run: go test ./...
|
||||||
|
|
||||||
|
action-self-test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: test
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Build action image
|
||||||
|
run: |
|
||||||
|
docker build -t slinky-action -f .github/actions/slinky/Dockerfile .
|
||||||
|
|
||||||
|
- name: Run action container (expect nonzero if failures)
|
||||||
|
id: run_action
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
docker run --rm -v "$PWD:/repo" -w /repo -e GITHUB_STEP_SUMMARY="/tmp/summary.md" slinky-action sh -lc \
|
||||||
|
'INPUT_PATH=. INPUT_PATTERNS="test files/**" INPUT_CONCURRENCY=8 INPUT_TIMEOUT=5 INPUT_JSON_OUT=results.json INPUT_MD_OUT=results.md INPUT_FAIL_ON_FAILURES=true INPUT_COMMENT_PR=false INPUT_STEP_SUMMARY=true /entrypoint.sh'
|
||||||
|
|
||||||
|
- name: Upload results.json
|
||||||
|
if: always()
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: results
|
||||||
|
path: results.json
|
||||||
|
- name: Upload results.md
|
||||||
|
if: always()
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: results-md
|
||||||
|
path: results.md
|
||||||
|
|
||||||
|
|
||||||
40
.github/workflows/example-slinky.yml
vendored
Normal file
40
.github/workflows/example-slinky.yml
vendored
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
name: Slinky Example
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
workflow_dispatch: {}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
slinky:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
pull-requests: write
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Run Slinky link checker
|
||||||
|
uses: ./\.github/actions/slinky
|
||||||
|
with:
|
||||||
|
path: .
|
||||||
|
patterns: "**/*"
|
||||||
|
concurrency: "16"
|
||||||
|
timeout: "10"
|
||||||
|
json-out: results.json
|
||||||
|
md-out: results.md
|
||||||
|
fail-on-failures: "true"
|
||||||
|
comment-pr: "true"
|
||||||
|
step-summary: "true"
|
||||||
|
|
||||||
|
- name: Upload results
|
||||||
|
if: always()
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: slinky-results
|
||||||
|
path: |
|
||||||
|
results.json
|
||||||
|
results.md
|
||||||
|
|
||||||
|
|
||||||
33
Makefile
Normal file
33
Makefile
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
GO ?= go
|
||||||
|
PKG := ./...
|
||||||
|
BIN_DIR ?= bin
|
||||||
|
BINARY ?= slinky
|
||||||
|
BIN := $(BIN_DIR)/$(BINARY)
|
||||||
|
|
||||||
|
.PHONY: build test clean check action-image action-run
|
||||||
|
|
||||||
|
build: $(BIN)
|
||||||
|
|
||||||
|
$(BIN):
|
||||||
|
@mkdir -p $(BIN_DIR)
|
||||||
|
CGO_ENABLED=0 $(GO) build -o $(BIN) ./
|
||||||
|
|
||||||
|
test:
|
||||||
|
$(GO) test -v $(PKG)
|
||||||
|
|
||||||
|
# Convenience: run the headless check against local test files
|
||||||
|
check: build
|
||||||
|
./$(BIN) check . --glob "test files/**" --json-out results.json --fail-on-failures true
|
||||||
|
|
||||||
|
# Build the Docker-based GitHub Action locally
|
||||||
|
action-image:
|
||||||
|
docker build -t slinky-action -f .github/actions/slinky/Dockerfile .
|
||||||
|
|
||||||
|
# Run the Action container against the current repo
|
||||||
|
action-run: action-image
|
||||||
|
docker run --rm -v "$(PWD):/repo" -w /repo -e GITHUB_STEP_SUMMARY="/tmp/summary.md" slinky-action sh -lc 'INPUT_PATH=. INPUT_PATTERNS="test files/**" INPUT_CONCURRENCY=8 INPUT_TIMEOUT=5 INPUT_JSON_OUT=results.json INPUT_MD_OUT=results.md INPUT_FAIL_ON_FAILURES=true INPUT_COMMENT_PR=false INPUT_STEP_SUMMARY=true /entrypoint.sh'
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf $(BIN_DIR) results.json results.md
|
||||||
|
|
||||||
|
|
||||||
165
cmd/check.go
Normal file
165
cmd/check.go
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/spf13/cobra"
|
||||||
|
|
||||||
|
"slinky/internal/fsurls"
|
||||||
|
"slinky/internal/report"
|
||||||
|
"slinky/internal/web"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SerializableResult mirrors web.Result but omits the error field for JSON.
|
||||||
|
type SerializableResult struct {
|
||||||
|
URL string `json:"url"`
|
||||||
|
OK bool `json:"ok"`
|
||||||
|
Status int `json:"status"`
|
||||||
|
ErrMsg string `json:"error"`
|
||||||
|
Method string `json:"method"`
|
||||||
|
ContentType string `json:"contentType"`
|
||||||
|
Sources []string `json:"sources"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
checkCmd := &cobra.Command{
|
||||||
|
Use: "check [path]",
|
||||||
|
Short: "Scan a directory for URLs and validate them (headless)",
|
||||||
|
Args: cobra.MaximumNArgs(1),
|
||||||
|
RunE: func(cmd *cobra.Command, args []string) error {
|
||||||
|
path := "."
|
||||||
|
if len(args) == 1 {
|
||||||
|
path = args[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
var gl []string
|
||||||
|
if len(patterns) > 0 {
|
||||||
|
gl = append(gl, patterns...)
|
||||||
|
} else if globPat != "" {
|
||||||
|
gl = strings.Split(globPat, ",")
|
||||||
|
} else {
|
||||||
|
gl = []string{"**/*"}
|
||||||
|
}
|
||||||
|
|
||||||
|
timeout := time.Duration(timeoutSeconds) * time.Second
|
||||||
|
cfg := web.Config{MaxConcurrency: maxConcurrency, RequestTimeout: timeout}
|
||||||
|
|
||||||
|
// Collect URLs
|
||||||
|
urlToFiles, err := fsurls.CollectURLs(path, gl)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
var urls []string
|
||||||
|
for u := range urlToFiles {
|
||||||
|
urls = append(urls, u)
|
||||||
|
}
|
||||||
|
sort.Strings(urls)
|
||||||
|
|
||||||
|
// If no URLs found, exit early
|
||||||
|
if len(urls) == 0 {
|
||||||
|
fmt.Println("No URLs found.")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run checks
|
||||||
|
startedAt := time.Now()
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
results := make(chan web.Result, 256)
|
||||||
|
web.CheckURLs(ctx, urls, urlToFiles, results, nil, cfg)
|
||||||
|
|
||||||
|
var total, okCount, failCount int
|
||||||
|
var failures []SerializableResult
|
||||||
|
var failedResults []web.Result
|
||||||
|
|
||||||
|
for r := range results {
|
||||||
|
total++
|
||||||
|
if r.OK {
|
||||||
|
okCount++
|
||||||
|
} else {
|
||||||
|
failCount++
|
||||||
|
}
|
||||||
|
if jsonOut != "" && !r.OK {
|
||||||
|
failures = append(failures, SerializableResult{
|
||||||
|
URL: r.URL,
|
||||||
|
OK: r.OK,
|
||||||
|
Status: r.Status,
|
||||||
|
ErrMsg: r.ErrMsg,
|
||||||
|
Method: r.Method,
|
||||||
|
ContentType: r.ContentType,
|
||||||
|
Sources: r.Sources,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if !r.OK {
|
||||||
|
failedResults = append(failedResults, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write JSON if requested (failures only)
|
||||||
|
if jsonOut != "" {
|
||||||
|
f, ferr := os.Create(jsonOut)
|
||||||
|
if ferr != nil {
|
||||||
|
return ferr
|
||||||
|
}
|
||||||
|
enc := json.NewEncoder(f)
|
||||||
|
enc.SetIndent("", " ")
|
||||||
|
if err := enc.Encode(failures); err != nil {
|
||||||
|
_ = f.Close()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
_ = f.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optionally write Markdown report for PR comment consumption
|
||||||
|
if mdOut != "" {
|
||||||
|
base := repoBlobBase
|
||||||
|
if strings.TrimSpace(base) == "" {
|
||||||
|
base = os.Getenv("SLINKY_REPO_BLOB_BASE_URL")
|
||||||
|
}
|
||||||
|
summary := report.Summary{
|
||||||
|
RootPath: path,
|
||||||
|
StartedAt: startedAt,
|
||||||
|
FinishedAt: time.Now(),
|
||||||
|
Processed: total,
|
||||||
|
OK: okCount,
|
||||||
|
Fail: failCount,
|
||||||
|
JSONPath: jsonOut,
|
||||||
|
RepoBlobBaseURL: base,
|
||||||
|
}
|
||||||
|
if _, err := report.WriteMarkdown(mdOut, failedResults, summary); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Checked %d URLs: %d OK, %d failed\n", total, okCount, failCount)
|
||||||
|
if failOnFailures && failCount > 0 {
|
||||||
|
return fmt.Errorf("%d links failed", failCount)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
checkCmd.Flags().StringVar(&globPat, "glob", "", "comma-separated glob patterns for files (doublestar); empty = all files")
|
||||||
|
checkCmd.Flags().StringSliceVar(&patterns, "patterns", nil, "file match patterns (doublestar). Examples: docs/**/*.md,**/*.go; defaults to **/*")
|
||||||
|
checkCmd.Flags().IntVar(&maxConcurrency, "concurrency", 16, "maximum concurrent requests")
|
||||||
|
checkCmd.Flags().StringVar(&jsonOut, "json-out", "", "path to write full JSON results (array)")
|
||||||
|
checkCmd.Flags().StringVar(&mdOut, "md-out", "", "path to write Markdown report for PR comment")
|
||||||
|
checkCmd.Flags().StringVar(&repoBlobBase, "repo-blob-base", "", "override GitHub blob base URL (e.g. https://github.com/owner/repo/blob/<sha>)")
|
||||||
|
checkCmd.Flags().IntVar(&timeoutSeconds, "timeout", 10, "HTTP request timeout in seconds")
|
||||||
|
checkCmd.Flags().BoolVar(&failOnFailures, "fail-on-failures", true, "exit non-zero if any links fail")
|
||||||
|
|
||||||
|
rootCmd.AddCommand(checkCmd)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
timeoutSeconds int
|
||||||
|
failOnFailures bool
|
||||||
|
patterns []string
|
||||||
|
repoBlobBase string
|
||||||
|
)
|
||||||
23
cmd/root.go
Normal file
23
cmd/root.go
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/spf13/cobra"
|
||||||
|
)
|
||||||
|
|
||||||
|
var rootCmd = &cobra.Command{
|
||||||
|
Use: "slinky",
|
||||||
|
Short: "Link checker for repos/directories and webpages (TUI)",
|
||||||
|
Long: "Slinky scans a directory/repo for URLs in files or crawls a URL, then validates links concurrently in a TUI.",
|
||||||
|
}
|
||||||
|
|
||||||
|
func Execute() {
|
||||||
|
if err := rootCmd.Execute(); err != nil {
|
||||||
|
fmt.Fprintln(os.Stderr, err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
49
cmd/run.go
Normal file
49
cmd/run.go
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/spf13/cobra"
|
||||||
|
|
||||||
|
"slinky/internal/tui"
|
||||||
|
"slinky/internal/web"
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
runCmd := &cobra.Command{
|
||||||
|
Use: "run [path]",
|
||||||
|
Short: "Scan a directory/repo for URLs in files and validate them (TUI)",
|
||||||
|
Args: cobra.MaximumNArgs(1),
|
||||||
|
RunE: func(cmd *cobra.Command, args []string) error {
|
||||||
|
path := "."
|
||||||
|
if len(args) == 1 {
|
||||||
|
path = args[0]
|
||||||
|
}
|
||||||
|
cfg := web.Config{MaxConcurrency: maxConcurrency}
|
||||||
|
var gl []string
|
||||||
|
if len(patterns) > 0 {
|
||||||
|
gl = append(gl, patterns...)
|
||||||
|
} else if globPat != "" {
|
||||||
|
gl = strings.Split(globPat, ",")
|
||||||
|
} else {
|
||||||
|
gl = []string{"**/*"}
|
||||||
|
}
|
||||||
|
return tui.Run(path, gl, cfg, jsonOut, mdOut)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
runCmd.Flags().StringVar(&globPat, "glob", "", "comma-separated glob patterns for files (doublestar); empty = all files")
|
||||||
|
runCmd.Flags().StringSliceVar(&patterns, "patterns", nil, "file match patterns (doublestar). Examples: docs/**/*.md,**/*.go; defaults to **/*")
|
||||||
|
runCmd.Flags().IntVar(&maxConcurrency, "concurrency", 16, "maximum concurrent requests")
|
||||||
|
runCmd.Flags().StringVar(&jsonOut, "json-out", "", "path to write full JSON results (array)")
|
||||||
|
runCmd.Flags().StringVar(&mdOut, "md-out", "", "path to write Markdown report for PR comment")
|
||||||
|
runCmd.Flags().StringVar(&repoBlobBase, "repo-blob-base", "", "override GitHub blob base URL (e.g. https://github.com/owner/repo/blob/<sha>)")
|
||||||
|
rootCmd.AddCommand(runCmd)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
maxConcurrency int
|
||||||
|
jsonOut string
|
||||||
|
globPat string
|
||||||
|
mdOut string
|
||||||
|
)
|
||||||
37
go.mod
Normal file
37
go.mod
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
module slinky
|
||||||
|
|
||||||
|
go 1.24.0
|
||||||
|
|
||||||
|
toolchain go1.24.7
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/bmatcuk/doublestar/v4 v4.6.1
|
||||||
|
github.com/charmbracelet/bubbles v0.21.0
|
||||||
|
github.com/charmbracelet/bubbletea v1.3.8
|
||||||
|
github.com/charmbracelet/lipgloss v1.1.0
|
||||||
|
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
|
||||||
|
github.com/spf13/cobra v1.10.1
|
||||||
|
)
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
|
||||||
|
github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect
|
||||||
|
github.com/charmbracelet/harmonica v0.2.0 // indirect
|
||||||
|
github.com/charmbracelet/x/ansi v0.10.1 // indirect
|
||||||
|
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd // indirect
|
||||||
|
github.com/charmbracelet/x/term v0.2.1 // indirect
|
||||||
|
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
|
||||||
|
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||||
|
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
|
||||||
|
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||||
|
github.com/mattn/go-localereader v0.0.1 // indirect
|
||||||
|
github.com/mattn/go-runewidth v0.0.16 // indirect
|
||||||
|
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
|
||||||
|
github.com/muesli/cancelreader v0.2.2 // indirect
|
||||||
|
github.com/muesli/termenv v0.16.0 // indirect
|
||||||
|
github.com/rivo/uniseg v0.4.7 // indirect
|
||||||
|
github.com/spf13/pflag v1.0.9 // indirect
|
||||||
|
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
|
||||||
|
golang.org/x/sys v0.36.0 // indirect
|
||||||
|
golang.org/x/text v0.24.0 // indirect
|
||||||
|
)
|
||||||
70
go.sum
Normal file
70
go.sum
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
|
||||||
|
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
|
||||||
|
github.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwNy7PA4I=
|
||||||
|
github.com/bmatcuk/doublestar/v4 v4.6.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc=
|
||||||
|
github.com/charmbracelet/bubbles v0.21.0 h1:9TdC97SdRVg/1aaXNVWfFH3nnLAwOXr8Fn6u6mfQdFs=
|
||||||
|
github.com/charmbracelet/bubbles v0.21.0/go.mod h1:HF+v6QUR4HkEpz62dx7ym2xc71/KBHg+zKwJtMw+qtg=
|
||||||
|
github.com/charmbracelet/bubbletea v1.3.8 h1:DJlh6UUPhobzomqCtnLJRmhBSxwUJoPPi6iCToUDr4g=
|
||||||
|
github.com/charmbracelet/bubbletea v1.3.8/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4=
|
||||||
|
github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc h1:4pZI35227imm7yK2bGPcfpFEmuY1gc2YSTShr4iJBfs=
|
||||||
|
github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc/go.mod h1:X4/0JoqgTIPSFcRA/P6INZzIuyqdFY5rm8tb41s9okk=
|
||||||
|
github.com/charmbracelet/harmonica v0.2.0 h1:8NxJWRWg/bzKqqEaaeFNipOu77YR5t8aSwG4pgaUBiQ=
|
||||||
|
github.com/charmbracelet/harmonica v0.2.0/go.mod h1:KSri/1RMQOZLbw7AHqgcBycp8pgJnQMYYT8QZRqZ1Ao=
|
||||||
|
github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY=
|
||||||
|
github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30=
|
||||||
|
github.com/charmbracelet/x/ansi v0.10.1 h1:rL3Koar5XvX0pHGfovN03f5cxLbCF2YvLeyz7D2jVDQ=
|
||||||
|
github.com/charmbracelet/x/ansi v0.10.1/go.mod h1:3RQDQ6lDnROptfpWuUVIUG64bD2g2BgntdxH0Ya5TeE=
|
||||||
|
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd h1:vy0GVL4jeHEwG5YOXDmi86oYw2yuYUGqz6a8sLwg0X8=
|
||||||
|
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd/go.mod h1:xe0nKWGd3eJgtqZRaN9RjMtK7xUYchjzPr7q6kcvCCs=
|
||||||
|
github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ=
|
||||||
|
github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg=
|
||||||
|
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
||||||
|
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
||||||
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
|
||||||
|
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
|
||||||
|
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
|
||||||
|
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
|
||||||
|
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
|
||||||
|
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
|
||||||
|
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||||
|
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||||
|
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
|
||||||
|
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
|
||||||
|
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
|
||||||
|
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||||
|
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
|
||||||
|
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
|
||||||
|
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
|
||||||
|
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
|
||||||
|
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
|
||||||
|
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||||
|
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
|
||||||
|
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||||
|
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||||
|
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI=
|
||||||
|
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs=
|
||||||
|
github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s=
|
||||||
|
github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0=
|
||||||
|
github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
|
||||||
|
github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||||
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
|
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
|
||||||
|
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
|
||||||
|
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
|
||||||
|
golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E=
|
||||||
|
golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
|
||||||
|
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
|
||||||
|
golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||||
|
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
|
||||||
|
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
287
internal/fsurls/fsurls.go
Normal file
287
internal/fsurls/fsurls.go
Normal file
@@ -0,0 +1,287 @@
|
|||||||
|
package fsurls
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"io"
|
||||||
|
"net/url"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/bmatcuk/doublestar/v4"
|
||||||
|
ignore "github.com/sabhiram/go-gitignore"
|
||||||
|
)
|
||||||
|
|
||||||
|
// URL patterns from various contexts
|
||||||
|
var bareURLRegex = regexp.MustCompile(`(?i)\bhttps?://[^\s<>()\[\]{}"']+`)
|
||||||
|
var mdLinkRegex = regexp.MustCompile(`(?is)!?\[[^\]]*\]\((.*?)\)`) // captures (url)
|
||||||
|
var angleURLRegex = regexp.MustCompile(`(?i)<(https?://[^>\s]+)>`)
|
||||||
|
var quotedURLRegex = regexp.MustCompile(`(?i)"(https?://[^"\s]+)"|'(https?://[^'\s]+)'`)
|
||||||
|
var htmlHrefRegex = regexp.MustCompile(`(?i)href\s*=\s*"([^"]+)"|href\s*=\s*'([^']+)'`)
|
||||||
|
var htmlSrcRegex = regexp.MustCompile(`(?i)src\s*=\s*"([^"]+)"|src\s*=\s*'([^']+)'`)
|
||||||
|
|
||||||
|
// Strict hostname validation: labels 1-63 chars, alnum & hyphen, not start/end hyphen, at least one dot, simple TLD
|
||||||
|
var hostnameRegex = regexp.MustCompile(`^(?i)([a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)+$`)
|
||||||
|
|
||||||
|
// CollectURLs walks the directory tree rooted at rootPath and collects URLs found in
|
||||||
|
// text-based files matching any of the provided glob patterns (doublestar ** supported).
|
||||||
|
// If globs is empty, all files are considered. Respects .gitignore if present.
|
||||||
|
// Returns a map from URL -> sorted unique list of file paths that contained it.
|
||||||
|
func CollectURLs(rootPath string, globs []string) (map[string][]string, error) {
|
||||||
|
if strings.TrimSpace(rootPath) == "" {
|
||||||
|
rootPath = "."
|
||||||
|
}
|
||||||
|
cleanRoot := filepath.Clean(rootPath)
|
||||||
|
|
||||||
|
st, _ := os.Stat(cleanRoot)
|
||||||
|
isFileRoot := st != nil && !st.IsDir()
|
||||||
|
var ign *ignore.GitIgnore
|
||||||
|
if !isFileRoot {
|
||||||
|
ign = loadGitIgnore(cleanRoot)
|
||||||
|
}
|
||||||
|
|
||||||
|
var patterns []string
|
||||||
|
for _, g := range globs {
|
||||||
|
g = strings.TrimSpace(g)
|
||||||
|
if g == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
patterns = append(patterns, g)
|
||||||
|
}
|
||||||
|
|
||||||
|
shouldInclude := func(rel string) bool {
|
||||||
|
if len(patterns) == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for _, p := range patterns {
|
||||||
|
ok, _ := doublestar.PathMatch(p, rel)
|
||||||
|
if ok {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
urlToFiles := make(map[string]map[string]struct{})
|
||||||
|
|
||||||
|
// 2 MiB max file size to avoid huge/binary files
|
||||||
|
const maxSize = 2 * 1024 * 1024
|
||||||
|
|
||||||
|
// Walk the filesystem
|
||||||
|
walkFn := func(path string, d os.DirEntry, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
rel, rerr := filepath.Rel(cleanRoot, path)
|
||||||
|
if rerr != nil {
|
||||||
|
rel = path
|
||||||
|
}
|
||||||
|
rel = filepath.ToSlash(rel)
|
||||||
|
if d.IsDir() {
|
||||||
|
base := filepath.Base(path)
|
||||||
|
if base == ".git" {
|
||||||
|
return filepath.SkipDir
|
||||||
|
}
|
||||||
|
if ign != nil && ign.MatchesPath(rel) {
|
||||||
|
return filepath.SkipDir
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if ign != nil && ign.MatchesPath(rel) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
info, ierr := d.Info()
|
||||||
|
if ierr != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if info.Size() > maxSize {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if isFileRoot && rel == "." {
|
||||||
|
rel = filepath.ToSlash(filepath.Base(path))
|
||||||
|
}
|
||||||
|
if !shouldInclude(rel) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
f, ferr := os.Open(path)
|
||||||
|
if ferr != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
br := bufio.NewReader(f)
|
||||||
|
// Read up to maxSize bytes
|
||||||
|
var b strings.Builder
|
||||||
|
read := int64(0)
|
||||||
|
for {
|
||||||
|
chunk, cerr := br.ReadString('\n')
|
||||||
|
b.WriteString(chunk)
|
||||||
|
read += int64(len(chunk))
|
||||||
|
if cerr == io.EOF || read > maxSize {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if cerr != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
content := b.String()
|
||||||
|
// Skip if likely binary (NUL present)
|
||||||
|
if strings.IndexByte(content, '\x00') >= 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
candidates := extractCandidates(content)
|
||||||
|
if len(candidates) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
for _, raw := range candidates {
|
||||||
|
u := sanitizeURLToken(raw)
|
||||||
|
if u == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fileSet, ok := urlToFiles[u]
|
||||||
|
if !ok {
|
||||||
|
fileSet = make(map[string]struct{})
|
||||||
|
urlToFiles[u] = fileSet
|
||||||
|
}
|
||||||
|
fileSet[rel] = struct{}{}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = filepath.WalkDir(cleanRoot, walkFn)
|
||||||
|
|
||||||
|
// Convert to sorted slices
|
||||||
|
result := make(map[string][]string, len(urlToFiles))
|
||||||
|
for u, files := range urlToFiles {
|
||||||
|
var list []string
|
||||||
|
for fp := range files {
|
||||||
|
list = append(list, fp)
|
||||||
|
}
|
||||||
|
sort.Strings(list)
|
||||||
|
result[u] = list
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func sanitizeURLToken(s string) string {
|
||||||
|
s = strings.TrimSpace(s)
|
||||||
|
// Strip surrounding angle brackets or quotes
|
||||||
|
if strings.HasPrefix(s, "<") && strings.HasSuffix(s, ">") {
|
||||||
|
s = strings.TrimSuffix(strings.TrimPrefix(s, "<"), ">")
|
||||||
|
}
|
||||||
|
if (strings.HasPrefix(s, "\"") && strings.HasSuffix(s, "\"")) || (strings.HasPrefix(s, "'") && strings.HasSuffix(s, "'")) {
|
||||||
|
s = strings.TrimSuffix(strings.TrimPrefix(s, string(s[0])), string(s[0]))
|
||||||
|
}
|
||||||
|
// Trim trailing punctuation and balance parentheses
|
||||||
|
s = trimTrailingDelimiters(s)
|
||||||
|
low := strings.ToLower(s)
|
||||||
|
if !(strings.HasPrefix(low, "http://") || strings.HasPrefix(low, "https://")) {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
// Parse and validate hostname strictly
|
||||||
|
u, err := url.Parse(s)
|
||||||
|
if err != nil || u == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
host := u.Hostname()
|
||||||
|
if host == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
// Reject placeholders like [tenant] or {tenant}
|
||||||
|
if strings.ContainsAny(host, "[]{}") {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
// Must match strict hostname rules
|
||||||
|
if !hostnameRegex.MatchString(host) {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func trimTrailingDelimiters(s string) string {
|
||||||
|
for {
|
||||||
|
if s == "" {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
last := s[len(s)-1]
|
||||||
|
if strings.ContainsRune(").,;:!?]'\"}", rune(last)) {
|
||||||
|
s = s[:len(s)-1]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if last == ')' {
|
||||||
|
open := strings.Count(s, "(")
|
||||||
|
close := strings.Count(s, ")")
|
||||||
|
if close > open {
|
||||||
|
s = s[:len(s)-1]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractCandidates(content string) []string {
|
||||||
|
var out []string
|
||||||
|
for _, m := range mdLinkRegex.FindAllStringSubmatch(content, -1) {
|
||||||
|
if len(m) > 1 {
|
||||||
|
out = append(out, m[1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, m := range htmlHrefRegex.FindAllStringSubmatch(content, -1) {
|
||||||
|
if len(m) > 2 {
|
||||||
|
if m[1] != "" {
|
||||||
|
out = append(out, m[1])
|
||||||
|
} else if m[2] != "" {
|
||||||
|
out = append(out, m[2])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, m := range htmlSrcRegex.FindAllStringSubmatch(content, -1) {
|
||||||
|
if len(m) > 2 {
|
||||||
|
if m[1] != "" {
|
||||||
|
out = append(out, m[1])
|
||||||
|
} else if m[2] != "" {
|
||||||
|
out = append(out, m[2])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, m := range angleURLRegex.FindAllStringSubmatch(content, -1) {
|
||||||
|
if len(m) > 1 {
|
||||||
|
out = append(out, m[1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, m := range quotedURLRegex.FindAllStringSubmatch(content, -1) {
|
||||||
|
if len(m) > 2 {
|
||||||
|
if m[1] != "" {
|
||||||
|
out = append(out, m[1])
|
||||||
|
} else if m[2] != "" {
|
||||||
|
out = append(out, m[2])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out = append(out, bareURLRegex.FindAllString(content, -1)...)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadGitIgnore(root string) *ignore.GitIgnore {
|
||||||
|
var lines []string
|
||||||
|
gi := filepath.Join(root, ".gitignore")
|
||||||
|
if b, err := os.ReadFile(gi); err == nil {
|
||||||
|
for _, ln := range strings.Split(string(b), "\n") {
|
||||||
|
lines = append(lines, ln)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ge := filepath.Join(root, ".git", "info", "exclude")
|
||||||
|
if b, err := os.ReadFile(ge); err == nil {
|
||||||
|
for _, ln := range strings.Split(string(b), "\n") {
|
||||||
|
lines = append(lines, ln)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(lines) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return ignore.CompileIgnoreLines(lines...)
|
||||||
|
}
|
||||||
43
internal/fsurls/fsurls_test.go
Normal file
43
internal/fsurls/fsurls_test.go
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
package fsurls
|
||||||
|
|
||||||
|
import (
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCollectURLs_FromTestFiles(t *testing.T) {
|
||||||
|
root := filepath.Join("..", "..", "test files")
|
||||||
|
|
||||||
|
urls, err := CollectURLs(root, []string{"**/*"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CollectURLs error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Spot-check presence of some known URLs
|
||||||
|
mustContain := []string{
|
||||||
|
"https://example.com",
|
||||||
|
"https://en.wikipedia.org/wiki/Main_Page",
|
||||||
|
"http://example.com:8080",
|
||||||
|
"http://example..com", // appears in multiple files
|
||||||
|
"https://this-domain-does-not-exist-123456789.com",
|
||||||
|
}
|
||||||
|
for _, u := range mustContain {
|
||||||
|
if _, ok := urls[u]; !ok {
|
||||||
|
// Show nearby URLs to aid debugging if it fails.
|
||||||
|
var sample []string
|
||||||
|
for seen := range urls {
|
||||||
|
if strings.Contains(seen, "example") {
|
||||||
|
sample = append(sample, seen)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.Fatalf("expected URL %q to be collected; example URLs seen: %v", u, sample)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure sources are recorded for a known URL
|
||||||
|
srcs := urls["https://example.com"]
|
||||||
|
if len(srcs) == 0 {
|
||||||
|
t.Fatalf("expected sources for https://example.com, got none")
|
||||||
|
}
|
||||||
|
}
|
||||||
42
internal/fsurls/lang_files_test.go
Normal file
42
internal/fsurls/lang_files_test.go
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
package fsurls
|
||||||
|
|
||||||
|
import (
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCollectURLs_FromCodeFiles(t *testing.T) {
|
||||||
|
root := filepath.Join("..", "..", "test files")
|
||||||
|
urls, err := CollectURLs(root, []string{"**/*"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CollectURLs error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Valid URLs from various languages should be present (including a known nonexistent-but-well-formed)
|
||||||
|
valids := []string{
|
||||||
|
"https://example.com",
|
||||||
|
"https://en.wikipedia.org/wiki/Main_Page",
|
||||||
|
"https://developer.mozilla.org",
|
||||||
|
"https://svelte.dev",
|
||||||
|
"https://go.dev/doc/",
|
||||||
|
"https://this-domain-does-not-exist-123456789.com",
|
||||||
|
}
|
||||||
|
for _, u := range valids {
|
||||||
|
if _, ok := urls[u]; !ok {
|
||||||
|
t.Fatalf("expected valid URL %q to be collected", u)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Placeholder patterns should be excluded by strict validation
|
||||||
|
placeholders := []string{
|
||||||
|
"https://[tenant].api.identitynow.com",
|
||||||
|
"https://{tenant}.api.identitynow.com",
|
||||||
|
"https://[tenant].[domain].com",
|
||||||
|
"https://{tenant}.api.ideidentitynow.com/v3/transforms",
|
||||||
|
}
|
||||||
|
for _, u := range placeholders {
|
||||||
|
if _, ok := urls[u]; ok {
|
||||||
|
t.Fatalf("did not expect placeholder URL %q to be collected", u)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
168
internal/report/markdown.go
Normal file
168
internal/report/markdown.go
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
package report
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"html"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"slinky/internal/web"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Summary captures high-level run details for the report.
|
||||||
|
type Summary struct {
|
||||||
|
RootPath string
|
||||||
|
StartedAt time.Time
|
||||||
|
FinishedAt time.Time
|
||||||
|
Processed int
|
||||||
|
OK int
|
||||||
|
Fail int
|
||||||
|
AvgRPS float64
|
||||||
|
PeakRPS float64
|
||||||
|
LowRPS float64
|
||||||
|
JSONPath string
|
||||||
|
RepoBlobBaseURL string // e.g. https://github.com/owner/repo/blob/<sha>
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteMarkdown writes a GitHub-flavored Markdown report to path. If path is empty,
|
||||||
|
// it derives a safe filename from s.RootPath.
|
||||||
|
func WriteMarkdown(path string, results []web.Result, s Summary) (string, error) {
|
||||||
|
if strings.TrimSpace(path) == "" {
|
||||||
|
base := filepath.Base(s.RootPath)
|
||||||
|
if strings.TrimSpace(base) == "" || base == "." || base == string(filepath.Separator) {
|
||||||
|
base = "results"
|
||||||
|
}
|
||||||
|
var b strings.Builder
|
||||||
|
for _, r := range strings.ToLower(base) {
|
||||||
|
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' || r == '.' {
|
||||||
|
b.WriteRune(r)
|
||||||
|
} else {
|
||||||
|
b.WriteByte('_')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
path = fmt.Sprintf("%s.md", b.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
// Title and summary
|
||||||
|
buf.WriteString("## Slinky Test Report\n\n")
|
||||||
|
buf.WriteString(fmt.Sprintf("- **Root**: %s\n", escapeMD(s.RootPath)))
|
||||||
|
buf.WriteString(fmt.Sprintf("- **Started**: %s\n", s.StartedAt.Format("2006-01-02 15:04:05 MST")))
|
||||||
|
buf.WriteString(fmt.Sprintf("- **Finished**: %s\n", s.FinishedAt.Format("2006-01-02 15:04:05 MST")))
|
||||||
|
buf.WriteString(fmt.Sprintf("- **Processed**: %d • **OK**: %d • **Fail**: %d\n", s.Processed, s.OK, s.Fail))
|
||||||
|
buf.WriteString(fmt.Sprintf("- **Rates**: avg %.1f/s • peak %.1f/s • low %.1f/s\n", s.AvgRPS, s.PeakRPS, s.LowRPS))
|
||||||
|
if s.JSONPath != "" {
|
||||||
|
base := filepath.Base(s.JSONPath)
|
||||||
|
buf.WriteString(fmt.Sprintf("- **JSON**: %s\n", escapeMD(base)))
|
||||||
|
}
|
||||||
|
buf.WriteString("\n")
|
||||||
|
|
||||||
|
// Failures by URL
|
||||||
|
buf.WriteString("### Failures by URL\n\n")
|
||||||
|
|
||||||
|
// Gather issues per URL with list of files
|
||||||
|
type fileRef struct {
|
||||||
|
Path string
|
||||||
|
}
|
||||||
|
type urlIssue struct {
|
||||||
|
Status int
|
||||||
|
Method string
|
||||||
|
ErrMsg string
|
||||||
|
Files []fileRef
|
||||||
|
}
|
||||||
|
byURL := make(map[string]*urlIssue)
|
||||||
|
for _, r := range results {
|
||||||
|
ui, ok := byURL[r.URL]
|
||||||
|
if !ok {
|
||||||
|
ui = &urlIssue{Status: r.Status, Method: r.Method, ErrMsg: r.ErrMsg}
|
||||||
|
byURL[r.URL] = ui
|
||||||
|
}
|
||||||
|
for _, src := range r.Sources {
|
||||||
|
ui.Files = append(ui.Files, fileRef{Path: src})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort URLs
|
||||||
|
var urls []string
|
||||||
|
for u := range byURL {
|
||||||
|
urls = append(urls, u)
|
||||||
|
}
|
||||||
|
sort.Strings(urls)
|
||||||
|
|
||||||
|
for _, u := range urls {
|
||||||
|
ui := byURL[u]
|
||||||
|
// Header line for URL
|
||||||
|
if ui.Status > 0 {
|
||||||
|
buf.WriteString(fmt.Sprintf("- %d %s `%s` — %s\n", ui.Status, escapeMD(ui.Method), escapeMD(u), escapeMD(ui.ErrMsg)))
|
||||||
|
} else {
|
||||||
|
buf.WriteString(fmt.Sprintf("- %s `%s` — %s\n", escapeMD(ui.Method), escapeMD(u), escapeMD(ui.ErrMsg)))
|
||||||
|
}
|
||||||
|
// Files list (collapsible)
|
||||||
|
buf.WriteString(" <details><summary>files</summary>\n\n")
|
||||||
|
// Deduplicate and sort file paths
|
||||||
|
seen := make(map[string]struct{})
|
||||||
|
var files []string
|
||||||
|
for _, fr := range ui.Files {
|
||||||
|
if _, ok := seen[fr.Path]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[fr.Path] = struct{}{}
|
||||||
|
files = append(files, fr.Path)
|
||||||
|
}
|
||||||
|
sort.Strings(files)
|
||||||
|
for _, fn := range files {
|
||||||
|
if strings.TrimSpace(s.RepoBlobBaseURL) != "" {
|
||||||
|
buf.WriteString(fmt.Sprintf(" - [%s](%s/%s)\n", escapeMD(fn), strings.TrimRight(s.RepoBlobBaseURL, "/"), escapeLinkPath(fn)))
|
||||||
|
} else {
|
||||||
|
buf.WriteString(fmt.Sprintf(" - [%s](./%s)\n", escapeMD(fn), escapeLinkPath(fn)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buf.WriteString("\n </details>\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := os.Create(path)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
if _, err := f.Write(buf.Bytes()); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return path, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func escapeMD(s string) string {
|
||||||
|
// Basic HTML escape to be safe in GitHub Markdown table cells
|
||||||
|
return html.EscapeString(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// formatSourcesList renders a list of file paths as an HTML unordered list suitable
|
||||||
|
// for inclusion in a Markdown table cell. Individual entries are escaped.
|
||||||
|
func formatSourcesList(srcs []string) string {
|
||||||
|
if len(srcs) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
var b strings.Builder
|
||||||
|
b.WriteString("<ul>\n")
|
||||||
|
for _, s := range srcs {
|
||||||
|
b.WriteString(" <li><code>")
|
||||||
|
b.WriteString(escapeMD(s))
|
||||||
|
b.WriteString("</code></li>\n")
|
||||||
|
}
|
||||||
|
b.WriteString("</ul>")
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// escapeLinkPath escapes a relative path for inclusion in a Markdown link URL.
|
||||||
|
// We keep it simple and only escape parentheses and spaces.
|
||||||
|
func escapeLinkPath(p string) string {
|
||||||
|
// Replace spaces with %20 and parentheses with encoded forms
|
||||||
|
p = strings.ReplaceAll(p, " ", "%20")
|
||||||
|
p = strings.ReplaceAll(p, "(", "%28")
|
||||||
|
p = strings.ReplaceAll(p, ")", "%29")
|
||||||
|
return p
|
||||||
|
}
|
||||||
10
internal/tui/fs_bridge.go
Normal file
10
internal/tui/fs_bridge.go
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
package tui
|
||||||
|
|
||||||
|
import (
|
||||||
|
"slinky/internal/fsurls"
|
||||||
|
)
|
||||||
|
|
||||||
|
// fsCollect is a tiny bridge to avoid importing fsurls directly in tui.go
|
||||||
|
func fsCollect(root string, globs []string) (map[string][]string, error) {
|
||||||
|
return fsurls.CollectURLs(root, globs)
|
||||||
|
}
|
||||||
319
internal/tui/tui.go
Normal file
319
internal/tui/tui.go
Normal file
@@ -0,0 +1,319 @@
|
|||||||
|
package tui
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/charmbracelet/bubbles/progress"
|
||||||
|
"github.com/charmbracelet/bubbles/spinner"
|
||||||
|
"github.com/charmbracelet/bubbles/viewport"
|
||||||
|
tea "github.com/charmbracelet/bubbletea"
|
||||||
|
"github.com/charmbracelet/lipgloss"
|
||||||
|
|
||||||
|
"slinky/internal/report"
|
||||||
|
"slinky/internal/web"
|
||||||
|
)
|
||||||
|
|
||||||
|
type linkResultMsg struct{ res web.Result }
|
||||||
|
type crawlDoneMsg struct{}
|
||||||
|
type statsMsg struct{ s web.Stats }
|
||||||
|
type tickMsg struct{ t time.Time }
|
||||||
|
|
||||||
|
type model struct {
|
||||||
|
rootPath string
|
||||||
|
cfg web.Config
|
||||||
|
jsonOut string
|
||||||
|
mdOut string
|
||||||
|
globs []string
|
||||||
|
|
||||||
|
results chan web.Result
|
||||||
|
stats chan web.Stats
|
||||||
|
started time.Time
|
||||||
|
finishedAt time.Time
|
||||||
|
done bool
|
||||||
|
|
||||||
|
spin spinner.Model
|
||||||
|
prog progress.Model
|
||||||
|
vp viewport.Model
|
||||||
|
|
||||||
|
lines []string
|
||||||
|
|
||||||
|
total int
|
||||||
|
ok int
|
||||||
|
fail int
|
||||||
|
|
||||||
|
pending int
|
||||||
|
processed int
|
||||||
|
|
||||||
|
lastProcessed int
|
||||||
|
rps float64
|
||||||
|
peakRPS float64
|
||||||
|
lowRPS float64
|
||||||
|
|
||||||
|
allResults []web.Result
|
||||||
|
jsonPath string
|
||||||
|
mdPath string
|
||||||
|
|
||||||
|
showFail bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run scans files under rootPath matching globs, extracts URLs, and checks them.
|
||||||
|
func Run(rootPath string, globs []string, cfg web.Config, jsonOut string, mdOut string) error {
|
||||||
|
m := &model{rootPath: rootPath, cfg: cfg, jsonOut: jsonOut, mdOut: mdOut, globs: globs}
|
||||||
|
p := tea.NewProgram(m, tea.WithAltScreen())
|
||||||
|
return p.Start()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *model) Init() tea.Cmd {
|
||||||
|
m.spin = spinner.New()
|
||||||
|
m.spin.Spinner = spinner.Dot
|
||||||
|
m.spin.Style = lipgloss.NewStyle().Foreground(lipgloss.Color("205"))
|
||||||
|
m.prog = progress.New(progress.WithDefaultGradient())
|
||||||
|
m.started = time.Now()
|
||||||
|
m.lowRPS = -1
|
||||||
|
m.results = make(chan web.Result, 256)
|
||||||
|
m.stats = make(chan web.Stats, 64)
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
go func() {
|
||||||
|
defer cancel()
|
||||||
|
urlsMap, _ := fsCollect(m.rootPath, m.globs)
|
||||||
|
var urls []string
|
||||||
|
for u := range urlsMap {
|
||||||
|
urls = append(urls, u)
|
||||||
|
}
|
||||||
|
web.CheckURLs(ctx, urls, urlsMap, m.results, m.stats, m.cfg)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return tea.Batch(m.spin.Tick, m.waitForEvent(), tickCmd())
|
||||||
|
}
|
||||||
|
|
||||||
|
func tickCmd() tea.Cmd {
|
||||||
|
return tea.Tick(time.Second, func(t time.Time) tea.Msg { return tickMsg{t: t} })
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *model) waitForEvent() tea.Cmd {
|
||||||
|
return func() tea.Msg {
|
||||||
|
if m.results == nil {
|
||||||
|
return crawlDoneMsg{}
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case res, ok := <-m.results:
|
||||||
|
if ok {
|
||||||
|
return linkResultMsg{res: res}
|
||||||
|
}
|
||||||
|
return crawlDoneMsg{}
|
||||||
|
case s := <-m.stats:
|
||||||
|
return statsMsg{s: s}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||||
|
switch msg := msg.(type) {
|
||||||
|
case tea.KeyMsg:
|
||||||
|
switch msg.String() {
|
||||||
|
case "q", "ctrl+c":
|
||||||
|
return m, tea.Quit
|
||||||
|
case "f":
|
||||||
|
m.showFail = !m.showFail
|
||||||
|
m.refreshViewport()
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
case tea.WindowSizeMsg:
|
||||||
|
// Reserve space for header (1), stats (1), progress (1), spacer (1), footer (1)
|
||||||
|
reserved := 5
|
||||||
|
if m.vp.Width == 0 {
|
||||||
|
m.vp = viewport.Model{Width: msg.Width, Height: max(msg.Height-reserved, 3)}
|
||||||
|
} else {
|
||||||
|
m.vp.Width = msg.Width
|
||||||
|
m.vp.Height = max(msg.Height-reserved, 3)
|
||||||
|
}
|
||||||
|
m.prog.Width = max(msg.Width-4, 10)
|
||||||
|
m.refreshViewport()
|
||||||
|
return m, nil
|
||||||
|
case linkResultMsg:
|
||||||
|
// Show every event in the log
|
||||||
|
prefix := statusEmoji(msg.res.OK, msg.res.Err)
|
||||||
|
if msg.res.CacheHit {
|
||||||
|
prefix = "🗃"
|
||||||
|
}
|
||||||
|
line := fmt.Sprintf("%s %3d %s", prefix, msg.res.Status, msg.res.URL)
|
||||||
|
m.lines = append(m.lines, line)
|
||||||
|
// Only count non-cache-hit in totals and JSON export
|
||||||
|
if !msg.res.CacheHit {
|
||||||
|
m.total++
|
||||||
|
if msg.res.OK && msg.res.Err == nil {
|
||||||
|
m.ok++
|
||||||
|
} else {
|
||||||
|
m.fail++
|
||||||
|
}
|
||||||
|
m.allResults = append(m.allResults, msg.res)
|
||||||
|
}
|
||||||
|
m.refreshViewport()
|
||||||
|
return m, m.waitForEvent()
|
||||||
|
case statsMsg:
|
||||||
|
m.pending = msg.s.Pending
|
||||||
|
m.processed = msg.s.Processed
|
||||||
|
return m, m.waitForEvent()
|
||||||
|
case tickMsg:
|
||||||
|
// compute requests/sec over the last tick
|
||||||
|
delta := m.processed - m.lastProcessed
|
||||||
|
m.lastProcessed = m.processed
|
||||||
|
m.rps = float64(delta)
|
||||||
|
if m.rps > m.peakRPS {
|
||||||
|
m.peakRPS = m.rps
|
||||||
|
}
|
||||||
|
if m.lowRPS < 0 || m.rps < m.lowRPS {
|
||||||
|
m.lowRPS = m.rps
|
||||||
|
}
|
||||||
|
return m, tickCmd()
|
||||||
|
case crawlDoneMsg:
|
||||||
|
m.done = true
|
||||||
|
m.finishedAt = time.Now()
|
||||||
|
m.results = nil
|
||||||
|
m.writeJSON()
|
||||||
|
m.writeMarkdown()
|
||||||
|
return m, tea.Quit
|
||||||
|
}
|
||||||
|
|
||||||
|
var cmd tea.Cmd
|
||||||
|
m.spin, cmd = m.spin.Update(msg)
|
||||||
|
return m, cmd
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *model) refreshViewport() {
|
||||||
|
var filtered []string
|
||||||
|
if m.showFail {
|
||||||
|
for _, l := range m.lines {
|
||||||
|
if strings.HasPrefix(l, "❌") {
|
||||||
|
filtered = append(filtered, l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
filtered = m.lines
|
||||||
|
}
|
||||||
|
m.vp.SetContent(strings.Join(filtered, "\n"))
|
||||||
|
m.vp.GotoBottom()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *model) writeJSON() {
|
||||||
|
path := m.jsonOut
|
||||||
|
if strings.TrimSpace(path) == "" {
|
||||||
|
base := filepath.Base(m.rootPath)
|
||||||
|
if strings.TrimSpace(base) == "" || base == "." || base == string(filepath.Separator) {
|
||||||
|
base = "results"
|
||||||
|
}
|
||||||
|
re := regexp.MustCompile(`[^a-zA-Z0-9.-]+`)
|
||||||
|
safe := re.ReplaceAllString(strings.ToLower(base), "_")
|
||||||
|
path = fmt.Sprintf("%s.json", safe)
|
||||||
|
}
|
||||||
|
f, err := os.Create(path)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
// Only write failing results
|
||||||
|
var fails []web.Result
|
||||||
|
for _, r := range m.allResults {
|
||||||
|
if !(r.OK && r.Err == nil) {
|
||||||
|
fails = append(fails, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
enc := json.NewEncoder(f)
|
||||||
|
enc.SetIndent("", " ")
|
||||||
|
_ = enc.Encode(fails)
|
||||||
|
m.jsonPath = path
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *model) writeMarkdown() {
|
||||||
|
// Compute average RPS over entire run
|
||||||
|
dur := m.finishedAt.Sub(m.started)
|
||||||
|
avg := 0.0
|
||||||
|
if dur.Seconds() > 0 {
|
||||||
|
avg = float64(m.processed) / dur.Seconds()
|
||||||
|
}
|
||||||
|
s := report.Summary{
|
||||||
|
RootPath: m.rootPath,
|
||||||
|
StartedAt: m.started,
|
||||||
|
FinishedAt: m.finishedAt,
|
||||||
|
Processed: m.processed,
|
||||||
|
OK: m.ok,
|
||||||
|
Fail: m.fail,
|
||||||
|
AvgRPS: avg,
|
||||||
|
PeakRPS: m.peakRPS,
|
||||||
|
LowRPS: m.lowRPS,
|
||||||
|
JSONPath: m.jsonPath,
|
||||||
|
RepoBlobBaseURL: os.Getenv("SLINKY_REPO_BLOB_BASE_URL"),
|
||||||
|
}
|
||||||
|
// Only include failing results in the markdown report
|
||||||
|
var failsMD []web.Result
|
||||||
|
for _, r := range m.allResults {
|
||||||
|
if !(r.OK && r.Err == nil) {
|
||||||
|
failsMD = append(failsMD, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p, err := report.WriteMarkdown(m.mdOut, failsMD, s)
|
||||||
|
if err == nil {
|
||||||
|
m.mdPath = p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *model) View() string {
|
||||||
|
header := lipgloss.NewStyle().Bold(true).Render(fmt.Sprintf(" Scanning %s ", m.rootPath))
|
||||||
|
if m.done {
|
||||||
|
dur := time.Since(m.started)
|
||||||
|
if !m.finishedAt.IsZero() {
|
||||||
|
dur = m.finishedAt.Sub(m.started)
|
||||||
|
}
|
||||||
|
avg := 0.0
|
||||||
|
if dur.Seconds() > 0 {
|
||||||
|
avg = float64(m.processed) / dur.Seconds()
|
||||||
|
}
|
||||||
|
summary := []string{
|
||||||
|
fmt.Sprintf("Duration: %s", dur.Truncate(time.Millisecond)),
|
||||||
|
fmt.Sprintf("Processed: %d OK:%d Fail:%d", m.processed, m.ok, m.fail),
|
||||||
|
fmt.Sprintf("Rates: avg %.1f/s peak %.1f/s low %.1f/s", avg, m.peakRPS, m.lowRPS),
|
||||||
|
}
|
||||||
|
if m.jsonPath != "" {
|
||||||
|
summary = append(summary, fmt.Sprintf("JSON: %s", m.jsonPath))
|
||||||
|
}
|
||||||
|
if m.mdPath != "" {
|
||||||
|
summary = append(summary, fmt.Sprintf("Markdown: %s", m.mdPath))
|
||||||
|
}
|
||||||
|
footer := lipgloss.NewStyle().Faint(true).Render("Controls: [q] quit [f] toggle fails")
|
||||||
|
container := lipgloss.NewStyle().Padding(1)
|
||||||
|
return container.Render(strings.Join(append([]string{header}, append(summary, footer)...), "\n"))
|
||||||
|
}
|
||||||
|
percent := 0.0
|
||||||
|
totalWork := m.processed + m.pending
|
||||||
|
if totalWork > 0 {
|
||||||
|
percent = float64(m.processed) / float64(totalWork)
|
||||||
|
}
|
||||||
|
progressLine := m.prog.ViewAs(percent)
|
||||||
|
stats := fmt.Sprintf("%s total:%d ok:%d fail:%d pending:%d processed:%d rps:%.1f/s", m.spin.View(), m.total, m.ok, m.fail, m.pending, m.processed, m.rps)
|
||||||
|
body := m.vp.View()
|
||||||
|
footer := lipgloss.NewStyle().Faint(true).Render("Controls: [q] quit [f] toggle fails")
|
||||||
|
container := lipgloss.NewStyle().Padding(1)
|
||||||
|
return container.Render(strings.Join([]string{header, stats, progressLine, "", body, footer}, "\n"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func statusEmoji(ok bool, err error) string {
|
||||||
|
if ok && err == nil {
|
||||||
|
return "✅"
|
||||||
|
}
|
||||||
|
return "❌"
|
||||||
|
}
|
||||||
|
|
||||||
|
func max(a, b int) int {
|
||||||
|
if a > b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
103
internal/web/checker.go
Normal file
103
internal/web/checker.go
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
package web
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net"
|
||||||
|
"net/http"
|
||||||
|
"sort"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CheckURLs performs concurrent GET requests for each URL and emits Result events.
|
||||||
|
// sources maps URL -> list of file paths where it was found.
|
||||||
|
func CheckURLs(ctx context.Context, urls []string, sources map[string][]string, out chan<- Result, stats chan<- Stats, cfg Config) {
|
||||||
|
defer close(out)
|
||||||
|
|
||||||
|
// Build HTTP client similar to crawler
|
||||||
|
transport := &http.Transport{
|
||||||
|
Proxy: http.ProxyFromEnvironment,
|
||||||
|
DialContext: (&net.Dialer{Timeout: 2 * time.Second, KeepAlive: 30 * time.Second}).DialContext,
|
||||||
|
TLSHandshakeTimeout: 5 * time.Second,
|
||||||
|
ExpectContinueTimeout: 1 * time.Second,
|
||||||
|
MaxIdleConns: cfg.MaxConcurrency * 2,
|
||||||
|
MaxIdleConnsPerHost: cfg.MaxConcurrency,
|
||||||
|
MaxConnsPerHost: cfg.MaxConcurrency,
|
||||||
|
IdleConnTimeout: 30 * time.Second,
|
||||||
|
ResponseHeaderTimeout: cfg.RequestTimeout,
|
||||||
|
}
|
||||||
|
client := &http.Client{Timeout: cfg.RequestTimeout, Transport: transport}
|
||||||
|
|
||||||
|
type job struct{ url string }
|
||||||
|
jobs := make(chan job, len(urls))
|
||||||
|
done := make(chan struct{})
|
||||||
|
|
||||||
|
// Seed jobs
|
||||||
|
unique := make(map[string]struct{}, len(urls))
|
||||||
|
for _, u := range urls {
|
||||||
|
if u == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok := unique[u]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
unique[u] = struct{}{}
|
||||||
|
jobs <- job{url: u}
|
||||||
|
}
|
||||||
|
close(jobs)
|
||||||
|
|
||||||
|
concurrency := cfg.MaxConcurrency
|
||||||
|
if concurrency <= 0 {
|
||||||
|
concurrency = 8
|
||||||
|
}
|
||||||
|
processed := 0
|
||||||
|
pending := len(unique)
|
||||||
|
|
||||||
|
worker := func() {
|
||||||
|
for j := range jobs {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
ok, status, resp, err := fetchWithMethod(ctx, client, http.MethodGet, j.url)
|
||||||
|
if resp != nil && resp.Body != nil {
|
||||||
|
resp.Body.Close()
|
||||||
|
}
|
||||||
|
// Treat 401/403 as valid links (exist but require authorization)
|
||||||
|
if status == http.StatusUnauthorized || status == http.StatusForbidden {
|
||||||
|
ok = true
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
var srcs []string
|
||||||
|
if sources != nil {
|
||||||
|
srcs = sources[j.url]
|
||||||
|
}
|
||||||
|
out <- Result{URL: j.url, OK: ok, Status: status, Err: err, ErrMsg: errString(err), Method: http.MethodGet, Sources: cloneAndSort(srcs)}
|
||||||
|
processed++
|
||||||
|
pending--
|
||||||
|
if stats != nil {
|
||||||
|
select {
|
||||||
|
case stats <- Stats{Pending: pending, Processed: processed}:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
done <- struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < concurrency; i++ {
|
||||||
|
go worker()
|
||||||
|
}
|
||||||
|
for i := 0; i < concurrency; i++ {
|
||||||
|
<-done
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func cloneAndSort(in []string) []string {
|
||||||
|
if len(in) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
out := append([]string(nil), in...)
|
||||||
|
sort.Strings(out)
|
||||||
|
return out
|
||||||
|
}
|
||||||
53
internal/web/checker_test.go
Normal file
53
internal/web/checker_test.go
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
package web
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// This test exercises CheckURLs with a mix of known-good and invalid URLs.
|
||||||
|
// It does real network calls; keep timeouts short to avoid long CI runs.
|
||||||
|
func TestCheckURLs_Basic(t *testing.T) {
|
||||||
|
urls := []string{
|
||||||
|
"https://example.com", // should be OK
|
||||||
|
"https://en.wikipedia.org/wiki/Main_Page", // should be OK
|
||||||
|
"http://example..com", // invalid hostname
|
||||||
|
"https://this-domain-does-not-exist-123456789.com", // NXDOMAIN/nonexistent
|
||||||
|
}
|
||||||
|
|
||||||
|
sources := map[string][]string{
|
||||||
|
"https://example.com": {"test files/test2.txt"},
|
||||||
|
"https://en.wikipedia.org/wiki/Main_Page": {"test files/test5.html"},
|
||||||
|
"http://example..com": {"test files/test5.html"},
|
||||||
|
"https://this-domain-does-not-exist-123456789.com": {"test files/test5.html"},
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
out := make(chan Result, 16)
|
||||||
|
cfg := Config{MaxConcurrency: 8, RequestTimeout: 5 * time.Second}
|
||||||
|
|
||||||
|
go CheckURLs(ctx, urls, sources, out, nil, cfg)
|
||||||
|
|
||||||
|
seen := 0
|
||||||
|
var okCount, failCount int
|
||||||
|
for r := range out {
|
||||||
|
seen++
|
||||||
|
if r.OK {
|
||||||
|
okCount++
|
||||||
|
} else {
|
||||||
|
failCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if seen != len(urls) {
|
||||||
|
t.Fatalf("expected %d results, got %d", len(urls), seen)
|
||||||
|
}
|
||||||
|
if okCount == 0 {
|
||||||
|
t.Fatalf("expected at least one OK result")
|
||||||
|
}
|
||||||
|
if failCount == 0 {
|
||||||
|
t.Fatalf("expected at least one failure result")
|
||||||
|
}
|
||||||
|
}
|
||||||
68
internal/web/http.go
Normal file
68
internal/web/http.go
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
package web
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"net"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
const browserUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0 Safari/537.36"
|
||||||
|
|
||||||
|
func fetchWithMethod(ctx context.Context, client *http.Client, method string, raw string) (bool, int, *http.Response, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, method, raw, nil)
|
||||||
|
if err != nil {
|
||||||
|
return false, 0, nil, err
|
||||||
|
}
|
||||||
|
req.Header.Set("User-Agent", browserUA)
|
||||||
|
req.Header.Set("Accept", "*/*")
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
if isDNSError(err) {
|
||||||
|
return false, 404, nil, simpleError("host not found")
|
||||||
|
}
|
||||||
|
if isTimeout(err) {
|
||||||
|
return false, 408, nil, simpleError("request timeout")
|
||||||
|
}
|
||||||
|
if isRefused(err) {
|
||||||
|
return false, 503, nil, simpleError("connection refused")
|
||||||
|
}
|
||||||
|
return false, 0, nil, err
|
||||||
|
}
|
||||||
|
return resp.StatusCode >= 200 && resp.StatusCode < 400, resp.StatusCode, resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func errString(e error) string {
|
||||||
|
if e == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return e.Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
func isTimeout(err error) bool {
|
||||||
|
if err == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if ne, ok := err.(net.Error); ok && ne.Timeout() {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func isDNSError(err error) bool {
|
||||||
|
msg := strings.ToLower(err.Error())
|
||||||
|
return strings.Contains(msg, "no such host") || strings.Contains(msg, "server misbehaving")
|
||||||
|
}
|
||||||
|
|
||||||
|
func isRefused(err error) bool {
|
||||||
|
msg := strings.ToLower(err.Error())
|
||||||
|
return strings.Contains(msg, "connection refused")
|
||||||
|
}
|
||||||
|
|
||||||
|
type simpleError string
|
||||||
|
|
||||||
|
func (e simpleError) Error() string { return string(e) }
|
||||||
29
internal/web/types.go
Normal file
29
internal/web/types.go
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
package web
|
||||||
|
|
||||||
|
import "time"
|
||||||
|
|
||||||
|
type Result struct {
|
||||||
|
URL string
|
||||||
|
OK bool
|
||||||
|
Status int
|
||||||
|
Err error
|
||||||
|
ErrMsg string
|
||||||
|
Depth int
|
||||||
|
CacheHit bool
|
||||||
|
Method string
|
||||||
|
ContentType string
|
||||||
|
Sources []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type Stats struct {
|
||||||
|
Pending int
|
||||||
|
Processed int
|
||||||
|
}
|
||||||
|
|
||||||
|
type Config struct {
|
||||||
|
MaxDepth int
|
||||||
|
MaxConcurrency int
|
||||||
|
RequestTimeout time.Duration
|
||||||
|
MaxRetries429 int
|
||||||
|
Exclude []string
|
||||||
|
}
|
||||||
9
main.go
Normal file
9
main.go
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import "slinky/cmd"
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
cmd.Execute()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
102
test files/test nesting/test more nesting/test1.md
Normal file
102
test files/test nesting/test more nesting/test1.md
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
|
||||||
|
# Invalid URL Test Cases
|
||||||
|
|
||||||
|
Here are some invalid URLs using various Markdown link and image syntaxes:
|
||||||
|
|
||||||
|
- [Broken Protocol](htp://invalid-url.com)
|
||||||
|
*Reason: Misspelled protocol ("htp" instead of "http")*
|
||||||
|
|
||||||
|
- [No Domain](http://)
|
||||||
|
*Reason: Missing domain*
|
||||||
|
|
||||||
|
- [Missing Name Before TLD](http://.com)
|
||||||
|
*Reason: Missing domain name before TLD*
|
||||||
|
|
||||||
|
- [Underscore in Domain](http://invalid_domain)
|
||||||
|
*Reason: Underscore in domain, not allowed in DNS hostnames*
|
||||||
|
|
||||||
|
- [Domain Starts with Hyphen](http://-example.com)
|
||||||
|
*Reason: Domain cannot start with a hyphen*
|
||||||
|
|
||||||
|
- [Double Dot in Domain](http://example..com)
|
||||||
|
*Reason: Double dot in domain*
|
||||||
|
|
||||||
|
- [Non-numeric Port](http://example.com:abc)
|
||||||
|
*Reason: Invalid port (non-numeric)*
|
||||||
|
|
||||||
|
- [Unsupported Protocol](ftp://example.com)
|
||||||
|
*Reason: Unsupported protocol (should be http/https)*
|
||||||
|
|
||||||
|
- [Space in Domain](http://example .com)
|
||||||
|
*Reason: Space in domain*
|
||||||
|
|
||||||
|
- [Extra Slash in Protocol](http:///example.com)
|
||||||
|
*Reason: Extra slash in protocol separator*
|
||||||
|
|
||||||
|
- 
|
||||||
|
*Reason: Image with missing domain*
|
||||||
|
|
||||||
|
- 
|
||||||
|
*Reason: Image with misspelled protocol*
|
||||||
|
|
||||||
|
- 
|
||||||
|
*Reason: Image with double dot in domain*
|
||||||
|
|
||||||
|
- [](htp://invalid-url.com)
|
||||||
|
*Reason: Image and link both with invalid URLs*
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# Correctly Formatted but Nonexistent URLs
|
||||||
|
|
||||||
|
These URLs are syntactically correct but do not point to real sites:
|
||||||
|
|
||||||
|
- [Nonexistent Domain](https://this-domain-does-not-exist-123456789.com)
|
||||||
|
|
||||||
|
- [Fake Subdomain](https://foo.bar.baz.nonexistent-tld)
|
||||||
|
|
||||||
|
- [Unused TLD](https://example.madeuptld)
|
||||||
|
|
||||||
|
- [Long Random String](https://abcdefg1234567890.example.com)
|
||||||
|
|
||||||
|
- [Fake Image](https://notarealwebsite.com/image.png)
|
||||||
|
|
||||||
|
- 
|
||||||
|
|
||||||
|
- [](https://notarealwebsite.com/page)
|
||||||
|
|
||||||
|
- [Unregistered Domain](https://unregistered-website-xyz.com)
|
||||||
|
|
||||||
|
- [Fake Path](https://example.com/this/path/does/not/exist)
|
||||||
|
|
||||||
|
- [Nonexistent Page](https://example.com/404notfound)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# Valid URLs
|
||||||
|
|
||||||
|
These URLs are well-formed and point to known good sites:
|
||||||
|
|
||||||
|
- [Example Domain](https://example.com)
|
||||||
|
|
||||||
|
- [Wikipedia](https://en.wikipedia.org/wiki/Main_Page)
|
||||||
|
|
||||||
|
- [GitHub](https://github.com)
|
||||||
|
|
||||||
|
- [Google](https://www.google.com)
|
||||||
|
|
||||||
|
- [Mozilla Developer Network](https://developer.mozilla.org)
|
||||||
|
|
||||||
|
- [Go Documentation](https://go.dev/doc/)
|
||||||
|
|
||||||
|
- 
|
||||||
|
|
||||||
|
- [](https://github.com)
|
||||||
|
|
||||||
|
- [Svelte](https://svelte.dev)
|
||||||
|
|
||||||
|
- [OpenAI](https://openai.com)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
11
test files/test nesting/test2.txt
Normal file
11
test files/test nesting/test2.txt
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed euismod, urna eu tincidunt consectetur, nisi nisl aliquam enim, eget facilisis quam felis id mauris.
|
||||||
|
Check out this website: https://example.com for more information.
|
||||||
|
Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Mauris non tempor quam.
|
||||||
|
|
||||||
|
Phasellus euismod, justo at dictum placerat, sapien erat ultricies eros, ac porta sem ex ac nisi.
|
||||||
|
For documentation, visit https://docs.something.org or refer to https://github.com/example/repo for the source code.
|
||||||
|
Nullam ac urna eu felis dapibus condimentum sit amet a augue.
|
||||||
|
|
||||||
|
Curabitur non nulla sit amet nisl tempus convallis quis ac lectus.
|
||||||
|
You might also find https://news.ycombinator.com interesting for tech news.
|
||||||
|
Vivamus magna justo, lacinia eget consectetur sed, convallis at tellus.
|
||||||
11
test files/test10.log
Normal file
11
test files/test10.log
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
2025-09-10 10:00:00 INFO Fetching https://example.com
|
||||||
|
2025-09-10 10:00:01 INFO Fetching https://github.com/example/repo
|
||||||
|
2025-09-10 10:00:02 WARN Retrying htp://bad-protocol.com
|
||||||
|
2025-09-10 10:00:03 ERROR Failed to fetch http:///example.com
|
||||||
|
2025-09-10 10:00:04 ERROR DNS error for https://this-domain-does-not-exist-123456789.com
|
||||||
|
2025-09-10 10:00:05 INFO Fetching http://example.com:8080/api/status
|
||||||
|
2025-09-10 10:00:06 ERROR Invalid host http://example..com
|
||||||
|
2025-09-10 10:00:07 ERROR Missing domain https://
|
||||||
|
2025-09-10 10:00:08 INFO Fetching https://en.wikipedia.org/wiki/Main_Page
|
||||||
|
|
||||||
|
|
||||||
10
test files/test11.go
Normal file
10
test files/test11.go
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
package testfiles
|
||||||
|
|
||||||
|
// Sample Go file with URLs
|
||||||
|
var url1 = "https://example.com"
|
||||||
|
var url2 = "https://en.wikipedia.org/wiki/Main_Page"
|
||||||
|
var urlBad = "http://example..com"
|
||||||
|
var urlMissing = "https://"
|
||||||
|
var urlNonexistent = "https://this-domain-does-not-exist-123456789.com"
|
||||||
|
var urlPlaceholder1 = "https://[tenant].api.identitynow.com"
|
||||||
|
var urlPlaceholder2 = "https://{tenant}.api.identitynow.com"
|
||||||
11
test files/test12.php
Normal file
11
test files/test12.php
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
<?php
|
||||||
|
// Sample PHP file with URLs
|
||||||
|
$url1 = "https://developer.mozilla.org";
|
||||||
|
$url2 = "https://github.com/example/repo";
|
||||||
|
$urlBad = "http://example..com";
|
||||||
|
$urlMissing = "https://";
|
||||||
|
$urlNonexistent = "https://notarealwebsite.com/image.png";
|
||||||
|
$urlPlaceholder = "https://{tenant}.api.identitynow.com";
|
||||||
|
?>
|
||||||
|
|
||||||
|
|
||||||
9
test files/test13.ps1
Normal file
9
test files/test13.ps1
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Sample PowerShell file with URLs
|
||||||
|
$url1 = "https://example.com"
|
||||||
|
$url2 = "https://news.ycombinator.com"
|
||||||
|
$urlBad = "http://example..com"
|
||||||
|
$urlMissing = "https://"
|
||||||
|
$urlNonexistent = "https://example.madeuptld"
|
||||||
|
$urlPlaceholder = "https://[tenant].api.identitynow.com"
|
||||||
|
|
||||||
|
|
||||||
9
test files/test14.py
Normal file
9
test files/test14.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Sample Python file with URLs
|
||||||
|
url1 = "https://example.com"
|
||||||
|
url2 = "https://developer.mozilla.org"
|
||||||
|
url_bad = "http://example..com"
|
||||||
|
url_missing = "https://"
|
||||||
|
url_nonexistent = "https://this-image-domain-should-not-exist-xyz.example"
|
||||||
|
url_placeholder = "https://{tenant}.api.identitynow.com/v3/transforms"
|
||||||
|
|
||||||
|
|
||||||
11
test files/test15.java
Normal file
11
test files/test15.java
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
public class Test15 {
|
||||||
|
// Sample Java file with URLs
|
||||||
|
String url1 = "https://example.com";
|
||||||
|
String url2 = "https://svelte.dev";
|
||||||
|
String urlBad = "http://example..com";
|
||||||
|
String urlMissing = "https://";
|
||||||
|
String urlNonexistent = "https://unregistered-website-xyz.com";
|
||||||
|
String urlPlaceholder = "https://[tenant].[domain].com";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
9
test files/test16.ts
Normal file
9
test files/test16.ts
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
// Sample TypeScript file with URLs
|
||||||
|
const url1: string = "https://example.com";
|
||||||
|
const url2: string = "https://go.dev/doc/";
|
||||||
|
const urlBad: string = "http://example..com";
|
||||||
|
const urlMissing: string = "https://";
|
||||||
|
const urlNonexistent: string = "https://this-domain-does-not-exist-987654321.com";
|
||||||
|
const urlPlaceholder: string = "https://{tenant}.api.ideidentitynow.com/v3/transforms";
|
||||||
|
|
||||||
|
|
||||||
9
test files/test17.rb
Normal file
9
test files/test17.rb
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Sample Ruby file with URLs
|
||||||
|
url1 = "https://example.com"
|
||||||
|
url2 = "https://github.com"
|
||||||
|
url_bad = "http://example..com"
|
||||||
|
url_missing = "https://"
|
||||||
|
url_nonexistent = "https://totally-made-up-host-tld-abcdef123.com"
|
||||||
|
url_placeholder = "https://[tenant].api.identitynow.com"
|
||||||
|
|
||||||
|
|
||||||
5
test files/test3.js
Normal file
5
test files/test3.js
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
const link1 = "https://example.com";
|
||||||
|
const link2 = "https://docs.something.org";
|
||||||
|
const link3 = "https://github.com/example/repo";
|
||||||
|
const link4 = "https://news.ycombinator.com";
|
||||||
|
const link5 = "http://example.com:8080";
|
||||||
7
test files/test4.xml
Normal file
7
test files/test4.xml
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
<links>
|
||||||
|
<link>https://example.com</link>
|
||||||
|
<link>https://docs.something.org</link>
|
||||||
|
<link>https://github.com/example/repo</link>
|
||||||
|
<link>https://news.ycombinator.com</link>
|
||||||
|
<link>http://example.com:8080</link>
|
||||||
|
</links>
|
||||||
25
test files/test5.html
Normal file
25
test files/test5.html
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>URL Test HTML</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>URL Test Links</h1>
|
||||||
|
<a href="https://example.com">Example</a>
|
||||||
|
<a href="http://example.com:8080/path?query=1">With Port</a>
|
||||||
|
<a href="htp://bad-protocol.com">Bad Protocol</a>
|
||||||
|
<a href="https://">Missing Domain</a>
|
||||||
|
<a href="http:///example.com">Extra Slash</a>
|
||||||
|
<a href="http://example..com">Double Dot</a>
|
||||||
|
<a href="https://this-domain-does-not-exist-123456789.com">Nonexistent Domain</a>
|
||||||
|
<a href="https://en.wikipedia.org/wiki/Main_Page">Wikipedia</a>
|
||||||
|
|
||||||
|
<h2>Images</h2>
|
||||||
|
<img src="https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png" alt="Valid Image">
|
||||||
|
<img src="http://example..com/pic.jpg" alt="Double Dot Image">
|
||||||
|
<img src="htp://invalid-url.com/image.png" alt="Bad Protocol Image">
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
||||||
|
|
||||||
21
test files/test6.json
Normal file
21
test files/test6.json
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
{
|
||||||
|
"valid": [
|
||||||
|
"https://example.com",
|
||||||
|
"https://github.com",
|
||||||
|
"http://example.com:8080/path",
|
||||||
|
"https://en.wikipedia.org/wiki/Main_Page"
|
||||||
|
],
|
||||||
|
"invalid": [
|
||||||
|
"htp://invalid-url.com",
|
||||||
|
"http:///example.com",
|
||||||
|
"http://example..com",
|
||||||
|
"https://"
|
||||||
|
],
|
||||||
|
"nonexistent": [
|
||||||
|
"https://this-domain-does-not-exist-123456789.com",
|
||||||
|
"https://notarealwebsite.com/image.png",
|
||||||
|
"https://example.madeuptld"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
15
test files/test7.yaml
Normal file
15
test files/test7.yaml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
valid:
|
||||||
|
- https://example.com
|
||||||
|
- https://docs.something.org
|
||||||
|
- http://example.com:8080/path
|
||||||
|
invalid:
|
||||||
|
- htp://bad-protocol.com
|
||||||
|
- http:///too-many-slashes.com
|
||||||
|
- http://example..com
|
||||||
|
- https://
|
||||||
|
nonexistent:
|
||||||
|
- https://notarealwebsite.com/page
|
||||||
|
- https://unregistered-website-xyz.com
|
||||||
|
- https://example.madeuptld
|
||||||
|
|
||||||
|
|
||||||
13
test files/test8.csv
Normal file
13
test files/test8.csv
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
id,name,url
|
||||||
|
1,Example,https://example.com
|
||||||
|
2,Docs,https://docs.something.org
|
||||||
|
3,Repo,https://github.com/example/repo
|
||||||
|
4,HN,https://news.ycombinator.com
|
||||||
|
5,WithPort,http://example.com:8080
|
||||||
|
6,BadProtocol,htp://invalid-url.com
|
||||||
|
7,ExtraSlash,http:///example.com
|
||||||
|
8,DoubleDot,http://example..com
|
||||||
|
9,MissingDomain,https://
|
||||||
|
10,Nonexistent,https://this-domain-does-not-exist-123456789.com
|
||||||
|
|
||||||
|
|
||||||
|
12
test files/test9.ini
Normal file
12
test files/test9.ini
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
[links]
|
||||||
|
good1 = https://example.com
|
||||||
|
good2 = https://en.wikipedia.org/wiki/Main_Page
|
||||||
|
good3 = http://example.com:8080/path
|
||||||
|
bad1 = htp://bad-protocol.com
|
||||||
|
bad2 = http:///example.com
|
||||||
|
bad3 = http://example..com
|
||||||
|
missing = https://
|
||||||
|
nonexistent1 = https://notarealwebsite.com/image.png
|
||||||
|
nonexistent2 = https://example.madeuptld
|
||||||
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user