mirror of
https://github.com/LukeHagar/slinky.git
synced 2025-12-06 04:21:20 +00:00
pushing the rest
This commit is contained in:
19
.github/actions/slinky/Dockerfile
vendored
Normal file
19
.github/actions/slinky/Dockerfile
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
FROM golang:1.24 as build
|
||||
WORKDIR /app
|
||||
# Expect the repository root as build context when building this image
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
COPY . .
|
||||
RUN CGO_ENABLED=0 go build -o /usr/local/bin/slinky ./
|
||||
|
||||
FROM alpine:3.20
|
||||
RUN adduser -D -u 10001 appuser \
|
||||
&& apk add --no-cache curl jq ca-certificates
|
||||
COPY --from=build /usr/local/bin/slinky /usr/local/bin/slinky
|
||||
COPY .github/actions/slinky/entrypoint.sh /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh
|
||||
USER appuser
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
|
||||
|
||||
|
||||
72
.github/actions/slinky/action.yml
vendored
Normal file
72
.github/actions/slinky/action.yml
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
name: "Slinky Link Checker"
|
||||
description: "Slink through your repository looking for dead links"
|
||||
author: "LukeHagar"
|
||||
branding:
|
||||
icon: "link"
|
||||
color: "blue"
|
||||
|
||||
inputs:
|
||||
path:
|
||||
description: "Root path to scan"
|
||||
required: false
|
||||
default: "."
|
||||
patterns:
|
||||
description: "Comma-separated doublestar patterns. Ex: docs/**/*.md,**/*.go; default **/*"
|
||||
required: false
|
||||
default: "**/*"
|
||||
concurrency:
|
||||
description: "Maximum concurrent requests"
|
||||
required: false
|
||||
default: "16"
|
||||
timeout:
|
||||
description: "HTTP timeout seconds"
|
||||
required: false
|
||||
default: "10"
|
||||
json-out:
|
||||
description: "Optional path to write JSON results"
|
||||
required: false
|
||||
default: "results.json"
|
||||
md-out:
|
||||
description: "Optional path to write Markdown report for PR comment"
|
||||
required: false
|
||||
default: "results.md"
|
||||
repo-blob-base:
|
||||
description: "Override GitHub blob base URL (https://github.com/<owner>/<repo>/blob/<sha>)"
|
||||
required: false
|
||||
default: ""
|
||||
fail-on-failures:
|
||||
description: "Fail the job if any links fail"
|
||||
required: false
|
||||
default: "true"
|
||||
comment-pr:
|
||||
description: "If running on a PR, post a comment with the report"
|
||||
required: false
|
||||
default: "true"
|
||||
step-summary:
|
||||
description: "Append the report to the GitHub Step Summary"
|
||||
required: false
|
||||
default: "true"
|
||||
|
||||
runs:
|
||||
using: "docker"
|
||||
image: "Dockerfile"
|
||||
args: []
|
||||
env:
|
||||
INPUT_PATH: ${{ inputs.path }}
|
||||
INPUT_PATTERNS: ${{ inputs.patterns }}
|
||||
INPUT_CONCURRENCY: ${{ inputs.concurrency }}
|
||||
INPUT_TIMEOUT: ${{ inputs.timeout }}
|
||||
INPUT_JSON_OUT: ${{ inputs["json-out"] }}
|
||||
INPUT_MD_OUT: ${{ inputs["md-out"] }}
|
||||
INPUT_REPO_BLOB_BASE: ${{ inputs["repo-blob-base"] }}
|
||||
INPUT_FAIL_ON_FAILURES: ${{ inputs["fail-on-failures"] }}
|
||||
INPUT_COMMENT_PR: ${{ inputs["comment-pr"] }}
|
||||
INPUT_STEP_SUMMARY: ${{ inputs["step-summary"] }}
|
||||
|
||||
outputs:
|
||||
json-path:
|
||||
description: "Path to JSON results file"
|
||||
md-path:
|
||||
description: "Path to Markdown report file"
|
||||
|
||||
|
||||
91
.github/actions/slinky/entrypoint.sh
vendored
Normal file
91
.github/actions/slinky/entrypoint.sh
vendored
Normal file
@@ -0,0 +1,91 @@
|
||||
#!/bin/sh
|
||||
set -eu
|
||||
|
||||
PATH_ARG="${INPUT_PATH:-.}"
|
||||
PATTERNS_ARG="${INPUT_PATTERNS:-**/*}"
|
||||
CONCURRENCY_ARG="${INPUT_CONCURRENCY:-16}"
|
||||
TIMEOUT_ARG="${INPUT_TIMEOUT:-10}"
|
||||
JSON_OUT_ARG="${INPUT_JSON_OUT:-results.json}"
|
||||
MD_OUT_ARG="${INPUT_MD_OUT:-results.md}"
|
||||
REPO_BLOB_BASE_ARG="${INPUT_REPO_BLOB_BASE:-}"
|
||||
FAIL_ON_FAILURES_ARG="${INPUT_FAIL_ON_FAILURES:-true}"
|
||||
COMMENT_PR_ARG="${INPUT_COMMENT_PR:-true}"
|
||||
STEP_SUMMARY_ARG="${INPUT_STEP_SUMMARY:-true}"
|
||||
|
||||
ARGS="check \"${PATH_ARG}\" --concurrency ${CONCURRENCY_ARG} --timeout ${TIMEOUT_ARG}"
|
||||
if [ "${FAIL_ON_FAILURES_ARG}" = "true" ]; then
|
||||
ARGS="$ARGS --fail-on-failures true"
|
||||
else
|
||||
ARGS="$ARGS --fail-on-failures false"
|
||||
fi
|
||||
if [ -n "${PATTERNS_ARG}" ]; then
|
||||
# normalize by removing spaces around commas
|
||||
NORM_PATTERNS=$(printf "%s" "${PATTERNS_ARG}" | sed 's/,\s*/,/g')
|
||||
IFS=','
|
||||
set -- $NORM_PATTERNS
|
||||
unset IFS
|
||||
for pat in "$@"; do
|
||||
ARGS="$ARGS --patterns \"$pat\""
|
||||
done
|
||||
fi
|
||||
if [ -n "${JSON_OUT_ARG}" ]; then
|
||||
ARGS="$ARGS --json-out \"${JSON_OUT_ARG}\""
|
||||
fi
|
||||
if [ -n "${MD_OUT_ARG}" ]; then
|
||||
ARGS="$ARGS --md-out \"${MD_OUT_ARG}\""
|
||||
fi
|
||||
|
||||
# Compute GitHub blob base URL for file links used in the Markdown report
|
||||
if [ -n "${REPO_BLOB_BASE_ARG}" ]; then
|
||||
export SLINKY_REPO_BLOB_BASE_URL="${REPO_BLOB_BASE_ARG}"
|
||||
elif [ -n "${GITHUB_REPOSITORY:-}" ]; then
|
||||
COMMIT_SHA="${GITHUB_SHA:-}"
|
||||
if [ -n "${GITHUB_EVENT_PATH:-}" ] && command -v jq >/dev/null 2>&1; then
|
||||
PR_HEAD_SHA="$(jq -r '.pull_request.head.sha // empty' "$GITHUB_EVENT_PATH" || true)"
|
||||
if [ -n "$PR_HEAD_SHA" ]; then
|
||||
COMMIT_SHA="$PR_HEAD_SHA"
|
||||
fi
|
||||
fi
|
||||
if [ -n "$COMMIT_SHA" ]; then
|
||||
export SLINKY_REPO_BLOB_BASE_URL="https://github.com/${GITHUB_REPOSITORY}/blob/${COMMIT_SHA}"
|
||||
fi
|
||||
fi
|
||||
|
||||
eval slinky ${ARGS}
|
||||
|
||||
# Expose outputs
|
||||
if [ -n "${GITHUB_OUTPUT:-}" ]; then
|
||||
if [ -n "${JSON_OUT_ARG}" ]; then
|
||||
echo "json-path=${JSON_OUT_ARG}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
if [ -n "${MD_OUT_ARG}" ]; then
|
||||
echo "md-path=${MD_OUT_ARG}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Append report to job summary if requested
|
||||
if [ "${STEP_SUMMARY_ARG}" = "true" ] && [ -n "${GITHUB_STEP_SUMMARY:-}" ] && [ -n "${MD_OUT_ARG}" ] && [ -f "${MD_OUT_ARG}" ]; then
|
||||
cat "${MD_OUT_ARG}" >> "$GITHUB_STEP_SUMMARY"
|
||||
fi
|
||||
|
||||
# Post PR comment if this is a PR and requested
|
||||
if [ "${COMMENT_PR_ARG}" = "true" ] && [ -n "${MD_OUT_ARG}" ] && [ -f "${MD_OUT_ARG}" ]; then
|
||||
PR_NUMBER=""
|
||||
if [ -n "${GITHUB_EVENT_PATH:-}" ] && command -v jq >/dev/null 2>&1; then
|
||||
PR_NUMBER="$(jq -r '.pull_request.number // empty' "$GITHUB_EVENT_PATH" || true)"
|
||||
fi
|
||||
if [ -n "${PR_NUMBER}" ] && [ -n "${GITHUB_REPOSITORY:-}" ] && [ -n "${GITHUB_TOKEN:-}" ]; then
|
||||
BODY_CONTENT="$(cat "${MD_OUT_ARG}")"
|
||||
# Post the comment
|
||||
curl -sS -H "Authorization: Bearer ${GITHUB_TOKEN}" \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
-X POST "https://api.github.com/repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \
|
||||
-d "$(printf '{"body": %s}' "$(jq -Rs . <<EOF
|
||||
${BODY_CONTENT}
|
||||
EOF
|
||||
)" )" >/dev/null || true
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
70
.github/workflows/ci.yml
vendored
Normal file
70
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.24.x'
|
||||
|
||||
- name: Cache go build
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-go-
|
||||
|
||||
- name: Build
|
||||
run: go build ./...
|
||||
|
||||
- name: Run unit tests
|
||||
run: go test ./...
|
||||
|
||||
action-self-test:
|
||||
runs-on: ubuntu-latest
|
||||
needs: test
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build action image
|
||||
run: |
|
||||
docker build -t slinky-action -f .github/actions/slinky/Dockerfile .
|
||||
|
||||
- name: Run action container (expect nonzero if failures)
|
||||
id: run_action
|
||||
run: |
|
||||
set -e
|
||||
docker run --rm -v "$PWD:/repo" -w /repo -e GITHUB_STEP_SUMMARY="/tmp/summary.md" slinky-action sh -lc \
|
||||
'INPUT_PATH=. INPUT_PATTERNS="test files/**" INPUT_CONCURRENCY=8 INPUT_TIMEOUT=5 INPUT_JSON_OUT=results.json INPUT_MD_OUT=results.md INPUT_FAIL_ON_FAILURES=true INPUT_COMMENT_PR=false INPUT_STEP_SUMMARY=true /entrypoint.sh'
|
||||
|
||||
- name: Upload results.json
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: results
|
||||
path: results.json
|
||||
- name: Upload results.md
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: results-md
|
||||
path: results.md
|
||||
|
||||
|
||||
40
.github/workflows/example-slinky.yml
vendored
Normal file
40
.github/workflows/example-slinky.yml
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
name: Slinky Example
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
workflow_dispatch: {}
|
||||
|
||||
jobs:
|
||||
slinky:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Run Slinky link checker
|
||||
uses: ./\.github/actions/slinky
|
||||
with:
|
||||
path: .
|
||||
patterns: "**/*"
|
||||
concurrency: "16"
|
||||
timeout: "10"
|
||||
json-out: results.json
|
||||
md-out: results.md
|
||||
fail-on-failures: "true"
|
||||
comment-pr: "true"
|
||||
step-summary: "true"
|
||||
|
||||
- name: Upload results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: slinky-results
|
||||
path: |
|
||||
results.json
|
||||
results.md
|
||||
|
||||
|
||||
33
Makefile
Normal file
33
Makefile
Normal file
@@ -0,0 +1,33 @@
|
||||
GO ?= go
|
||||
PKG := ./...
|
||||
BIN_DIR ?= bin
|
||||
BINARY ?= slinky
|
||||
BIN := $(BIN_DIR)/$(BINARY)
|
||||
|
||||
.PHONY: build test clean check action-image action-run
|
||||
|
||||
build: $(BIN)
|
||||
|
||||
$(BIN):
|
||||
@mkdir -p $(BIN_DIR)
|
||||
CGO_ENABLED=0 $(GO) build -o $(BIN) ./
|
||||
|
||||
test:
|
||||
$(GO) test -v $(PKG)
|
||||
|
||||
# Convenience: run the headless check against local test files
|
||||
check: build
|
||||
./$(BIN) check . --glob "test files/**" --json-out results.json --fail-on-failures true
|
||||
|
||||
# Build the Docker-based GitHub Action locally
|
||||
action-image:
|
||||
docker build -t slinky-action -f .github/actions/slinky/Dockerfile .
|
||||
|
||||
# Run the Action container against the current repo
|
||||
action-run: action-image
|
||||
docker run --rm -v "$(PWD):/repo" -w /repo -e GITHUB_STEP_SUMMARY="/tmp/summary.md" slinky-action sh -lc 'INPUT_PATH=. INPUT_PATTERNS="test files/**" INPUT_CONCURRENCY=8 INPUT_TIMEOUT=5 INPUT_JSON_OUT=results.json INPUT_MD_OUT=results.md INPUT_FAIL_ON_FAILURES=true INPUT_COMMENT_PR=false INPUT_STEP_SUMMARY=true /entrypoint.sh'
|
||||
|
||||
clean:
|
||||
rm -rf $(BIN_DIR) results.json results.md
|
||||
|
||||
|
||||
165
cmd/check.go
Normal file
165
cmd/check.go
Normal file
@@ -0,0 +1,165 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"slinky/internal/fsurls"
|
||||
"slinky/internal/report"
|
||||
"slinky/internal/web"
|
||||
)
|
||||
|
||||
// SerializableResult mirrors web.Result but omits the error field for JSON.
|
||||
type SerializableResult struct {
|
||||
URL string `json:"url"`
|
||||
OK bool `json:"ok"`
|
||||
Status int `json:"status"`
|
||||
ErrMsg string `json:"error"`
|
||||
Method string `json:"method"`
|
||||
ContentType string `json:"contentType"`
|
||||
Sources []string `json:"sources"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
checkCmd := &cobra.Command{
|
||||
Use: "check [path]",
|
||||
Short: "Scan a directory for URLs and validate them (headless)",
|
||||
Args: cobra.MaximumNArgs(1),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
path := "."
|
||||
if len(args) == 1 {
|
||||
path = args[0]
|
||||
}
|
||||
|
||||
var gl []string
|
||||
if len(patterns) > 0 {
|
||||
gl = append(gl, patterns...)
|
||||
} else if globPat != "" {
|
||||
gl = strings.Split(globPat, ",")
|
||||
} else {
|
||||
gl = []string{"**/*"}
|
||||
}
|
||||
|
||||
timeout := time.Duration(timeoutSeconds) * time.Second
|
||||
cfg := web.Config{MaxConcurrency: maxConcurrency, RequestTimeout: timeout}
|
||||
|
||||
// Collect URLs
|
||||
urlToFiles, err := fsurls.CollectURLs(path, gl)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var urls []string
|
||||
for u := range urlToFiles {
|
||||
urls = append(urls, u)
|
||||
}
|
||||
sort.Strings(urls)
|
||||
|
||||
// If no URLs found, exit early
|
||||
if len(urls) == 0 {
|
||||
fmt.Println("No URLs found.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Run checks
|
||||
startedAt := time.Now()
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
results := make(chan web.Result, 256)
|
||||
web.CheckURLs(ctx, urls, urlToFiles, results, nil, cfg)
|
||||
|
||||
var total, okCount, failCount int
|
||||
var failures []SerializableResult
|
||||
var failedResults []web.Result
|
||||
|
||||
for r := range results {
|
||||
total++
|
||||
if r.OK {
|
||||
okCount++
|
||||
} else {
|
||||
failCount++
|
||||
}
|
||||
if jsonOut != "" && !r.OK {
|
||||
failures = append(failures, SerializableResult{
|
||||
URL: r.URL,
|
||||
OK: r.OK,
|
||||
Status: r.Status,
|
||||
ErrMsg: r.ErrMsg,
|
||||
Method: r.Method,
|
||||
ContentType: r.ContentType,
|
||||
Sources: r.Sources,
|
||||
})
|
||||
}
|
||||
if !r.OK {
|
||||
failedResults = append(failedResults, r)
|
||||
}
|
||||
}
|
||||
|
||||
// Write JSON if requested (failures only)
|
||||
if jsonOut != "" {
|
||||
f, ferr := os.Create(jsonOut)
|
||||
if ferr != nil {
|
||||
return ferr
|
||||
}
|
||||
enc := json.NewEncoder(f)
|
||||
enc.SetIndent("", " ")
|
||||
if err := enc.Encode(failures); err != nil {
|
||||
_ = f.Close()
|
||||
return err
|
||||
}
|
||||
_ = f.Close()
|
||||
}
|
||||
|
||||
// Optionally write Markdown report for PR comment consumption
|
||||
if mdOut != "" {
|
||||
base := repoBlobBase
|
||||
if strings.TrimSpace(base) == "" {
|
||||
base = os.Getenv("SLINKY_REPO_BLOB_BASE_URL")
|
||||
}
|
||||
summary := report.Summary{
|
||||
RootPath: path,
|
||||
StartedAt: startedAt,
|
||||
FinishedAt: time.Now(),
|
||||
Processed: total,
|
||||
OK: okCount,
|
||||
Fail: failCount,
|
||||
JSONPath: jsonOut,
|
||||
RepoBlobBaseURL: base,
|
||||
}
|
||||
if _, err := report.WriteMarkdown(mdOut, failedResults, summary); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("Checked %d URLs: %d OK, %d failed\n", total, okCount, failCount)
|
||||
if failOnFailures && failCount > 0 {
|
||||
return fmt.Errorf("%d links failed", failCount)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
checkCmd.Flags().StringVar(&globPat, "glob", "", "comma-separated glob patterns for files (doublestar); empty = all files")
|
||||
checkCmd.Flags().StringSliceVar(&patterns, "patterns", nil, "file match patterns (doublestar). Examples: docs/**/*.md,**/*.go; defaults to **/*")
|
||||
checkCmd.Flags().IntVar(&maxConcurrency, "concurrency", 16, "maximum concurrent requests")
|
||||
checkCmd.Flags().StringVar(&jsonOut, "json-out", "", "path to write full JSON results (array)")
|
||||
checkCmd.Flags().StringVar(&mdOut, "md-out", "", "path to write Markdown report for PR comment")
|
||||
checkCmd.Flags().StringVar(&repoBlobBase, "repo-blob-base", "", "override GitHub blob base URL (e.g. https://github.com/owner/repo/blob/<sha>)")
|
||||
checkCmd.Flags().IntVar(&timeoutSeconds, "timeout", 10, "HTTP request timeout in seconds")
|
||||
checkCmd.Flags().BoolVar(&failOnFailures, "fail-on-failures", true, "exit non-zero if any links fail")
|
||||
|
||||
rootCmd.AddCommand(checkCmd)
|
||||
}
|
||||
|
||||
var (
|
||||
timeoutSeconds int
|
||||
failOnFailures bool
|
||||
patterns []string
|
||||
repoBlobBase string
|
||||
)
|
||||
23
cmd/root.go
Normal file
23
cmd/root.go
Normal file
@@ -0,0 +1,23 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var rootCmd = &cobra.Command{
|
||||
Use: "slinky",
|
||||
Short: "Link checker for repos/directories and webpages (TUI)",
|
||||
Long: "Slinky scans a directory/repo for URLs in files or crawls a URL, then validates links concurrently in a TUI.",
|
||||
}
|
||||
|
||||
func Execute() {
|
||||
if err := rootCmd.Execute(); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
49
cmd/run.go
Normal file
49
cmd/run.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"slinky/internal/tui"
|
||||
"slinky/internal/web"
|
||||
)
|
||||
|
||||
func init() {
|
||||
runCmd := &cobra.Command{
|
||||
Use: "run [path]",
|
||||
Short: "Scan a directory/repo for URLs in files and validate them (TUI)",
|
||||
Args: cobra.MaximumNArgs(1),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
path := "."
|
||||
if len(args) == 1 {
|
||||
path = args[0]
|
||||
}
|
||||
cfg := web.Config{MaxConcurrency: maxConcurrency}
|
||||
var gl []string
|
||||
if len(patterns) > 0 {
|
||||
gl = append(gl, patterns...)
|
||||
} else if globPat != "" {
|
||||
gl = strings.Split(globPat, ",")
|
||||
} else {
|
||||
gl = []string{"**/*"}
|
||||
}
|
||||
return tui.Run(path, gl, cfg, jsonOut, mdOut)
|
||||
},
|
||||
}
|
||||
|
||||
runCmd.Flags().StringVar(&globPat, "glob", "", "comma-separated glob patterns for files (doublestar); empty = all files")
|
||||
runCmd.Flags().StringSliceVar(&patterns, "patterns", nil, "file match patterns (doublestar). Examples: docs/**/*.md,**/*.go; defaults to **/*")
|
||||
runCmd.Flags().IntVar(&maxConcurrency, "concurrency", 16, "maximum concurrent requests")
|
||||
runCmd.Flags().StringVar(&jsonOut, "json-out", "", "path to write full JSON results (array)")
|
||||
runCmd.Flags().StringVar(&mdOut, "md-out", "", "path to write Markdown report for PR comment")
|
||||
runCmd.Flags().StringVar(&repoBlobBase, "repo-blob-base", "", "override GitHub blob base URL (e.g. https://github.com/owner/repo/blob/<sha>)")
|
||||
rootCmd.AddCommand(runCmd)
|
||||
}
|
||||
|
||||
var (
|
||||
maxConcurrency int
|
||||
jsonOut string
|
||||
globPat string
|
||||
mdOut string
|
||||
)
|
||||
37
go.mod
Normal file
37
go.mod
Normal file
@@ -0,0 +1,37 @@
|
||||
module slinky
|
||||
|
||||
go 1.24.0
|
||||
|
||||
toolchain go1.24.7
|
||||
|
||||
require (
|
||||
github.com/bmatcuk/doublestar/v4 v4.6.1
|
||||
github.com/charmbracelet/bubbles v0.21.0
|
||||
github.com/charmbracelet/bubbletea v1.3.8
|
||||
github.com/charmbracelet/lipgloss v1.1.0
|
||||
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
|
||||
github.com/spf13/cobra v1.10.1
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
|
||||
github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect
|
||||
github.com/charmbracelet/harmonica v0.2.0 // indirect
|
||||
github.com/charmbracelet/x/ansi v0.10.1 // indirect
|
||||
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd // indirect
|
||||
github.com/charmbracelet/x/term v0.2.1 // indirect
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
|
||||
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mattn/go-localereader v0.0.1 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.16 // indirect
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
|
||||
github.com/muesli/cancelreader v0.2.2 // indirect
|
||||
github.com/muesli/termenv v0.16.0 // indirect
|
||||
github.com/rivo/uniseg v0.4.7 // indirect
|
||||
github.com/spf13/pflag v1.0.9 // indirect
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
|
||||
golang.org/x/sys v0.36.0 // indirect
|
||||
golang.org/x/text v0.24.0 // indirect
|
||||
)
|
||||
70
go.sum
Normal file
70
go.sum
Normal file
@@ -0,0 +1,70 @@
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
|
||||
github.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwNy7PA4I=
|
||||
github.com/bmatcuk/doublestar/v4 v4.6.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc=
|
||||
github.com/charmbracelet/bubbles v0.21.0 h1:9TdC97SdRVg/1aaXNVWfFH3nnLAwOXr8Fn6u6mfQdFs=
|
||||
github.com/charmbracelet/bubbles v0.21.0/go.mod h1:HF+v6QUR4HkEpz62dx7ym2xc71/KBHg+zKwJtMw+qtg=
|
||||
github.com/charmbracelet/bubbletea v1.3.8 h1:DJlh6UUPhobzomqCtnLJRmhBSxwUJoPPi6iCToUDr4g=
|
||||
github.com/charmbracelet/bubbletea v1.3.8/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4=
|
||||
github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc h1:4pZI35227imm7yK2bGPcfpFEmuY1gc2YSTShr4iJBfs=
|
||||
github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc/go.mod h1:X4/0JoqgTIPSFcRA/P6INZzIuyqdFY5rm8tb41s9okk=
|
||||
github.com/charmbracelet/harmonica v0.2.0 h1:8NxJWRWg/bzKqqEaaeFNipOu77YR5t8aSwG4pgaUBiQ=
|
||||
github.com/charmbracelet/harmonica v0.2.0/go.mod h1:KSri/1RMQOZLbw7AHqgcBycp8pgJnQMYYT8QZRqZ1Ao=
|
||||
github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY=
|
||||
github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30=
|
||||
github.com/charmbracelet/x/ansi v0.10.1 h1:rL3Koar5XvX0pHGfovN03f5cxLbCF2YvLeyz7D2jVDQ=
|
||||
github.com/charmbracelet/x/ansi v0.10.1/go.mod h1:3RQDQ6lDnROptfpWuUVIUG64bD2g2BgntdxH0Ya5TeE=
|
||||
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd h1:vy0GVL4jeHEwG5YOXDmi86oYw2yuYUGqz6a8sLwg0X8=
|
||||
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd/go.mod h1:xe0nKWGd3eJgtqZRaN9RjMtK7xUYchjzPr7q6kcvCCs=
|
||||
github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ=
|
||||
github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
||||
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
|
||||
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
|
||||
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
|
||||
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
|
||||
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
|
||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
|
||||
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
|
||||
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
|
||||
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
|
||||
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
|
||||
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
|
||||
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
|
||||
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
|
||||
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI=
|
||||
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs=
|
||||
github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s=
|
||||
github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0=
|
||||
github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
|
||||
github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
|
||||
golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E=
|
||||
golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
|
||||
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
|
||||
golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
|
||||
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
287
internal/fsurls/fsurls.go
Normal file
287
internal/fsurls/fsurls.go
Normal file
@@ -0,0 +1,287 @@
|
||||
package fsurls
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/bmatcuk/doublestar/v4"
|
||||
ignore "github.com/sabhiram/go-gitignore"
|
||||
)
|
||||
|
||||
// URL patterns from various contexts
|
||||
var bareURLRegex = regexp.MustCompile(`(?i)\bhttps?://[^\s<>()\[\]{}"']+`)
|
||||
var mdLinkRegex = regexp.MustCompile(`(?is)!?\[[^\]]*\]\((.*?)\)`) // captures (url)
|
||||
var angleURLRegex = regexp.MustCompile(`(?i)<(https?://[^>\s]+)>`)
|
||||
var quotedURLRegex = regexp.MustCompile(`(?i)"(https?://[^"\s]+)"|'(https?://[^'\s]+)'`)
|
||||
var htmlHrefRegex = regexp.MustCompile(`(?i)href\s*=\s*"([^"]+)"|href\s*=\s*'([^']+)'`)
|
||||
var htmlSrcRegex = regexp.MustCompile(`(?i)src\s*=\s*"([^"]+)"|src\s*=\s*'([^']+)'`)
|
||||
|
||||
// Strict hostname validation: labels 1-63 chars, alnum & hyphen, not start/end hyphen, at least one dot, simple TLD
|
||||
var hostnameRegex = regexp.MustCompile(`^(?i)([a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)+$`)
|
||||
|
||||
// CollectURLs walks the directory tree rooted at rootPath and collects URLs found in
|
||||
// text-based files matching any of the provided glob patterns (doublestar ** supported).
|
||||
// If globs is empty, all files are considered. Respects .gitignore if present.
|
||||
// Returns a map from URL -> sorted unique list of file paths that contained it.
|
||||
func CollectURLs(rootPath string, globs []string) (map[string][]string, error) {
|
||||
if strings.TrimSpace(rootPath) == "" {
|
||||
rootPath = "."
|
||||
}
|
||||
cleanRoot := filepath.Clean(rootPath)
|
||||
|
||||
st, _ := os.Stat(cleanRoot)
|
||||
isFileRoot := st != nil && !st.IsDir()
|
||||
var ign *ignore.GitIgnore
|
||||
if !isFileRoot {
|
||||
ign = loadGitIgnore(cleanRoot)
|
||||
}
|
||||
|
||||
var patterns []string
|
||||
for _, g := range globs {
|
||||
g = strings.TrimSpace(g)
|
||||
if g == "" {
|
||||
continue
|
||||
}
|
||||
patterns = append(patterns, g)
|
||||
}
|
||||
|
||||
shouldInclude := func(rel string) bool {
|
||||
if len(patterns) == 0 {
|
||||
return true
|
||||
}
|
||||
for _, p := range patterns {
|
||||
ok, _ := doublestar.PathMatch(p, rel)
|
||||
if ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
urlToFiles := make(map[string]map[string]struct{})
|
||||
|
||||
// 2 MiB max file size to avoid huge/binary files
|
||||
const maxSize = 2 * 1024 * 1024
|
||||
|
||||
// Walk the filesystem
|
||||
walkFn := func(path string, d os.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
rel, rerr := filepath.Rel(cleanRoot, path)
|
||||
if rerr != nil {
|
||||
rel = path
|
||||
}
|
||||
rel = filepath.ToSlash(rel)
|
||||
if d.IsDir() {
|
||||
base := filepath.Base(path)
|
||||
if base == ".git" {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
if ign != nil && ign.MatchesPath(rel) {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if ign != nil && ign.MatchesPath(rel) {
|
||||
return nil
|
||||
}
|
||||
info, ierr := d.Info()
|
||||
if ierr != nil {
|
||||
return nil
|
||||
}
|
||||
if info.Size() > maxSize {
|
||||
return nil
|
||||
}
|
||||
if isFileRoot && rel == "." {
|
||||
rel = filepath.ToSlash(filepath.Base(path))
|
||||
}
|
||||
if !shouldInclude(rel) {
|
||||
return nil
|
||||
}
|
||||
|
||||
f, ferr := os.Open(path)
|
||||
if ferr != nil {
|
||||
return nil
|
||||
}
|
||||
defer f.Close()
|
||||
br := bufio.NewReader(f)
|
||||
// Read up to maxSize bytes
|
||||
var b strings.Builder
|
||||
read := int64(0)
|
||||
for {
|
||||
chunk, cerr := br.ReadString('\n')
|
||||
b.WriteString(chunk)
|
||||
read += int64(len(chunk))
|
||||
if cerr == io.EOF || read > maxSize {
|
||||
break
|
||||
}
|
||||
if cerr != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
content := b.String()
|
||||
// Skip if likely binary (NUL present)
|
||||
if strings.IndexByte(content, '\x00') >= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
candidates := extractCandidates(content)
|
||||
if len(candidates) == 0 {
|
||||
return nil
|
||||
}
|
||||
for _, raw := range candidates {
|
||||
u := sanitizeURLToken(raw)
|
||||
if u == "" {
|
||||
continue
|
||||
}
|
||||
fileSet, ok := urlToFiles[u]
|
||||
if !ok {
|
||||
fileSet = make(map[string]struct{})
|
||||
urlToFiles[u] = fileSet
|
||||
}
|
||||
fileSet[rel] = struct{}{}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
_ = filepath.WalkDir(cleanRoot, walkFn)
|
||||
|
||||
// Convert to sorted slices
|
||||
result := make(map[string][]string, len(urlToFiles))
|
||||
for u, files := range urlToFiles {
|
||||
var list []string
|
||||
for fp := range files {
|
||||
list = append(list, fp)
|
||||
}
|
||||
sort.Strings(list)
|
||||
result[u] = list
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func sanitizeURLToken(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
// Strip surrounding angle brackets or quotes
|
||||
if strings.HasPrefix(s, "<") && strings.HasSuffix(s, ">") {
|
||||
s = strings.TrimSuffix(strings.TrimPrefix(s, "<"), ">")
|
||||
}
|
||||
if (strings.HasPrefix(s, "\"") && strings.HasSuffix(s, "\"")) || (strings.HasPrefix(s, "'") && strings.HasSuffix(s, "'")) {
|
||||
s = strings.TrimSuffix(strings.TrimPrefix(s, string(s[0])), string(s[0]))
|
||||
}
|
||||
// Trim trailing punctuation and balance parentheses
|
||||
s = trimTrailingDelimiters(s)
|
||||
low := strings.ToLower(s)
|
||||
if !(strings.HasPrefix(low, "http://") || strings.HasPrefix(low, "https://")) {
|
||||
return ""
|
||||
}
|
||||
// Parse and validate hostname strictly
|
||||
u, err := url.Parse(s)
|
||||
if err != nil || u == nil {
|
||||
return ""
|
||||
}
|
||||
host := u.Hostname()
|
||||
if host == "" {
|
||||
return ""
|
||||
}
|
||||
// Reject placeholders like [tenant] or {tenant}
|
||||
if strings.ContainsAny(host, "[]{}") {
|
||||
return ""
|
||||
}
|
||||
// Must match strict hostname rules
|
||||
if !hostnameRegex.MatchString(host) {
|
||||
return ""
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func trimTrailingDelimiters(s string) string {
|
||||
for {
|
||||
if s == "" {
|
||||
return s
|
||||
}
|
||||
last := s[len(s)-1]
|
||||
if strings.ContainsRune(").,;:!?]'\"}", rune(last)) {
|
||||
s = s[:len(s)-1]
|
||||
continue
|
||||
}
|
||||
if last == ')' {
|
||||
open := strings.Count(s, "(")
|
||||
close := strings.Count(s, ")")
|
||||
if close > open {
|
||||
s = s[:len(s)-1]
|
||||
continue
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
}
|
||||
|
||||
func extractCandidates(content string) []string {
|
||||
var out []string
|
||||
for _, m := range mdLinkRegex.FindAllStringSubmatch(content, -1) {
|
||||
if len(m) > 1 {
|
||||
out = append(out, m[1])
|
||||
}
|
||||
}
|
||||
for _, m := range htmlHrefRegex.FindAllStringSubmatch(content, -1) {
|
||||
if len(m) > 2 {
|
||||
if m[1] != "" {
|
||||
out = append(out, m[1])
|
||||
} else if m[2] != "" {
|
||||
out = append(out, m[2])
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, m := range htmlSrcRegex.FindAllStringSubmatch(content, -1) {
|
||||
if len(m) > 2 {
|
||||
if m[1] != "" {
|
||||
out = append(out, m[1])
|
||||
} else if m[2] != "" {
|
||||
out = append(out, m[2])
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, m := range angleURLRegex.FindAllStringSubmatch(content, -1) {
|
||||
if len(m) > 1 {
|
||||
out = append(out, m[1])
|
||||
}
|
||||
}
|
||||
for _, m := range quotedURLRegex.FindAllStringSubmatch(content, -1) {
|
||||
if len(m) > 2 {
|
||||
if m[1] != "" {
|
||||
out = append(out, m[1])
|
||||
} else if m[2] != "" {
|
||||
out = append(out, m[2])
|
||||
}
|
||||
}
|
||||
}
|
||||
out = append(out, bareURLRegex.FindAllString(content, -1)...)
|
||||
return out
|
||||
}
|
||||
|
||||
func loadGitIgnore(root string) *ignore.GitIgnore {
|
||||
var lines []string
|
||||
gi := filepath.Join(root, ".gitignore")
|
||||
if b, err := os.ReadFile(gi); err == nil {
|
||||
for _, ln := range strings.Split(string(b), "\n") {
|
||||
lines = append(lines, ln)
|
||||
}
|
||||
}
|
||||
ge := filepath.Join(root, ".git", "info", "exclude")
|
||||
if b, err := os.ReadFile(ge); err == nil {
|
||||
for _, ln := range strings.Split(string(b), "\n") {
|
||||
lines = append(lines, ln)
|
||||
}
|
||||
}
|
||||
if len(lines) == 0 {
|
||||
return nil
|
||||
}
|
||||
return ignore.CompileIgnoreLines(lines...)
|
||||
}
|
||||
43
internal/fsurls/fsurls_test.go
Normal file
43
internal/fsurls/fsurls_test.go
Normal file
@@ -0,0 +1,43 @@
|
||||
package fsurls
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCollectURLs_FromTestFiles(t *testing.T) {
|
||||
root := filepath.Join("..", "..", "test files")
|
||||
|
||||
urls, err := CollectURLs(root, []string{"**/*"})
|
||||
if err != nil {
|
||||
t.Fatalf("CollectURLs error: %v", err)
|
||||
}
|
||||
|
||||
// Spot-check presence of some known URLs
|
||||
mustContain := []string{
|
||||
"https://example.com",
|
||||
"https://en.wikipedia.org/wiki/Main_Page",
|
||||
"http://example.com:8080",
|
||||
"http://example..com", // appears in multiple files
|
||||
"https://this-domain-does-not-exist-123456789.com",
|
||||
}
|
||||
for _, u := range mustContain {
|
||||
if _, ok := urls[u]; !ok {
|
||||
// Show nearby URLs to aid debugging if it fails.
|
||||
var sample []string
|
||||
for seen := range urls {
|
||||
if strings.Contains(seen, "example") {
|
||||
sample = append(sample, seen)
|
||||
}
|
||||
}
|
||||
t.Fatalf("expected URL %q to be collected; example URLs seen: %v", u, sample)
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure sources are recorded for a known URL
|
||||
srcs := urls["https://example.com"]
|
||||
if len(srcs) == 0 {
|
||||
t.Fatalf("expected sources for https://example.com, got none")
|
||||
}
|
||||
}
|
||||
42
internal/fsurls/lang_files_test.go
Normal file
42
internal/fsurls/lang_files_test.go
Normal file
@@ -0,0 +1,42 @@
|
||||
package fsurls
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCollectURLs_FromCodeFiles(t *testing.T) {
|
||||
root := filepath.Join("..", "..", "test files")
|
||||
urls, err := CollectURLs(root, []string{"**/*"})
|
||||
if err != nil {
|
||||
t.Fatalf("CollectURLs error: %v", err)
|
||||
}
|
||||
|
||||
// Valid URLs from various languages should be present (including a known nonexistent-but-well-formed)
|
||||
valids := []string{
|
||||
"https://example.com",
|
||||
"https://en.wikipedia.org/wiki/Main_Page",
|
||||
"https://developer.mozilla.org",
|
||||
"https://svelte.dev",
|
||||
"https://go.dev/doc/",
|
||||
"https://this-domain-does-not-exist-123456789.com",
|
||||
}
|
||||
for _, u := range valids {
|
||||
if _, ok := urls[u]; !ok {
|
||||
t.Fatalf("expected valid URL %q to be collected", u)
|
||||
}
|
||||
}
|
||||
|
||||
// Placeholder patterns should be excluded by strict validation
|
||||
placeholders := []string{
|
||||
"https://[tenant].api.identitynow.com",
|
||||
"https://{tenant}.api.identitynow.com",
|
||||
"https://[tenant].[domain].com",
|
||||
"https://{tenant}.api.ideidentitynow.com/v3/transforms",
|
||||
}
|
||||
for _, u := range placeholders {
|
||||
if _, ok := urls[u]; ok {
|
||||
t.Fatalf("did not expect placeholder URL %q to be collected", u)
|
||||
}
|
||||
}
|
||||
}
|
||||
168
internal/report/markdown.go
Normal file
168
internal/report/markdown.go
Normal file
@@ -0,0 +1,168 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"html"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"slinky/internal/web"
|
||||
)
|
||||
|
||||
// Summary captures high-level run details for the report.
|
||||
type Summary struct {
|
||||
RootPath string
|
||||
StartedAt time.Time
|
||||
FinishedAt time.Time
|
||||
Processed int
|
||||
OK int
|
||||
Fail int
|
||||
AvgRPS float64
|
||||
PeakRPS float64
|
||||
LowRPS float64
|
||||
JSONPath string
|
||||
RepoBlobBaseURL string // e.g. https://github.com/owner/repo/blob/<sha>
|
||||
}
|
||||
|
||||
// WriteMarkdown writes a GitHub-flavored Markdown report to path. If path is empty,
|
||||
// it derives a safe filename from s.RootPath.
|
||||
func WriteMarkdown(path string, results []web.Result, s Summary) (string, error) {
|
||||
if strings.TrimSpace(path) == "" {
|
||||
base := filepath.Base(s.RootPath)
|
||||
if strings.TrimSpace(base) == "" || base == "." || base == string(filepath.Separator) {
|
||||
base = "results"
|
||||
}
|
||||
var b strings.Builder
|
||||
for _, r := range strings.ToLower(base) {
|
||||
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' || r == '.' {
|
||||
b.WriteRune(r)
|
||||
} else {
|
||||
b.WriteByte('_')
|
||||
}
|
||||
}
|
||||
path = fmt.Sprintf("%s.md", b.String())
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
// Title and summary
|
||||
buf.WriteString("## Slinky Test Report\n\n")
|
||||
buf.WriteString(fmt.Sprintf("- **Root**: %s\n", escapeMD(s.RootPath)))
|
||||
buf.WriteString(fmt.Sprintf("- **Started**: %s\n", s.StartedAt.Format("2006-01-02 15:04:05 MST")))
|
||||
buf.WriteString(fmt.Sprintf("- **Finished**: %s\n", s.FinishedAt.Format("2006-01-02 15:04:05 MST")))
|
||||
buf.WriteString(fmt.Sprintf("- **Processed**: %d • **OK**: %d • **Fail**: %d\n", s.Processed, s.OK, s.Fail))
|
||||
buf.WriteString(fmt.Sprintf("- **Rates**: avg %.1f/s • peak %.1f/s • low %.1f/s\n", s.AvgRPS, s.PeakRPS, s.LowRPS))
|
||||
if s.JSONPath != "" {
|
||||
base := filepath.Base(s.JSONPath)
|
||||
buf.WriteString(fmt.Sprintf("- **JSON**: %s\n", escapeMD(base)))
|
||||
}
|
||||
buf.WriteString("\n")
|
||||
|
||||
// Failures by URL
|
||||
buf.WriteString("### Failures by URL\n\n")
|
||||
|
||||
// Gather issues per URL with list of files
|
||||
type fileRef struct {
|
||||
Path string
|
||||
}
|
||||
type urlIssue struct {
|
||||
Status int
|
||||
Method string
|
||||
ErrMsg string
|
||||
Files []fileRef
|
||||
}
|
||||
byURL := make(map[string]*urlIssue)
|
||||
for _, r := range results {
|
||||
ui, ok := byURL[r.URL]
|
||||
if !ok {
|
||||
ui = &urlIssue{Status: r.Status, Method: r.Method, ErrMsg: r.ErrMsg}
|
||||
byURL[r.URL] = ui
|
||||
}
|
||||
for _, src := range r.Sources {
|
||||
ui.Files = append(ui.Files, fileRef{Path: src})
|
||||
}
|
||||
}
|
||||
|
||||
// Sort URLs
|
||||
var urls []string
|
||||
for u := range byURL {
|
||||
urls = append(urls, u)
|
||||
}
|
||||
sort.Strings(urls)
|
||||
|
||||
for _, u := range urls {
|
||||
ui := byURL[u]
|
||||
// Header line for URL
|
||||
if ui.Status > 0 {
|
||||
buf.WriteString(fmt.Sprintf("- %d %s `%s` — %s\n", ui.Status, escapeMD(ui.Method), escapeMD(u), escapeMD(ui.ErrMsg)))
|
||||
} else {
|
||||
buf.WriteString(fmt.Sprintf("- %s `%s` — %s\n", escapeMD(ui.Method), escapeMD(u), escapeMD(ui.ErrMsg)))
|
||||
}
|
||||
// Files list (collapsible)
|
||||
buf.WriteString(" <details><summary>files</summary>\n\n")
|
||||
// Deduplicate and sort file paths
|
||||
seen := make(map[string]struct{})
|
||||
var files []string
|
||||
for _, fr := range ui.Files {
|
||||
if _, ok := seen[fr.Path]; ok {
|
||||
continue
|
||||
}
|
||||
seen[fr.Path] = struct{}{}
|
||||
files = append(files, fr.Path)
|
||||
}
|
||||
sort.Strings(files)
|
||||
for _, fn := range files {
|
||||
if strings.TrimSpace(s.RepoBlobBaseURL) != "" {
|
||||
buf.WriteString(fmt.Sprintf(" - [%s](%s/%s)\n", escapeMD(fn), strings.TrimRight(s.RepoBlobBaseURL, "/"), escapeLinkPath(fn)))
|
||||
} else {
|
||||
buf.WriteString(fmt.Sprintf(" - [%s](./%s)\n", escapeMD(fn), escapeLinkPath(fn)))
|
||||
}
|
||||
}
|
||||
buf.WriteString("\n </details>\n\n")
|
||||
}
|
||||
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
if _, err := f.Write(buf.Bytes()); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func escapeMD(s string) string {
|
||||
// Basic HTML escape to be safe in GitHub Markdown table cells
|
||||
return html.EscapeString(s)
|
||||
}
|
||||
|
||||
// formatSourcesList renders a list of file paths as an HTML unordered list suitable
|
||||
// for inclusion in a Markdown table cell. Individual entries are escaped.
|
||||
func formatSourcesList(srcs []string) string {
|
||||
if len(srcs) == 0 {
|
||||
return ""
|
||||
}
|
||||
var b strings.Builder
|
||||
b.WriteString("<ul>\n")
|
||||
for _, s := range srcs {
|
||||
b.WriteString(" <li><code>")
|
||||
b.WriteString(escapeMD(s))
|
||||
b.WriteString("</code></li>\n")
|
||||
}
|
||||
b.WriteString("</ul>")
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// escapeLinkPath escapes a relative path for inclusion in a Markdown link URL.
|
||||
// We keep it simple and only escape parentheses and spaces.
|
||||
func escapeLinkPath(p string) string {
|
||||
// Replace spaces with %20 and parentheses with encoded forms
|
||||
p = strings.ReplaceAll(p, " ", "%20")
|
||||
p = strings.ReplaceAll(p, "(", "%28")
|
||||
p = strings.ReplaceAll(p, ")", "%29")
|
||||
return p
|
||||
}
|
||||
10
internal/tui/fs_bridge.go
Normal file
10
internal/tui/fs_bridge.go
Normal file
@@ -0,0 +1,10 @@
|
||||
package tui
|
||||
|
||||
import (
|
||||
"slinky/internal/fsurls"
|
||||
)
|
||||
|
||||
// fsCollect is a tiny bridge to avoid importing fsurls directly in tui.go
|
||||
func fsCollect(root string, globs []string) (map[string][]string, error) {
|
||||
return fsurls.CollectURLs(root, globs)
|
||||
}
|
||||
319
internal/tui/tui.go
Normal file
319
internal/tui/tui.go
Normal file
@@ -0,0 +1,319 @@
|
||||
package tui
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/charmbracelet/bubbles/progress"
|
||||
"github.com/charmbracelet/bubbles/spinner"
|
||||
"github.com/charmbracelet/bubbles/viewport"
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
"github.com/charmbracelet/lipgloss"
|
||||
|
||||
"slinky/internal/report"
|
||||
"slinky/internal/web"
|
||||
)
|
||||
|
||||
type linkResultMsg struct{ res web.Result }
|
||||
type crawlDoneMsg struct{}
|
||||
type statsMsg struct{ s web.Stats }
|
||||
type tickMsg struct{ t time.Time }
|
||||
|
||||
type model struct {
|
||||
rootPath string
|
||||
cfg web.Config
|
||||
jsonOut string
|
||||
mdOut string
|
||||
globs []string
|
||||
|
||||
results chan web.Result
|
||||
stats chan web.Stats
|
||||
started time.Time
|
||||
finishedAt time.Time
|
||||
done bool
|
||||
|
||||
spin spinner.Model
|
||||
prog progress.Model
|
||||
vp viewport.Model
|
||||
|
||||
lines []string
|
||||
|
||||
total int
|
||||
ok int
|
||||
fail int
|
||||
|
||||
pending int
|
||||
processed int
|
||||
|
||||
lastProcessed int
|
||||
rps float64
|
||||
peakRPS float64
|
||||
lowRPS float64
|
||||
|
||||
allResults []web.Result
|
||||
jsonPath string
|
||||
mdPath string
|
||||
|
||||
showFail bool
|
||||
}
|
||||
|
||||
// Run scans files under rootPath matching globs, extracts URLs, and checks them.
|
||||
func Run(rootPath string, globs []string, cfg web.Config, jsonOut string, mdOut string) error {
|
||||
m := &model{rootPath: rootPath, cfg: cfg, jsonOut: jsonOut, mdOut: mdOut, globs: globs}
|
||||
p := tea.NewProgram(m, tea.WithAltScreen())
|
||||
return p.Start()
|
||||
}
|
||||
|
||||
func (m *model) Init() tea.Cmd {
|
||||
m.spin = spinner.New()
|
||||
m.spin.Spinner = spinner.Dot
|
||||
m.spin.Style = lipgloss.NewStyle().Foreground(lipgloss.Color("205"))
|
||||
m.prog = progress.New(progress.WithDefaultGradient())
|
||||
m.started = time.Now()
|
||||
m.lowRPS = -1
|
||||
m.results = make(chan web.Result, 256)
|
||||
m.stats = make(chan web.Stats, 64)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
go func() {
|
||||
defer cancel()
|
||||
urlsMap, _ := fsCollect(m.rootPath, m.globs)
|
||||
var urls []string
|
||||
for u := range urlsMap {
|
||||
urls = append(urls, u)
|
||||
}
|
||||
web.CheckURLs(ctx, urls, urlsMap, m.results, m.stats, m.cfg)
|
||||
}()
|
||||
|
||||
return tea.Batch(m.spin.Tick, m.waitForEvent(), tickCmd())
|
||||
}
|
||||
|
||||
func tickCmd() tea.Cmd {
|
||||
return tea.Tick(time.Second, func(t time.Time) tea.Msg { return tickMsg{t: t} })
|
||||
}
|
||||
|
||||
func (m *model) waitForEvent() tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
if m.results == nil {
|
||||
return crawlDoneMsg{}
|
||||
}
|
||||
select {
|
||||
case res, ok := <-m.results:
|
||||
if ok {
|
||||
return linkResultMsg{res: res}
|
||||
}
|
||||
return crawlDoneMsg{}
|
||||
case s := <-m.stats:
|
||||
return statsMsg{s: s}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
switch msg := msg.(type) {
|
||||
case tea.KeyMsg:
|
||||
switch msg.String() {
|
||||
case "q", "ctrl+c":
|
||||
return m, tea.Quit
|
||||
case "f":
|
||||
m.showFail = !m.showFail
|
||||
m.refreshViewport()
|
||||
return m, nil
|
||||
}
|
||||
case tea.WindowSizeMsg:
|
||||
// Reserve space for header (1), stats (1), progress (1), spacer (1), footer (1)
|
||||
reserved := 5
|
||||
if m.vp.Width == 0 {
|
||||
m.vp = viewport.Model{Width: msg.Width, Height: max(msg.Height-reserved, 3)}
|
||||
} else {
|
||||
m.vp.Width = msg.Width
|
||||
m.vp.Height = max(msg.Height-reserved, 3)
|
||||
}
|
||||
m.prog.Width = max(msg.Width-4, 10)
|
||||
m.refreshViewport()
|
||||
return m, nil
|
||||
case linkResultMsg:
|
||||
// Show every event in the log
|
||||
prefix := statusEmoji(msg.res.OK, msg.res.Err)
|
||||
if msg.res.CacheHit {
|
||||
prefix = "🗃"
|
||||
}
|
||||
line := fmt.Sprintf("%s %3d %s", prefix, msg.res.Status, msg.res.URL)
|
||||
m.lines = append(m.lines, line)
|
||||
// Only count non-cache-hit in totals and JSON export
|
||||
if !msg.res.CacheHit {
|
||||
m.total++
|
||||
if msg.res.OK && msg.res.Err == nil {
|
||||
m.ok++
|
||||
} else {
|
||||
m.fail++
|
||||
}
|
||||
m.allResults = append(m.allResults, msg.res)
|
||||
}
|
||||
m.refreshViewport()
|
||||
return m, m.waitForEvent()
|
||||
case statsMsg:
|
||||
m.pending = msg.s.Pending
|
||||
m.processed = msg.s.Processed
|
||||
return m, m.waitForEvent()
|
||||
case tickMsg:
|
||||
// compute requests/sec over the last tick
|
||||
delta := m.processed - m.lastProcessed
|
||||
m.lastProcessed = m.processed
|
||||
m.rps = float64(delta)
|
||||
if m.rps > m.peakRPS {
|
||||
m.peakRPS = m.rps
|
||||
}
|
||||
if m.lowRPS < 0 || m.rps < m.lowRPS {
|
||||
m.lowRPS = m.rps
|
||||
}
|
||||
return m, tickCmd()
|
||||
case crawlDoneMsg:
|
||||
m.done = true
|
||||
m.finishedAt = time.Now()
|
||||
m.results = nil
|
||||
m.writeJSON()
|
||||
m.writeMarkdown()
|
||||
return m, tea.Quit
|
||||
}
|
||||
|
||||
var cmd tea.Cmd
|
||||
m.spin, cmd = m.spin.Update(msg)
|
||||
return m, cmd
|
||||
}
|
||||
|
||||
func (m *model) refreshViewport() {
|
||||
var filtered []string
|
||||
if m.showFail {
|
||||
for _, l := range m.lines {
|
||||
if strings.HasPrefix(l, "❌") {
|
||||
filtered = append(filtered, l)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
filtered = m.lines
|
||||
}
|
||||
m.vp.SetContent(strings.Join(filtered, "\n"))
|
||||
m.vp.GotoBottom()
|
||||
}
|
||||
|
||||
func (m *model) writeJSON() {
|
||||
path := m.jsonOut
|
||||
if strings.TrimSpace(path) == "" {
|
||||
base := filepath.Base(m.rootPath)
|
||||
if strings.TrimSpace(base) == "" || base == "." || base == string(filepath.Separator) {
|
||||
base = "results"
|
||||
}
|
||||
re := regexp.MustCompile(`[^a-zA-Z0-9.-]+`)
|
||||
safe := re.ReplaceAllString(strings.ToLower(base), "_")
|
||||
path = fmt.Sprintf("%s.json", safe)
|
||||
}
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
// Only write failing results
|
||||
var fails []web.Result
|
||||
for _, r := range m.allResults {
|
||||
if !(r.OK && r.Err == nil) {
|
||||
fails = append(fails, r)
|
||||
}
|
||||
}
|
||||
enc := json.NewEncoder(f)
|
||||
enc.SetIndent("", " ")
|
||||
_ = enc.Encode(fails)
|
||||
m.jsonPath = path
|
||||
}
|
||||
|
||||
func (m *model) writeMarkdown() {
|
||||
// Compute average RPS over entire run
|
||||
dur := m.finishedAt.Sub(m.started)
|
||||
avg := 0.0
|
||||
if dur.Seconds() > 0 {
|
||||
avg = float64(m.processed) / dur.Seconds()
|
||||
}
|
||||
s := report.Summary{
|
||||
RootPath: m.rootPath,
|
||||
StartedAt: m.started,
|
||||
FinishedAt: m.finishedAt,
|
||||
Processed: m.processed,
|
||||
OK: m.ok,
|
||||
Fail: m.fail,
|
||||
AvgRPS: avg,
|
||||
PeakRPS: m.peakRPS,
|
||||
LowRPS: m.lowRPS,
|
||||
JSONPath: m.jsonPath,
|
||||
RepoBlobBaseURL: os.Getenv("SLINKY_REPO_BLOB_BASE_URL"),
|
||||
}
|
||||
// Only include failing results in the markdown report
|
||||
var failsMD []web.Result
|
||||
for _, r := range m.allResults {
|
||||
if !(r.OK && r.Err == nil) {
|
||||
failsMD = append(failsMD, r)
|
||||
}
|
||||
}
|
||||
p, err := report.WriteMarkdown(m.mdOut, failsMD, s)
|
||||
if err == nil {
|
||||
m.mdPath = p
|
||||
}
|
||||
}
|
||||
|
||||
func (m *model) View() string {
|
||||
header := lipgloss.NewStyle().Bold(true).Render(fmt.Sprintf(" Scanning %s ", m.rootPath))
|
||||
if m.done {
|
||||
dur := time.Since(m.started)
|
||||
if !m.finishedAt.IsZero() {
|
||||
dur = m.finishedAt.Sub(m.started)
|
||||
}
|
||||
avg := 0.0
|
||||
if dur.Seconds() > 0 {
|
||||
avg = float64(m.processed) / dur.Seconds()
|
||||
}
|
||||
summary := []string{
|
||||
fmt.Sprintf("Duration: %s", dur.Truncate(time.Millisecond)),
|
||||
fmt.Sprintf("Processed: %d OK:%d Fail:%d", m.processed, m.ok, m.fail),
|
||||
fmt.Sprintf("Rates: avg %.1f/s peak %.1f/s low %.1f/s", avg, m.peakRPS, m.lowRPS),
|
||||
}
|
||||
if m.jsonPath != "" {
|
||||
summary = append(summary, fmt.Sprintf("JSON: %s", m.jsonPath))
|
||||
}
|
||||
if m.mdPath != "" {
|
||||
summary = append(summary, fmt.Sprintf("Markdown: %s", m.mdPath))
|
||||
}
|
||||
footer := lipgloss.NewStyle().Faint(true).Render("Controls: [q] quit [f] toggle fails")
|
||||
container := lipgloss.NewStyle().Padding(1)
|
||||
return container.Render(strings.Join(append([]string{header}, append(summary, footer)...), "\n"))
|
||||
}
|
||||
percent := 0.0
|
||||
totalWork := m.processed + m.pending
|
||||
if totalWork > 0 {
|
||||
percent = float64(m.processed) / float64(totalWork)
|
||||
}
|
||||
progressLine := m.prog.ViewAs(percent)
|
||||
stats := fmt.Sprintf("%s total:%d ok:%d fail:%d pending:%d processed:%d rps:%.1f/s", m.spin.View(), m.total, m.ok, m.fail, m.pending, m.processed, m.rps)
|
||||
body := m.vp.View()
|
||||
footer := lipgloss.NewStyle().Faint(true).Render("Controls: [q] quit [f] toggle fails")
|
||||
container := lipgloss.NewStyle().Padding(1)
|
||||
return container.Render(strings.Join([]string{header, stats, progressLine, "", body, footer}, "\n"))
|
||||
}
|
||||
|
||||
func statusEmoji(ok bool, err error) string {
|
||||
if ok && err == nil {
|
||||
return "✅"
|
||||
}
|
||||
return "❌"
|
||||
}
|
||||
|
||||
func max(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
103
internal/web/checker.go
Normal file
103
internal/web/checker.go
Normal file
@@ -0,0 +1,103 @@
|
||||
package web
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net"
|
||||
"net/http"
|
||||
"sort"
|
||||
"time"
|
||||
)
|
||||
|
||||
// CheckURLs performs concurrent GET requests for each URL and emits Result events.
|
||||
// sources maps URL -> list of file paths where it was found.
|
||||
func CheckURLs(ctx context.Context, urls []string, sources map[string][]string, out chan<- Result, stats chan<- Stats, cfg Config) {
|
||||
defer close(out)
|
||||
|
||||
// Build HTTP client similar to crawler
|
||||
transport := &http.Transport{
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{Timeout: 2 * time.Second, KeepAlive: 30 * time.Second}).DialContext,
|
||||
TLSHandshakeTimeout: 5 * time.Second,
|
||||
ExpectContinueTimeout: 1 * time.Second,
|
||||
MaxIdleConns: cfg.MaxConcurrency * 2,
|
||||
MaxIdleConnsPerHost: cfg.MaxConcurrency,
|
||||
MaxConnsPerHost: cfg.MaxConcurrency,
|
||||
IdleConnTimeout: 30 * time.Second,
|
||||
ResponseHeaderTimeout: cfg.RequestTimeout,
|
||||
}
|
||||
client := &http.Client{Timeout: cfg.RequestTimeout, Transport: transport}
|
||||
|
||||
type job struct{ url string }
|
||||
jobs := make(chan job, len(urls))
|
||||
done := make(chan struct{})
|
||||
|
||||
// Seed jobs
|
||||
unique := make(map[string]struct{}, len(urls))
|
||||
for _, u := range urls {
|
||||
if u == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := unique[u]; ok {
|
||||
continue
|
||||
}
|
||||
unique[u] = struct{}{}
|
||||
jobs <- job{url: u}
|
||||
}
|
||||
close(jobs)
|
||||
|
||||
concurrency := cfg.MaxConcurrency
|
||||
if concurrency <= 0 {
|
||||
concurrency = 8
|
||||
}
|
||||
processed := 0
|
||||
pending := len(unique)
|
||||
|
||||
worker := func() {
|
||||
for j := range jobs {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
ok, status, resp, err := fetchWithMethod(ctx, client, http.MethodGet, j.url)
|
||||
if resp != nil && resp.Body != nil {
|
||||
resp.Body.Close()
|
||||
}
|
||||
// Treat 401/403 as valid links (exist but require authorization)
|
||||
if status == http.StatusUnauthorized || status == http.StatusForbidden {
|
||||
ok = true
|
||||
err = nil
|
||||
}
|
||||
var srcs []string
|
||||
if sources != nil {
|
||||
srcs = sources[j.url]
|
||||
}
|
||||
out <- Result{URL: j.url, OK: ok, Status: status, Err: err, ErrMsg: errString(err), Method: http.MethodGet, Sources: cloneAndSort(srcs)}
|
||||
processed++
|
||||
pending--
|
||||
if stats != nil {
|
||||
select {
|
||||
case stats <- Stats{Pending: pending, Processed: processed}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
done <- struct{}{}
|
||||
}
|
||||
|
||||
for i := 0; i < concurrency; i++ {
|
||||
go worker()
|
||||
}
|
||||
for i := 0; i < concurrency; i++ {
|
||||
<-done
|
||||
}
|
||||
}
|
||||
|
||||
func cloneAndSort(in []string) []string {
|
||||
if len(in) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := append([]string(nil), in...)
|
||||
sort.Strings(out)
|
||||
return out
|
||||
}
|
||||
53
internal/web/checker_test.go
Normal file
53
internal/web/checker_test.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package web
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// This test exercises CheckURLs with a mix of known-good and invalid URLs.
|
||||
// It does real network calls; keep timeouts short to avoid long CI runs.
|
||||
func TestCheckURLs_Basic(t *testing.T) {
|
||||
urls := []string{
|
||||
"https://example.com", // should be OK
|
||||
"https://en.wikipedia.org/wiki/Main_Page", // should be OK
|
||||
"http://example..com", // invalid hostname
|
||||
"https://this-domain-does-not-exist-123456789.com", // NXDOMAIN/nonexistent
|
||||
}
|
||||
|
||||
sources := map[string][]string{
|
||||
"https://example.com": {"test files/test2.txt"},
|
||||
"https://en.wikipedia.org/wiki/Main_Page": {"test files/test5.html"},
|
||||
"http://example..com": {"test files/test5.html"},
|
||||
"https://this-domain-does-not-exist-123456789.com": {"test files/test5.html"},
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
out := make(chan Result, 16)
|
||||
cfg := Config{MaxConcurrency: 8, RequestTimeout: 5 * time.Second}
|
||||
|
||||
go CheckURLs(ctx, urls, sources, out, nil, cfg)
|
||||
|
||||
seen := 0
|
||||
var okCount, failCount int
|
||||
for r := range out {
|
||||
seen++
|
||||
if r.OK {
|
||||
okCount++
|
||||
} else {
|
||||
failCount++
|
||||
}
|
||||
}
|
||||
|
||||
if seen != len(urls) {
|
||||
t.Fatalf("expected %d results, got %d", len(urls), seen)
|
||||
}
|
||||
if okCount == 0 {
|
||||
t.Fatalf("expected at least one OK result")
|
||||
}
|
||||
if failCount == 0 {
|
||||
t.Fatalf("expected at least one failure result")
|
||||
}
|
||||
}
|
||||
68
internal/web/http.go
Normal file
68
internal/web/http.go
Normal file
@@ -0,0 +1,68 @@
|
||||
package web
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net"
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const browserUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0 Safari/537.36"
|
||||
|
||||
func fetchWithMethod(ctx context.Context, client *http.Client, method string, raw string) (bool, int, *http.Response, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, method, raw, nil)
|
||||
if err != nil {
|
||||
return false, 0, nil, err
|
||||
}
|
||||
req.Header.Set("User-Agent", browserUA)
|
||||
req.Header.Set("Accept", "*/*")
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
if isDNSError(err) {
|
||||
return false, 404, nil, simpleError("host not found")
|
||||
}
|
||||
if isTimeout(err) {
|
||||
return false, 408, nil, simpleError("request timeout")
|
||||
}
|
||||
if isRefused(err) {
|
||||
return false, 503, nil, simpleError("connection refused")
|
||||
}
|
||||
return false, 0, nil, err
|
||||
}
|
||||
return resp.StatusCode >= 200 && resp.StatusCode < 400, resp.StatusCode, resp, nil
|
||||
}
|
||||
|
||||
func errString(e error) string {
|
||||
if e == nil {
|
||||
return ""
|
||||
}
|
||||
return e.Error()
|
||||
}
|
||||
|
||||
func isTimeout(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
return true
|
||||
}
|
||||
if ne, ok := err.(net.Error); ok && ne.Timeout() {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isDNSError(err error) bool {
|
||||
msg := strings.ToLower(err.Error())
|
||||
return strings.Contains(msg, "no such host") || strings.Contains(msg, "server misbehaving")
|
||||
}
|
||||
|
||||
func isRefused(err error) bool {
|
||||
msg := strings.ToLower(err.Error())
|
||||
return strings.Contains(msg, "connection refused")
|
||||
}
|
||||
|
||||
type simpleError string
|
||||
|
||||
func (e simpleError) Error() string { return string(e) }
|
||||
29
internal/web/types.go
Normal file
29
internal/web/types.go
Normal file
@@ -0,0 +1,29 @@
|
||||
package web
|
||||
|
||||
import "time"
|
||||
|
||||
type Result struct {
|
||||
URL string
|
||||
OK bool
|
||||
Status int
|
||||
Err error
|
||||
ErrMsg string
|
||||
Depth int
|
||||
CacheHit bool
|
||||
Method string
|
||||
ContentType string
|
||||
Sources []string
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
Pending int
|
||||
Processed int
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
MaxDepth int
|
||||
MaxConcurrency int
|
||||
RequestTimeout time.Duration
|
||||
MaxRetries429 int
|
||||
Exclude []string
|
||||
}
|
||||
9
main.go
Normal file
9
main.go
Normal file
@@ -0,0 +1,9 @@
|
||||
package main
|
||||
|
||||
import "slinky/cmd"
|
||||
|
||||
func main() {
|
||||
cmd.Execute()
|
||||
}
|
||||
|
||||
|
||||
102
test files/test nesting/test more nesting/test1.md
Normal file
102
test files/test nesting/test more nesting/test1.md
Normal file
@@ -0,0 +1,102 @@
|
||||
|
||||
# Invalid URL Test Cases
|
||||
|
||||
Here are some invalid URLs using various Markdown link and image syntaxes:
|
||||
|
||||
- [Broken Protocol](htp://invalid-url.com)
|
||||
*Reason: Misspelled protocol ("htp" instead of "http")*
|
||||
|
||||
- [No Domain](http://)
|
||||
*Reason: Missing domain*
|
||||
|
||||
- [Missing Name Before TLD](http://.com)
|
||||
*Reason: Missing domain name before TLD*
|
||||
|
||||
- [Underscore in Domain](http://invalid_domain)
|
||||
*Reason: Underscore in domain, not allowed in DNS hostnames*
|
||||
|
||||
- [Domain Starts with Hyphen](http://-example.com)
|
||||
*Reason: Domain cannot start with a hyphen*
|
||||
|
||||
- [Double Dot in Domain](http://example..com)
|
||||
*Reason: Double dot in domain*
|
||||
|
||||
- [Non-numeric Port](http://example.com:abc)
|
||||
*Reason: Invalid port (non-numeric)*
|
||||
|
||||
- [Unsupported Protocol](ftp://example.com)
|
||||
*Reason: Unsupported protocol (should be http/https)*
|
||||
|
||||
- [Space in Domain](http://example .com)
|
||||
*Reason: Space in domain*
|
||||
|
||||
- [Extra Slash in Protocol](http:///example.com)
|
||||
*Reason: Extra slash in protocol separator*
|
||||
|
||||
- 
|
||||
*Reason: Image with missing domain*
|
||||
|
||||
- 
|
||||
*Reason: Image with misspelled protocol*
|
||||
|
||||
- 
|
||||
*Reason: Image with double dot in domain*
|
||||
|
||||
- [](htp://invalid-url.com)
|
||||
*Reason: Image and link both with invalid URLs*
|
||||
|
||||
---
|
||||
|
||||
# Correctly Formatted but Nonexistent URLs
|
||||
|
||||
These URLs are syntactically correct but do not point to real sites:
|
||||
|
||||
- [Nonexistent Domain](https://this-domain-does-not-exist-123456789.com)
|
||||
|
||||
- [Fake Subdomain](https://foo.bar.baz.nonexistent-tld)
|
||||
|
||||
- [Unused TLD](https://example.madeuptld)
|
||||
|
||||
- [Long Random String](https://abcdefg1234567890.example.com)
|
||||
|
||||
- [Fake Image](https://notarealwebsite.com/image.png)
|
||||
|
||||
- 
|
||||
|
||||
- [](https://notarealwebsite.com/page)
|
||||
|
||||
- [Unregistered Domain](https://unregistered-website-xyz.com)
|
||||
|
||||
- [Fake Path](https://example.com/this/path/does/not/exist)
|
||||
|
||||
- [Nonexistent Page](https://example.com/404notfound)
|
||||
|
||||
---
|
||||
|
||||
# Valid URLs
|
||||
|
||||
These URLs are well-formed and point to known good sites:
|
||||
|
||||
- [Example Domain](https://example.com)
|
||||
|
||||
- [Wikipedia](https://en.wikipedia.org/wiki/Main_Page)
|
||||
|
||||
- [GitHub](https://github.com)
|
||||
|
||||
- [Google](https://www.google.com)
|
||||
|
||||
- [Mozilla Developer Network](https://developer.mozilla.org)
|
||||
|
||||
- [Go Documentation](https://go.dev/doc/)
|
||||
|
||||
- 
|
||||
|
||||
- [](https://github.com)
|
||||
|
||||
- [Svelte](https://svelte.dev)
|
||||
|
||||
- [OpenAI](https://openai.com)
|
||||
|
||||
|
||||
|
||||
|
||||
11
test files/test nesting/test2.txt
Normal file
11
test files/test nesting/test2.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed euismod, urna eu tincidunt consectetur, nisi nisl aliquam enim, eget facilisis quam felis id mauris.
|
||||
Check out this website: https://example.com for more information.
|
||||
Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Mauris non tempor quam.
|
||||
|
||||
Phasellus euismod, justo at dictum placerat, sapien erat ultricies eros, ac porta sem ex ac nisi.
|
||||
For documentation, visit https://docs.something.org or refer to https://github.com/example/repo for the source code.
|
||||
Nullam ac urna eu felis dapibus condimentum sit amet a augue.
|
||||
|
||||
Curabitur non nulla sit amet nisl tempus convallis quis ac lectus.
|
||||
You might also find https://news.ycombinator.com interesting for tech news.
|
||||
Vivamus magna justo, lacinia eget consectetur sed, convallis at tellus.
|
||||
11
test files/test10.log
Normal file
11
test files/test10.log
Normal file
@@ -0,0 +1,11 @@
|
||||
2025-09-10 10:00:00 INFO Fetching https://example.com
|
||||
2025-09-10 10:00:01 INFO Fetching https://github.com/example/repo
|
||||
2025-09-10 10:00:02 WARN Retrying htp://bad-protocol.com
|
||||
2025-09-10 10:00:03 ERROR Failed to fetch http:///example.com
|
||||
2025-09-10 10:00:04 ERROR DNS error for https://this-domain-does-not-exist-123456789.com
|
||||
2025-09-10 10:00:05 INFO Fetching http://example.com:8080/api/status
|
||||
2025-09-10 10:00:06 ERROR Invalid host http://example..com
|
||||
2025-09-10 10:00:07 ERROR Missing domain https://
|
||||
2025-09-10 10:00:08 INFO Fetching https://en.wikipedia.org/wiki/Main_Page
|
||||
|
||||
|
||||
10
test files/test11.go
Normal file
10
test files/test11.go
Normal file
@@ -0,0 +1,10 @@
|
||||
package testfiles
|
||||
|
||||
// Sample Go file with URLs
|
||||
var url1 = "https://example.com"
|
||||
var url2 = "https://en.wikipedia.org/wiki/Main_Page"
|
||||
var urlBad = "http://example..com"
|
||||
var urlMissing = "https://"
|
||||
var urlNonexistent = "https://this-domain-does-not-exist-123456789.com"
|
||||
var urlPlaceholder1 = "https://[tenant].api.identitynow.com"
|
||||
var urlPlaceholder2 = "https://{tenant}.api.identitynow.com"
|
||||
11
test files/test12.php
Normal file
11
test files/test12.php
Normal file
@@ -0,0 +1,11 @@
|
||||
<?php
|
||||
// Sample PHP file with URLs
|
||||
$url1 = "https://developer.mozilla.org";
|
||||
$url2 = "https://github.com/example/repo";
|
||||
$urlBad = "http://example..com";
|
||||
$urlMissing = "https://";
|
||||
$urlNonexistent = "https://notarealwebsite.com/image.png";
|
||||
$urlPlaceholder = "https://{tenant}.api.identitynow.com";
|
||||
?>
|
||||
|
||||
|
||||
9
test files/test13.ps1
Normal file
9
test files/test13.ps1
Normal file
@@ -0,0 +1,9 @@
|
||||
# Sample PowerShell file with URLs
|
||||
$url1 = "https://example.com"
|
||||
$url2 = "https://news.ycombinator.com"
|
||||
$urlBad = "http://example..com"
|
||||
$urlMissing = "https://"
|
||||
$urlNonexistent = "https://example.madeuptld"
|
||||
$urlPlaceholder = "https://[tenant].api.identitynow.com"
|
||||
|
||||
|
||||
9
test files/test14.py
Normal file
9
test files/test14.py
Normal file
@@ -0,0 +1,9 @@
|
||||
# Sample Python file with URLs
|
||||
url1 = "https://example.com"
|
||||
url2 = "https://developer.mozilla.org"
|
||||
url_bad = "http://example..com"
|
||||
url_missing = "https://"
|
||||
url_nonexistent = "https://this-image-domain-should-not-exist-xyz.example"
|
||||
url_placeholder = "https://{tenant}.api.identitynow.com/v3/transforms"
|
||||
|
||||
|
||||
11
test files/test15.java
Normal file
11
test files/test15.java
Normal file
@@ -0,0 +1,11 @@
|
||||
public class Test15 {
|
||||
// Sample Java file with URLs
|
||||
String url1 = "https://example.com";
|
||||
String url2 = "https://svelte.dev";
|
||||
String urlBad = "http://example..com";
|
||||
String urlMissing = "https://";
|
||||
String urlNonexistent = "https://unregistered-website-xyz.com";
|
||||
String urlPlaceholder = "https://[tenant].[domain].com";
|
||||
}
|
||||
|
||||
|
||||
9
test files/test16.ts
Normal file
9
test files/test16.ts
Normal file
@@ -0,0 +1,9 @@
|
||||
// Sample TypeScript file with URLs
|
||||
const url1: string = "https://example.com";
|
||||
const url2: string = "https://go.dev/doc/";
|
||||
const urlBad: string = "http://example..com";
|
||||
const urlMissing: string = "https://";
|
||||
const urlNonexistent: string = "https://this-domain-does-not-exist-987654321.com";
|
||||
const urlPlaceholder: string = "https://{tenant}.api.ideidentitynow.com/v3/transforms";
|
||||
|
||||
|
||||
9
test files/test17.rb
Normal file
9
test files/test17.rb
Normal file
@@ -0,0 +1,9 @@
|
||||
# Sample Ruby file with URLs
|
||||
url1 = "https://example.com"
|
||||
url2 = "https://github.com"
|
||||
url_bad = "http://example..com"
|
||||
url_missing = "https://"
|
||||
url_nonexistent = "https://totally-made-up-host-tld-abcdef123.com"
|
||||
url_placeholder = "https://[tenant].api.identitynow.com"
|
||||
|
||||
|
||||
5
test files/test3.js
Normal file
5
test files/test3.js
Normal file
@@ -0,0 +1,5 @@
|
||||
const link1 = "https://example.com";
|
||||
const link2 = "https://docs.something.org";
|
||||
const link3 = "https://github.com/example/repo";
|
||||
const link4 = "https://news.ycombinator.com";
|
||||
const link5 = "http://example.com:8080";
|
||||
7
test files/test4.xml
Normal file
7
test files/test4.xml
Normal file
@@ -0,0 +1,7 @@
|
||||
<links>
|
||||
<link>https://example.com</link>
|
||||
<link>https://docs.something.org</link>
|
||||
<link>https://github.com/example/repo</link>
|
||||
<link>https://news.ycombinator.com</link>
|
||||
<link>http://example.com:8080</link>
|
||||
</links>
|
||||
25
test files/test5.html
Normal file
25
test files/test5.html
Normal file
@@ -0,0 +1,25 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>URL Test HTML</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>URL Test Links</h1>
|
||||
<a href="https://example.com">Example</a>
|
||||
<a href="http://example.com:8080/path?query=1">With Port</a>
|
||||
<a href="htp://bad-protocol.com">Bad Protocol</a>
|
||||
<a href="https://">Missing Domain</a>
|
||||
<a href="http:///example.com">Extra Slash</a>
|
||||
<a href="http://example..com">Double Dot</a>
|
||||
<a href="https://this-domain-does-not-exist-123456789.com">Nonexistent Domain</a>
|
||||
<a href="https://en.wikipedia.org/wiki/Main_Page">Wikipedia</a>
|
||||
|
||||
<h2>Images</h2>
|
||||
<img src="https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png" alt="Valid Image">
|
||||
<img src="http://example..com/pic.jpg" alt="Double Dot Image">
|
||||
<img src="htp://invalid-url.com/image.png" alt="Bad Protocol Image">
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
21
test files/test6.json
Normal file
21
test files/test6.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"valid": [
|
||||
"https://example.com",
|
||||
"https://github.com",
|
||||
"http://example.com:8080/path",
|
||||
"https://en.wikipedia.org/wiki/Main_Page"
|
||||
],
|
||||
"invalid": [
|
||||
"htp://invalid-url.com",
|
||||
"http:///example.com",
|
||||
"http://example..com",
|
||||
"https://"
|
||||
],
|
||||
"nonexistent": [
|
||||
"https://this-domain-does-not-exist-123456789.com",
|
||||
"https://notarealwebsite.com/image.png",
|
||||
"https://example.madeuptld"
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
15
test files/test7.yaml
Normal file
15
test files/test7.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
valid:
|
||||
- https://example.com
|
||||
- https://docs.something.org
|
||||
- http://example.com:8080/path
|
||||
invalid:
|
||||
- htp://bad-protocol.com
|
||||
- http:///too-many-slashes.com
|
||||
- http://example..com
|
||||
- https://
|
||||
nonexistent:
|
||||
- https://notarealwebsite.com/page
|
||||
- https://unregistered-website-xyz.com
|
||||
- https://example.madeuptld
|
||||
|
||||
|
||||
13
test files/test8.csv
Normal file
13
test files/test8.csv
Normal file
@@ -0,0 +1,13 @@
|
||||
id,name,url
|
||||
1,Example,https://example.com
|
||||
2,Docs,https://docs.something.org
|
||||
3,Repo,https://github.com/example/repo
|
||||
4,HN,https://news.ycombinator.com
|
||||
5,WithPort,http://example.com:8080
|
||||
6,BadProtocol,htp://invalid-url.com
|
||||
7,ExtraSlash,http:///example.com
|
||||
8,DoubleDot,http://example..com
|
||||
9,MissingDomain,https://
|
||||
10,Nonexistent,https://this-domain-does-not-exist-123456789.com
|
||||
|
||||
|
||||
|
12
test files/test9.ini
Normal file
12
test files/test9.ini
Normal file
@@ -0,0 +1,12 @@
|
||||
[links]
|
||||
good1 = https://example.com
|
||||
good2 = https://en.wikipedia.org/wiki/Main_Page
|
||||
good3 = http://example.com:8080/path
|
||||
bad1 = htp://bad-protocol.com
|
||||
bad2 = http:///example.com
|
||||
bad3 = http://example..com
|
||||
missing = https://
|
||||
nonexistent1 = https://notarealwebsite.com/image.png
|
||||
nonexistent2 = https://example.madeuptld
|
||||
|
||||
|
||||
Reference in New Issue
Block a user