diff --git a/.github/actions/slinky/Dockerfile b/.github/actions/slinky/Dockerfile new file mode 100644 index 0000000..b1bb0a8 --- /dev/null +++ b/.github/actions/slinky/Dockerfile @@ -0,0 +1,19 @@ +FROM golang:1.24 as build +WORKDIR /app +# Expect the repository root as build context when building this image +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN CGO_ENABLED=0 go build -o /usr/local/bin/slinky ./ + +FROM alpine:3.20 +RUN adduser -D -u 10001 appuser \ + && apk add --no-cache curl jq ca-certificates +COPY --from=build /usr/local/bin/slinky /usr/local/bin/slinky +COPY .github/actions/slinky/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh +USER appuser +ENTRYPOINT ["/entrypoint.sh"] + + + diff --git a/.github/actions/slinky/action.yml b/.github/actions/slinky/action.yml new file mode 100644 index 0000000..a3a263e --- /dev/null +++ b/.github/actions/slinky/action.yml @@ -0,0 +1,72 @@ +name: "Slinky Link Checker" +description: "Slink through your repository looking for dead links" +author: "LukeHagar" +branding: + icon: "link" + color: "blue" + +inputs: + path: + description: "Root path to scan" + required: false + default: "." + patterns: + description: "Comma-separated doublestar patterns. Ex: docs/**/*.md,**/*.go; default **/*" + required: false + default: "**/*" + concurrency: + description: "Maximum concurrent requests" + required: false + default: "16" + timeout: + description: "HTTP timeout seconds" + required: false + default: "10" + json-out: + description: "Optional path to write JSON results" + required: false + default: "results.json" + md-out: + description: "Optional path to write Markdown report for PR comment" + required: false + default: "results.md" + repo-blob-base: + description: "Override GitHub blob base URL (https://github.com///blob/)" + required: false + default: "" + fail-on-failures: + description: "Fail the job if any links fail" + required: false + default: "true" + comment-pr: + description: "If running on a PR, post a comment with the report" + required: false + default: "true" + step-summary: + description: "Append the report to the GitHub Step Summary" + required: false + default: "true" + +runs: + using: "docker" + image: "Dockerfile" + args: [] + env: + INPUT_PATH: ${{ inputs.path }} + INPUT_PATTERNS: ${{ inputs.patterns }} + INPUT_CONCURRENCY: ${{ inputs.concurrency }} + INPUT_TIMEOUT: ${{ inputs.timeout }} + INPUT_JSON_OUT: ${{ inputs["json-out"] }} + INPUT_MD_OUT: ${{ inputs["md-out"] }} + INPUT_REPO_BLOB_BASE: ${{ inputs["repo-blob-base"] }} + INPUT_FAIL_ON_FAILURES: ${{ inputs["fail-on-failures"] }} + INPUT_COMMENT_PR: ${{ inputs["comment-pr"] }} + INPUT_STEP_SUMMARY: ${{ inputs["step-summary"] }} + +outputs: + json-path: + description: "Path to JSON results file" + md-path: + description: "Path to Markdown report file" + + diff --git a/.github/actions/slinky/entrypoint.sh b/.github/actions/slinky/entrypoint.sh new file mode 100644 index 0000000..6058e06 --- /dev/null +++ b/.github/actions/slinky/entrypoint.sh @@ -0,0 +1,91 @@ +#!/bin/sh +set -eu + +PATH_ARG="${INPUT_PATH:-.}" +PATTERNS_ARG="${INPUT_PATTERNS:-**/*}" +CONCURRENCY_ARG="${INPUT_CONCURRENCY:-16}" +TIMEOUT_ARG="${INPUT_TIMEOUT:-10}" +JSON_OUT_ARG="${INPUT_JSON_OUT:-results.json}" +MD_OUT_ARG="${INPUT_MD_OUT:-results.md}" +REPO_BLOB_BASE_ARG="${INPUT_REPO_BLOB_BASE:-}" +FAIL_ON_FAILURES_ARG="${INPUT_FAIL_ON_FAILURES:-true}" +COMMENT_PR_ARG="${INPUT_COMMENT_PR:-true}" +STEP_SUMMARY_ARG="${INPUT_STEP_SUMMARY:-true}" + +ARGS="check \"${PATH_ARG}\" --concurrency ${CONCURRENCY_ARG} --timeout ${TIMEOUT_ARG}" +if [ "${FAIL_ON_FAILURES_ARG}" = "true" ]; then + ARGS="$ARGS --fail-on-failures true" +else + ARGS="$ARGS --fail-on-failures false" +fi +if [ -n "${PATTERNS_ARG}" ]; then + # normalize by removing spaces around commas + NORM_PATTERNS=$(printf "%s" "${PATTERNS_ARG}" | sed 's/,\s*/,/g') + IFS=',' + set -- $NORM_PATTERNS + unset IFS + for pat in "$@"; do + ARGS="$ARGS --patterns \"$pat\"" + done +fi +if [ -n "${JSON_OUT_ARG}" ]; then + ARGS="$ARGS --json-out \"${JSON_OUT_ARG}\"" +fi +if [ -n "${MD_OUT_ARG}" ]; then + ARGS="$ARGS --md-out \"${MD_OUT_ARG}\"" +fi + +# Compute GitHub blob base URL for file links used in the Markdown report +if [ -n "${REPO_BLOB_BASE_ARG}" ]; then + export SLINKY_REPO_BLOB_BASE_URL="${REPO_BLOB_BASE_ARG}" +elif [ -n "${GITHUB_REPOSITORY:-}" ]; then + COMMIT_SHA="${GITHUB_SHA:-}" + if [ -n "${GITHUB_EVENT_PATH:-}" ] && command -v jq >/dev/null 2>&1; then + PR_HEAD_SHA="$(jq -r '.pull_request.head.sha // empty' "$GITHUB_EVENT_PATH" || true)" + if [ -n "$PR_HEAD_SHA" ]; then + COMMIT_SHA="$PR_HEAD_SHA" + fi + fi + if [ -n "$COMMIT_SHA" ]; then + export SLINKY_REPO_BLOB_BASE_URL="https://github.com/${GITHUB_REPOSITORY}/blob/${COMMIT_SHA}" + fi +fi + +eval slinky ${ARGS} + +# Expose outputs +if [ -n "${GITHUB_OUTPUT:-}" ]; then + if [ -n "${JSON_OUT_ARG}" ]; then + echo "json-path=${JSON_OUT_ARG}" >> "$GITHUB_OUTPUT" + fi + if [ -n "${MD_OUT_ARG}" ]; then + echo "md-path=${MD_OUT_ARG}" >> "$GITHUB_OUTPUT" + fi +fi + +# Append report to job summary if requested +if [ "${STEP_SUMMARY_ARG}" = "true" ] && [ -n "${GITHUB_STEP_SUMMARY:-}" ] && [ -n "${MD_OUT_ARG}" ] && [ -f "${MD_OUT_ARG}" ]; then + cat "${MD_OUT_ARG}" >> "$GITHUB_STEP_SUMMARY" +fi + +# Post PR comment if this is a PR and requested +if [ "${COMMENT_PR_ARG}" = "true" ] && [ -n "${MD_OUT_ARG}" ] && [ -f "${MD_OUT_ARG}" ]; then + PR_NUMBER="" + if [ -n "${GITHUB_EVENT_PATH:-}" ] && command -v jq >/dev/null 2>&1; then + PR_NUMBER="$(jq -r '.pull_request.number // empty' "$GITHUB_EVENT_PATH" || true)" + fi + if [ -n "${PR_NUMBER}" ] && [ -n "${GITHUB_REPOSITORY:-}" ] && [ -n "${GITHUB_TOKEN:-}" ]; then + BODY_CONTENT="$(cat "${MD_OUT_ARG}")" + # Post the comment + curl -sS -H "Authorization: Bearer ${GITHUB_TOKEN}" \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + -X POST "https://api.github.com/repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \ + -d "$(printf '{"body": %s}' "$(jq -Rs . </dev/null || true + fi +fi + + diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..ce4a8bd --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,70 @@ +name: CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: '1.24.x' + + - name: Cache go build + uses: actions/cache@v4 + with: + path: | + ~/.cache/go-build + ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ runner.os }}-go- + + - name: Build + run: go build ./... + + - name: Run unit tests + run: go test ./... + + action-self-test: + runs-on: ubuntu-latest + needs: test + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Build action image + run: | + docker build -t slinky-action -f .github/actions/slinky/Dockerfile . + + - name: Run action container (expect nonzero if failures) + id: run_action + run: | + set -e + docker run --rm -v "$PWD:/repo" -w /repo -e GITHUB_STEP_SUMMARY="/tmp/summary.md" slinky-action sh -lc \ + 'INPUT_PATH=. INPUT_PATTERNS="test files/**" INPUT_CONCURRENCY=8 INPUT_TIMEOUT=5 INPUT_JSON_OUT=results.json INPUT_MD_OUT=results.md INPUT_FAIL_ON_FAILURES=true INPUT_COMMENT_PR=false INPUT_STEP_SUMMARY=true /entrypoint.sh' + + - name: Upload results.json + if: always() + uses: actions/upload-artifact@v4 + with: + name: results + path: results.json + - name: Upload results.md + if: always() + uses: actions/upload-artifact@v4 + with: + name: results-md + path: results.md + + diff --git a/.github/workflows/example-slinky.yml b/.github/workflows/example-slinky.yml new file mode 100644 index 0000000..36e5672 --- /dev/null +++ b/.github/workflows/example-slinky.yml @@ -0,0 +1,40 @@ +name: Slinky Example + +on: + pull_request: + branches: [ main ] + workflow_dispatch: {} + +jobs: + slinky: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Run Slinky link checker + uses: ./\.github/actions/slinky + with: + path: . + patterns: "**/*" + concurrency: "16" + timeout: "10" + json-out: results.json + md-out: results.md + fail-on-failures: "true" + comment-pr: "true" + step-summary: "true" + + - name: Upload results + if: always() + uses: actions/upload-artifact@v4 + with: + name: slinky-results + path: | + results.json + results.md + + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7f15d1d --- /dev/null +++ b/Makefile @@ -0,0 +1,33 @@ +GO ?= go +PKG := ./... +BIN_DIR ?= bin +BINARY ?= slinky +BIN := $(BIN_DIR)/$(BINARY) + +.PHONY: build test clean check action-image action-run + +build: $(BIN) + +$(BIN): + @mkdir -p $(BIN_DIR) + CGO_ENABLED=0 $(GO) build -o $(BIN) ./ + +test: + $(GO) test -v $(PKG) + +# Convenience: run the headless check against local test files +check: build + ./$(BIN) check . --glob "test files/**" --json-out results.json --fail-on-failures true + +# Build the Docker-based GitHub Action locally +action-image: + docker build -t slinky-action -f .github/actions/slinky/Dockerfile . + +# Run the Action container against the current repo +action-run: action-image + docker run --rm -v "$(PWD):/repo" -w /repo -e GITHUB_STEP_SUMMARY="/tmp/summary.md" slinky-action sh -lc 'INPUT_PATH=. INPUT_PATTERNS="test files/**" INPUT_CONCURRENCY=8 INPUT_TIMEOUT=5 INPUT_JSON_OUT=results.json INPUT_MD_OUT=results.md INPUT_FAIL_ON_FAILURES=true INPUT_COMMENT_PR=false INPUT_STEP_SUMMARY=true /entrypoint.sh' + +clean: + rm -rf $(BIN_DIR) results.json results.md + + diff --git a/cmd/check.go b/cmd/check.go new file mode 100644 index 0000000..842a9ac --- /dev/null +++ b/cmd/check.go @@ -0,0 +1,165 @@ +package cmd + +import ( + "context" + "encoding/json" + "fmt" + "os" + "sort" + "strings" + "time" + + "github.com/spf13/cobra" + + "slinky/internal/fsurls" + "slinky/internal/report" + "slinky/internal/web" +) + +// SerializableResult mirrors web.Result but omits the error field for JSON. +type SerializableResult struct { + URL string `json:"url"` + OK bool `json:"ok"` + Status int `json:"status"` + ErrMsg string `json:"error"` + Method string `json:"method"` + ContentType string `json:"contentType"` + Sources []string `json:"sources"` +} + +func init() { + checkCmd := &cobra.Command{ + Use: "check [path]", + Short: "Scan a directory for URLs and validate them (headless)", + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + path := "." + if len(args) == 1 { + path = args[0] + } + + var gl []string + if len(patterns) > 0 { + gl = append(gl, patterns...) + } else if globPat != "" { + gl = strings.Split(globPat, ",") + } else { + gl = []string{"**/*"} + } + + timeout := time.Duration(timeoutSeconds) * time.Second + cfg := web.Config{MaxConcurrency: maxConcurrency, RequestTimeout: timeout} + + // Collect URLs + urlToFiles, err := fsurls.CollectURLs(path, gl) + if err != nil { + return err + } + var urls []string + for u := range urlToFiles { + urls = append(urls, u) + } + sort.Strings(urls) + + // If no URLs found, exit early + if len(urls) == 0 { + fmt.Println("No URLs found.") + return nil + } + + // Run checks + startedAt := time.Now() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + results := make(chan web.Result, 256) + web.CheckURLs(ctx, urls, urlToFiles, results, nil, cfg) + + var total, okCount, failCount int + var failures []SerializableResult + var failedResults []web.Result + + for r := range results { + total++ + if r.OK { + okCount++ + } else { + failCount++ + } + if jsonOut != "" && !r.OK { + failures = append(failures, SerializableResult{ + URL: r.URL, + OK: r.OK, + Status: r.Status, + ErrMsg: r.ErrMsg, + Method: r.Method, + ContentType: r.ContentType, + Sources: r.Sources, + }) + } + if !r.OK { + failedResults = append(failedResults, r) + } + } + + // Write JSON if requested (failures only) + if jsonOut != "" { + f, ferr := os.Create(jsonOut) + if ferr != nil { + return ferr + } + enc := json.NewEncoder(f) + enc.SetIndent("", " ") + if err := enc.Encode(failures); err != nil { + _ = f.Close() + return err + } + _ = f.Close() + } + + // Optionally write Markdown report for PR comment consumption + if mdOut != "" { + base := repoBlobBase + if strings.TrimSpace(base) == "" { + base = os.Getenv("SLINKY_REPO_BLOB_BASE_URL") + } + summary := report.Summary{ + RootPath: path, + StartedAt: startedAt, + FinishedAt: time.Now(), + Processed: total, + OK: okCount, + Fail: failCount, + JSONPath: jsonOut, + RepoBlobBaseURL: base, + } + if _, err := report.WriteMarkdown(mdOut, failedResults, summary); err != nil { + return err + } + } + + fmt.Printf("Checked %d URLs: %d OK, %d failed\n", total, okCount, failCount) + if failOnFailures && failCount > 0 { + return fmt.Errorf("%d links failed", failCount) + } + return nil + }, + } + + checkCmd.Flags().StringVar(&globPat, "glob", "", "comma-separated glob patterns for files (doublestar); empty = all files") + checkCmd.Flags().StringSliceVar(&patterns, "patterns", nil, "file match patterns (doublestar). Examples: docs/**/*.md,**/*.go; defaults to **/*") + checkCmd.Flags().IntVar(&maxConcurrency, "concurrency", 16, "maximum concurrent requests") + checkCmd.Flags().StringVar(&jsonOut, "json-out", "", "path to write full JSON results (array)") + checkCmd.Flags().StringVar(&mdOut, "md-out", "", "path to write Markdown report for PR comment") + checkCmd.Flags().StringVar(&repoBlobBase, "repo-blob-base", "", "override GitHub blob base URL (e.g. https://github.com/owner/repo/blob/)") + checkCmd.Flags().IntVar(&timeoutSeconds, "timeout", 10, "HTTP request timeout in seconds") + checkCmd.Flags().BoolVar(&failOnFailures, "fail-on-failures", true, "exit non-zero if any links fail") + + rootCmd.AddCommand(checkCmd) +} + +var ( + timeoutSeconds int + failOnFailures bool + patterns []string + repoBlobBase string +) diff --git a/cmd/root.go b/cmd/root.go new file mode 100644 index 0000000..153bdc9 --- /dev/null +++ b/cmd/root.go @@ -0,0 +1,23 @@ +package cmd + +import ( + "fmt" + "os" + + "github.com/spf13/cobra" +) + +var rootCmd = &cobra.Command{ + Use: "slinky", + Short: "Link checker for repos/directories and webpages (TUI)", + Long: "Slinky scans a directory/repo for URLs in files or crawls a URL, then validates links concurrently in a TUI.", +} + +func Execute() { + if err := rootCmd.Execute(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} + + diff --git a/cmd/run.go b/cmd/run.go new file mode 100644 index 0000000..6530c38 --- /dev/null +++ b/cmd/run.go @@ -0,0 +1,49 @@ +package cmd + +import ( + "strings" + + "github.com/spf13/cobra" + + "slinky/internal/tui" + "slinky/internal/web" +) + +func init() { + runCmd := &cobra.Command{ + Use: "run [path]", + Short: "Scan a directory/repo for URLs in files and validate them (TUI)", + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + path := "." + if len(args) == 1 { + path = args[0] + } + cfg := web.Config{MaxConcurrency: maxConcurrency} + var gl []string + if len(patterns) > 0 { + gl = append(gl, patterns...) + } else if globPat != "" { + gl = strings.Split(globPat, ",") + } else { + gl = []string{"**/*"} + } + return tui.Run(path, gl, cfg, jsonOut, mdOut) + }, + } + + runCmd.Flags().StringVar(&globPat, "glob", "", "comma-separated glob patterns for files (doublestar); empty = all files") + runCmd.Flags().StringSliceVar(&patterns, "patterns", nil, "file match patterns (doublestar). Examples: docs/**/*.md,**/*.go; defaults to **/*") + runCmd.Flags().IntVar(&maxConcurrency, "concurrency", 16, "maximum concurrent requests") + runCmd.Flags().StringVar(&jsonOut, "json-out", "", "path to write full JSON results (array)") + runCmd.Flags().StringVar(&mdOut, "md-out", "", "path to write Markdown report for PR comment") + runCmd.Flags().StringVar(&repoBlobBase, "repo-blob-base", "", "override GitHub blob base URL (e.g. https://github.com/owner/repo/blob/)") + rootCmd.AddCommand(runCmd) +} + +var ( + maxConcurrency int + jsonOut string + globPat string + mdOut string +) diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..9e04e0c --- /dev/null +++ b/go.mod @@ -0,0 +1,37 @@ +module slinky + +go 1.24.0 + +toolchain go1.24.7 + +require ( + github.com/bmatcuk/doublestar/v4 v4.6.1 + github.com/charmbracelet/bubbles v0.21.0 + github.com/charmbracelet/bubbletea v1.3.8 + github.com/charmbracelet/lipgloss v1.1.0 + github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 + github.com/spf13/cobra v1.10.1 +) + +require ( + github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect + github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect + github.com/charmbracelet/harmonica v0.2.0 // indirect + github.com/charmbracelet/x/ansi v0.10.1 // indirect + github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd // indirect + github.com/charmbracelet/x/term v0.2.1 // indirect + github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/lucasb-eyer/go-colorful v1.2.0 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mattn/go-localereader v0.0.1 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect + github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect + github.com/muesli/cancelreader v0.2.2 // indirect + github.com/muesli/termenv v0.16.0 // indirect + github.com/rivo/uniseg v0.4.7 // indirect + github.com/spf13/pflag v1.0.9 // indirect + github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect + golang.org/x/sys v0.36.0 // indirect + golang.org/x/text v0.24.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..6f3f02a --- /dev/null +++ b/go.sum @@ -0,0 +1,70 @@ +github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= +github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= +github.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwNy7PA4I= +github.com/bmatcuk/doublestar/v4 v4.6.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= +github.com/charmbracelet/bubbles v0.21.0 h1:9TdC97SdRVg/1aaXNVWfFH3nnLAwOXr8Fn6u6mfQdFs= +github.com/charmbracelet/bubbles v0.21.0/go.mod h1:HF+v6QUR4HkEpz62dx7ym2xc71/KBHg+zKwJtMw+qtg= +github.com/charmbracelet/bubbletea v1.3.8 h1:DJlh6UUPhobzomqCtnLJRmhBSxwUJoPPi6iCToUDr4g= +github.com/charmbracelet/bubbletea v1.3.8/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4= +github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc h1:4pZI35227imm7yK2bGPcfpFEmuY1gc2YSTShr4iJBfs= +github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc/go.mod h1:X4/0JoqgTIPSFcRA/P6INZzIuyqdFY5rm8tb41s9okk= +github.com/charmbracelet/harmonica v0.2.0 h1:8NxJWRWg/bzKqqEaaeFNipOu77YR5t8aSwG4pgaUBiQ= +github.com/charmbracelet/harmonica v0.2.0/go.mod h1:KSri/1RMQOZLbw7AHqgcBycp8pgJnQMYYT8QZRqZ1Ao= +github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY= +github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30= +github.com/charmbracelet/x/ansi v0.10.1 h1:rL3Koar5XvX0pHGfovN03f5cxLbCF2YvLeyz7D2jVDQ= +github.com/charmbracelet/x/ansi v0.10.1/go.mod h1:3RQDQ6lDnROptfpWuUVIUG64bD2g2BgntdxH0Ya5TeE= +github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd h1:vy0GVL4jeHEwG5YOXDmi86oYw2yuYUGqz6a8sLwg0X8= +github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd/go.mod h1:xe0nKWGd3eJgtqZRaN9RjMtK7xUYchjzPr7q6kcvCCs= +github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ= +github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= +github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= +github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4= +github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI= +github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo= +github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= +github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= +github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc= +github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= +github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= +github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= +github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0= +github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= +github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= +golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E= +golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= +golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= +golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= +golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/fsurls/fsurls.go b/internal/fsurls/fsurls.go new file mode 100644 index 0000000..79bdef3 --- /dev/null +++ b/internal/fsurls/fsurls.go @@ -0,0 +1,287 @@ +package fsurls + +import ( + "bufio" + "io" + "net/url" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + + "github.com/bmatcuk/doublestar/v4" + ignore "github.com/sabhiram/go-gitignore" +) + +// URL patterns from various contexts +var bareURLRegex = regexp.MustCompile(`(?i)\bhttps?://[^\s<>()\[\]{}"']+`) +var mdLinkRegex = regexp.MustCompile(`(?is)!?\[[^\]]*\]\((.*?)\)`) // captures (url) +var angleURLRegex = regexp.MustCompile(`(?i)<(https?://[^>\s]+)>`) +var quotedURLRegex = regexp.MustCompile(`(?i)"(https?://[^"\s]+)"|'(https?://[^'\s]+)'`) +var htmlHrefRegex = regexp.MustCompile(`(?i)href\s*=\s*"([^"]+)"|href\s*=\s*'([^']+)'`) +var htmlSrcRegex = regexp.MustCompile(`(?i)src\s*=\s*"([^"]+)"|src\s*=\s*'([^']+)'`) + +// Strict hostname validation: labels 1-63 chars, alnum & hyphen, not start/end hyphen, at least one dot, simple TLD +var hostnameRegex = regexp.MustCompile(`^(?i)([a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)+$`) + +// CollectURLs walks the directory tree rooted at rootPath and collects URLs found in +// text-based files matching any of the provided glob patterns (doublestar ** supported). +// If globs is empty, all files are considered. Respects .gitignore if present. +// Returns a map from URL -> sorted unique list of file paths that contained it. +func CollectURLs(rootPath string, globs []string) (map[string][]string, error) { + if strings.TrimSpace(rootPath) == "" { + rootPath = "." + } + cleanRoot := filepath.Clean(rootPath) + + st, _ := os.Stat(cleanRoot) + isFileRoot := st != nil && !st.IsDir() + var ign *ignore.GitIgnore + if !isFileRoot { + ign = loadGitIgnore(cleanRoot) + } + + var patterns []string + for _, g := range globs { + g = strings.TrimSpace(g) + if g == "" { + continue + } + patterns = append(patterns, g) + } + + shouldInclude := func(rel string) bool { + if len(patterns) == 0 { + return true + } + for _, p := range patterns { + ok, _ := doublestar.PathMatch(p, rel) + if ok { + return true + } + } + return false + } + + urlToFiles := make(map[string]map[string]struct{}) + + // 2 MiB max file size to avoid huge/binary files + const maxSize = 2 * 1024 * 1024 + + // Walk the filesystem + walkFn := func(path string, d os.DirEntry, err error) error { + if err != nil { + return nil + } + rel, rerr := filepath.Rel(cleanRoot, path) + if rerr != nil { + rel = path + } + rel = filepath.ToSlash(rel) + if d.IsDir() { + base := filepath.Base(path) + if base == ".git" { + return filepath.SkipDir + } + if ign != nil && ign.MatchesPath(rel) { + return filepath.SkipDir + } + return nil + } + if ign != nil && ign.MatchesPath(rel) { + return nil + } + info, ierr := d.Info() + if ierr != nil { + return nil + } + if info.Size() > maxSize { + return nil + } + if isFileRoot && rel == "." { + rel = filepath.ToSlash(filepath.Base(path)) + } + if !shouldInclude(rel) { + return nil + } + + f, ferr := os.Open(path) + if ferr != nil { + return nil + } + defer f.Close() + br := bufio.NewReader(f) + // Read up to maxSize bytes + var b strings.Builder + read := int64(0) + for { + chunk, cerr := br.ReadString('\n') + b.WriteString(chunk) + read += int64(len(chunk)) + if cerr == io.EOF || read > maxSize { + break + } + if cerr != nil { + break + } + } + content := b.String() + // Skip if likely binary (NUL present) + if strings.IndexByte(content, '\x00') >= 0 { + return nil + } + + candidates := extractCandidates(content) + if len(candidates) == 0 { + return nil + } + for _, raw := range candidates { + u := sanitizeURLToken(raw) + if u == "" { + continue + } + fileSet, ok := urlToFiles[u] + if !ok { + fileSet = make(map[string]struct{}) + urlToFiles[u] = fileSet + } + fileSet[rel] = struct{}{} + } + return nil + } + + _ = filepath.WalkDir(cleanRoot, walkFn) + + // Convert to sorted slices + result := make(map[string][]string, len(urlToFiles)) + for u, files := range urlToFiles { + var list []string + for fp := range files { + list = append(list, fp) + } + sort.Strings(list) + result[u] = list + } + return result, nil +} + +func sanitizeURLToken(s string) string { + s = strings.TrimSpace(s) + // Strip surrounding angle brackets or quotes + if strings.HasPrefix(s, "<") && strings.HasSuffix(s, ">") { + s = strings.TrimSuffix(strings.TrimPrefix(s, "<"), ">") + } + if (strings.HasPrefix(s, "\"") && strings.HasSuffix(s, "\"")) || (strings.HasPrefix(s, "'") && strings.HasSuffix(s, "'")) { + s = strings.TrimSuffix(strings.TrimPrefix(s, string(s[0])), string(s[0])) + } + // Trim trailing punctuation and balance parentheses + s = trimTrailingDelimiters(s) + low := strings.ToLower(s) + if !(strings.HasPrefix(low, "http://") || strings.HasPrefix(low, "https://")) { + return "" + } + // Parse and validate hostname strictly + u, err := url.Parse(s) + if err != nil || u == nil { + return "" + } + host := u.Hostname() + if host == "" { + return "" + } + // Reject placeholders like [tenant] or {tenant} + if strings.ContainsAny(host, "[]{}") { + return "" + } + // Must match strict hostname rules + if !hostnameRegex.MatchString(host) { + return "" + } + return s +} + +func trimTrailingDelimiters(s string) string { + for { + if s == "" { + return s + } + last := s[len(s)-1] + if strings.ContainsRune(").,;:!?]'\"}", rune(last)) { + s = s[:len(s)-1] + continue + } + if last == ')' { + open := strings.Count(s, "(") + close := strings.Count(s, ")") + if close > open { + s = s[:len(s)-1] + continue + } + } + return s + } +} + +func extractCandidates(content string) []string { + var out []string + for _, m := range mdLinkRegex.FindAllStringSubmatch(content, -1) { + if len(m) > 1 { + out = append(out, m[1]) + } + } + for _, m := range htmlHrefRegex.FindAllStringSubmatch(content, -1) { + if len(m) > 2 { + if m[1] != "" { + out = append(out, m[1]) + } else if m[2] != "" { + out = append(out, m[2]) + } + } + } + for _, m := range htmlSrcRegex.FindAllStringSubmatch(content, -1) { + if len(m) > 2 { + if m[1] != "" { + out = append(out, m[1]) + } else if m[2] != "" { + out = append(out, m[2]) + } + } + } + for _, m := range angleURLRegex.FindAllStringSubmatch(content, -1) { + if len(m) > 1 { + out = append(out, m[1]) + } + } + for _, m := range quotedURLRegex.FindAllStringSubmatch(content, -1) { + if len(m) > 2 { + if m[1] != "" { + out = append(out, m[1]) + } else if m[2] != "" { + out = append(out, m[2]) + } + } + } + out = append(out, bareURLRegex.FindAllString(content, -1)...) + return out +} + +func loadGitIgnore(root string) *ignore.GitIgnore { + var lines []string + gi := filepath.Join(root, ".gitignore") + if b, err := os.ReadFile(gi); err == nil { + for _, ln := range strings.Split(string(b), "\n") { + lines = append(lines, ln) + } + } + ge := filepath.Join(root, ".git", "info", "exclude") + if b, err := os.ReadFile(ge); err == nil { + for _, ln := range strings.Split(string(b), "\n") { + lines = append(lines, ln) + } + } + if len(lines) == 0 { + return nil + } + return ignore.CompileIgnoreLines(lines...) +} diff --git a/internal/fsurls/fsurls_test.go b/internal/fsurls/fsurls_test.go new file mode 100644 index 0000000..cf37ded --- /dev/null +++ b/internal/fsurls/fsurls_test.go @@ -0,0 +1,43 @@ +package fsurls + +import ( + "path/filepath" + "strings" + "testing" +) + +func TestCollectURLs_FromTestFiles(t *testing.T) { + root := filepath.Join("..", "..", "test files") + + urls, err := CollectURLs(root, []string{"**/*"}) + if err != nil { + t.Fatalf("CollectURLs error: %v", err) + } + + // Spot-check presence of some known URLs + mustContain := []string{ + "https://example.com", + "https://en.wikipedia.org/wiki/Main_Page", + "http://example.com:8080", + "http://example..com", // appears in multiple files + "https://this-domain-does-not-exist-123456789.com", + } + for _, u := range mustContain { + if _, ok := urls[u]; !ok { + // Show nearby URLs to aid debugging if it fails. + var sample []string + for seen := range urls { + if strings.Contains(seen, "example") { + sample = append(sample, seen) + } + } + t.Fatalf("expected URL %q to be collected; example URLs seen: %v", u, sample) + } + } + + // Ensure sources are recorded for a known URL + srcs := urls["https://example.com"] + if len(srcs) == 0 { + t.Fatalf("expected sources for https://example.com, got none") + } +} diff --git a/internal/fsurls/lang_files_test.go b/internal/fsurls/lang_files_test.go new file mode 100644 index 0000000..6e1d8c0 --- /dev/null +++ b/internal/fsurls/lang_files_test.go @@ -0,0 +1,42 @@ +package fsurls + +import ( + "path/filepath" + "testing" +) + +func TestCollectURLs_FromCodeFiles(t *testing.T) { + root := filepath.Join("..", "..", "test files") + urls, err := CollectURLs(root, []string{"**/*"}) + if err != nil { + t.Fatalf("CollectURLs error: %v", err) + } + + // Valid URLs from various languages should be present (including a known nonexistent-but-well-formed) + valids := []string{ + "https://example.com", + "https://en.wikipedia.org/wiki/Main_Page", + "https://developer.mozilla.org", + "https://svelte.dev", + "https://go.dev/doc/", + "https://this-domain-does-not-exist-123456789.com", + } + for _, u := range valids { + if _, ok := urls[u]; !ok { + t.Fatalf("expected valid URL %q to be collected", u) + } + } + + // Placeholder patterns should be excluded by strict validation + placeholders := []string{ + "https://[tenant].api.identitynow.com", + "https://{tenant}.api.identitynow.com", + "https://[tenant].[domain].com", + "https://{tenant}.api.ideidentitynow.com/v3/transforms", + } + for _, u := range placeholders { + if _, ok := urls[u]; ok { + t.Fatalf("did not expect placeholder URL %q to be collected", u) + } + } +} diff --git a/internal/report/markdown.go b/internal/report/markdown.go new file mode 100644 index 0000000..3250dda --- /dev/null +++ b/internal/report/markdown.go @@ -0,0 +1,168 @@ +package report + +import ( + "bytes" + "fmt" + "html" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "slinky/internal/web" +) + +// Summary captures high-level run details for the report. +type Summary struct { + RootPath string + StartedAt time.Time + FinishedAt time.Time + Processed int + OK int + Fail int + AvgRPS float64 + PeakRPS float64 + LowRPS float64 + JSONPath string + RepoBlobBaseURL string // e.g. https://github.com/owner/repo/blob/ +} + +// WriteMarkdown writes a GitHub-flavored Markdown report to path. If path is empty, +// it derives a safe filename from s.RootPath. +func WriteMarkdown(path string, results []web.Result, s Summary) (string, error) { + if strings.TrimSpace(path) == "" { + base := filepath.Base(s.RootPath) + if strings.TrimSpace(base) == "" || base == "." || base == string(filepath.Separator) { + base = "results" + } + var b strings.Builder + for _, r := range strings.ToLower(base) { + if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' || r == '.' { + b.WriteRune(r) + } else { + b.WriteByte('_') + } + } + path = fmt.Sprintf("%s.md", b.String()) + } + + var buf bytes.Buffer + // Title and summary + buf.WriteString("## Slinky Test Report\n\n") + buf.WriteString(fmt.Sprintf("- **Root**: %s\n", escapeMD(s.RootPath))) + buf.WriteString(fmt.Sprintf("- **Started**: %s\n", s.StartedAt.Format("2006-01-02 15:04:05 MST"))) + buf.WriteString(fmt.Sprintf("- **Finished**: %s\n", s.FinishedAt.Format("2006-01-02 15:04:05 MST"))) + buf.WriteString(fmt.Sprintf("- **Processed**: %d • **OK**: %d • **Fail**: %d\n", s.Processed, s.OK, s.Fail)) + buf.WriteString(fmt.Sprintf("- **Rates**: avg %.1f/s • peak %.1f/s • low %.1f/s\n", s.AvgRPS, s.PeakRPS, s.LowRPS)) + if s.JSONPath != "" { + base := filepath.Base(s.JSONPath) + buf.WriteString(fmt.Sprintf("- **JSON**: %s\n", escapeMD(base))) + } + buf.WriteString("\n") + + // Failures by URL + buf.WriteString("### Failures by URL\n\n") + + // Gather issues per URL with list of files + type fileRef struct { + Path string + } + type urlIssue struct { + Status int + Method string + ErrMsg string + Files []fileRef + } + byURL := make(map[string]*urlIssue) + for _, r := range results { + ui, ok := byURL[r.URL] + if !ok { + ui = &urlIssue{Status: r.Status, Method: r.Method, ErrMsg: r.ErrMsg} + byURL[r.URL] = ui + } + for _, src := range r.Sources { + ui.Files = append(ui.Files, fileRef{Path: src}) + } + } + + // Sort URLs + var urls []string + for u := range byURL { + urls = append(urls, u) + } + sort.Strings(urls) + + for _, u := range urls { + ui := byURL[u] + // Header line for URL + if ui.Status > 0 { + buf.WriteString(fmt.Sprintf("- %d %s `%s` — %s\n", ui.Status, escapeMD(ui.Method), escapeMD(u), escapeMD(ui.ErrMsg))) + } else { + buf.WriteString(fmt.Sprintf("- %s `%s` — %s\n", escapeMD(ui.Method), escapeMD(u), escapeMD(ui.ErrMsg))) + } + // Files list (collapsible) + buf.WriteString("
files\n\n") + // Deduplicate and sort file paths + seen := make(map[string]struct{}) + var files []string + for _, fr := range ui.Files { + if _, ok := seen[fr.Path]; ok { + continue + } + seen[fr.Path] = struct{}{} + files = append(files, fr.Path) + } + sort.Strings(files) + for _, fn := range files { + if strings.TrimSpace(s.RepoBlobBaseURL) != "" { + buf.WriteString(fmt.Sprintf(" - [%s](%s/%s)\n", escapeMD(fn), strings.TrimRight(s.RepoBlobBaseURL, "/"), escapeLinkPath(fn))) + } else { + buf.WriteString(fmt.Sprintf(" - [%s](./%s)\n", escapeMD(fn), escapeLinkPath(fn))) + } + } + buf.WriteString("\n
\n\n") + } + + f, err := os.Create(path) + if err != nil { + return "", err + } + defer f.Close() + if _, err := f.Write(buf.Bytes()); err != nil { + return "", err + } + return path, nil +} + +func escapeMD(s string) string { + // Basic HTML escape to be safe in GitHub Markdown table cells + return html.EscapeString(s) +} + +// formatSourcesList renders a list of file paths as an HTML unordered list suitable +// for inclusion in a Markdown table cell. Individual entries are escaped. +func formatSourcesList(srcs []string) string { + if len(srcs) == 0 { + return "" + } + var b strings.Builder + b.WriteString("
    \n") + for _, s := range srcs { + b.WriteString("
  • ") + b.WriteString(escapeMD(s)) + b.WriteString("
  • \n") + } + b.WriteString("
") + return b.String() +} + +// escapeLinkPath escapes a relative path for inclusion in a Markdown link URL. +// We keep it simple and only escape parentheses and spaces. +func escapeLinkPath(p string) string { + // Replace spaces with %20 and parentheses with encoded forms + p = strings.ReplaceAll(p, " ", "%20") + p = strings.ReplaceAll(p, "(", "%28") + p = strings.ReplaceAll(p, ")", "%29") + return p +} diff --git a/internal/tui/fs_bridge.go b/internal/tui/fs_bridge.go new file mode 100644 index 0000000..861e759 --- /dev/null +++ b/internal/tui/fs_bridge.go @@ -0,0 +1,10 @@ +package tui + +import ( + "slinky/internal/fsurls" +) + +// fsCollect is a tiny bridge to avoid importing fsurls directly in tui.go +func fsCollect(root string, globs []string) (map[string][]string, error) { + return fsurls.CollectURLs(root, globs) +} diff --git a/internal/tui/tui.go b/internal/tui/tui.go new file mode 100644 index 0000000..8b35e0c --- /dev/null +++ b/internal/tui/tui.go @@ -0,0 +1,319 @@ +package tui + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + "time" + + "github.com/charmbracelet/bubbles/progress" + "github.com/charmbracelet/bubbles/spinner" + "github.com/charmbracelet/bubbles/viewport" + tea "github.com/charmbracelet/bubbletea" + "github.com/charmbracelet/lipgloss" + + "slinky/internal/report" + "slinky/internal/web" +) + +type linkResultMsg struct{ res web.Result } +type crawlDoneMsg struct{} +type statsMsg struct{ s web.Stats } +type tickMsg struct{ t time.Time } + +type model struct { + rootPath string + cfg web.Config + jsonOut string + mdOut string + globs []string + + results chan web.Result + stats chan web.Stats + started time.Time + finishedAt time.Time + done bool + + spin spinner.Model + prog progress.Model + vp viewport.Model + + lines []string + + total int + ok int + fail int + + pending int + processed int + + lastProcessed int + rps float64 + peakRPS float64 + lowRPS float64 + + allResults []web.Result + jsonPath string + mdPath string + + showFail bool +} + +// Run scans files under rootPath matching globs, extracts URLs, and checks them. +func Run(rootPath string, globs []string, cfg web.Config, jsonOut string, mdOut string) error { + m := &model{rootPath: rootPath, cfg: cfg, jsonOut: jsonOut, mdOut: mdOut, globs: globs} + p := tea.NewProgram(m, tea.WithAltScreen()) + return p.Start() +} + +func (m *model) Init() tea.Cmd { + m.spin = spinner.New() + m.spin.Spinner = spinner.Dot + m.spin.Style = lipgloss.NewStyle().Foreground(lipgloss.Color("205")) + m.prog = progress.New(progress.WithDefaultGradient()) + m.started = time.Now() + m.lowRPS = -1 + m.results = make(chan web.Result, 256) + m.stats = make(chan web.Stats, 64) + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + defer cancel() + urlsMap, _ := fsCollect(m.rootPath, m.globs) + var urls []string + for u := range urlsMap { + urls = append(urls, u) + } + web.CheckURLs(ctx, urls, urlsMap, m.results, m.stats, m.cfg) + }() + + return tea.Batch(m.spin.Tick, m.waitForEvent(), tickCmd()) +} + +func tickCmd() tea.Cmd { + return tea.Tick(time.Second, func(t time.Time) tea.Msg { return tickMsg{t: t} }) +} + +func (m *model) waitForEvent() tea.Cmd { + return func() tea.Msg { + if m.results == nil { + return crawlDoneMsg{} + } + select { + case res, ok := <-m.results: + if ok { + return linkResultMsg{res: res} + } + return crawlDoneMsg{} + case s := <-m.stats: + return statsMsg{s: s} + } + } +} + +func (m *model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { + switch msg := msg.(type) { + case tea.KeyMsg: + switch msg.String() { + case "q", "ctrl+c": + return m, tea.Quit + case "f": + m.showFail = !m.showFail + m.refreshViewport() + return m, nil + } + case tea.WindowSizeMsg: + // Reserve space for header (1), stats (1), progress (1), spacer (1), footer (1) + reserved := 5 + if m.vp.Width == 0 { + m.vp = viewport.Model{Width: msg.Width, Height: max(msg.Height-reserved, 3)} + } else { + m.vp.Width = msg.Width + m.vp.Height = max(msg.Height-reserved, 3) + } + m.prog.Width = max(msg.Width-4, 10) + m.refreshViewport() + return m, nil + case linkResultMsg: + // Show every event in the log + prefix := statusEmoji(msg.res.OK, msg.res.Err) + if msg.res.CacheHit { + prefix = "🗃" + } + line := fmt.Sprintf("%s %3d %s", prefix, msg.res.Status, msg.res.URL) + m.lines = append(m.lines, line) + // Only count non-cache-hit in totals and JSON export + if !msg.res.CacheHit { + m.total++ + if msg.res.OK && msg.res.Err == nil { + m.ok++ + } else { + m.fail++ + } + m.allResults = append(m.allResults, msg.res) + } + m.refreshViewport() + return m, m.waitForEvent() + case statsMsg: + m.pending = msg.s.Pending + m.processed = msg.s.Processed + return m, m.waitForEvent() + case tickMsg: + // compute requests/sec over the last tick + delta := m.processed - m.lastProcessed + m.lastProcessed = m.processed + m.rps = float64(delta) + if m.rps > m.peakRPS { + m.peakRPS = m.rps + } + if m.lowRPS < 0 || m.rps < m.lowRPS { + m.lowRPS = m.rps + } + return m, tickCmd() + case crawlDoneMsg: + m.done = true + m.finishedAt = time.Now() + m.results = nil + m.writeJSON() + m.writeMarkdown() + return m, tea.Quit + } + + var cmd tea.Cmd + m.spin, cmd = m.spin.Update(msg) + return m, cmd +} + +func (m *model) refreshViewport() { + var filtered []string + if m.showFail { + for _, l := range m.lines { + if strings.HasPrefix(l, "❌") { + filtered = append(filtered, l) + } + } + } else { + filtered = m.lines + } + m.vp.SetContent(strings.Join(filtered, "\n")) + m.vp.GotoBottom() +} + +func (m *model) writeJSON() { + path := m.jsonOut + if strings.TrimSpace(path) == "" { + base := filepath.Base(m.rootPath) + if strings.TrimSpace(base) == "" || base == "." || base == string(filepath.Separator) { + base = "results" + } + re := regexp.MustCompile(`[^a-zA-Z0-9.-]+`) + safe := re.ReplaceAllString(strings.ToLower(base), "_") + path = fmt.Sprintf("%s.json", safe) + } + f, err := os.Create(path) + if err != nil { + return + } + defer f.Close() + // Only write failing results + var fails []web.Result + for _, r := range m.allResults { + if !(r.OK && r.Err == nil) { + fails = append(fails, r) + } + } + enc := json.NewEncoder(f) + enc.SetIndent("", " ") + _ = enc.Encode(fails) + m.jsonPath = path +} + +func (m *model) writeMarkdown() { + // Compute average RPS over entire run + dur := m.finishedAt.Sub(m.started) + avg := 0.0 + if dur.Seconds() > 0 { + avg = float64(m.processed) / dur.Seconds() + } + s := report.Summary{ + RootPath: m.rootPath, + StartedAt: m.started, + FinishedAt: m.finishedAt, + Processed: m.processed, + OK: m.ok, + Fail: m.fail, + AvgRPS: avg, + PeakRPS: m.peakRPS, + LowRPS: m.lowRPS, + JSONPath: m.jsonPath, + RepoBlobBaseURL: os.Getenv("SLINKY_REPO_BLOB_BASE_URL"), + } + // Only include failing results in the markdown report + var failsMD []web.Result + for _, r := range m.allResults { + if !(r.OK && r.Err == nil) { + failsMD = append(failsMD, r) + } + } + p, err := report.WriteMarkdown(m.mdOut, failsMD, s) + if err == nil { + m.mdPath = p + } +} + +func (m *model) View() string { + header := lipgloss.NewStyle().Bold(true).Render(fmt.Sprintf(" Scanning %s ", m.rootPath)) + if m.done { + dur := time.Since(m.started) + if !m.finishedAt.IsZero() { + dur = m.finishedAt.Sub(m.started) + } + avg := 0.0 + if dur.Seconds() > 0 { + avg = float64(m.processed) / dur.Seconds() + } + summary := []string{ + fmt.Sprintf("Duration: %s", dur.Truncate(time.Millisecond)), + fmt.Sprintf("Processed: %d OK:%d Fail:%d", m.processed, m.ok, m.fail), + fmt.Sprintf("Rates: avg %.1f/s peak %.1f/s low %.1f/s", avg, m.peakRPS, m.lowRPS), + } + if m.jsonPath != "" { + summary = append(summary, fmt.Sprintf("JSON: %s", m.jsonPath)) + } + if m.mdPath != "" { + summary = append(summary, fmt.Sprintf("Markdown: %s", m.mdPath)) + } + footer := lipgloss.NewStyle().Faint(true).Render("Controls: [q] quit [f] toggle fails") + container := lipgloss.NewStyle().Padding(1) + return container.Render(strings.Join(append([]string{header}, append(summary, footer)...), "\n")) + } + percent := 0.0 + totalWork := m.processed + m.pending + if totalWork > 0 { + percent = float64(m.processed) / float64(totalWork) + } + progressLine := m.prog.ViewAs(percent) + stats := fmt.Sprintf("%s total:%d ok:%d fail:%d pending:%d processed:%d rps:%.1f/s", m.spin.View(), m.total, m.ok, m.fail, m.pending, m.processed, m.rps) + body := m.vp.View() + footer := lipgloss.NewStyle().Faint(true).Render("Controls: [q] quit [f] toggle fails") + container := lipgloss.NewStyle().Padding(1) + return container.Render(strings.Join([]string{header, stats, progressLine, "", body, footer}, "\n")) +} + +func statusEmoji(ok bool, err error) string { + if ok && err == nil { + return "✅" + } + return "❌" +} + +func max(a, b int) int { + if a > b { + return a + } + return b +} diff --git a/internal/web/checker.go b/internal/web/checker.go new file mode 100644 index 0000000..1ec711c --- /dev/null +++ b/internal/web/checker.go @@ -0,0 +1,103 @@ +package web + +import ( + "context" + "net" + "net/http" + "sort" + "time" +) + +// CheckURLs performs concurrent GET requests for each URL and emits Result events. +// sources maps URL -> list of file paths where it was found. +func CheckURLs(ctx context.Context, urls []string, sources map[string][]string, out chan<- Result, stats chan<- Stats, cfg Config) { + defer close(out) + + // Build HTTP client similar to crawler + transport := &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{Timeout: 2 * time.Second, KeepAlive: 30 * time.Second}).DialContext, + TLSHandshakeTimeout: 5 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + MaxIdleConns: cfg.MaxConcurrency * 2, + MaxIdleConnsPerHost: cfg.MaxConcurrency, + MaxConnsPerHost: cfg.MaxConcurrency, + IdleConnTimeout: 30 * time.Second, + ResponseHeaderTimeout: cfg.RequestTimeout, + } + client := &http.Client{Timeout: cfg.RequestTimeout, Transport: transport} + + type job struct{ url string } + jobs := make(chan job, len(urls)) + done := make(chan struct{}) + + // Seed jobs + unique := make(map[string]struct{}, len(urls)) + for _, u := range urls { + if u == "" { + continue + } + if _, ok := unique[u]; ok { + continue + } + unique[u] = struct{}{} + jobs <- job{url: u} + } + close(jobs) + + concurrency := cfg.MaxConcurrency + if concurrency <= 0 { + concurrency = 8 + } + processed := 0 + pending := len(unique) + + worker := func() { + for j := range jobs { + select { + case <-ctx.Done(): + return + default: + } + ok, status, resp, err := fetchWithMethod(ctx, client, http.MethodGet, j.url) + if resp != nil && resp.Body != nil { + resp.Body.Close() + } + // Treat 401/403 as valid links (exist but require authorization) + if status == http.StatusUnauthorized || status == http.StatusForbidden { + ok = true + err = nil + } + var srcs []string + if sources != nil { + srcs = sources[j.url] + } + out <- Result{URL: j.url, OK: ok, Status: status, Err: err, ErrMsg: errString(err), Method: http.MethodGet, Sources: cloneAndSort(srcs)} + processed++ + pending-- + if stats != nil { + select { + case stats <- Stats{Pending: pending, Processed: processed}: + default: + } + } + } + done <- struct{}{} + } + + for i := 0; i < concurrency; i++ { + go worker() + } + for i := 0; i < concurrency; i++ { + <-done + } +} + +func cloneAndSort(in []string) []string { + if len(in) == 0 { + return nil + } + out := append([]string(nil), in...) + sort.Strings(out) + return out +} diff --git a/internal/web/checker_test.go b/internal/web/checker_test.go new file mode 100644 index 0000000..b645c66 --- /dev/null +++ b/internal/web/checker_test.go @@ -0,0 +1,53 @@ +package web + +import ( + "context" + "testing" + "time" +) + +// This test exercises CheckURLs with a mix of known-good and invalid URLs. +// It does real network calls; keep timeouts short to avoid long CI runs. +func TestCheckURLs_Basic(t *testing.T) { + urls := []string{ + "https://example.com", // should be OK + "https://en.wikipedia.org/wiki/Main_Page", // should be OK + "http://example..com", // invalid hostname + "https://this-domain-does-not-exist-123456789.com", // NXDOMAIN/nonexistent + } + + sources := map[string][]string{ + "https://example.com": {"test files/test2.txt"}, + "https://en.wikipedia.org/wiki/Main_Page": {"test files/test5.html"}, + "http://example..com": {"test files/test5.html"}, + "https://this-domain-does-not-exist-123456789.com": {"test files/test5.html"}, + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + out := make(chan Result, 16) + cfg := Config{MaxConcurrency: 8, RequestTimeout: 5 * time.Second} + + go CheckURLs(ctx, urls, sources, out, nil, cfg) + + seen := 0 + var okCount, failCount int + for r := range out { + seen++ + if r.OK { + okCount++ + } else { + failCount++ + } + } + + if seen != len(urls) { + t.Fatalf("expected %d results, got %d", len(urls), seen) + } + if okCount == 0 { + t.Fatalf("expected at least one OK result") + } + if failCount == 0 { + t.Fatalf("expected at least one failure result") + } +} diff --git a/internal/web/http.go b/internal/web/http.go new file mode 100644 index 0000000..e020a0b --- /dev/null +++ b/internal/web/http.go @@ -0,0 +1,68 @@ +package web + +import ( + "context" + "errors" + "net" + "net/http" + "strings" +) + +const browserUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0 Safari/537.36" + +func fetchWithMethod(ctx context.Context, client *http.Client, method string, raw string) (bool, int, *http.Response, error) { + req, err := http.NewRequestWithContext(ctx, method, raw, nil) + if err != nil { + return false, 0, nil, err + } + req.Header.Set("User-Agent", browserUA) + req.Header.Set("Accept", "*/*") + resp, err := client.Do(req) + if err != nil { + if isDNSError(err) { + return false, 404, nil, simpleError("host not found") + } + if isTimeout(err) { + return false, 408, nil, simpleError("request timeout") + } + if isRefused(err) { + return false, 503, nil, simpleError("connection refused") + } + return false, 0, nil, err + } + return resp.StatusCode >= 200 && resp.StatusCode < 400, resp.StatusCode, resp, nil +} + +func errString(e error) string { + if e == nil { + return "" + } + return e.Error() +} + +func isTimeout(err error) bool { + if err == nil { + return false + } + if errors.Is(err, context.DeadlineExceeded) { + return true + } + if ne, ok := err.(net.Error); ok && ne.Timeout() { + return true + } + return false +} + +func isDNSError(err error) bool { + msg := strings.ToLower(err.Error()) + return strings.Contains(msg, "no such host") || strings.Contains(msg, "server misbehaving") +} + +func isRefused(err error) bool { + msg := strings.ToLower(err.Error()) + return strings.Contains(msg, "connection refused") +} + +type simpleError string + +func (e simpleError) Error() string { return string(e) } diff --git a/internal/web/types.go b/internal/web/types.go new file mode 100644 index 0000000..f5bbdc1 --- /dev/null +++ b/internal/web/types.go @@ -0,0 +1,29 @@ +package web + +import "time" + +type Result struct { + URL string + OK bool + Status int + Err error + ErrMsg string + Depth int + CacheHit bool + Method string + ContentType string + Sources []string +} + +type Stats struct { + Pending int + Processed int +} + +type Config struct { + MaxDepth int + MaxConcurrency int + RequestTimeout time.Duration + MaxRetries429 int + Exclude []string +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..6d4c0e4 --- /dev/null +++ b/main.go @@ -0,0 +1,9 @@ +package main + +import "slinky/cmd" + +func main() { + cmd.Execute() +} + + diff --git a/test files/test nesting/test more nesting/test1.md b/test files/test nesting/test more nesting/test1.md new file mode 100644 index 0000000..df0df1e --- /dev/null +++ b/test files/test nesting/test more nesting/test1.md @@ -0,0 +1,102 @@ + +# Invalid URL Test Cases + +Here are some invalid URLs using various Markdown link and image syntaxes: + +- [Broken Protocol](htp://invalid-url.com) + *Reason: Misspelled protocol ("htp" instead of "http")* + +- [No Domain](http://) + *Reason: Missing domain* + +- [Missing Name Before TLD](http://.com) + *Reason: Missing domain name before TLD* + +- [Underscore in Domain](http://invalid_domain) + *Reason: Underscore in domain, not allowed in DNS hostnames* + +- [Domain Starts with Hyphen](http://-example.com) + *Reason: Domain cannot start with a hyphen* + +- [Double Dot in Domain](http://example..com) + *Reason: Double dot in domain* + +- [Non-numeric Port](http://example.com:abc) + *Reason: Invalid port (non-numeric)* + +- [Unsupported Protocol](ftp://example.com) + *Reason: Unsupported protocol (should be http/https)* + +- [Space in Domain](http://example .com) + *Reason: Space in domain* + +- [Extra Slash in Protocol](http:///example.com) + *Reason: Extra slash in protocol separator* + +- ![Broken Image Link](http://) + *Reason: Image with missing domain* + +- ![Invalid Protocol Image](htp://invalid-url.com/image.png) + *Reason: Image with misspelled protocol* + +- ![Double Dot Image](http://example..com/pic.jpg) + *Reason: Image with double dot in domain* + +- [![Image with Bad Link](http://)](htp://invalid-url.com) + *Reason: Image and link both with invalid URLs* + +--- + +# Correctly Formatted but Nonexistent URLs + +These URLs are syntactically correct but do not point to real sites: + +- [Nonexistent Domain](https://this-domain-does-not-exist-123456789.com) + +- [Fake Subdomain](https://foo.bar.baz.nonexistent-tld) + +- [Unused TLD](https://example.madeuptld) + +- [Long Random String](https://abcdefg1234567890.example.com) + +- [Fake Image](https://notarealwebsite.com/image.png) + +- ![Nonexistent Image](https://this-image-does-not-exist.com/pic.jpg) + +- [![Fake Image Link](https://notarealwebsite.com/fake.png)](https://notarealwebsite.com/page) + +- [Unregistered Domain](https://unregistered-website-xyz.com) + +- [Fake Path](https://example.com/this/path/does/not/exist) + +- [Nonexistent Page](https://example.com/404notfound) + +--- + +# Valid URLs + +These URLs are well-formed and point to known good sites: + +- [Example Domain](https://example.com) + +- [Wikipedia](https://en.wikipedia.org/wiki/Main_Page) + +- [GitHub](https://github.com) + +- [Google](https://www.google.com) + +- [Mozilla Developer Network](https://developer.mozilla.org) + +- [Go Documentation](https://go.dev/doc/) + +- ![Valid Image](https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png) + +- [![GitHub Logo](https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png)](https://github.com) + +- [Svelte](https://svelte.dev) + +- [OpenAI](https://openai.com) + + + + diff --git a/test files/test nesting/test2.txt b/test files/test nesting/test2.txt new file mode 100644 index 0000000..eb470ff --- /dev/null +++ b/test files/test nesting/test2.txt @@ -0,0 +1,11 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed euismod, urna eu tincidunt consectetur, nisi nisl aliquam enim, eget facilisis quam felis id mauris. +Check out this website: https://example.com for more information. +Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Mauris non tempor quam. + +Phasellus euismod, justo at dictum placerat, sapien erat ultricies eros, ac porta sem ex ac nisi. +For documentation, visit https://docs.something.org or refer to https://github.com/example/repo for the source code. +Nullam ac urna eu felis dapibus condimentum sit amet a augue. + +Curabitur non nulla sit amet nisl tempus convallis quis ac lectus. +You might also find https://news.ycombinator.com interesting for tech news. +Vivamus magna justo, lacinia eget consectetur sed, convallis at tellus. diff --git a/test files/test10.log b/test files/test10.log new file mode 100644 index 0000000..415514f --- /dev/null +++ b/test files/test10.log @@ -0,0 +1,11 @@ +2025-09-10 10:00:00 INFO Fetching https://example.com +2025-09-10 10:00:01 INFO Fetching https://github.com/example/repo +2025-09-10 10:00:02 WARN Retrying htp://bad-protocol.com +2025-09-10 10:00:03 ERROR Failed to fetch http:///example.com +2025-09-10 10:00:04 ERROR DNS error for https://this-domain-does-not-exist-123456789.com +2025-09-10 10:00:05 INFO Fetching http://example.com:8080/api/status +2025-09-10 10:00:06 ERROR Invalid host http://example..com +2025-09-10 10:00:07 ERROR Missing domain https:// +2025-09-10 10:00:08 INFO Fetching https://en.wikipedia.org/wiki/Main_Page + + diff --git a/test files/test11.go b/test files/test11.go new file mode 100644 index 0000000..b1a2e05 --- /dev/null +++ b/test files/test11.go @@ -0,0 +1,10 @@ +package testfiles + +// Sample Go file with URLs +var url1 = "https://example.com" +var url2 = "https://en.wikipedia.org/wiki/Main_Page" +var urlBad = "http://example..com" +var urlMissing = "https://" +var urlNonexistent = "https://this-domain-does-not-exist-123456789.com" +var urlPlaceholder1 = "https://[tenant].api.identitynow.com" +var urlPlaceholder2 = "https://{tenant}.api.identitynow.com" diff --git a/test files/test12.php b/test files/test12.php new file mode 100644 index 0000000..5d4db75 --- /dev/null +++ b/test files/test12.php @@ -0,0 +1,11 @@ + + + diff --git a/test files/test13.ps1 b/test files/test13.ps1 new file mode 100644 index 0000000..6464693 --- /dev/null +++ b/test files/test13.ps1 @@ -0,0 +1,9 @@ +# Sample PowerShell file with URLs +$url1 = "https://example.com" +$url2 = "https://news.ycombinator.com" +$urlBad = "http://example..com" +$urlMissing = "https://" +$urlNonexistent = "https://example.madeuptld" +$urlPlaceholder = "https://[tenant].api.identitynow.com" + + diff --git a/test files/test14.py b/test files/test14.py new file mode 100644 index 0000000..6b434b3 --- /dev/null +++ b/test files/test14.py @@ -0,0 +1,9 @@ +# Sample Python file with URLs +url1 = "https://example.com" +url2 = "https://developer.mozilla.org" +url_bad = "http://example..com" +url_missing = "https://" +url_nonexistent = "https://this-image-domain-should-not-exist-xyz.example" +url_placeholder = "https://{tenant}.api.identitynow.com/v3/transforms" + + diff --git a/test files/test15.java b/test files/test15.java new file mode 100644 index 0000000..f74c137 --- /dev/null +++ b/test files/test15.java @@ -0,0 +1,11 @@ +public class Test15 { + // Sample Java file with URLs + String url1 = "https://example.com"; + String url2 = "https://svelte.dev"; + String urlBad = "http://example..com"; + String urlMissing = "https://"; + String urlNonexistent = "https://unregistered-website-xyz.com"; + String urlPlaceholder = "https://[tenant].[domain].com"; +} + + diff --git a/test files/test16.ts b/test files/test16.ts new file mode 100644 index 0000000..16d2324 --- /dev/null +++ b/test files/test16.ts @@ -0,0 +1,9 @@ +// Sample TypeScript file with URLs +const url1: string = "https://example.com"; +const url2: string = "https://go.dev/doc/"; +const urlBad: string = "http://example..com"; +const urlMissing: string = "https://"; +const urlNonexistent: string = "https://this-domain-does-not-exist-987654321.com"; +const urlPlaceholder: string = "https://{tenant}.api.ideidentitynow.com/v3/transforms"; + + diff --git a/test files/test17.rb b/test files/test17.rb new file mode 100644 index 0000000..a454c44 --- /dev/null +++ b/test files/test17.rb @@ -0,0 +1,9 @@ +# Sample Ruby file with URLs +url1 = "https://example.com" +url2 = "https://github.com" +url_bad = "http://example..com" +url_missing = "https://" +url_nonexistent = "https://totally-made-up-host-tld-abcdef123.com" +url_placeholder = "https://[tenant].api.identitynow.com" + + diff --git a/test files/test3.js b/test files/test3.js new file mode 100644 index 0000000..c2b64c8 --- /dev/null +++ b/test files/test3.js @@ -0,0 +1,5 @@ +const link1 = "https://example.com"; +const link2 = "https://docs.something.org"; +const link3 = "https://github.com/example/repo"; +const link4 = "https://news.ycombinator.com"; +const link5 = "http://example.com:8080"; diff --git a/test files/test4.xml b/test files/test4.xml new file mode 100644 index 0000000..3f4b277 --- /dev/null +++ b/test files/test4.xml @@ -0,0 +1,7 @@ + + https://example.com + https://docs.something.org + https://github.com/example/repo + https://news.ycombinator.com + http://example.com:8080 + diff --git a/test files/test5.html b/test files/test5.html new file mode 100644 index 0000000..c081d9f --- /dev/null +++ b/test files/test5.html @@ -0,0 +1,25 @@ + + + + + URL Test HTML + + +

URL Test Links

+ Example + With Port + Bad Protocol + Missing Domain + Extra Slash + Double Dot + Nonexistent Domain + Wikipedia + +

Images

+ Valid Image + Double Dot Image + Bad Protocol Image + + + + diff --git a/test files/test6.json b/test files/test6.json new file mode 100644 index 0000000..d768c41 --- /dev/null +++ b/test files/test6.json @@ -0,0 +1,21 @@ +{ + "valid": [ + "https://example.com", + "https://github.com", + "http://example.com:8080/path", + "https://en.wikipedia.org/wiki/Main_Page" + ], + "invalid": [ + "htp://invalid-url.com", + "http:///example.com", + "http://example..com", + "https://" + ], + "nonexistent": [ + "https://this-domain-does-not-exist-123456789.com", + "https://notarealwebsite.com/image.png", + "https://example.madeuptld" + ] +} + + diff --git a/test files/test7.yaml b/test files/test7.yaml new file mode 100644 index 0000000..5f7bcb1 --- /dev/null +++ b/test files/test7.yaml @@ -0,0 +1,15 @@ +valid: + - https://example.com + - https://docs.something.org + - http://example.com:8080/path +invalid: + - htp://bad-protocol.com + - http:///too-many-slashes.com + - http://example..com + - https:// +nonexistent: + - https://notarealwebsite.com/page + - https://unregistered-website-xyz.com + - https://example.madeuptld + + diff --git a/test files/test8.csv b/test files/test8.csv new file mode 100644 index 0000000..42b07b9 --- /dev/null +++ b/test files/test8.csv @@ -0,0 +1,13 @@ +id,name,url +1,Example,https://example.com +2,Docs,https://docs.something.org +3,Repo,https://github.com/example/repo +4,HN,https://news.ycombinator.com +5,WithPort,http://example.com:8080 +6,BadProtocol,htp://invalid-url.com +7,ExtraSlash,http:///example.com +8,DoubleDot,http://example..com +9,MissingDomain,https:// +10,Nonexistent,https://this-domain-does-not-exist-123456789.com + + diff --git a/test files/test9.ini b/test files/test9.ini new file mode 100644 index 0000000..9784d05 --- /dev/null +++ b/test files/test9.ini @@ -0,0 +1,12 @@ +[links] +good1 = https://example.com +good2 = https://en.wikipedia.org/wiki/Main_Page +good3 = http://example.com:8080/path +bad1 = htp://bad-protocol.com +bad2 = http:///example.com +bad3 = http://example..com +missing = https:// +nonexistent1 = https://notarealwebsite.com/image.png +nonexistent2 = https://example.madeuptld + +