mirror of
https://github.com/LukeHagar/slinky.git
synced 2025-12-09 20:57:48 +00:00
This update introduces a watch mode that monitors file changes and automatically re-scans for broken links. The feature enhances the user experience during development by providing real-time updates and ensuring links remain valid as files are modified. Additionally, the README has been updated to include usage instructions and details about the new watch mode capabilities.
118 lines
2.7 KiB
Go
118 lines
2.7 KiB
Go
package web
|
|
|
|
import (
|
|
"context"
|
|
"net"
|
|
"net/http"
|
|
"sort"
|
|
"time"
|
|
)
|
|
|
|
// CheckURLs performs concurrent GET requests for each URL and emits Result events.
|
|
// sources maps URL -> list of file paths where it was found.
|
|
func CheckURLs(ctx context.Context, urls []string, sources map[string][]string, out chan<- Result, stats chan<- Stats, cfg Config) {
|
|
defer close(out)
|
|
|
|
// Build HTTP client similar to crawler
|
|
transport := &http.Transport{
|
|
Proxy: http.ProxyFromEnvironment,
|
|
DialContext: (&net.Dialer{Timeout: 2 * time.Second, KeepAlive: 30 * time.Second}).DialContext,
|
|
TLSHandshakeTimeout: 5 * time.Second,
|
|
ExpectContinueTimeout: 1 * time.Second,
|
|
MaxIdleConns: cfg.MaxConcurrency * 2,
|
|
MaxIdleConnsPerHost: cfg.MaxConcurrency,
|
|
MaxConnsPerHost: cfg.MaxConcurrency,
|
|
IdleConnTimeout: 30 * time.Second,
|
|
ResponseHeaderTimeout: cfg.RequestTimeout,
|
|
}
|
|
client := &http.Client{Timeout: cfg.RequestTimeout, Transport: transport}
|
|
|
|
type job struct{ url string }
|
|
jobs := make(chan job, len(urls))
|
|
done := make(chan struct{})
|
|
|
|
// Seed jobs
|
|
unique := make(map[string]struct{}, len(urls))
|
|
for _, u := range urls {
|
|
if u == "" {
|
|
continue
|
|
}
|
|
if _, ok := unique[u]; ok {
|
|
continue
|
|
}
|
|
unique[u] = struct{}{}
|
|
jobs <- job{url: u}
|
|
}
|
|
close(jobs)
|
|
|
|
concurrency := cfg.MaxConcurrency
|
|
if concurrency <= 0 {
|
|
concurrency = 8
|
|
}
|
|
processed := 0
|
|
pending := len(unique)
|
|
|
|
worker := func() {
|
|
for j := range jobs {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
default:
|
|
}
|
|
ok, status, resp, err := fetchWithMethod(ctx, client, http.MethodGet, j.url)
|
|
if resp != nil && resp.Body != nil {
|
|
resp.Body.Close()
|
|
}
|
|
// Treat 401/403/408/429 as valid links
|
|
if status == http.StatusUnauthorized || status == http.StatusForbidden || status == http.StatusRequestTimeout || status == http.StatusTooManyRequests {
|
|
ok = true
|
|
err = nil
|
|
}
|
|
// Check context before sending result
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
default:
|
|
}
|
|
|
|
var srcs []string
|
|
if sources != nil {
|
|
srcs = sources[j.url]
|
|
}
|
|
|
|
// Send result with context check
|
|
select {
|
|
case out <- Result{URL: j.url, OK: ok, Status: status, Err: err, ErrMsg: errString(err), Method: http.MethodGet, Sources: cloneAndSort(srcs)}:
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
|
|
processed++
|
|
pending--
|
|
if stats != nil {
|
|
select {
|
|
case stats <- Stats{Pending: pending, Processed: processed}:
|
|
default:
|
|
}
|
|
}
|
|
}
|
|
done <- struct{}{}
|
|
}
|
|
|
|
for i := 0; i < concurrency; i++ {
|
|
go worker()
|
|
}
|
|
for i := 0; i < concurrency; i++ {
|
|
<-done
|
|
}
|
|
}
|
|
|
|
func cloneAndSort(in []string) []string {
|
|
if len(in) == 0 {
|
|
return nil
|
|
}
|
|
out := append([]string(nil), in...)
|
|
sort.Strings(out)
|
|
return out
|
|
}
|