// Copyright 2023 Princess B33f Heavy Industries / Dave Shanley // SPDX-License-Identifier: MIT package index import ( "errors" "fmt" "gopkg.in/yaml.v3" "io" "io/fs" "log/slog" "math" "os" "path/filepath" "sort" "strconv" "strings" "sync" "time" ) // CanBeIndexed is an interface that allows a file to be indexed. type CanBeIndexed interface { Index(config *SpecIndexConfig) (*SpecIndex, error) } // RolodexFile is an interface that represents a file in the rolodex. It combines multiple `fs` interfaces // like `fs.FileInfo` and `fs.File` into one interface, so the same struct can be used for everything. type RolodexFile interface { GetContent() string GetFileExtension() FileExtension GetFullPath() string GetErrors() []error GetContentAsYAMLNode() (*yaml.Node, error) GetIndex() *SpecIndex Name() string ModTime() time.Time IsDir() bool Sys() any Size() int64 Mode() os.FileMode } // RolodexFS is an interface that represents a RolodexFS, is the same interface as `fs.FS`, except it // also exposes a GetFiles() signature, to extract all files in the FS. type RolodexFS interface { Open(name string) (fs.File, error) GetFiles() map[string]RolodexFile } // Rolodex is a file system abstraction that allows for the indexing of multiple file systems // and the ability to resolve references across those file systems. It is used to hold references to external // files, and the indexes they hold. The rolodex is the master lookup for all references. type Rolodex struct { localFS map[string]fs.FS remoteFS map[string]fs.FS indexed bool built bool manualBuilt bool resolved bool circChecked bool indexConfig *SpecIndexConfig indexingDuration time.Duration indexes []*SpecIndex indexMap map[string]*SpecIndex indexLock sync.Mutex rootIndex *SpecIndex rootNode *yaml.Node caughtErrors []error safeCircularReferences []*CircularReferenceResult infiniteCircularReferences []*CircularReferenceResult ignoredCircularReferences []*CircularReferenceResult logger *slog.Logger } // NewRolodex creates a new rolodex with the provided index configuration. func NewRolodex(indexConfig *SpecIndexConfig) *Rolodex { logger := indexConfig.Logger if logger == nil { logger = slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ Level: slog.LevelError, })) } r := &Rolodex{ indexConfig: indexConfig, localFS: make(map[string]fs.FS), remoteFS: make(map[string]fs.FS), logger: logger, indexMap: make(map[string]*SpecIndex), } indexConfig.Rolodex = r return r } // GetIgnoredCircularReferences returns a list of circular references that were ignored during the indexing process. // These can be array or polymorphic references. func (r *Rolodex) GetIgnoredCircularReferences() []*CircularReferenceResult { debounced := make(map[string]*CircularReferenceResult) for _, c := range r.ignoredCircularReferences { if _, ok := debounced[c.LoopPoint.FullDefinition]; !ok { debounced[c.LoopPoint.FullDefinition] = c } } var debouncedResults []*CircularReferenceResult for _, v := range debounced { debouncedResults = append(debouncedResults, v) } return debouncedResults } // GetIndexingDuration returns the duration it took to index the rolodex. func (r *Rolodex) GetIndexingDuration() time.Duration { return r.indexingDuration } // GetRootIndex returns the root index of the rolodex (the entry point, the main document) func (r *Rolodex) GetRootIndex() *SpecIndex { return r.rootIndex } // GetRootNode returns the root index of the rolodex (the entry point, the main document) func (r *Rolodex) GetRootNode() *yaml.Node { return r.rootNode } // GetIndexes returns all the indexes in the rolodex. func (r *Rolodex) GetIndexes() []*SpecIndex { return r.indexes } // GetCaughtErrors returns all the errors that were caught during the indexing process. func (r *Rolodex) GetCaughtErrors() []error { return r.caughtErrors } // AddLocalFS adds a local file system to the rolodex. func (r *Rolodex) AddLocalFS(baseDir string, fileSystem fs.FS) { absBaseDir, _ := filepath.Abs(baseDir) if f, ok := fileSystem.(*LocalFS); ok { f.rolodex = r f.logger = r.logger } r.localFS[absBaseDir] = fileSystem } // SetRootNode sets the root node of the rolodex (the entry point, the main document) func (r *Rolodex) SetRootNode(node *yaml.Node) { r.rootNode = node } func (r *Rolodex) AddExternalIndex(idx *SpecIndex, location string) { r.indexLock.Lock() if r.indexMap[location] == nil { r.indexMap[location] = idx } r.indexLock.Unlock() } func (r *Rolodex) AddIndex(idx *SpecIndex) { r.indexes = append(r.indexes, idx) if idx != nil { p := idx.specAbsolutePath r.AddExternalIndex(idx, p) } } // AddRemoteFS adds a remote file system to the rolodex. func (r *Rolodex) AddRemoteFS(baseURL string, fileSystem fs.FS) { if f, ok := fileSystem.(*RemoteFS); ok { f.rolodex = r f.logger = r.logger } r.remoteFS[baseURL] = fileSystem } // IndexTheRolodex indexes the rolodex, building out the indexes for each file, and then building the root index. func (r *Rolodex) IndexTheRolodex() error { if r.indexed { return nil } var caughtErrors []error var indexBuildQueue []*SpecIndex indexRolodexFile := func( location string, fs fs.FS, doneChan chan bool, errChan chan error, indexChan chan *SpecIndex) { var wg sync.WaitGroup indexFileFunc := func(idxFile CanBeIndexed, fullPath string) { defer wg.Done() // copy config and set the copiedConfig := *r.indexConfig copiedConfig.SpecAbsolutePath = fullPath copiedConfig.AvoidBuildIndex = true // we will build out everything in two steps. idx, err := idxFile.Index(&copiedConfig) if err != nil { errChan <- err } if err == nil { // for each index, we need a resolver resolver := NewResolver(idx) // check if the config has been set to ignore circular references in arrays and polymorphic schemas if copiedConfig.IgnoreArrayCircularReferences { resolver.IgnoreArrayCircularReferences() } if copiedConfig.IgnorePolymorphicCircularReferences { resolver.IgnorePolymorphicCircularReferences() } indexChan <- idx } } if lfs, ok := fs.(RolodexFS); ok { wait := false for _, f := range lfs.GetFiles() { if idxFile, ko := f.(CanBeIndexed); ko { wg.Add(1) wait = true go indexFileFunc(idxFile, f.GetFullPath()) } } if wait { wg.Wait() } doneChan <- true return } else { errChan <- errors.New("rolodex file system is not a RolodexFS") doneChan <- true } } indexingCompleted := 0 totalToIndex := len(r.localFS) + len(r.remoteFS) doneChan := make(chan bool) errChan := make(chan error) indexChan := make(chan *SpecIndex) // run through every file system and index every file, fan out as many goroutines as possible. started := time.Now() for k, v := range r.localFS { go indexRolodexFile(k, v, doneChan, errChan, indexChan) } for k, v := range r.remoteFS { go indexRolodexFile(k, v, doneChan, errChan, indexChan) } for indexingCompleted < totalToIndex { select { case <-doneChan: indexingCompleted++ case err := <-errChan: indexingCompleted++ caughtErrors = append(caughtErrors, err) case idx := <-indexChan: indexBuildQueue = append(indexBuildQueue, idx) } } // now that we have indexed all the files, we can build the index. r.indexes = indexBuildQueue sort.Slice(indexBuildQueue, func(i, j int) bool { return indexBuildQueue[i].specAbsolutePath < indexBuildQueue[j].specAbsolutePath }) for _, idx := range indexBuildQueue { idx.BuildIndex() if r.indexConfig.AvoidCircularReferenceCheck { continue } errs := idx.resolver.CheckForCircularReferences() for e := range errs { caughtErrors = append(caughtErrors, errs[e]) } if len(idx.resolver.GetIgnoredCircularPolyReferences()) > 0 { r.ignoredCircularReferences = append(r.ignoredCircularReferences, idx.resolver.GetIgnoredCircularPolyReferences()...) } if len(idx.resolver.GetIgnoredCircularArrayReferences()) > 0 { r.ignoredCircularReferences = append(r.ignoredCircularReferences, idx.resolver.GetIgnoredCircularArrayReferences()...) } } // indexed and built every supporting file, we can build the root index (our entry point) if r.rootNode != nil { // if there is a base path, then we need to set the root spec config to point to a theoretical root.yaml // which does not exist, but is used to formulate the absolute path to root references correctly. if r.indexConfig.BasePath != "" && r.indexConfig.BaseURL == nil { basePath := r.indexConfig.BasePath if !filepath.IsAbs(basePath) { basePath, _ = filepath.Abs(basePath) } if len(r.localFS) > 0 || len(r.remoteFS) > 0 { r.indexConfig.SpecAbsolutePath = filepath.Join(basePath, "root.yaml") } } index := NewSpecIndexWithConfig(r.rootNode, r.indexConfig) resolver := NewResolver(index) if r.indexConfig.IgnoreArrayCircularReferences { resolver.IgnoreArrayCircularReferences() } if r.indexConfig.IgnorePolymorphicCircularReferences { resolver.IgnorePolymorphicCircularReferences() } r.rootIndex = index r.logger.Debug("[rolodex] starting root index build") index.BuildIndex() r.logger.Debug("[rolodex] root index build completed") if !r.indexConfig.AvoidCircularReferenceCheck { resolvingErrors := resolver.CheckForCircularReferences() r.circChecked = true for e := range resolvingErrors { caughtErrors = append(caughtErrors, resolvingErrors[e]) } if len(resolver.GetIgnoredCircularPolyReferences()) > 0 { r.ignoredCircularReferences = append(r.ignoredCircularReferences, resolver.GetIgnoredCircularPolyReferences()...) } if len(resolver.GetIgnoredCircularArrayReferences()) > 0 { r.ignoredCircularReferences = append(r.ignoredCircularReferences, resolver.GetIgnoredCircularArrayReferences()...) } } if len(index.refErrors) > 0 { caughtErrors = append(caughtErrors, index.refErrors...) } } r.indexingDuration = time.Since(started) r.indexed = true r.caughtErrors = caughtErrors r.built = true return errors.Join(caughtErrors...) } // CheckForCircularReferences checks for circular references in the rolodex. func (r *Rolodex) CheckForCircularReferences() { if !r.circChecked { if r.rootIndex != nil && r.rootIndex.resolver != nil { resolvingErrors := r.rootIndex.resolver.CheckForCircularReferences() for e := range resolvingErrors { r.caughtErrors = append(r.caughtErrors, resolvingErrors[e]) } if len(r.rootIndex.resolver.ignoredPolyReferences) > 0 { r.ignoredCircularReferences = append(r.ignoredCircularReferences, r.rootIndex.resolver.ignoredPolyReferences...) } if len(r.rootIndex.resolver.ignoredArrayReferences) > 0 { r.ignoredCircularReferences = append(r.ignoredCircularReferences, r.rootIndex.resolver.ignoredArrayReferences...) } r.safeCircularReferences = append(r.safeCircularReferences, r.rootIndex.resolver.GetSafeCircularReferences()...) r.infiniteCircularReferences = append(r.infiniteCircularReferences, r.rootIndex.resolver.GetInfiniteCircularReferences()...) } r.circChecked = true } } // Resolve resolves references in the rolodex. func (r *Rolodex) Resolve() { var resolvers []*Resolver if r.rootIndex != nil && r.rootIndex.resolver != nil { resolvers = append(resolvers, r.rootIndex.resolver) } for _, idx := range r.indexes { if idx.resolver != nil { resolvers = append(resolvers, idx.resolver) } } for _, res := range resolvers { resolvingErrors := res.Resolve() for e := range resolvingErrors { r.caughtErrors = append(r.caughtErrors, resolvingErrors[e]) } if r.rootIndex != nil && len(r.rootIndex.resolver.ignoredPolyReferences) > 0 { r.ignoredCircularReferences = append(r.ignoredCircularReferences, res.ignoredPolyReferences...) } if r.rootIndex != nil && len(r.rootIndex.resolver.ignoredArrayReferences) > 0 { r.ignoredCircularReferences = append(r.ignoredCircularReferences, res.ignoredArrayReferences...) } r.safeCircularReferences = append(r.safeCircularReferences, res.GetSafeCircularReferences()...) r.infiniteCircularReferences = append(r.infiniteCircularReferences, res.GetInfiniteCircularReferences()...) } // resolve pending nodes for _, res := range resolvers { res.ResolvePendingNodes() } r.resolved = true } // BuildIndexes builds the indexes in the rolodex, this is generally not required unless manually building a rolodex. func (r *Rolodex) BuildIndexes() { if r.manualBuilt { return } for _, idx := range r.indexes { idx.BuildIndex() } if r.rootIndex != nil { r.rootIndex.BuildIndex() } r.manualBuilt = true } // Open opens a file in the rolodex, and returns a RolodexFile. func (r *Rolodex) Open(location string) (RolodexFile, error) { if r == nil { return nil, fmt.Errorf("rolodex has not been initialized, cannot open file '%s'", location) } if len(r.localFS) <= 0 && len(r.remoteFS) <= 0 { return nil, fmt.Errorf("rolodex has no file systems configured, cannot open '%s'. Add a BaseURL or BasePath to your configuration so the rolodex knows how to resolve references", location) } var errorStack []error var localFile *LocalFile var remoteFile *RemoteFile fileLookup := location isUrl := false if strings.HasPrefix(location, "http") { isUrl = true } if !isUrl { if len(r.localFS) <= 0 { r.logger.Warn("[rolodex] no local file systems configured, cannot open local file", "location", location) return nil, fmt.Errorf("the rolodex has no local file systems configured, cannot open local file '%s'", location) } for k, v := range r.localFS { // check if this is a URL or an abs/rel reference. if !filepath.IsAbs(location) { fileLookup, _ = filepath.Abs(filepath.Join(k, location)) } f, err := v.Open(fileLookup) if err != nil { // try a lookup that is not absolute, but relative f, err = v.Open(location) if err != nil { errorStack = append(errorStack, err) continue } } // check if this is a native rolodex FS, then the work is done. if lf, ko := interface{}(f).(*LocalFile); ko { localFile = lf break } else { // not a native FS, so we need to read the file and create a local file. bytes, rErr := io.ReadAll(f) if rErr != nil { errorStack = append(errorStack, rErr) continue } s, sErr := f.Stat() if sErr != nil { errorStack = append(errorStack, sErr) continue } if len(bytes) > 0 { localFile = &LocalFile{ filename: filepath.Base(fileLookup), name: filepath.Base(fileLookup), extension: ExtractFileType(fileLookup), data: bytes, fullPath: fileLookup, lastModified: s.ModTime(), index: r.rootIndex, } break } } } } else { if !r.indexConfig.AllowRemoteLookup { return nil, fmt.Errorf("remote lookup for '%s' not allowed, please set the index configuration to "+ "AllowRemoteLookup to true", fileLookup) } for _, v := range r.remoteFS { f, err := v.Open(fileLookup) if err != nil { r.logger.Warn("[rolodex] errors opening remote file", "location", fileLookup, "error", err) } if f != nil { if rf, ok := interface{}(f).(*RemoteFile); ok { remoteFile = rf break } else { bytes, rErr := io.ReadAll(f) if rErr != nil { errorStack = append(errorStack, rErr) continue } s, sErr := f.Stat() if sErr != nil { errorStack = append(errorStack, sErr) continue } if len(bytes) > 0 { remoteFile = &RemoteFile{ filename: filepath.Base(fileLookup), name: filepath.Base(fileLookup), extension: ExtractFileType(fileLookup), data: bytes, fullPath: fileLookup, lastModified: s.ModTime(), index: r.rootIndex, } break } } } } } if localFile != nil { return &rolodexFile{ rolodex: r, location: localFile.fullPath, localFile: localFile, }, errors.Join(errorStack...) } if remoteFile != nil { return &rolodexFile{ rolodex: r, location: remoteFile.fullPath, remoteFile: remoteFile, }, errors.Join(errorStack...) } return nil, errors.Join(errorStack...) } var suffixes = []string{"B", "KB", "MB", "GB", "TB"} func Round(val float64, roundOn float64, places int) (newVal float64) { var round float64 pow := math.Pow(10, float64(places)) digit := pow * val _, div := math.Modf(digit) if div >= roundOn { round = math.Ceil(digit) } else { round = math.Floor(digit) } newVal = round / pow return } func HumanFileSize(size float64) string { base := math.Log(size) / math.Log(1024) getSize := Round(math.Pow(1024, base-math.Floor(base)), .5, 2) getSuffix := suffixes[int(math.Floor(base))] return strconv.FormatFloat(getSize, 'f', -1, 64) + " " + string(getSuffix) } func (r *Rolodex) RolodexFileSizeAsString() string { size := r.RolodexFileSize() return HumanFileSize(float64(size)) } func (r *Rolodex) RolodexTotalFiles() int { // look through each file system and count the files var total int for _, v := range r.localFS { if lfs, ok := v.(RolodexFS); ok { total += len(lfs.GetFiles()) } } for _, v := range r.remoteFS { if lfs, ok := v.(RolodexFS); ok { total += len(lfs.GetFiles()) } } return total } func (r *Rolodex) RolodexFileSize() int64 { var size int64 for _, v := range r.localFS { if lfs, ok := v.(RolodexFS); ok { for _, f := range lfs.GetFiles() { size += f.Size() } } } for _, v := range r.remoteFS { if lfs, ok := v.(RolodexFS); ok { for _, f := range lfs.GetFiles() { size += f.Size() } } } return size }