diff --git a/index/rolodex_remote_loader.go b/index/rolodex_remote_loader.go index b548d8f..e3505d7 100644 --- a/index/rolodex_remote_loader.go +++ b/index/rolodex_remote_loader.go @@ -4,26 +4,26 @@ package index import ( - "errors" - "fmt" - "github.com/pb33f/libopenapi/datamodel" - "github.com/pb33f/libopenapi/utils" - "golang.org/x/sync/syncmap" - "gopkg.in/yaml.v3" - "io" - "io/fs" - "log/slog" - "net/http" - "net/url" - "os" - "path/filepath" - "time" + "errors" + "fmt" + "github.com/pb33f/libopenapi/datamodel" + "github.com/pb33f/libopenapi/utils" + "golang.org/x/sync/syncmap" + "gopkg.in/yaml.v3" + "io" + "io/fs" + "log/slog" + "net/http" + "net/url" + "os" + "path/filepath" + "time" ) const ( - YAML FileExtension = iota - JSON - UNSUPPORTED + YAML FileExtension = iota + JSON + UNSUPPORTED ) // FileExtension is the type of file extension. @@ -32,415 +32,415 @@ type FileExtension int // RemoteFS is a file system that indexes remote files. It implements the fs.FS interface. Files are located remotely // and served via HTTP. type RemoteFS struct { - indexConfig *SpecIndexConfig - rootURL string - rootURLParsed *url.URL - RemoteHandlerFunc utils.RemoteURLHandler - Files syncmap.Map - ProcessingFiles syncmap.Map - FetchTime int64 - FetchChannel chan *RemoteFile - remoteErrors []error - logger *slog.Logger - extractedFiles map[string]RolodexFile - rolodex *Rolodex + indexConfig *SpecIndexConfig + rootURL string + rootURLParsed *url.URL + RemoteHandlerFunc utils.RemoteURLHandler + Files syncmap.Map + ProcessingFiles syncmap.Map + FetchTime int64 + FetchChannel chan *RemoteFile + remoteErrors []error + logger *slog.Logger + extractedFiles map[string]RolodexFile + rolodex *Rolodex } // RemoteFile is a file that has been indexed by the RemoteFS. It implements the RolodexFile interface. type RemoteFile struct { - filename string - name string - extension FileExtension - data []byte - fullPath string - URL *url.URL - lastModified time.Time - seekingErrors []error - index *SpecIndex - parsed *yaml.Node - offset int64 + filename string + name string + extension FileExtension + data []byte + fullPath string + URL *url.URL + lastModified time.Time + seekingErrors []error + index *SpecIndex + parsed *yaml.Node + offset int64 } // GetFileName returns the name of the file. func (f *RemoteFile) GetFileName() string { - return f.filename + return f.filename } // GetContent returns the content of the file as a string. func (f *RemoteFile) GetContent() string { - return string(f.data) + return string(f.data) } // GetContentAsYAMLNode returns the content of the file as a yaml.Node. func (f *RemoteFile) GetContentAsYAMLNode() (*yaml.Node, error) { - if f.parsed != nil { - return f.parsed, nil - } - if f.index != nil && f.index.root != nil { - return f.index.root, nil - } - if f.data == nil { - return nil, fmt.Errorf("no data to parse for file: %s", f.fullPath) - } - var root yaml.Node - err := yaml.Unmarshal(f.data, &root) - if err != nil { - return nil, err - } - if f.index != nil && f.index.root == nil { - f.index.root = &root - } - f.parsed = &root - return &root, nil + if f.parsed != nil { + return f.parsed, nil + } + if f.index != nil && f.index.root != nil { + return f.index.root, nil + } + if f.data == nil { + return nil, fmt.Errorf("no data to parse for file: %s", f.fullPath) + } + var root yaml.Node + err := yaml.Unmarshal(f.data, &root) + if err != nil { + return nil, err + } + if f.index != nil && f.index.root == nil { + f.index.root = &root + } + f.parsed = &root + return &root, nil } // GetFileExtension returns the file extension of the file. func (f *RemoteFile) GetFileExtension() FileExtension { - return f.extension + return f.extension } // GetLastModified returns the last modified time of the file. func (f *RemoteFile) GetLastModified() time.Time { - return f.lastModified + return f.lastModified } // GetErrors returns any errors that occurred while reading the file. func (f *RemoteFile) GetErrors() []error { - return f.seekingErrors + return f.seekingErrors } // GetFullPath returns the full path of the file. func (f *RemoteFile) GetFullPath() string { - return f.fullPath + return f.fullPath } // fs.FileInfo interfaces // Name returns the name of the file. func (f *RemoteFile) Name() string { - return f.name + return f.name } // Size returns the size of the file. func (f *RemoteFile) Size() int64 { - return int64(len(f.data)) + return int64(len(f.data)) } // Mode returns the file mode bits for the file. func (f *RemoteFile) Mode() fs.FileMode { - return fs.FileMode(0) + return fs.FileMode(0) } // ModTime returns the modification time of the file. func (f *RemoteFile) ModTime() time.Time { - return f.lastModified + return f.lastModified } // IsDir returns true if the file is a directory. func (f *RemoteFile) IsDir() bool { - return false + return false } // fs.File interfaces // Sys returns the underlying data source (always returns nil) func (f *RemoteFile) Sys() interface{} { - return nil + return nil } // Close closes the file (doesn't do anything, returns no error) func (f *RemoteFile) Close() error { - return nil + return nil } // Stat returns the FileInfo for the file. func (f *RemoteFile) Stat() (fs.FileInfo, error) { - return f, nil + return f, nil } // Read reads the file. Makes it compatible with io.Reader. func (f *RemoteFile) Read(b []byte) (int, error) { - if f.offset >= int64(len(f.data)) { - return 0, io.EOF - } - if f.offset < 0 { - return 0, &fs.PathError{Op: "read", Path: f.name, Err: fs.ErrInvalid} - } - n := copy(b, f.data[f.offset:]) - f.offset += int64(n) - return n, nil + if f.offset >= int64(len(f.data)) { + return 0, io.EOF + } + if f.offset < 0 { + return 0, &fs.PathError{Op: "read", Path: f.name, Err: fs.ErrInvalid} + } + n := copy(b, f.data[f.offset:]) + f.offset += int64(n) + return n, nil } // Index indexes the file and returns a *SpecIndex, any errors are returned as well. func (f *RemoteFile) Index(config *SpecIndexConfig) (*SpecIndex, error) { - if f.index != nil { - return f.index, nil - } - content := f.data + if f.index != nil { + return f.index, nil + } + content := f.data - // first, we must parse the content of the file - info, err := datamodel.ExtractSpecInfoWithDocumentCheck(content, true) - if err != nil { - return nil, err - } + // first, we must parse the content of the file + info, err := datamodel.ExtractSpecInfoWithDocumentCheck(content, true) + if err != nil { + return nil, err + } - index := NewSpecIndexWithConfig(info.RootNode, config) - index.specAbsolutePath = config.SpecAbsolutePath - f.index = index - return index, nil + index := NewSpecIndexWithConfig(info.RootNode, config) + index.specAbsolutePath = config.SpecAbsolutePath + f.index = index + return index, nil } // GetIndex returns the index for the file. func (f *RemoteFile) GetIndex() *SpecIndex { - return f.index + return f.index } // NewRemoteFSWithConfig creates a new RemoteFS using the supplied SpecIndexConfig. func NewRemoteFSWithConfig(specIndexConfig *SpecIndexConfig) (*RemoteFS, error) { - if specIndexConfig == nil { - return nil, errors.New("no spec index config provided") - } - remoteRootURL := specIndexConfig.BaseURL - log := specIndexConfig.Logger - if log == nil { - log = slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ - Level: slog.LevelError, - })) - } + if specIndexConfig == nil { + return nil, errors.New("no spec index config provided") + } + remoteRootURL := specIndexConfig.BaseURL + log := specIndexConfig.Logger + if log == nil { + log = slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelError, + })) + } - rfs := &RemoteFS{ - indexConfig: specIndexConfig, - logger: log, - rootURLParsed: remoteRootURL, - FetchChannel: make(chan *RemoteFile), - } - if remoteRootURL != nil { - rfs.rootURL = remoteRootURL.String() - } - if specIndexConfig.RemoteURLHandler != nil { - rfs.RemoteHandlerFunc = specIndexConfig.RemoteURLHandler - } else { - // default http client - client := &http.Client{ - Timeout: time.Second * 120, - } - rfs.RemoteHandlerFunc = func(url string) (*http.Response, error) { - return client.Get(url) - } - } - return rfs, nil + rfs := &RemoteFS{ + indexConfig: specIndexConfig, + logger: log, + rootURLParsed: remoteRootURL, + FetchChannel: make(chan *RemoteFile), + } + if remoteRootURL != nil { + rfs.rootURL = remoteRootURL.String() + } + if specIndexConfig.RemoteURLHandler != nil { + rfs.RemoteHandlerFunc = specIndexConfig.RemoteURLHandler + } else { + // default http client + client := &http.Client{ + Timeout: time.Second * 120, + } + rfs.RemoteHandlerFunc = func(url string) (*http.Response, error) { + return client.Get(url) + } + } + return rfs, nil } // NewRemoteFSWithRootURL creates a new RemoteFS using the supplied root URL. func NewRemoteFSWithRootURL(rootURL string) (*RemoteFS, error) { - remoteRootURL, err := url.Parse(rootURL) - if err != nil { - return nil, err - } - config := CreateOpenAPIIndexConfig() - config.BaseURL = remoteRootURL - return NewRemoteFSWithConfig(config) + remoteRootURL, err := url.Parse(rootURL) + if err != nil { + return nil, err + } + config := CreateOpenAPIIndexConfig() + config.BaseURL = remoteRootURL + return NewRemoteFSWithConfig(config) } // SetRemoteHandlerFunc sets the remote handler function. func (i *RemoteFS) SetRemoteHandlerFunc(handlerFunc utils.RemoteURLHandler) { - i.RemoteHandlerFunc = handlerFunc + i.RemoteHandlerFunc = handlerFunc } // SetIndexConfig sets the index configuration. func (i *RemoteFS) SetIndexConfig(config *SpecIndexConfig) { - i.indexConfig = config + i.indexConfig = config } // GetFiles returns the files that have been indexed. func (i *RemoteFS) GetFiles() map[string]RolodexFile { - files := make(map[string]RolodexFile) - i.Files.Range(func(key, value interface{}) bool { - files[key.(string)] = value.(*RemoteFile) - return true - }) - i.extractedFiles = files - return files + files := make(map[string]RolodexFile) + i.Files.Range(func(key, value interface{}) bool { + files[key.(string)] = value.(*RemoteFile) + return true + }) + i.extractedFiles = files + return files } // GetErrors returns any errors that occurred during the indexing process. func (i *RemoteFS) GetErrors() []error { - return i.remoteErrors + return i.remoteErrors } type waiterRemote struct { - f string - done bool - file *RemoteFile - listeners int + f string + done bool + file *RemoteFile + listeners int } // Open opens a file, returning it or an error. If the file is not found, the error is of type *PathError. func (i *RemoteFS) Open(remoteURL string) (fs.File, error) { - if i.indexConfig != nil && !i.indexConfig.AllowRemoteLookup { - return nil, fmt.Errorf("remote lookup for '%s' is not allowed, please set "+ - "AllowRemoteLookup to true as part of the index configuration", remoteURL) - } + if i.indexConfig != nil && !i.indexConfig.AllowRemoteLookup { + return nil, fmt.Errorf("remote lookup for '%s' is not allowed, please set "+ + "AllowRemoteLookup to true as part of the index configuration", remoteURL) + } - remoteParsedURL, err := url.Parse(remoteURL) - if err != nil { - return nil, err - } - remoteParsedURLOriginal, _ := url.Parse(remoteURL) + remoteParsedURL, err := url.Parse(remoteURL) + if err != nil { + return nil, err + } + remoteParsedURLOriginal, _ := url.Parse(remoteURL) - // try path first - if r, ok := i.Files.Load(remoteParsedURL.Path); ok { - return r.(*RemoteFile), nil - } + // try path first + if r, ok := i.Files.Load(remoteParsedURL.Path); ok { + return r.(*RemoteFile), nil + } - // if we're processing, we need to block and wait for the file to be processed - // try path first - if r, ok := i.ProcessingFiles.Load(remoteParsedURL.Path); ok { + // if we're processing, we need to block and wait for the file to be processed + // try path first + if r, ok := i.ProcessingFiles.Load(remoteParsedURL.Path); ok { - wait := r.(*waiterRemote) - wait.listeners++ + wait := r.(*waiterRemote) + wait.listeners++ - i.logger.Debug("[rolodex remote loader] waiting for existing fetch to complete", "file", remoteURL, - "remoteURL", remoteParsedURL.String()) + i.logger.Debug("[rolodex remote loader] waiting for existing fetch to complete", "file", remoteURL, + "remoteURL", remoteParsedURL.String()) - for !wait.done { - time.Sleep(200 * time.Nanosecond) // breathe for a few nanoseconds. - } + for !wait.done { + time.Sleep(500 * time.Nanosecond) // breathe for a few nanoseconds. + } - wait.listeners-- - i.logger.Debug("[rolodex remote loader]: waiting done, remote completed, returning file", "file", - remoteParsedURL.String(), "listeners", wait.listeners) - return wait.file, nil - } - - processingWaiter := &waiterRemote{f: remoteParsedURL.Path} + wait.listeners-- + i.logger.Debug("[rolodex remote loader]: waiting done, remote completed, returning file", "file", + remoteParsedURL.String(), "listeners", wait.listeners) + return wait.file, nil + } - // add to processing - i.ProcessingFiles.Store(remoteParsedURL.Path, processingWaiter) + processingWaiter := &waiterRemote{f: remoteParsedURL.Path} - fileExt := ExtractFileType(remoteParsedURL.Path) + // add to processing + i.ProcessingFiles.Store(remoteParsedURL.Path, processingWaiter) - if fileExt == UNSUPPORTED { - return nil, &fs.PathError{Op: "open", Path: remoteURL, Err: fs.ErrInvalid} - } + fileExt := ExtractFileType(remoteParsedURL.Path) - // if the remote URL is absolute (http:// or https://), and we have a rootURL defined, we need to override - // the host being defined by this URL, and use the rootURL instead, but keep the path. - if i.rootURLParsed != nil { - remoteParsedURL.Host = i.rootURLParsed.Host - remoteParsedURL.Scheme = i.rootURLParsed.Scheme - if !filepath.IsAbs(remoteParsedURL.Path) { - remoteParsedURL.Path = filepath.Join(i.rootURLParsed.Path, remoteParsedURL.Path) - } - } + if fileExt == UNSUPPORTED { + return nil, &fs.PathError{Op: "open", Path: remoteURL, Err: fs.ErrInvalid} + } - if remoteParsedURL.Scheme == "" { - i.ProcessingFiles.Delete(remoteParsedURL.Path) - return nil, nil // not a remote file, nothing wrong with that - just we can't keep looking here partner. - } + // if the remote URL is absolute (http:// or https://), and we have a rootURL defined, we need to override + // the host being defined by this URL, and use the rootURL instead, but keep the path. + if i.rootURLParsed != nil { + remoteParsedURL.Host = i.rootURLParsed.Host + remoteParsedURL.Scheme = i.rootURLParsed.Scheme + if !filepath.IsAbs(remoteParsedURL.Path) { + remoteParsedURL.Path = filepath.Join(i.rootURLParsed.Path, remoteParsedURL.Path) + } + } - i.logger.Debug("loading remote file", "file", remoteURL, "remoteURL", remoteParsedURL.String()) + if remoteParsedURL.Scheme == "" { + i.ProcessingFiles.Delete(remoteParsedURL.Path) + return nil, nil // not a remote file, nothing wrong with that - just we can't keep looking here partner. + } - response, clientErr := i.RemoteHandlerFunc(remoteParsedURL.String()) - if clientErr != nil { + i.logger.Debug("loading remote file", "file", remoteURL, "remoteURL", remoteParsedURL.String()) - i.remoteErrors = append(i.remoteErrors, clientErr) - // remove from processing - i.ProcessingFiles.Delete(remoteParsedURL.Path) - if response != nil { - i.logger.Error("client error", "error", clientErr, "status", response.StatusCode) - } else { - i.logger.Error("client error", "error", clientErr.Error()) - } - return nil, clientErr - } - if response == nil { - // remove from processing - i.ProcessingFiles.Delete(remoteParsedURL.Path) + response, clientErr := i.RemoteHandlerFunc(remoteParsedURL.String()) + if clientErr != nil { - return nil, fmt.Errorf("empty response from remote URL: %s", remoteParsedURL.String()) - } - responseBytes, readError := io.ReadAll(response.Body) - if readError != nil { + i.remoteErrors = append(i.remoteErrors, clientErr) + // remove from processing + i.ProcessingFiles.Delete(remoteParsedURL.Path) + if response != nil { + i.logger.Error("client error", "error", clientErr, "status", response.StatusCode) + } else { + i.logger.Error("client error", "error", clientErr.Error()) + } + return nil, clientErr + } + if response == nil { + // remove from processing + i.ProcessingFiles.Delete(remoteParsedURL.Path) - // remove from processing - i.ProcessingFiles.Delete(remoteParsedURL.Path) + return nil, fmt.Errorf("empty response from remote URL: %s", remoteParsedURL.String()) + } + responseBytes, readError := io.ReadAll(response.Body) + if readError != nil { - return nil, fmt.Errorf("error reading bytes from remote file '%s': [%s]", - remoteParsedURL.String(), readError.Error()) - } + // remove from processing + i.ProcessingFiles.Delete(remoteParsedURL.Path) - if response.StatusCode >= 400 { + return nil, fmt.Errorf("error reading bytes from remote file '%s': [%s]", + remoteParsedURL.String(), readError.Error()) + } - // remove from processing - i.ProcessingFiles.Delete(remoteParsedURL.Path) + if response.StatusCode >= 400 { - i.logger.Error("unable to fetch remote document", - "file", remoteParsedURL.Path, "status", response.StatusCode, "resp", string(responseBytes)) - return nil, fmt.Errorf("unable to fetch remote document: %s", string(responseBytes)) - } + // remove from processing + i.ProcessingFiles.Delete(remoteParsedURL.Path) - absolutePath, _ := filepath.Abs(remoteParsedURL.Path) + i.logger.Error("unable to fetch remote document", + "file", remoteParsedURL.Path, "status", response.StatusCode, "resp", string(responseBytes)) + return nil, fmt.Errorf("unable to fetch remote document: %s", string(responseBytes)) + } - // extract last modified from response - lastModified := response.Header.Get("Last-Modified") + absolutePath, _ := filepath.Abs(remoteParsedURL.Path) - // parse the last modified date into a time object - lastModifiedTime, parseErr := time.Parse(time.RFC1123, lastModified) + // extract last modified from response + lastModified := response.Header.Get("Last-Modified") - if parseErr != nil { - // can't extract last modified, so use now - lastModifiedTime = time.Now() - } + // parse the last modified date into a time object + lastModifiedTime, parseErr := time.Parse(time.RFC1123, lastModified) - filename := filepath.Base(remoteParsedURL.Path) + if parseErr != nil { + // can't extract last modified, so use now + lastModifiedTime = time.Now() + } - remoteFile := &RemoteFile{ - filename: filename, - name: remoteParsedURL.Path, - extension: fileExt, - data: responseBytes, - fullPath: absolutePath, - URL: remoteParsedURL, - lastModified: lastModifiedTime, - } + filename := filepath.Base(remoteParsedURL.Path) - copiedCfg := *i.indexConfig + remoteFile := &RemoteFile{ + filename: filename, + name: remoteParsedURL.Path, + extension: fileExt, + data: responseBytes, + fullPath: absolutePath, + URL: remoteParsedURL, + lastModified: lastModifiedTime, + } - newBase := fmt.Sprintf("%s://%s%s", remoteParsedURLOriginal.Scheme, remoteParsedURLOriginal.Host, - filepath.Dir(remoteParsedURL.Path)) - newBaseURL, _ := url.Parse(newBase) + copiedCfg := *i.indexConfig - if newBaseURL != nil { - copiedCfg.BaseURL = newBaseURL - } - copiedCfg.SpecAbsolutePath = remoteParsedURL.String() + newBase := fmt.Sprintf("%s://%s%s", remoteParsedURLOriginal.Scheme, remoteParsedURLOriginal.Host, + filepath.Dir(remoteParsedURL.Path)) + newBaseURL, _ := url.Parse(newBase) - if len(remoteFile.data) > 0 { - i.logger.Debug("successfully loaded file", "file", absolutePath) - } + if newBaseURL != nil { + copiedCfg.BaseURL = newBaseURL + } + copiedCfg.SpecAbsolutePath = remoteParsedURL.String() - processingWaiter.file = remoteFile - processingWaiter.done = true + if len(remoteFile.data) > 0 { + i.logger.Debug("successfully loaded file", "file", absolutePath) + } - // remove from processing - i.ProcessingFiles.Delete(remoteParsedURL.Path) - i.Files.Store(absolutePath, remoteFile) + processingWaiter.file = remoteFile + processingWaiter.done = true - idx, idxError := remoteFile.Index(&copiedCfg) + // remove from processing + i.ProcessingFiles.Delete(remoteParsedURL.Path) + i.Files.Store(absolutePath, remoteFile) - if idxError != nil && idx == nil { - i.remoteErrors = append(i.remoteErrors, idxError) - } else { + idx, idxError := remoteFile.Index(&copiedCfg) - // for each index, we need a resolver - resolver := NewResolver(idx) - idx.resolver = resolver - idx.BuildIndex() - if i.rolodex != nil { - i.rolodex.AddExternalIndex(idx, remoteParsedURL.String()) - } - } - return remoteFile, errors.Join(i.remoteErrors...) + if idxError != nil && idx == nil { + i.remoteErrors = append(i.remoteErrors, idxError) + } else { + + // for each index, we need a resolver + resolver := NewResolver(idx) + idx.resolver = resolver + idx.BuildIndex() + if i.rolodex != nil { + i.rolodex.AddExternalIndex(idx, remoteParsedURL.String()) + } + } + return remoteFile, errors.Join(i.remoteErrors...) } diff --git a/index/spec_index_test.go b/index/spec_index_test.go index 68d047c..3e1980d 100644 --- a/index/spec_index_test.go +++ b/index/spec_index_test.go @@ -260,6 +260,10 @@ func TestSpecIndex_DigitalOcean_FullCheckoutLocalResolve(t *testing.T) { assert.Len(t, rolo.GetCaughtErrors(), 0) assert.Len(t, rolo.GetIgnoredCircularReferences(), 0) + assert.Equal(t, int64(1328224), rolo.RolodexFileSize()) + assert.Equal(t, "1.27 MB", rolo.RolodexFileSizeAsString()) + assert.Equal(t, 1691, rolo.RolodexTotalFiles()) + } func TestSpecIndex_DigitalOcean_FullCheckoutLocalResolve_RecursiveLookup(t *testing.T) { @@ -330,6 +334,10 @@ func TestSpecIndex_DigitalOcean_FullCheckoutLocalResolve_RecursiveLookup(t *test assert.Len(t, rolo.GetCaughtErrors(), 0) assert.Len(t, rolo.GetIgnoredCircularReferences(), 0) + assert.Equal(t, int64(1266728), rolo.RolodexFileSize()) + assert.Equal(t, "1.21 MB", rolo.RolodexFileSizeAsString()) + assert.Equal(t, 1677, rolo.RolodexTotalFiles()) + } func TestSpecIndex_DigitalOcean_LookupsNotAllowed(t *testing.T) { @@ -783,6 +791,21 @@ func TestSpecIndex_BurgerShopMixedRef(t *testing.T) { assert.Equal(t, 1, index.GetInlineUniqueParamCount()) assert.Len(t, index.refErrors, 0) assert.Len(t, index.GetCircularReferences(), 0) + + // get the size of the rolodex. + assert.Equal(t, int64(60232), rolo.RolodexFileSize()+int64(len(yml))) + assert.Equal(t, "50.48 KB", rolo.RolodexFileSizeAsString()) + assert.Equal(t, 3, rolo.RolodexTotalFiles()) + +} + +func TestCalcSizeAsString(t *testing.T) { + assert.Equal(t, "345 B", HumanFileSize(345)) + assert.Equal(t, "1 KB", HumanFileSize(1024)) + assert.Equal(t, "1 KB", HumanFileSize(1025)) + assert.Equal(t, "1.98 KB", HumanFileSize(2025)) + assert.Equal(t, "1 MB", HumanFileSize(1025*1024)) + assert.Equal(t, "1 GB", HumanFileSize(1025*1025*1025)) } func TestSpecIndex_TestEmptyBrokenReferences(t *testing.T) {