added tests for file size and total files.

Signed-off-by: quobix <dave@quobix.com>
This commit is contained in:
quobix
2023-11-25 17:06:11 -05:00
parent 14f992cb93
commit af1ee6c620
2 changed files with 300 additions and 277 deletions

View File

@@ -4,26 +4,26 @@
package index
import (
"errors"
"fmt"
"github.com/pb33f/libopenapi/datamodel"
"github.com/pb33f/libopenapi/utils"
"golang.org/x/sync/syncmap"
"gopkg.in/yaml.v3"
"io"
"io/fs"
"log/slog"
"net/http"
"net/url"
"os"
"path/filepath"
"time"
"errors"
"fmt"
"github.com/pb33f/libopenapi/datamodel"
"github.com/pb33f/libopenapi/utils"
"golang.org/x/sync/syncmap"
"gopkg.in/yaml.v3"
"io"
"io/fs"
"log/slog"
"net/http"
"net/url"
"os"
"path/filepath"
"time"
)
const (
YAML FileExtension = iota
JSON
UNSUPPORTED
YAML FileExtension = iota
JSON
UNSUPPORTED
)
// FileExtension is the type of file extension.
@@ -32,415 +32,415 @@ type FileExtension int
// RemoteFS is a file system that indexes remote files. It implements the fs.FS interface. Files are located remotely
// and served via HTTP.
type RemoteFS struct {
indexConfig *SpecIndexConfig
rootURL string
rootURLParsed *url.URL
RemoteHandlerFunc utils.RemoteURLHandler
Files syncmap.Map
ProcessingFiles syncmap.Map
FetchTime int64
FetchChannel chan *RemoteFile
remoteErrors []error
logger *slog.Logger
extractedFiles map[string]RolodexFile
rolodex *Rolodex
indexConfig *SpecIndexConfig
rootURL string
rootURLParsed *url.URL
RemoteHandlerFunc utils.RemoteURLHandler
Files syncmap.Map
ProcessingFiles syncmap.Map
FetchTime int64
FetchChannel chan *RemoteFile
remoteErrors []error
logger *slog.Logger
extractedFiles map[string]RolodexFile
rolodex *Rolodex
}
// RemoteFile is a file that has been indexed by the RemoteFS. It implements the RolodexFile interface.
type RemoteFile struct {
filename string
name string
extension FileExtension
data []byte
fullPath string
URL *url.URL
lastModified time.Time
seekingErrors []error
index *SpecIndex
parsed *yaml.Node
offset int64
filename string
name string
extension FileExtension
data []byte
fullPath string
URL *url.URL
lastModified time.Time
seekingErrors []error
index *SpecIndex
parsed *yaml.Node
offset int64
}
// GetFileName returns the name of the file.
func (f *RemoteFile) GetFileName() string {
return f.filename
return f.filename
}
// GetContent returns the content of the file as a string.
func (f *RemoteFile) GetContent() string {
return string(f.data)
return string(f.data)
}
// GetContentAsYAMLNode returns the content of the file as a yaml.Node.
func (f *RemoteFile) GetContentAsYAMLNode() (*yaml.Node, error) {
if f.parsed != nil {
return f.parsed, nil
}
if f.index != nil && f.index.root != nil {
return f.index.root, nil
}
if f.data == nil {
return nil, fmt.Errorf("no data to parse for file: %s", f.fullPath)
}
var root yaml.Node
err := yaml.Unmarshal(f.data, &root)
if err != nil {
return nil, err
}
if f.index != nil && f.index.root == nil {
f.index.root = &root
}
f.parsed = &root
return &root, nil
if f.parsed != nil {
return f.parsed, nil
}
if f.index != nil && f.index.root != nil {
return f.index.root, nil
}
if f.data == nil {
return nil, fmt.Errorf("no data to parse for file: %s", f.fullPath)
}
var root yaml.Node
err := yaml.Unmarshal(f.data, &root)
if err != nil {
return nil, err
}
if f.index != nil && f.index.root == nil {
f.index.root = &root
}
f.parsed = &root
return &root, nil
}
// GetFileExtension returns the file extension of the file.
func (f *RemoteFile) GetFileExtension() FileExtension {
return f.extension
return f.extension
}
// GetLastModified returns the last modified time of the file.
func (f *RemoteFile) GetLastModified() time.Time {
return f.lastModified
return f.lastModified
}
// GetErrors returns any errors that occurred while reading the file.
func (f *RemoteFile) GetErrors() []error {
return f.seekingErrors
return f.seekingErrors
}
// GetFullPath returns the full path of the file.
func (f *RemoteFile) GetFullPath() string {
return f.fullPath
return f.fullPath
}
// fs.FileInfo interfaces
// Name returns the name of the file.
func (f *RemoteFile) Name() string {
return f.name
return f.name
}
// Size returns the size of the file.
func (f *RemoteFile) Size() int64 {
return int64(len(f.data))
return int64(len(f.data))
}
// Mode returns the file mode bits for the file.
func (f *RemoteFile) Mode() fs.FileMode {
return fs.FileMode(0)
return fs.FileMode(0)
}
// ModTime returns the modification time of the file.
func (f *RemoteFile) ModTime() time.Time {
return f.lastModified
return f.lastModified
}
// IsDir returns true if the file is a directory.
func (f *RemoteFile) IsDir() bool {
return false
return false
}
// fs.File interfaces
// Sys returns the underlying data source (always returns nil)
func (f *RemoteFile) Sys() interface{} {
return nil
return nil
}
// Close closes the file (doesn't do anything, returns no error)
func (f *RemoteFile) Close() error {
return nil
return nil
}
// Stat returns the FileInfo for the file.
func (f *RemoteFile) Stat() (fs.FileInfo, error) {
return f, nil
return f, nil
}
// Read reads the file. Makes it compatible with io.Reader.
func (f *RemoteFile) Read(b []byte) (int, error) {
if f.offset >= int64(len(f.data)) {
return 0, io.EOF
}
if f.offset < 0 {
return 0, &fs.PathError{Op: "read", Path: f.name, Err: fs.ErrInvalid}
}
n := copy(b, f.data[f.offset:])
f.offset += int64(n)
return n, nil
if f.offset >= int64(len(f.data)) {
return 0, io.EOF
}
if f.offset < 0 {
return 0, &fs.PathError{Op: "read", Path: f.name, Err: fs.ErrInvalid}
}
n := copy(b, f.data[f.offset:])
f.offset += int64(n)
return n, nil
}
// Index indexes the file and returns a *SpecIndex, any errors are returned as well.
func (f *RemoteFile) Index(config *SpecIndexConfig) (*SpecIndex, error) {
if f.index != nil {
return f.index, nil
}
content := f.data
if f.index != nil {
return f.index, nil
}
content := f.data
// first, we must parse the content of the file
info, err := datamodel.ExtractSpecInfoWithDocumentCheck(content, true)
if err != nil {
return nil, err
}
// first, we must parse the content of the file
info, err := datamodel.ExtractSpecInfoWithDocumentCheck(content, true)
if err != nil {
return nil, err
}
index := NewSpecIndexWithConfig(info.RootNode, config)
index.specAbsolutePath = config.SpecAbsolutePath
f.index = index
return index, nil
index := NewSpecIndexWithConfig(info.RootNode, config)
index.specAbsolutePath = config.SpecAbsolutePath
f.index = index
return index, nil
}
// GetIndex returns the index for the file.
func (f *RemoteFile) GetIndex() *SpecIndex {
return f.index
return f.index
}
// NewRemoteFSWithConfig creates a new RemoteFS using the supplied SpecIndexConfig.
func NewRemoteFSWithConfig(specIndexConfig *SpecIndexConfig) (*RemoteFS, error) {
if specIndexConfig == nil {
return nil, errors.New("no spec index config provided")
}
remoteRootURL := specIndexConfig.BaseURL
log := specIndexConfig.Logger
if log == nil {
log = slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
Level: slog.LevelError,
}))
}
if specIndexConfig == nil {
return nil, errors.New("no spec index config provided")
}
remoteRootURL := specIndexConfig.BaseURL
log := specIndexConfig.Logger
if log == nil {
log = slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
Level: slog.LevelError,
}))
}
rfs := &RemoteFS{
indexConfig: specIndexConfig,
logger: log,
rootURLParsed: remoteRootURL,
FetchChannel: make(chan *RemoteFile),
}
if remoteRootURL != nil {
rfs.rootURL = remoteRootURL.String()
}
if specIndexConfig.RemoteURLHandler != nil {
rfs.RemoteHandlerFunc = specIndexConfig.RemoteURLHandler
} else {
// default http client
client := &http.Client{
Timeout: time.Second * 120,
}
rfs.RemoteHandlerFunc = func(url string) (*http.Response, error) {
return client.Get(url)
}
}
return rfs, nil
rfs := &RemoteFS{
indexConfig: specIndexConfig,
logger: log,
rootURLParsed: remoteRootURL,
FetchChannel: make(chan *RemoteFile),
}
if remoteRootURL != nil {
rfs.rootURL = remoteRootURL.String()
}
if specIndexConfig.RemoteURLHandler != nil {
rfs.RemoteHandlerFunc = specIndexConfig.RemoteURLHandler
} else {
// default http client
client := &http.Client{
Timeout: time.Second * 120,
}
rfs.RemoteHandlerFunc = func(url string) (*http.Response, error) {
return client.Get(url)
}
}
return rfs, nil
}
// NewRemoteFSWithRootURL creates a new RemoteFS using the supplied root URL.
func NewRemoteFSWithRootURL(rootURL string) (*RemoteFS, error) {
remoteRootURL, err := url.Parse(rootURL)
if err != nil {
return nil, err
}
config := CreateOpenAPIIndexConfig()
config.BaseURL = remoteRootURL
return NewRemoteFSWithConfig(config)
remoteRootURL, err := url.Parse(rootURL)
if err != nil {
return nil, err
}
config := CreateOpenAPIIndexConfig()
config.BaseURL = remoteRootURL
return NewRemoteFSWithConfig(config)
}
// SetRemoteHandlerFunc sets the remote handler function.
func (i *RemoteFS) SetRemoteHandlerFunc(handlerFunc utils.RemoteURLHandler) {
i.RemoteHandlerFunc = handlerFunc
i.RemoteHandlerFunc = handlerFunc
}
// SetIndexConfig sets the index configuration.
func (i *RemoteFS) SetIndexConfig(config *SpecIndexConfig) {
i.indexConfig = config
i.indexConfig = config
}
// GetFiles returns the files that have been indexed.
func (i *RemoteFS) GetFiles() map[string]RolodexFile {
files := make(map[string]RolodexFile)
i.Files.Range(func(key, value interface{}) bool {
files[key.(string)] = value.(*RemoteFile)
return true
})
i.extractedFiles = files
return files
files := make(map[string]RolodexFile)
i.Files.Range(func(key, value interface{}) bool {
files[key.(string)] = value.(*RemoteFile)
return true
})
i.extractedFiles = files
return files
}
// GetErrors returns any errors that occurred during the indexing process.
func (i *RemoteFS) GetErrors() []error {
return i.remoteErrors
return i.remoteErrors
}
type waiterRemote struct {
f string
done bool
file *RemoteFile
listeners int
f string
done bool
file *RemoteFile
listeners int
}
// Open opens a file, returning it or an error. If the file is not found, the error is of type *PathError.
func (i *RemoteFS) Open(remoteURL string) (fs.File, error) {
if i.indexConfig != nil && !i.indexConfig.AllowRemoteLookup {
return nil, fmt.Errorf("remote lookup for '%s' is not allowed, please set "+
"AllowRemoteLookup to true as part of the index configuration", remoteURL)
}
if i.indexConfig != nil && !i.indexConfig.AllowRemoteLookup {
return nil, fmt.Errorf("remote lookup for '%s' is not allowed, please set "+
"AllowRemoteLookup to true as part of the index configuration", remoteURL)
}
remoteParsedURL, err := url.Parse(remoteURL)
if err != nil {
return nil, err
}
remoteParsedURLOriginal, _ := url.Parse(remoteURL)
remoteParsedURL, err := url.Parse(remoteURL)
if err != nil {
return nil, err
}
remoteParsedURLOriginal, _ := url.Parse(remoteURL)
// try path first
if r, ok := i.Files.Load(remoteParsedURL.Path); ok {
return r.(*RemoteFile), nil
}
// try path first
if r, ok := i.Files.Load(remoteParsedURL.Path); ok {
return r.(*RemoteFile), nil
}
// if we're processing, we need to block and wait for the file to be processed
// try path first
if r, ok := i.ProcessingFiles.Load(remoteParsedURL.Path); ok {
// if we're processing, we need to block and wait for the file to be processed
// try path first
if r, ok := i.ProcessingFiles.Load(remoteParsedURL.Path); ok {
wait := r.(*waiterRemote)
wait.listeners++
wait := r.(*waiterRemote)
wait.listeners++
i.logger.Debug("[rolodex remote loader] waiting for existing fetch to complete", "file", remoteURL,
"remoteURL", remoteParsedURL.String())
i.logger.Debug("[rolodex remote loader] waiting for existing fetch to complete", "file", remoteURL,
"remoteURL", remoteParsedURL.String())
for !wait.done {
time.Sleep(200 * time.Nanosecond) // breathe for a few nanoseconds.
}
for !wait.done {
time.Sleep(500 * time.Nanosecond) // breathe for a few nanoseconds.
}
wait.listeners--
i.logger.Debug("[rolodex remote loader]: waiting done, remote completed, returning file", "file",
remoteParsedURL.String(), "listeners", wait.listeners)
return wait.file, nil
}
wait.listeners--
i.logger.Debug("[rolodex remote loader]: waiting done, remote completed, returning file", "file",
remoteParsedURL.String(), "listeners", wait.listeners)
return wait.file, nil
}
processingWaiter := &waiterRemote{f: remoteParsedURL.Path}
processingWaiter := &waiterRemote{f: remoteParsedURL.Path}
// add to processing
i.ProcessingFiles.Store(remoteParsedURL.Path, processingWaiter)
// add to processing
i.ProcessingFiles.Store(remoteParsedURL.Path, processingWaiter)
fileExt := ExtractFileType(remoteParsedURL.Path)
fileExt := ExtractFileType(remoteParsedURL.Path)
if fileExt == UNSUPPORTED {
return nil, &fs.PathError{Op: "open", Path: remoteURL, Err: fs.ErrInvalid}
}
if fileExt == UNSUPPORTED {
return nil, &fs.PathError{Op: "open", Path: remoteURL, Err: fs.ErrInvalid}
}
// if the remote URL is absolute (http:// or https://), and we have a rootURL defined, we need to override
// the host being defined by this URL, and use the rootURL instead, but keep the path.
if i.rootURLParsed != nil {
remoteParsedURL.Host = i.rootURLParsed.Host
remoteParsedURL.Scheme = i.rootURLParsed.Scheme
if !filepath.IsAbs(remoteParsedURL.Path) {
remoteParsedURL.Path = filepath.Join(i.rootURLParsed.Path, remoteParsedURL.Path)
}
}
// if the remote URL is absolute (http:// or https://), and we have a rootURL defined, we need to override
// the host being defined by this URL, and use the rootURL instead, but keep the path.
if i.rootURLParsed != nil {
remoteParsedURL.Host = i.rootURLParsed.Host
remoteParsedURL.Scheme = i.rootURLParsed.Scheme
if !filepath.IsAbs(remoteParsedURL.Path) {
remoteParsedURL.Path = filepath.Join(i.rootURLParsed.Path, remoteParsedURL.Path)
}
}
if remoteParsedURL.Scheme == "" {
i.ProcessingFiles.Delete(remoteParsedURL.Path)
return nil, nil // not a remote file, nothing wrong with that - just we can't keep looking here partner.
}
if remoteParsedURL.Scheme == "" {
i.ProcessingFiles.Delete(remoteParsedURL.Path)
return nil, nil // not a remote file, nothing wrong with that - just we can't keep looking here partner.
}
i.logger.Debug("loading remote file", "file", remoteURL, "remoteURL", remoteParsedURL.String())
i.logger.Debug("loading remote file", "file", remoteURL, "remoteURL", remoteParsedURL.String())
response, clientErr := i.RemoteHandlerFunc(remoteParsedURL.String())
if clientErr != nil {
response, clientErr := i.RemoteHandlerFunc(remoteParsedURL.String())
if clientErr != nil {
i.remoteErrors = append(i.remoteErrors, clientErr)
// remove from processing
i.ProcessingFiles.Delete(remoteParsedURL.Path)
if response != nil {
i.logger.Error("client error", "error", clientErr, "status", response.StatusCode)
} else {
i.logger.Error("client error", "error", clientErr.Error())
}
return nil, clientErr
}
if response == nil {
// remove from processing
i.ProcessingFiles.Delete(remoteParsedURL.Path)
i.remoteErrors = append(i.remoteErrors, clientErr)
// remove from processing
i.ProcessingFiles.Delete(remoteParsedURL.Path)
if response != nil {
i.logger.Error("client error", "error", clientErr, "status", response.StatusCode)
} else {
i.logger.Error("client error", "error", clientErr.Error())
}
return nil, clientErr
}
if response == nil {
// remove from processing
i.ProcessingFiles.Delete(remoteParsedURL.Path)
return nil, fmt.Errorf("empty response from remote URL: %s", remoteParsedURL.String())
}
responseBytes, readError := io.ReadAll(response.Body)
if readError != nil {
return nil, fmt.Errorf("empty response from remote URL: %s", remoteParsedURL.String())
}
responseBytes, readError := io.ReadAll(response.Body)
if readError != nil {
// remove from processing
i.ProcessingFiles.Delete(remoteParsedURL.Path)
// remove from processing
i.ProcessingFiles.Delete(remoteParsedURL.Path)
return nil, fmt.Errorf("error reading bytes from remote file '%s': [%s]",
remoteParsedURL.String(), readError.Error())
}
return nil, fmt.Errorf("error reading bytes from remote file '%s': [%s]",
remoteParsedURL.String(), readError.Error())
}
if response.StatusCode >= 400 {
if response.StatusCode >= 400 {
// remove from processing
i.ProcessingFiles.Delete(remoteParsedURL.Path)
// remove from processing
i.ProcessingFiles.Delete(remoteParsedURL.Path)
i.logger.Error("unable to fetch remote document",
"file", remoteParsedURL.Path, "status", response.StatusCode, "resp", string(responseBytes))
return nil, fmt.Errorf("unable to fetch remote document: %s", string(responseBytes))
}
i.logger.Error("unable to fetch remote document",
"file", remoteParsedURL.Path, "status", response.StatusCode, "resp", string(responseBytes))
return nil, fmt.Errorf("unable to fetch remote document: %s", string(responseBytes))
}
absolutePath, _ := filepath.Abs(remoteParsedURL.Path)
absolutePath, _ := filepath.Abs(remoteParsedURL.Path)
// extract last modified from response
lastModified := response.Header.Get("Last-Modified")
// extract last modified from response
lastModified := response.Header.Get("Last-Modified")
// parse the last modified date into a time object
lastModifiedTime, parseErr := time.Parse(time.RFC1123, lastModified)
// parse the last modified date into a time object
lastModifiedTime, parseErr := time.Parse(time.RFC1123, lastModified)
if parseErr != nil {
// can't extract last modified, so use now
lastModifiedTime = time.Now()
}
if parseErr != nil {
// can't extract last modified, so use now
lastModifiedTime = time.Now()
}
filename := filepath.Base(remoteParsedURL.Path)
filename := filepath.Base(remoteParsedURL.Path)
remoteFile := &RemoteFile{
filename: filename,
name: remoteParsedURL.Path,
extension: fileExt,
data: responseBytes,
fullPath: absolutePath,
URL: remoteParsedURL,
lastModified: lastModifiedTime,
}
remoteFile := &RemoteFile{
filename: filename,
name: remoteParsedURL.Path,
extension: fileExt,
data: responseBytes,
fullPath: absolutePath,
URL: remoteParsedURL,
lastModified: lastModifiedTime,
}
copiedCfg := *i.indexConfig
copiedCfg := *i.indexConfig
newBase := fmt.Sprintf("%s://%s%s", remoteParsedURLOriginal.Scheme, remoteParsedURLOriginal.Host,
filepath.Dir(remoteParsedURL.Path))
newBaseURL, _ := url.Parse(newBase)
newBase := fmt.Sprintf("%s://%s%s", remoteParsedURLOriginal.Scheme, remoteParsedURLOriginal.Host,
filepath.Dir(remoteParsedURL.Path))
newBaseURL, _ := url.Parse(newBase)
if newBaseURL != nil {
copiedCfg.BaseURL = newBaseURL
}
copiedCfg.SpecAbsolutePath = remoteParsedURL.String()
if newBaseURL != nil {
copiedCfg.BaseURL = newBaseURL
}
copiedCfg.SpecAbsolutePath = remoteParsedURL.String()
if len(remoteFile.data) > 0 {
i.logger.Debug("successfully loaded file", "file", absolutePath)
}
if len(remoteFile.data) > 0 {
i.logger.Debug("successfully loaded file", "file", absolutePath)
}
processingWaiter.file = remoteFile
processingWaiter.done = true
processingWaiter.file = remoteFile
processingWaiter.done = true
// remove from processing
i.ProcessingFiles.Delete(remoteParsedURL.Path)
i.Files.Store(absolutePath, remoteFile)
// remove from processing
i.ProcessingFiles.Delete(remoteParsedURL.Path)
i.Files.Store(absolutePath, remoteFile)
idx, idxError := remoteFile.Index(&copiedCfg)
idx, idxError := remoteFile.Index(&copiedCfg)
if idxError != nil && idx == nil {
i.remoteErrors = append(i.remoteErrors, idxError)
} else {
if idxError != nil && idx == nil {
i.remoteErrors = append(i.remoteErrors, idxError)
} else {
// for each index, we need a resolver
resolver := NewResolver(idx)
idx.resolver = resolver
idx.BuildIndex()
if i.rolodex != nil {
i.rolodex.AddExternalIndex(idx, remoteParsedURL.String())
}
}
return remoteFile, errors.Join(i.remoteErrors...)
// for each index, we need a resolver
resolver := NewResolver(idx)
idx.resolver = resolver
idx.BuildIndex()
if i.rolodex != nil {
i.rolodex.AddExternalIndex(idx, remoteParsedURL.String())
}
}
return remoteFile, errors.Join(i.remoteErrors...)
}

View File

@@ -260,6 +260,10 @@ func TestSpecIndex_DigitalOcean_FullCheckoutLocalResolve(t *testing.T) {
assert.Len(t, rolo.GetCaughtErrors(), 0)
assert.Len(t, rolo.GetIgnoredCircularReferences(), 0)
assert.Equal(t, int64(1328224), rolo.RolodexFileSize())
assert.Equal(t, "1.27 MB", rolo.RolodexFileSizeAsString())
assert.Equal(t, 1691, rolo.RolodexTotalFiles())
}
func TestSpecIndex_DigitalOcean_FullCheckoutLocalResolve_RecursiveLookup(t *testing.T) {
@@ -330,6 +334,10 @@ func TestSpecIndex_DigitalOcean_FullCheckoutLocalResolve_RecursiveLookup(t *test
assert.Len(t, rolo.GetCaughtErrors(), 0)
assert.Len(t, rolo.GetIgnoredCircularReferences(), 0)
assert.Equal(t, int64(1266728), rolo.RolodexFileSize())
assert.Equal(t, "1.21 MB", rolo.RolodexFileSizeAsString())
assert.Equal(t, 1677, rolo.RolodexTotalFiles())
}
func TestSpecIndex_DigitalOcean_LookupsNotAllowed(t *testing.T) {
@@ -783,6 +791,21 @@ func TestSpecIndex_BurgerShopMixedRef(t *testing.T) {
assert.Equal(t, 1, index.GetInlineUniqueParamCount())
assert.Len(t, index.refErrors, 0)
assert.Len(t, index.GetCircularReferences(), 0)
// get the size of the rolodex.
assert.Equal(t, int64(60232), rolo.RolodexFileSize()+int64(len(yml)))
assert.Equal(t, "50.48 KB", rolo.RolodexFileSizeAsString())
assert.Equal(t, 3, rolo.RolodexTotalFiles())
}
func TestCalcSizeAsString(t *testing.T) {
assert.Equal(t, "345 B", HumanFileSize(345))
assert.Equal(t, "1 KB", HumanFileSize(1024))
assert.Equal(t, "1 KB", HumanFileSize(1025))
assert.Equal(t, "1.98 KB", HumanFileSize(2025))
assert.Equal(t, "1 MB", HumanFileSize(1025*1024))
assert.Equal(t, "1 GB", HumanFileSize(1025*1025*1025))
}
func TestSpecIndex_TestEmptyBrokenReferences(t *testing.T) {