From f56cdeae9e942b43f03fdf8f422b6043fa9f396a Mon Sep 17 00:00:00 2001 From: quobix Date: Wed, 22 Nov 2023 12:37:25 -0500 Subject: [PATCH] Tuned up local file handling and cleaned things up Signed-off-by: quobix --- datamodel/high/base/schema_proxy.go | 2 - datamodel/high/v3/document_test.go | 8 ++ datamodel/low/v3/create_document_test.go | 4 + index/map_index_nodes.go | 8 +- index/map_index_nodes_test.go | 1 - index/rolodex.go | 17 +++- index/rolodex_file_loader.go | 83 +++++++++++++++---- index/rolodex_file_loader_test.go | 19 ++++- index/rolodex_remote_loader.go | 8 +- index/rolodex_test.go | 3 + index/rolodex_test_data/dir1/utils/utils.yaml | 4 +- index/rolodex_test_data/dir2/components.yaml | 6 ++ .../dir2/subdir2/shared.yaml | 6 +- index/rolodex_test_data/dir2/utils/utils.yaml | 9 +- index/rolodex_test_data/doc2.yaml | 67 ++++++++------- index/search_rolodex.go | 56 +++++-------- index/search_rolodex_test.go | 18 ++++ 17 files changed, 208 insertions(+), 111 deletions(-) diff --git a/datamodel/high/base/schema_proxy.go b/datamodel/high/base/schema_proxy.go index 538fe10..78a7c9c 100644 --- a/datamodel/high/base/schema_proxy.go +++ b/datamodel/high/base/schema_proxy.go @@ -4,7 +4,6 @@ package base import ( - "fmt" "github.com/pb33f/libopenapi/datamodel/high" "github.com/pb33f/libopenapi/datamodel/low" "github.com/pb33f/libopenapi/datamodel/low/base" @@ -122,7 +121,6 @@ func (sp *SchemaProxy) GetReferenceOrigin() *index.NodeOrigin { if sp.schema != nil { return sp.schema.Value.GetSchemaReferenceLocation() } - fmt.Print("fuck man") return nil } diff --git a/datamodel/high/v3/document_test.go b/datamodel/high/v3/document_test.go index 9d39aab..ca7d57a 100644 --- a/datamodel/high/v3/document_test.go +++ b/datamodel/high/v3/document_test.go @@ -13,6 +13,7 @@ import ( "github.com/stretchr/testify/assert" "gopkg.in/yaml.v3" "log" + "log/slog" "net/http" "net/url" "os" @@ -445,6 +446,9 @@ func TestDigitalOceanAsDocViaCheckout(t *testing.T) { AllowFileReferences: true, AllowRemoteReferences: true, BasePath: basePath, + Logger: slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })), } lowDoc, err = lowv3.CreateDocumentFromConfig(info, &config) @@ -501,6 +505,10 @@ func TestDigitalOceanAsDocFromMain(t *testing.T) { BaseURL: baseURL, } + config.Logger = slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })) + if os.Getenv("GH_PAT") != "" { client := &http.Client{ Timeout: time.Second * 60, diff --git a/datamodel/low/v3/create_document_test.go b/datamodel/low/v3/create_document_test.go index 81c73bb..2d41e4e 100644 --- a/datamodel/low/v3/create_document_test.go +++ b/datamodel/low/v3/create_document_test.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/pb33f/libopenapi/index" "github.com/pb33f/libopenapi/utils" + "log/slog" "net/http" "net/url" "os" @@ -123,6 +124,9 @@ func TestRolodexRemoteFileSystem(t *testing.T) { info, _ := datamodel.ExtractSpecInfo(data) cf := datamodel.NewDocumentConfiguration() + cf.Logger = slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })) baseUrl := "https://raw.githubusercontent.com/pb33f/libopenapi/main/test_specs" u, _ := url.Parse(baseUrl) diff --git a/index/map_index_nodes.go b/index/map_index_nodes.go index e4fb665..dda0656 100644 --- a/index/map_index_nodes.go +++ b/index/map_index_nodes.go @@ -78,13 +78,7 @@ func (index *SpecIndex) FindNodeOrigin(node *yaml.Node) *NodeOrigin { if index.nodeMap[node.Line][node.Column] != nil { foundNode := index.nodeMap[node.Line][node.Column] match := true - if foundNode.Value != node.Value { - match = false - } - if foundNode.Kind != node.Kind { - match = false - } - if foundNode.Tag != node.Tag { + if foundNode.Value != node.Value || foundNode.Kind != node.Kind || foundNode.Tag != node.Tag { match = false } if len(foundNode.Content) == len(node.Content) { diff --git a/index/map_index_nodes_test.go b/index/map_index_nodes_test.go index c541235..62f74fb 100644 --- a/index/map_index_nodes_test.go +++ b/index/map_index_nodes_test.go @@ -53,7 +53,6 @@ func TestSpecIndex_MapNodes(t *testing.T) { mappedKeyNode, ok = index.GetNode(15, 999) assert.False(t, ok) assert.Nil(t, mappedKeyNode) - } func BenchmarkSpecIndex_MapNodes(b *testing.B) { diff --git a/index/rolodex.go b/index/rolodex.go index 03b379c..995d2f8 100644 --- a/index/rolodex.go +++ b/index/rolodex.go @@ -61,6 +61,7 @@ type Rolodex struct { indexConfig *SpecIndexConfig indexingDuration time.Duration indexes []*SpecIndex + indexMap map[string]*SpecIndex indexLock sync.Mutex rootIndex *SpecIndex rootNode *yaml.Node @@ -69,6 +70,7 @@ type Rolodex struct { infiniteCircularReferences []*CircularReferenceResult ignoredCircularReferences []*CircularReferenceResult logger *slog.Logger + rolodex *Rolodex } // NewRolodex creates a new rolodex with the provided index configuration. @@ -86,6 +88,7 @@ func NewRolodex(indexConfig *SpecIndexConfig) *Rolodex { localFS: make(map[string]fs.FS), remoteFS: make(map[string]fs.FS), logger: logger, + indexMap: make(map[string]*SpecIndex), } indexConfig.Rolodex = r return r @@ -147,12 +150,22 @@ func (r *Rolodex) SetRootNode(node *yaml.Node) { r.rootNode = node } -func (r *Rolodex) AddIndex(idx *SpecIndex) { +func (r *Rolodex) AddExternalIndex(idx *SpecIndex, location string) { r.indexLock.Lock() - r.indexes = append(r.indexes, idx) + if r.indexMap[location] == nil { + r.indexMap[location] = idx + } r.indexLock.Unlock() } +func (r *Rolodex) AddIndex(idx *SpecIndex) { + r.indexes = append(r.indexes, idx) + if idx != nil { + p := idx.specAbsolutePath + r.AddExternalIndex(idx, p) + } +} + // AddRemoteFS adds a remote file system to the rolodex. func (r *Rolodex) AddRemoteFS(baseURL string, fileSystem fs.FS) { if f, ok := fileSystem.(*RemoteFS); ok { diff --git a/index/rolodex_file_loader.go b/index/rolodex_file_loader.go index 99167ca..67611c8 100644 --- a/index/rolodex_file_loader.go +++ b/index/rolodex_file_loader.go @@ -30,6 +30,9 @@ type LocalFS struct { logger *slog.Logger fileLock sync.Mutex readingErrors []error + rolodex *Rolodex + processingFiles syncmap.Map + fileListeners int } // GetFiles returns the files that have been indexed. A map of RolodexFile objects keyed by the full path of the file. @@ -48,6 +51,12 @@ func (l *LocalFS) GetErrors() []error { return l.readingErrors } +type waiterLocal struct { + f string + c chan *LocalFile + listeners int +} + // Open opens a file, returning it or an error. If the file is not found, the error is of type *PathError. func (l *LocalFS) Open(name string) (fs.File, error) { @@ -66,11 +75,40 @@ func (l *LocalFS) Open(name string) (fs.File, error) { } else { if l.fsConfig != nil && l.fsConfig.DirFS == nil { + + // create a complete channel + c := make(chan *LocalFile) + + // if we're processing, we need to block and wait for the file to be processed + // try path first + if r, ko := l.processingFiles.Load(name); ko { + + wait := r.(*waiterLocal) + wait.listeners++ + + l.logger.Debug("[rolodex file loader]: waiting for existing OS load to complete", "file", name, "listeners", wait.listeners) + + select { + case v := <-wait.c: + wait.listeners-- + l.logger.Debug("[rolodex file loader]: waiting done, OS load completed, returning file", "file", name, "listeners", wait.listeners) + return v, nil + } + } + + processingWaiter := &waiterLocal{f: name, c: c} + + // add to processing + l.processingFiles.Store(name, processingWaiter) + var extractedFile *LocalFile var extErr error // attempt to open the file from the local filesystem + l.logger.Debug("[rolodex file loader]: extracting file from OS", "file", name) extractedFile, extErr = l.extractFile(name) if extErr != nil { + close(c) + l.processingFiles.Delete(name) return nil, extErr } if extractedFile != nil { @@ -83,6 +121,10 @@ func (l *LocalFS) Open(name string) (fs.File, error) { idx, idxError := extractedFile.Index(&copiedCfg) + if idx != nil && l.rolodex != nil { + idx.rolodex = l.rolodex + } + if idxError != nil && idx == nil { extractedFile.readingErrors = append(l.readingErrors, idxError) } else { @@ -94,8 +136,22 @@ func (l *LocalFS) Open(name string) (fs.File, error) { } if len(extractedFile.data) > 0 { - l.logger.Debug("successfully loaded and indexed file", "file", name) + l.logger.Debug("[rolodex file loader]: successfully loaded and indexed file", "file", name) } + + // add index to rolodex indexes + if l.rolodex != nil { + l.rolodex.AddIndex(idx) + } + if processingWaiter.listeners > 0 { + l.logger.Debug("[rolodex file loader]: alerting file subscribers", "file", name, "subs", processingWaiter.listeners) + } + for x := 0; x < processingWaiter.listeners; x++ { + c <- extractedFile + } + close(c) + + l.processingFiles.Delete(name) return extractedFile, nil } } @@ -344,7 +400,15 @@ func (l *LocalFS) extractFile(p string) (*LocalFile, error) { var file fs.File var fileError error if config != nil && config.DirFS != nil { + l.logger.Debug("[rolodex file loader]: collecting JSON/YAML file from dirFS", "file", abs) file, _ = config.DirFS.Open(p) + } else { + l.logger.Debug("[rolodex file loader]: reading local file from OS", "file", abs) + file, fileError = os.Open(abs) + } + + if config != nil && config.DirFS != nil { + } else { file, fileError = os.Open(abs) } @@ -361,11 +425,7 @@ func (l *LocalFS) extractFile(p string) (*LocalFile, error) { modTime = stat.ModTime() } fileData, _ = io.ReadAll(file) - if config != nil && config.DirFS != nil { - l.logger.Debug("collecting JSON/YAML file", "file", abs) - } else { - l.logger.Debug("parsing file", "file", abs) - } + lf := &LocalFile{ filename: p, name: filepath.Base(p), @@ -379,17 +439,8 @@ func (l *LocalFS) extractFile(p string) (*LocalFile, error) { return lf, nil case UNSUPPORTED: if config != nil && config.DirFS != nil { - l.logger.Debug("skipping non JSON/YAML file", "file", abs) + l.logger.Debug("[rolodex file loader]: skipping non JSON/YAML file", "file", abs) } } return nil, nil } - -// NewLocalFS creates a new LocalFS with the supplied base directory. -func NewLocalFS(baseDir string, dirFS fs.FS) (*LocalFS, error) { - config := &LocalFSConfig{ - BaseDirectory: baseDir, - DirFS: dirFS, - } - return NewLocalFSWithConfig(config) -} diff --git a/index/rolodex_file_loader_test.go b/index/rolodex_file_loader_test.go index dcdf5dd..a36acb5 100644 --- a/index/rolodex_file_loader_test.go +++ b/index/rolodex_file_loader_test.go @@ -8,6 +8,7 @@ import ( "gopkg.in/yaml.v3" "io" "io/fs" + "log/slog" "os" "path/filepath" "testing" @@ -25,7 +26,14 @@ func TestRolodexLoadsFilesCorrectly_NoErrors(t *testing.T) { "subfolder2/hello.jpg": {Data: []byte("shop"), ModTime: time.Now()}, } - fileFS, err := NewLocalFS(".", testFS) + fileFS, err := NewLocalFSWithConfig(&LocalFSConfig{ + BaseDirectory: ".", + Logger: slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })), + DirFS: testFS, + }) + if err != nil { t.Fatal(err) } @@ -150,7 +158,14 @@ func TestRolodexLocalFile_IndexSingleFile(t *testing.T) { "i-am-a-dir": {Mode: fs.FileMode(fs.ModeDir), ModTime: time.Now()}, } - fileFS, _ := NewLocalFS("spec.yaml", testFS) + fileFS, _ := NewLocalFSWithConfig(&LocalFSConfig{ + BaseDirectory: "spec.yaml", + Logger: slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })), + DirFS: testFS, + }) + files := fileFS.GetFiles() assert.Len(t, files, 1) diff --git a/index/rolodex_remote_loader.go b/index/rolodex_remote_loader.go index d82d612..0811a28 100644 --- a/index/rolodex_remote_loader.go +++ b/index/rolodex_remote_loader.go @@ -44,6 +44,7 @@ type RemoteFS struct { remoteErrors []error logger *slog.Logger extractedFiles map[string]RolodexFile + rolodex *Rolodex } // RemoteFile is a file that has been indexed by the RemoteFS. It implements the RolodexFile interface. @@ -293,8 +294,8 @@ func (i *RemoteFS) Open(remoteURL string) (fs.File, error) { i.logger.Debug("waiting for existing fetch to complete", "file", remoteURL, "remoteURL", remoteParsedURL.String()) - // Create a context with a timeout of 50ms - ctxTimeout, cancel := context.WithTimeout(context.Background(), time.Millisecond*50) + // Create a context with a timeout of 100 milliseconds. + ctxTimeout, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) defer cancel() f := make(chan *RemoteFile) fwait := func(path string, c chan *RemoteFile) { @@ -431,6 +432,9 @@ func (i *RemoteFS) Open(remoteURL string) (fs.File, error) { resolver := NewResolver(idx) idx.resolver = resolver idx.BuildIndex() + if i.rolodex != nil { + i.rolodex.AddExternalIndex(idx, remoteParsedURL.String()) + } } return remoteFile, errors.Join(i.remoteErrors...) } diff --git a/index/rolodex_test.go b/index/rolodex_test.go index 7ee8698..caaf026 100644 --- a/index/rolodex_test.go +++ b/index/rolodex_test.go @@ -1366,6 +1366,9 @@ func TestRolodex_SimpleTest_OneDoc(t *testing.T) { assert.Equal(t, fs.FileMode(0), f.Mode()) assert.Len(t, f.GetErrors(), 0) + // check the index has a rolodex reference + assert.NotNil(t, idx.GetRolodex()) + // re-run the index should be a no-op assert.NoError(t, rolo.IndexTheRolodex()) rolo.CheckForCircularReferences() diff --git a/index/rolodex_test_data/dir1/utils/utils.yaml b/index/rolodex_test_data/dir1/utils/utils.yaml index 8cb1080..2fa63ac 100644 --- a/index/rolodex_test_data/dir1/utils/utils.yaml +++ b/index/rolodex_test_data/dir1/utils/utils.yaml @@ -2,10 +2,8 @@ type: object description: I am a utility for dir1 properties: message: - type: string + type: object description: I am pointless dir1. properties: - link: - $ref: "../components.yaml#/components/schemas/GlobalComponent" shared: $ref: '../subdir1/shared.yaml#/components/schemas/SharedComponent' \ No newline at end of file diff --git a/index/rolodex_test_data/dir2/components.yaml b/index/rolodex_test_data/dir2/components.yaml index f41d4aa..1d25203 100644 --- a/index/rolodex_test_data/dir2/components.yaml +++ b/index/rolodex_test_data/dir2/components.yaml @@ -11,5 +11,11 @@ components: message: type: string description: I am pointless, but I am global dir2. + AnotherComponent: + type: object + description: Dir2 Another Component + properties: + message: + $ref: "subdir2/shared.yaml#/components/schemas/SharedComponent" SomeUtil: $ref: "utils/utils.yaml" \ No newline at end of file diff --git a/index/rolodex_test_data/dir2/subdir2/shared.yaml b/index/rolodex_test_data/dir2/subdir2/shared.yaml index 3c33657..ef913dc 100644 --- a/index/rolodex_test_data/dir2/subdir2/shared.yaml +++ b/index/rolodex_test_data/dir2/subdir2/shared.yaml @@ -8,8 +8,8 @@ components: type: object description: Dir2 Shared Component properties: + utilMessage: + $ref: "../utils/utils.yaml" message: type: string - description: I am pointless, but I am shared dir2. - SomeUtil: - $ref: "../utils/utils.yaml" \ No newline at end of file + description: I am pointless, but I am shared dir2. \ No newline at end of file diff --git a/index/rolodex_test_data/dir2/utils/utils.yaml b/index/rolodex_test_data/dir2/utils/utils.yaml index d36a123..494bf4e 100644 --- a/index/rolodex_test_data/dir2/utils/utils.yaml +++ b/index/rolodex_test_data/dir2/utils/utils.yaml @@ -2,10 +2,5 @@ type: object description: I am a utility for dir2 properties: message: - type: string - description: I am pointless dir2. - properties: - link: - $ref: "../components.yaml#/components/schemas/GlobalComponent" - shared: - $ref: '../subdir2/shared.yaml#/components/schemas/SharedComponent' \ No newline at end of file + type: object + description: I am pointless dir2 utility, I am multiple levels deep. \ No newline at end of file diff --git a/index/rolodex_test_data/doc2.yaml b/index/rolodex_test_data/doc2.yaml index 10586ec..c0fef50 100644 --- a/index/rolodex_test_data/doc2.yaml +++ b/index/rolodex_test_data/doc2.yaml @@ -3,7 +3,43 @@ info: title: Rolodex Test Data version: 1.0.0 paths: - /one/local: +# /one/local: +# get: +# responses: +# '200': +# description: OK +# content: +# application/json: +# schema: +# $ref: '#/components/schemas/Thing' +# /one/file: +# get: +# responses: +# '200': +# description: OK +# content: +# application/json: +# schema: +# $ref: 'components.yaml#/components/schemas/Ding' +# /nested/files1: +# get: +# responses: +# '200': +# description: OK +# content: +# application/json: +# schema: +# $ref: 'dir1/components.yaml#/components/schemas/GlobalComponent' +# /nested/files2: +# get: +# responses: +# '200': +# description: OK +# content: +# application/json: +# schema: +# $ref: 'dir2/components.yaml#/components/schemas/GlobalComponent' + /nested/files3: get: responses: '200': @@ -11,34 +47,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/Thing' - /one/file: - get: - responses: - '200': - description: OK - content: - application/json: - schema: - $ref: 'components.yaml#/components/schemas/Ding' - /nested/files1: - get: - responses: - '200': - description: OK - content: - application/json: - schema: - $ref: 'dir1/components.yaml#/components/schemas/GlobalComponent' - /nested/files2: - get: - responses: - '200': - description: OK - content: - application/json: - schema: - $ref: 'dir2/components.yaml#/components/schemas/GlobalComponent' + $ref: 'dir2/components.yaml#/components/schemas/AnotherComponent' components: schemas: Thing: diff --git a/index/search_rolodex.go b/index/search_rolodex.go index e780387..a59bf84 100644 --- a/index/search_rolodex.go +++ b/index/search_rolodex.go @@ -4,51 +4,33 @@ package index import ( - "fmt" "gopkg.in/yaml.v3" ) +// FindNodeOrigin searches all indexes for the origin of a node. If the node is found, a NodeOrigin +// is returned, otherwise nil is returned. func (r *Rolodex) FindNodeOrigin(node *yaml.Node) *NodeOrigin { - //f := make(chan *NodeOrigin) - //d := make(chan bool) - //findNode := func(i int, node *yaml.Node) { - // n := r.indexes[i].FindNodeOrigin(node) - // if n != nil { - // f <- n - // return - // } - // d <- true - //} - //for i, _ := range r.indexes { - // go findNode(i, node) - //} - //searched := 0 - //for searched < len(r.indexes) { - // select { - // case n := <-f: - // return n - // case <-d: - // searched++ - // } - //} - //return nil - - if len(r.indexes) == 0 { - fmt.Println("NO FUCKING WAY MAN") - } else { - //fmt.Printf("searching %d files\n", len(r.indexes)) - } - for i := range r.indexes { + f := make(chan *NodeOrigin) + d := make(chan bool) + findNode := func(i int, node *yaml.Node) { n := r.indexes[i].FindNodeOrigin(node) if n != nil { + f <- n + return + } + d <- true + } + for i, _ := range r.indexes { + go findNode(i, node) + } + searched := 0 + for searched < len(r.indexes) { + select { + case n := <-f: return n + case <-d: + searched++ } } - // if n != nil { - // f <- n - // return - // } - fmt.Println("my FUCKING ARSE") return nil - } diff --git a/index/search_rolodex_test.go b/index/search_rolodex_test.go index a029b2b..69449f3 100644 --- a/index/search_rolodex_test.go +++ b/index/search_rolodex_test.go @@ -6,6 +6,7 @@ package index import ( "github.com/stretchr/testify/assert" "github.com/vmware-labs/yaml-jsonpath/pkg/yamlpath" + "gopkg.in/yaml.v3" "strings" "testing" ) @@ -65,4 +66,21 @@ func TestRolodex_FindNodeOrigin(t *testing.T) { origin = rolo.FindNodeOrigin(nil) assert.Nil(t, origin) + // modify the node and try again + m := *results[0] + m.Value = "I am a new message" + origin = rolo.FindNodeOrigin(&m) + assert.Nil(t, origin) + + // copy, modify, and try again + o := *results[0] + o.Content = []*yaml.Node{ + {Value: "beer"}, + } + results[0].Content = []*yaml.Node{ + {Value: "wine"}, + } + origin = rolo.FindNodeOrigin(&o) + assert.Nil(t, origin) + }