diff --git a/datamodel/high/base/schema_proxy.go b/datamodel/high/base/schema_proxy.go index 49b7e0f..538fe10 100644 --- a/datamodel/high/base/schema_proxy.go +++ b/datamodel/high/base/schema_proxy.go @@ -4,9 +4,11 @@ package base import ( + "fmt" "github.com/pb33f/libopenapi/datamodel/high" "github.com/pb33f/libopenapi/datamodel/low" "github.com/pb33f/libopenapi/datamodel/low/base" + "github.com/pb33f/libopenapi/index" "github.com/pb33f/libopenapi/utils" "gopkg.in/yaml.v3" "sync" @@ -114,6 +116,16 @@ func (sp *SchemaProxy) GetReference() string { return sp.schema.Value.GetSchemaReference() } +// GetReferenceOrigin returns a pointer to the index.NodeOrigin of the $ref if this SchemaProxy is a reference to another Schema. +// returns nil if the origin cannot be found (which, means there is a bug, and we need to fix it). +func (sp *SchemaProxy) GetReferenceOrigin() *index.NodeOrigin { + if sp.schema != nil { + return sp.schema.Value.GetSchemaReferenceLocation() + } + fmt.Print("fuck man") + return nil +} + // BuildSchema operates the same way as Schema, except it will return any error along with the *Schema func (sp *SchemaProxy) BuildSchema() (*Schema, error) { if sp.rendered != nil { diff --git a/datamodel/low/base/schema_proxy.go b/datamodel/low/base/schema_proxy.go index 36d77fd..373b178 100644 --- a/datamodel/low/base/schema_proxy.go +++ b/datamodel/low/base/schema_proxy.go @@ -6,6 +6,7 @@ package base import ( "context" "crypto/sha256" + "fmt" "github.com/pb33f/libopenapi/index" "github.com/pb33f/libopenapi/utils" @@ -132,6 +133,21 @@ func (sp *SchemaProxy) GetSchemaReference() string { return sp.referenceLookup } +func (sp *SchemaProxy) GetSchemaReferenceLocation() *index.NodeOrigin { + if sp.idx != nil { + origin := sp.idx.FindNodeOrigin(sp.vn) + if origin != nil { + return origin + } + if sp.idx.GetRolodex() != nil { + origin = sp.idx.GetRolodex().FindNodeOrigin(sp.vn) + return origin + } + } + fmt.Println("ooooooh my arse") + return nil +} + // GetKeyNode will return the yaml.Node pointer that is a key for value node. func (sp *SchemaProxy) GetKeyNode() *yaml.Node { return sp.kn diff --git a/index/index_model.go b/index/index_model.go index 469826b..c10cad8 100644 --- a/index/index_model.go +++ b/index/index_model.go @@ -274,6 +274,8 @@ type SpecIndex struct { built bool uri []string logger *slog.Logger + nodeMap map[int]map[int]*yaml.Node + nodeMapCompleted chan bool } // GetResolver returns the resolver for this index. diff --git a/index/map_index_nodes.go b/index/map_index_nodes.go new file mode 100644 index 0000000..e4fb665 --- /dev/null +++ b/index/map_index_nodes.go @@ -0,0 +1,131 @@ +// Copyright 2023 Princess B33f Heavy Industries / Dave Shanley +// SPDX-License-Identifier: MIT + +package index + +import ( + "gopkg.in/yaml.v3" +) + +type nodeMap struct { + line int + column int + node *yaml.Node +} + +// NodeOrigin represents where a node has come from within a specification. This is not useful for single file specs, +// but becomes very, very important when dealing with exploded specifications, and we need to know where in the mass +// of files a node has come from. +type NodeOrigin struct { + // Node is the node in question + Node *yaml.Node + + // Line is yhe original line of where the node was found in the original file + Line int + + // Column is the original column of where the node was found in the original file + Column int + + // AbsoluteLocation is the absolute path to the reference was extracted from. + // This can either be an absolute path to a file, or a URL. + AbsoluteLocation string + + // Index is the index that contains the node that was located in. + Index *SpecIndex +} + +// GetNode returns a node from the spec based on a line and column. The second return var bool is true +// if the node was found, false if not. +func (index *SpecIndex) GetNode(line int, column int) (*yaml.Node, bool) { + if index.nodeMap[line] == nil { + return nil, false + } + node := index.nodeMap[line][column] + return node, node != nil +} + +// MapNodes maps all nodes in the document to a map of line/column to node. +func (index *SpecIndex) MapNodes(rootNode *yaml.Node) { + cruising := make(chan bool) + nodeChan := make(chan *nodeMap) + go func(nodeChan chan *nodeMap) { + for { + select { + case node, ok := <-nodeChan: + if !ok { + cruising <- true + return + } + if index.nodeMap[node.line] == nil { + index.nodeMap[node.line] = make(map[int]*yaml.Node) + } + index.nodeMap[node.line][node.column] = node.node + } + } + }(nodeChan) + go enjoyALuxuryCruise(rootNode, nodeChan, true) + <-cruising + close(cruising) + index.nodeMapCompleted <- true + close(index.nodeMapCompleted) +} + +func (index *SpecIndex) FindNodeOrigin(node *yaml.Node) *NodeOrigin { + + // local search, then throw up to rolodex for a full search + if node != nil { + if index.nodeMap[node.Line] != nil { + if index.nodeMap[node.Line][node.Column] != nil { + foundNode := index.nodeMap[node.Line][node.Column] + match := true + if foundNode.Value != node.Value { + match = false + } + if foundNode.Kind != node.Kind { + match = false + } + if foundNode.Tag != node.Tag { + match = false + } + if len(foundNode.Content) == len(node.Content) { + for i := range foundNode.Content { + if foundNode.Content[i].Value != node.Content[i].Value { + match = false + } + } + } + if match { + return &NodeOrigin{ + Node: foundNode, + Line: node.Line, + Column: node.Column, + AbsoluteLocation: index.specAbsolutePath, + Index: index, + } + } + } + } + } + return nil +} + +func enjoyALuxuryCruise(node *yaml.Node, nodeChan chan *nodeMap, root bool) { + if len(node.Content) > 0 { + for _, child := range node.Content { + nodeChan <- &nodeMap{ + line: child.Line, + column: child.Column, + node: child, + } + enjoyALuxuryCruise(child, nodeChan, false) + } + } + nodeChan <- &nodeMap{ + line: node.Line, + column: node.Column, + node: node, + } + if root { + close(nodeChan) + } +} diff --git a/index/map_index_nodes_test.go b/index/map_index_nodes_test.go new file mode 100644 index 0000000..c541235 --- /dev/null +++ b/index/map_index_nodes_test.go @@ -0,0 +1,87 @@ +// Copyright 2023 Princess B33f Heavy Industries / Dave Shanley +// SPDX-License-Identifier: MIT + +package index + +import ( + "github.com/pb33f/libopenapi/utils" + "github.com/stretchr/testify/assert" + "github.com/vmware-labs/yaml-jsonpath/pkg/yamlpath" + "gopkg.in/yaml.v3" + "os" + "reflect" + "testing" +) + +func TestSpecIndex_MapNodes(t *testing.T) { + + petstore, _ := os.ReadFile("../test_specs/petstorev3.json") + var rootNode yaml.Node + _ = yaml.Unmarshal(petstore, &rootNode) + + index := NewSpecIndexWithConfig(&rootNode, CreateOpenAPIIndexConfig()) + + <-index.nodeMapCompleted + + // look up a node and make sure they match exactly (same pointer) + path, _ := yamlpath.NewPath("$.paths./pet.put") + nodes, _ := path.Find(&rootNode) + + keyNode, valueNode := utils.FindKeyNodeTop("operationId", nodes[0].Content) + mappedKeyNode, _ := index.GetNode(keyNode.Line, keyNode.Column) + mappedValueNode, _ := index.GetNode(valueNode.Line, valueNode.Column) + + assert.Equal(t, keyNode, mappedKeyNode) + assert.Equal(t, valueNode, mappedValueNode) + + // make sure the pointers are the same + p1 := reflect.ValueOf(keyNode).Pointer() + p2 := reflect.ValueOf(mappedKeyNode).Pointer() + assert.Equal(t, p1, p2) + + // check missing line + var ok bool + mappedKeyNode, ok = index.GetNode(999, 999) + assert.False(t, ok) + assert.Nil(t, mappedKeyNode) + + mappedKeyNode, ok = index.GetNode(12, 999) + assert.False(t, ok) + assert.Nil(t, mappedKeyNode) + + index.nodeMap[15] = nil + mappedKeyNode, ok = index.GetNode(15, 999) + assert.False(t, ok) + assert.Nil(t, mappedKeyNode) + +} + +func BenchmarkSpecIndex_MapNodes(b *testing.B) { + + petstore, _ := os.ReadFile("../test_specs/petstorev3.json") + var rootNode yaml.Node + _ = yaml.Unmarshal(petstore, &rootNode) + path, _ := yamlpath.NewPath("$.paths./pet.put") + + for i := 0; i < b.N; i++ { + + index := NewSpecIndexWithConfig(&rootNode, CreateOpenAPIIndexConfig()) + + <-index.nodeMapCompleted + + // look up a node and make sure they match exactly (same pointer) + nodes, _ := path.Find(&rootNode) + + keyNode, valueNode := utils.FindKeyNodeTop("operationId", nodes[0].Content) + mappedKeyNode, _ := index.GetNode(keyNode.Line, keyNode.Column) + mappedValueNode, _ := index.GetNode(valueNode.Line, valueNode.Column) + + assert.Equal(b, keyNode, mappedKeyNode) + assert.Equal(b, valueNode, mappedValueNode) + + // make sure the pointers are the same + p1 := reflect.ValueOf(keyNode).Pointer() + p2 := reflect.ValueOf(mappedKeyNode).Pointer() + assert.Equal(b, p1, p2) + } +} diff --git a/index/rolodex.go b/index/rolodex.go index c854079..03b379c 100644 --- a/index/rolodex.go +++ b/index/rolodex.go @@ -9,6 +9,7 @@ import ( "gopkg.in/yaml.v3" "io" "io/fs" + "log/slog" "net/url" "os" "path/filepath" @@ -60,20 +61,31 @@ type Rolodex struct { indexConfig *SpecIndexConfig indexingDuration time.Duration indexes []*SpecIndex + indexLock sync.Mutex rootIndex *SpecIndex rootNode *yaml.Node caughtErrors []error safeCircularReferences []*CircularReferenceResult infiniteCircularReferences []*CircularReferenceResult ignoredCircularReferences []*CircularReferenceResult + logger *slog.Logger } // NewRolodex creates a new rolodex with the provided index configuration. func NewRolodex(indexConfig *SpecIndexConfig) *Rolodex { + + logger := indexConfig.Logger + if logger == nil { + logger = slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelError, + })) + } + r := &Rolodex{ indexConfig: indexConfig, localFS: make(map[string]fs.FS), remoteFS: make(map[string]fs.FS), + logger: logger, } indexConfig.Rolodex = r return r @@ -123,6 +135,10 @@ func (r *Rolodex) GetCaughtErrors() []error { // AddLocalFS adds a local file system to the rolodex. func (r *Rolodex) AddLocalFS(baseDir string, fileSystem fs.FS) { absBaseDir, _ := filepath.Abs(baseDir) + if f, ok := fileSystem.(*LocalFS); ok { + f.rolodex = r + f.logger = r.logger + } r.localFS[absBaseDir] = fileSystem } @@ -131,8 +147,18 @@ func (r *Rolodex) SetRootNode(node *yaml.Node) { r.rootNode = node } +func (r *Rolodex) AddIndex(idx *SpecIndex) { + r.indexLock.Lock() + r.indexes = append(r.indexes, idx) + r.indexLock.Unlock() +} + // AddRemoteFS adds a remote file system to the rolodex. func (r *Rolodex) AddRemoteFS(baseURL string, fileSystem fs.FS) { + if f, ok := fileSystem.(*RemoteFS); ok { + f.rolodex = r + f.logger = r.logger + } r.remoteFS[baseURL] = fileSystem } @@ -281,7 +307,9 @@ func (r *Rolodex) IndexTheRolodex() error { resolver.IgnorePolymorphicCircularReferences() } + r.logger.Debug("[rolodex] starting root index build") index.BuildIndex() + r.logger.Debug("[rolodex] root index build completed") if !r.indexConfig.AvoidCircularReferenceCheck { resolvingErrors := resolver.CheckForCircularReferences() @@ -347,10 +375,10 @@ func (r *Rolodex) Resolve() { for e := range resolvingErrors { r.caughtErrors = append(r.caughtErrors, resolvingErrors[e]) } - if len(r.rootIndex.resolver.ignoredPolyReferences) > 0 { + if r.rootIndex != nil && len(r.rootIndex.resolver.ignoredPolyReferences) > 0 { r.ignoredCircularReferences = append(r.ignoredCircularReferences, res.ignoredPolyReferences...) } - if len(r.rootIndex.resolver.ignoredArrayReferences) > 0 { + if r.rootIndex != nil && len(r.rootIndex.resolver.ignoredArrayReferences) > 0 { r.ignoredCircularReferences = append(r.ignoredCircularReferences, res.ignoredArrayReferences...) } r.safeCircularReferences = append(r.safeCircularReferences, res.GetSafeCircularReferences()...) diff --git a/index/rolodex_test.go b/index/rolodex_test.go index 132f80a..7ee8698 100644 --- a/index/rolodex_test.go +++ b/index/rolodex_test.go @@ -8,6 +8,7 @@ import ( "gopkg.in/yaml.v3" "io" "io/fs" + "log/slog" "net/http" "net/http/httptest" "net/url" @@ -53,7 +54,13 @@ func TestRolodex_LocalNativeFS(t *testing.T) { baseDir := "/tmp" - fileFS, err := NewLocalFS(baseDir, testFS) + fileFS, err := NewLocalFSWithConfig(&LocalFSConfig{ + BaseDirectory: baseDir, + Logger: slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })), + DirFS: testFS, + }) if err != nil { t.Fatal(err) } @@ -1313,7 +1320,14 @@ func TestRolodex_SimpleTest_OneDoc(t *testing.T) { baseDir := "rolodex_test_data" - fileFS, err := NewLocalFS(baseDir, os.DirFS(baseDir)) + fileFS, err := NewLocalFSWithConfig(&LocalFSConfig{ + BaseDirectory: baseDir, + Logger: slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })), + DirFS: os.DirFS(baseDir), + }) + if err != nil { t.Fatal(err) } diff --git a/index/search_rolodex.go b/index/search_rolodex.go new file mode 100644 index 0000000..e780387 --- /dev/null +++ b/index/search_rolodex.go @@ -0,0 +1,54 @@ +// Copyright 2023 Princess B33f Heavy Industries / Dave Shanley +// SPDX-License-Identifier: MIT + +package index + +import ( + "fmt" + "gopkg.in/yaml.v3" +) + +func (r *Rolodex) FindNodeOrigin(node *yaml.Node) *NodeOrigin { + //f := make(chan *NodeOrigin) + //d := make(chan bool) + //findNode := func(i int, node *yaml.Node) { + // n := r.indexes[i].FindNodeOrigin(node) + // if n != nil { + // f <- n + // return + // } + // d <- true + //} + //for i, _ := range r.indexes { + // go findNode(i, node) + //} + //searched := 0 + //for searched < len(r.indexes) { + // select { + // case n := <-f: + // return n + // case <-d: + // searched++ + // } + //} + //return nil + + if len(r.indexes) == 0 { + fmt.Println("NO FUCKING WAY MAN") + } else { + //fmt.Printf("searching %d files\n", len(r.indexes)) + } + for i := range r.indexes { + n := r.indexes[i].FindNodeOrigin(node) + if n != nil { + return n + } + } + // if n != nil { + // f <- n + // return + // } + fmt.Println("my FUCKING ARSE") + return nil + +} diff --git a/index/search_rolodex_test.go b/index/search_rolodex_test.go new file mode 100644 index 0000000..a029b2b --- /dev/null +++ b/index/search_rolodex_test.go @@ -0,0 +1,68 @@ +// Copyright 2023 Princess B33f Heavy Industries / Dave Shanley +// SPDX-License-Identifier: MIT + +package index + +import ( + "github.com/stretchr/testify/assert" + "github.com/vmware-labs/yaml-jsonpath/pkg/yamlpath" + "strings" + "testing" +) + +func TestRolodex_FindNodeOrigin(t *testing.T) { + + baseDir := "rolodex_test_data" + + cf := CreateOpenAPIIndexConfig() + cf.BasePath = baseDir + cf.AvoidCircularReferenceCheck = true + + fileFS, err := NewLocalFSWithConfig(&LocalFSConfig{ + BaseDirectory: baseDir, + IndexConfig: cf, + }) + if err != nil { + t.Fatal(err) + } + + rolo := NewRolodex(cf) + rolo.AddLocalFS(baseDir, fileFS) + + // open doc2 + f, rerr := rolo.Open("doc2.yaml") + assert.Nil(t, rerr) + assert.NotNil(t, f) + + node, _ := f.GetContentAsYAMLNode() + + rolo.SetRootNode(node) + + err = rolo.IndexTheRolodex() + rolo.Resolve() + + assert.Len(t, rolo.indexes, 4) + + // extract something that can only exist after resolution + path := "$.paths./nested/files3.get.responses.200.content.application/json.schema.properties.message.properties.utilMessage.properties.message.description" + yp, _ := yamlpath.NewPath(path) + results, _ := yp.Find(node) + + assert.NotNil(t, results) + assert.Len(t, results, 1) + assert.Equal(t, "I am pointless dir2 utility, I am multiple levels deep.", results[0].Value) + + // now for the truth, where did this come from? + origin := rolo.FindNodeOrigin(results[0]) + + assert.NotNil(t, origin) + assert.True(t, strings.HasSuffix(origin.AbsoluteLocation, "index/rolodex_test_data/dir2/utils/utils.yaml")) + + // should be identical to the original node + assert.Equal(t, results[0], origin.Node) + + // look for something that cannot exist + origin = rolo.FindNodeOrigin(nil) + assert.Nil(t, origin) + +} diff --git a/index/spec_index.go b/index/spec_index.go index 1b95f1e..b4d7bc8 100644 --- a/index/spec_index.go +++ b/index/spec_index.go @@ -66,6 +66,9 @@ func createNewIndex(rootNode *yaml.Node, index *SpecIndex, avoidBuildOut bool) * if rootNode == nil { return index } + index.nodeMapCompleted = make(chan bool) + index.nodeMap = make(map[int]map[int]*yaml.Node) + go index.MapNodes(rootNode) // this can run async. index.cache = new(syncmap.Map) @@ -91,7 +94,7 @@ func createNewIndex(rootNode *yaml.Node, index *SpecIndex, avoidBuildOut bool) * if !avoidBuildOut { index.BuildIndex() } - + <- index.nodeMapCompleted return index } @@ -147,6 +150,10 @@ func (index *SpecIndex) GetRootNode() *yaml.Node { return index.root } +func (index *SpecIndex) GetRolodex() *Rolodex { + return index.rolodex +} + // GetGlobalTagsNode returns document root tags node. func (index *SpecIndex) GetGlobalTagsNode() *yaml.Node { return index.tagsNode diff --git a/index/spec_index_test.go b/index/spec_index_test.go index 47859c5..4b16b8c 100644 --- a/index/spec_index_test.go +++ b/index/spec_index_test.go @@ -142,7 +142,7 @@ func TestSpecIndex_DigitalOcean(t *testing.T) { cf.AllowRemoteLookup = true cf.AvoidCircularReferenceCheck = true cf.Logger = slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ - Level: slog.LevelInfo, + Level: slog.LevelDebug, })) // setting this baseURL will override the base @@ -166,7 +166,7 @@ func TestSpecIndex_DigitalOcean(t *testing.T) { } remoteFS.SetRemoteHandlerFunc(func(url string) (*http.Response, error) { request, _ := http.NewRequest(http.MethodGet, url, nil) - request.Header.Set("Authorization", fmt.Sprintf("Bearer %s", os.Getenv("GITHUB_TOKEN"))) + request.Header.Set("Authorization", fmt.Sprintf("Bearer %s", os.Getenv("GH_PAT"))) return client.Do(request) }) } @@ -178,6 +178,7 @@ func TestSpecIndex_DigitalOcean(t *testing.T) { indexedErr := rolo.IndexTheRolodex() assert.NoError(t, indexedErr) + // get all the files! files := remoteFS.GetFiles() fileLen := len(files) assert.Equal(t, 1646, fileLen)