libopenapi/index/extract_refs.go

// Copyright 2023 Princess B33f Heavy Industries / Dave Shanley
// SPDX-License-Identifier: MIT

package index

import (
	"errors"
	"fmt"
	"net/url"
	"path/filepath"
	"strings"

	"github.com/pb33f/libopenapi/utils"
	"golang.org/x/exp/slices"
	"gopkg.in/yaml.v3"
)

// ExtractRefs will return a deduplicated slice of references for every unique ref found in the document.
// The total number of refs, will generally be much higher, you can extract those from GetRawReferenceCount()
func (index *SpecIndex) ExtractRefs(node, parent *yaml.Node, seenPath []string, level int, poly bool, pName string) []*Reference {
	if node == nil {
		return nil
	}
	var found []*Reference
	if len(node.Content) > 0 {
		var prev, polyName string
		for i, n := range node.Content {
			if utils.IsNodeMap(n) || utils.IsNodeArray(n) {
				level++
				// check if we're using  polymorphic values. These tend to create rabbit warrens of circular
				// references if every single link is followed. We don't resolve polymorphic values.
				isPoly, _ := index.checkPolymorphicNode(prev)
				polyName = pName
				if isPoly {
					poly = true
					if prev != "" {
						polyName = prev
					}
				}
				found = append(found, index.ExtractRefs(n, node, seenPath, level, poly, polyName)...)
			}

			// check if we're dealing with an inline schema definition, that isn't part of an array
			// (which means it's being used as a value in an array, and it's not a label)
			// https://github.com/pb33f/libopenapi/issues/76
			schemaContainingNodes := []string{"schema", "items", "additionalProperties", "contains", "not", "unevaluatedItems", "unevaluatedProperties"}
			if i%2 == 0 && slices.Contains(schemaContainingNodes, n.Value) && !utils.IsNodeArray(node) && (i+1 < len(node.Content)) {

				var jsonPath, definitionPath, fullDefinitionPath string

				if len(seenPath) > 0 || n.Value != "" {
					loc := append(seenPath, n.Value)
					// create definition and full definition paths
					definitionPath = fmt.Sprintf("#/%s", strings.Join(loc, "/"))
					fullDefinitionPath = fmt.Sprintf("%s#/%s", index.specAbsolutePath, strings.Join(loc, "/"))
					_, jsonPath = utils.ConvertComponentIdIntoFriendlyPathSearch(definitionPath)
				}
				ref := &Reference{
					FullDefinition: fullDefinitionPath,
					Definition:     definitionPath,
					Node:           node.Content[i+1],
					Path:           jsonPath,
					Index:          index,
				}

				isRef, _, _ := utils.IsNodeRefValue(node.Content[i+1])
				if isRef {
					// record this reference
					index.allRefSchemaDefinitions = append(index.allRefSchemaDefinitions, ref)
					continue
				}

				if n.Value == "additionalProperties" || n.Value == "unevaluatedProperties" {
					if utils.IsNodeBoolValue(node.Content[i+1]) {
						continue
					}
				}

				index.allInlineSchemaDefinitions = append(index.allInlineSchemaDefinitions, ref)

				// check if the schema is an object or an array,
				// and if so, add it to the list of inline schema object definitions.
				k, v := utils.FindKeyNodeTop("type", node.Content[i+1].Content)
				if k != nil && v != nil {
					if v.Value == "object" || v.Value == "array" {
						index.allInlineSchemaObjectDefinitions = append(index.allInlineSchemaObjectDefinitions, ref)
					}
				}
			}

			// Perform the same check for all maps of schemas like properties and patternProperties
			// https://github.com/pb33f/libopenapi/issues/76
			mapOfSchemaContainingNodes := []string{"properties", "patternProperties"}
			if i%2 == 0 && slices.Contains(mapOfSchemaContainingNodes, n.Value) && !utils.IsNodeArray(node) && (i+1 < len(node.Content)) {
				// for each property add it to our schema definitions
				label := ""
				for h, prop := range node.Content[i+1].Content {

					if h%2 == 0 {
						label = prop.Value
						continue
					}
					var jsonPath, definitionPath, fullDefinitionPath string
					if len(seenPath) > 0 || n.Value != "" && label != "" {
						loc := append(seenPath, n.Value, label)
						definitionPath = fmt.Sprintf("#/%s", strings.Join(loc, "/"))
						fullDefinitionPath = fmt.Sprintf("%s#/%s", index.specAbsolutePath, strings.Join(loc, "/"))
						_, jsonPath = utils.ConvertComponentIdIntoFriendlyPathSearch(definitionPath)
					}
					ref := &Reference{
						FullDefinition: fullDefinitionPath,
						Definition:     definitionPath,
						Node:           prop,
						Path:           jsonPath,
						Index:          index,
					}

					isRef, _, _ := utils.IsNodeRefValue(prop)
					if isRef {
						// record this reference
						index.allRefSchemaDefinitions = append(index.allRefSchemaDefinitions, ref)
						continue
					}

					index.allInlineSchemaDefinitions = append(index.allInlineSchemaDefinitions, ref)

					// check if the schema is an object or an array,
					// and if so, add it to the list of inline schema object definitions.
					k, v := utils.FindKeyNodeTop("type", prop.Content)
					if k != nil && v != nil {
						if v.Value == "object" || v.Value == "array" {
							index.allInlineSchemaObjectDefinitions = append(index.allInlineSchemaObjectDefinitions, ref)
						}
					}
				}
			}

			// Perform the same check for all arrays of schemas like allOf, anyOf, oneOf
			arrayOfSchemaContainingNodes := []string{"allOf", "anyOf", "oneOf", "prefixItems"}
			if i%2 == 0 && slices.Contains(arrayOfSchemaContainingNodes, n.Value) && !utils.IsNodeArray(node) && (i+1 < len(node.Content)) {
				// for each element in the array, add it to our schema definitions
				for h, element := range node.Content[i+1].Content {

					var jsonPath, definitionPath, fullDefinitionPath string
					if len(seenPath) > 0 {
						loc := append(seenPath, n.Value, fmt.Sprintf("%d", h))
						definitionPath = fmt.Sprintf("#/%s", strings.Join(loc, "/"))
						fullDefinitionPath = fmt.Sprintf("%s#/%s", index.specAbsolutePath, strings.Join(loc, "/"))
						_, jsonPath = utils.ConvertComponentIdIntoFriendlyPathSearch(definitionPath)
					} else {
						definitionPath = fmt.Sprintf("#/%s", n.Value)
						fullDefinitionPath = fmt.Sprintf("%s#/%s", index.specAbsolutePath, n.Value)
						_, jsonPath = utils.ConvertComponentIdIntoFriendlyPathSearch(definitionPath)
					}

					ref := &Reference{
						FullDefinition: fullDefinitionPath,
						Definition:     definitionPath,
						Node:           element,
						Path:           jsonPath,
						Index:          index,
					}

					isRef, _, _ := utils.IsNodeRefValue(element)
					if isRef { // record this reference
						index.allRefSchemaDefinitions = append(index.allRefSchemaDefinitions, ref)
						continue
					}
					index.allInlineSchemaDefinitions = append(index.allInlineSchemaDefinitions, ref)

					// check if the schema is an object or an array,
					// and if so, add it to the list of inline schema object definitions.
					k, v := utils.FindKeyNodeTop("type", element.Content)
					if k != nil && v != nil {
						if v.Value == "object" || v.Value == "array" {
							index.allInlineSchemaObjectDefinitions = append(index.allInlineSchemaObjectDefinitions, ref)
						}
					}
				}
			}

			if i%2 == 0 && n.Value == "$ref" {

				// only look at scalar values, not maps (looking at you k8s)
				if !utils.IsNodeStringValue(node.Content[i+1]) {
					continue
				}

				index.linesWithRefs[n.Line] = true

				fp := make([]string, len(seenPath))
				copy(fp, seenPath)

				value := node.Content[i+1].Value
				segs := strings.Split(value, "/")
				name := segs[len(segs)-1]
				uri := strings.Split(value, "#/")

				// determine absolute path to this definition
				var defRoot string
				if strings.HasPrefix(index.specAbsolutePath, "http") {
					defRoot = index.specAbsolutePath
				} else {
					defRoot = filepath.Dir(index.specAbsolutePath)
				}

				var componentName string
				var fullDefinitionPath string
				if len(uri) == 2 {
					if uri[0] == "" {
						fullDefinitionPath = fmt.Sprintf("%s#/%s", index.specAbsolutePath, uri[1])
						componentName = value
					} else {

						if strings.HasPrefix(uri[0], "http") {
							fullDefinitionPath = value
							componentName = fmt.Sprintf("#/%s", uri[1])
						} else {
							if filepath.IsAbs(uri[0]) {
								fullDefinitionPath = value
								componentName = fmt.Sprintf("#/%s", uri[1])
							} else {

								// if the index has a base path, use that to resolve the path
								if index.config.BasePath != "" && index.config.BaseURL == nil {
									abs, _ := filepath.Abs(filepath.Join(index.config.BasePath, uri[0]))
									if abs != defRoot {
										abs, _ = filepath.Abs(filepath.Join(defRoot, uri[0]))
									}
									fullDefinitionPath = fmt.Sprintf("%s#/%s", abs, uri[1])
									componentName = fmt.Sprintf("#/%s", uri[1])
								} else {
									// if the index has a base URL, use that to resolve the path.
									if index.config.BaseURL != nil && !filepath.IsAbs(defRoot) {
										u := *index.config.BaseURL
										abs, _ := filepath.Abs(filepath.Join(u.Path, uri[0]))
										u.Path = abs
										fullDefinitionPath = fmt.Sprintf("%s#/%s", u.String(), uri[1])
										componentName = fmt.Sprintf("#/%s", uri[1])

									} else {

										abs, _ := filepath.Abs(filepath.Join(defRoot, uri[0]))
										fullDefinitionPath = fmt.Sprintf("%s#/%s", abs, uri[1])
										componentName = fmt.Sprintf("#/%s", uri[1])
									}
								}
							}
						}
					}

				} else {
					if strings.HasPrefix(uri[0], "http") {
						fullDefinitionPath = value
					} else {
						// is it a relative file include?
						if !strings.Contains(uri[0], "#") {

							if strings.HasPrefix(defRoot, "http") {
								if !filepath.IsAbs(uri[0]) {
									u, _ := url.Parse(defRoot)
									pathDir := filepath.Dir(u.Path)
									pathAbs, _ := filepath.Abs(filepath.Join(pathDir, uri[0]))
									u.Path = pathAbs
									fullDefinitionPath = u.String()
								}
							} else {
								if !filepath.IsAbs(uri[0]) {
									// if the index has a base path, use that to resolve the path
									if index.config.BasePath != "" {
										abs, _ := filepath.Abs(filepath.Join(index.config.BasePath, uri[0]))
										if abs != defRoot {
											abs, _ = filepath.Abs(filepath.Join(defRoot, uri[0]))
										}
										fullDefinitionPath = abs
										componentName = uri[0]
									} else {
										// if the index has a base URL, use that to resolve the path.
										if index.config.BaseURL != nil {

											u := *index.config.BaseURL
											abs := filepath.Join(u.Path, uri[0])
											u.Path = abs
											fullDefinitionPath = u.String()
											componentName = uri[0]
										} else {
											abs, _ := filepath.Abs(filepath.Join(defRoot, uri[0]))
											fullDefinitionPath = abs
											componentName = uri[0]
										}
									}
								}
							}
						}
					}
				}

				_, p := utils.ConvertComponentIdIntoFriendlyPathSearch(componentName)

				ref := &Reference{
					FullDefinition: fullDefinitionPath,
					Definition:     componentName,
					Name:           name,
					Node:           node,
					Path:           p,
					Index:          index,
				}

				// add to raw sequenced refs
				index.rawSequencedRefs = append(index.rawSequencedRefs, ref)

				// add ref by line number
				refNameIndex := strings.LastIndex(value, "/")
				refName := value[refNameIndex+1:]
				if len(index.refsByLine[refName]) > 0 {
					index.refsByLine[refName][n.Line] = true
				} else {
					v := make(map[int]bool)
					v[n.Line] = true
					index.refsByLine[refName] = v
				}

				// if this ref value has any siblings (node.Content is larger than two elements)
				// then add to refs with siblings
				if len(node.Content) > 2 {
					copiedNode := *node
					copied := Reference{
						FullDefinition: fullDefinitionPath,
						Definition:     ref.Definition,
						Name:           ref.Name,
						Node:           &copiedNode,
						Path:           p,
						Index:          index,
					}
					// protect this data using a copy, prevent the resolver from destroying things.
					index.refsWithSiblings[value] = copied
				}

				// if this is a polymorphic reference, we're going to leave it out
				// allRefs. We don't ever want these resolved, so instead of polluting
				// the timeline, we will keep each poly ref in its own collection for later
				// analysis.
				if poly {
					index.polymorphicRefs[value] = ref

					// index each type
					switch pName {
					case "anyOf":
						index.polymorphicAnyOfRefs = append(index.polymorphicAnyOfRefs, ref)
					case "allOf":
						index.polymorphicAllOfRefs = append(index.polymorphicAllOfRefs, ref)
					case "oneOf":
						index.polymorphicOneOfRefs = append(index.polymorphicOneOfRefs, ref)
					}
					continue
				}

				// check if this is a dupe, if so, skip it, we don't care now.
				if index.allRefs[value] != nil { // seen before, skip.
					continue
				}

				if value == "" {

					completedPath := fmt.Sprintf("$.%s", strings.Join(fp, "."))

					indexError := &IndexingError{
						Err:  errors.New("schema reference is empty and cannot be processed"),
						Node: node.Content[i+1],
						Path: completedPath,
					}

					index.refErrors = append(index.refErrors, indexError)

					continue
				}

				index.allRefs[fullDefinitionPath] = ref
				found = append(found, ref)
			}

			if i%2 == 0 && n.Value != "$ref" && n.Value != "" {

				nodePath := fmt.Sprintf("$.%s", strings.Join(seenPath, "."))

				// capture descriptions and summaries
				if n.Value == "description" {

					// if the parent is a sequence, ignore.
					if utils.IsNodeArray(node) {
						continue
					}

					ref := &DescriptionReference{
						Content:   node.Content[i+1].Value,
						Path:      nodePath,
						Node:      node.Content[i+1],
						IsSummary: false,
					}

					if !utils.IsNodeMap(ref.Node) {
						index.allDescriptions = append(index.allDescriptions, ref)
						index.descriptionCount++
					}
				}

				if n.Value == "summary" {

					var b *yaml.Node
					if len(node.Content) == i+1 {
						b = node.Content[i]
					} else {
						b = node.Content[i+1]
					}
					ref := &DescriptionReference{
						Content:   b.Value,
						Path:      nodePath,
						Node:      b,
						IsSummary: true,
					}

					index.allSummaries = append(index.allSummaries, ref)
					index.summaryCount++
				}

				// capture security requirement references (these are not traditional references, but they
				// are used as a look-up. This is the only exception to the design.
				if n.Value == "security" {
					var b *yaml.Node
					if len(node.Content) == i+1 {
						b = node.Content[i]
					} else {
						b = node.Content[i+1]
					}
					if utils.IsNodeArray(b) {
						var secKey string
						for k := range b.Content {
							if utils.IsNodeMap(b.Content[k]) {
								for g := range b.Content[k].Content {
									if g%2 == 0 {
										secKey = b.Content[k].Content[g].Value
										continue
									}
									if utils.IsNodeArray(b.Content[k].Content[g]) {
										var refMap map[string][]*Reference
										if index.securityRequirementRefs[secKey] == nil {
											index.securityRequirementRefs[secKey] = make(map[string][]*Reference)
											refMap = index.securityRequirementRefs[secKey]
										} else {
											refMap = index.securityRequirementRefs[secKey]
										}
										for r := range b.Content[k].Content[g].Content {
											var refs []*Reference
											if refMap[b.Content[k].Content[g].Content[r].Value] != nil {
												refs = refMap[b.Content[k].Content[g].Content[r].Value]
											}

											refs = append(refs, &Reference{
												Definition: b.Content[k].Content[g].Content[r].Value,
												Path:       fmt.Sprintf("%s.security[%d].%s[%d]", nodePath, k, secKey, r),
												Node:       b.Content[k].Content[g].Content[r],
											})

											index.securityRequirementRefs[secKey][b.Content[k].Content[g].Content[r].Value] = refs
										}
									}
								}
							}
						}
					}
				}
				// capture enums
				if n.Value == "enum" {

					if len(seenPath) > 0 {
						lastItem := seenPath[len(seenPath)-1]
						if lastItem == "properties" {
							seenPath = append(seenPath, strings.ReplaceAll(n.Value, "/", "~1"))
							prev = n.Value
							continue
						}
					}

					// all enums need to have a type, extract the type from the node where the enum was found.
					_, enumKeyValueNode := utils.FindKeyNodeTop("type", node.Content)

					if enumKeyValueNode != nil {
						ref := &EnumReference{
							Path:       nodePath,
							Node:       node.Content[i+1],
							Type:       enumKeyValueNode,
							SchemaNode: node,
							ParentNode: parent,
						}

						index.allEnums = append(index.allEnums, ref)
						index.enumCount++
					}
				}
				// capture all objects with properties
				if n.Value == "properties" {
					_, typeKeyValueNode := utils.FindKeyNodeTop("type", node.Content)

					if typeKeyValueNode != nil {
						isObject := false

						if typeKeyValueNode.Value == "object" {
							isObject = true
						}

						for _, v := range typeKeyValueNode.Content {
							if v.Value == "object" {
								isObject = true
							}
						}

						if isObject {
							index.allObjectsWithProperties = append(index.allObjectsWithProperties, &ObjectReference{
								Path:       nodePath,
								Node:       node,
								ParentNode: parent,
							})
						}
					}
				}

				seenPath = append(seenPath, strings.ReplaceAll(n.Value, "/", "~1"))
				//seenPath = append(seenPath, n.Value)
				prev = n.Value
			}

			// if next node is map, don't add segment.
			if i < len(node.Content)-1 {
				next := node.Content[i+1]

				if i%2 != 0 && next != nil && !utils.IsNodeArray(next) && !utils.IsNodeMap(next) && len(seenPath) > 0 {
					seenPath = seenPath[:len(seenPath)-1]
				}
			}
		}
	}

	index.refCount = len(index.allRefs)

	return found
}

// ExtractComponentsFromRefs returns located components from references. The returned nodes from here
// can be used for resolving as they contain the actual object properties.
func (index *SpecIndex) ExtractComponentsFromRefs(refs []*Reference) []*Reference {
	var found []*Reference

	// run this async because when things get recursive, it can take a while
	c := make(chan bool)

	locate := func(ref *Reference, refIndex int, sequence []*ReferenceMapped) {
		located := index.FindComponent(ref.FullDefinition)
		if located != nil {

			// have we already mapped this?
			index.refLock.Lock()
			if index.allMappedRefs[ref.FullDefinition] == nil {
				found = append(found, located)
				index.allMappedRefs[located.FullDefinition] = located
			}
			rm := &ReferenceMapped{
				OriginalReference: ref,
				Reference:         located,
				Definition:        located.Definition,
				FullDefinition:    located.FullDefinition,
			}
			sequence[refIndex] = rm
			index.refLock.Unlock()

		} else {

			_, path := utils.ConvertComponentIdIntoFriendlyPathSearch(ref.Definition)
			indexError := &IndexingError{
				Err:  fmt.Errorf("component '%s' does not exist in the specification", ref.Definition),
				Node: ref.Node,
				Path: path,
			}
			index.errorLock.Lock()
			index.refErrors = append(index.refErrors, indexError)
			index.errorLock.Unlock()
		}
		c <- true
	}

	var refsToCheck []*Reference
	for _, ref := range refs {

		// check reference for backslashes (hah yeah seen this too!)
		if strings.Contains(ref.Definition, "\\") { // this was from blazemeter.com haha!
			_, path := utils.ConvertComponentIdIntoFriendlyPathSearch(ref.Definition)
			indexError := &IndexingError{
				Err:  fmt.Errorf("component '%s' contains a backslash '\\'. It's not valid", ref.Definition),
				Node: ref.Node,
				Path: path,
			}
			index.refErrors = append(index.refErrors, indexError)
			continue

		}
		refsToCheck = append(refsToCheck, ref)
	}
	mappedRefsInSequence := make([]*ReferenceMapped, len(refsToCheck))

	for r := range refsToCheck {
		// expand our index of all mapped refs
		go locate(refsToCheck[r], r, mappedRefsInSequence)
		//locate(refsToCheck[r], r, mappedRefsInSequence) // used for sync testing.
	}

	completedRefs := 0
	for completedRefs < len(refsToCheck) {
		<-c
		completedRefs++
	}
	for m := range mappedRefsInSequence {
		if mappedRefsInSequence[m] != nil {
			index.allMappedRefsSequenced = append(index.allMappedRefsSequenced, mappedRefsInSequence[m])
		}
	}
	return found
}