mirror of
https://github.com/LukeHagar/arbiter.git
synced 2025-12-10 04:19:26 +00:00
Saving all progress
This commit is contained in:
717
node_modules/micromark/dev/lib/create-tokenizer.js
generated
vendored
Normal file
717
node_modules/micromark/dev/lib/create-tokenizer.js
generated
vendored
Normal file
@@ -0,0 +1,717 @@
|
||||
/**
|
||||
* @import {
|
||||
* Chunk,
|
||||
* Code,
|
||||
* ConstructRecord,
|
||||
* Construct,
|
||||
* Effects,
|
||||
* InitialConstruct,
|
||||
* ParseContext,
|
||||
* Point,
|
||||
* State,
|
||||
* TokenizeContext,
|
||||
* Token
|
||||
* } from 'micromark-util-types'
|
||||
*/
|
||||
|
||||
/**
|
||||
* @callback Restore
|
||||
* Restore the state.
|
||||
* @returns {undefined}
|
||||
* Nothing.
|
||||
*
|
||||
* @typedef Info
|
||||
* Info.
|
||||
* @property {Restore} restore
|
||||
* Restore.
|
||||
* @property {number} from
|
||||
* From.
|
||||
*
|
||||
* @callback ReturnHandle
|
||||
* Handle a successful run.
|
||||
* @param {Construct} construct
|
||||
* Construct.
|
||||
* @param {Info} info
|
||||
* Info.
|
||||
* @returns {undefined}
|
||||
* Nothing.
|
||||
*/
|
||||
|
||||
import createDebug from 'debug'
|
||||
import {ok as assert} from 'devlop'
|
||||
import {markdownLineEnding} from 'micromark-util-character'
|
||||
import {push, splice} from 'micromark-util-chunked'
|
||||
import {resolveAll} from 'micromark-util-resolve-all'
|
||||
import {codes, values} from 'micromark-util-symbol'
|
||||
|
||||
const debug = createDebug('micromark')
|
||||
|
||||
/**
|
||||
* Create a tokenizer.
|
||||
* Tokenizers deal with one type of data (e.g., containers, flow, text).
|
||||
* The parser is the object dealing with it all.
|
||||
* `initialize` works like other constructs, except that only its `tokenize`
|
||||
* function is used, in which case it doesn’t receive an `ok` or `nok`.
|
||||
* `from` can be given to set the point before the first character, although
|
||||
* when further lines are indented, they must be set with `defineSkip`.
|
||||
*
|
||||
* @param {ParseContext} parser
|
||||
* Parser.
|
||||
* @param {InitialConstruct} initialize
|
||||
* Construct.
|
||||
* @param {Omit<Point, '_bufferIndex' | '_index'> | undefined} [from]
|
||||
* Point (optional).
|
||||
* @returns {TokenizeContext}
|
||||
* Context.
|
||||
*/
|
||||
export function createTokenizer(parser, initialize, from) {
|
||||
/** @type {Point} */
|
||||
let point = {
|
||||
_bufferIndex: -1,
|
||||
_index: 0,
|
||||
line: (from && from.line) || 1,
|
||||
column: (from && from.column) || 1,
|
||||
offset: (from && from.offset) || 0
|
||||
}
|
||||
/** @type {Record<string, number>} */
|
||||
const columnStart = {}
|
||||
/** @type {Array<Construct>} */
|
||||
const resolveAllConstructs = []
|
||||
/** @type {Array<Chunk>} */
|
||||
let chunks = []
|
||||
/** @type {Array<Token>} */
|
||||
let stack = []
|
||||
/** @type {boolean | undefined} */
|
||||
let consumed = true
|
||||
|
||||
/**
|
||||
* Tools used for tokenizing.
|
||||
*
|
||||
* @type {Effects}
|
||||
*/
|
||||
const effects = {
|
||||
attempt: constructFactory(onsuccessfulconstruct),
|
||||
check: constructFactory(onsuccessfulcheck),
|
||||
consume,
|
||||
enter,
|
||||
exit,
|
||||
interrupt: constructFactory(onsuccessfulcheck, {interrupt: true})
|
||||
}
|
||||
|
||||
/**
|
||||
* State and tools for resolving and serializing.
|
||||
*
|
||||
* @type {TokenizeContext}
|
||||
*/
|
||||
const context = {
|
||||
code: codes.eof,
|
||||
containerState: {},
|
||||
defineSkip,
|
||||
events: [],
|
||||
now,
|
||||
parser,
|
||||
previous: codes.eof,
|
||||
sliceSerialize,
|
||||
sliceStream,
|
||||
write
|
||||
}
|
||||
|
||||
/**
|
||||
* The state function.
|
||||
*
|
||||
* @type {State | undefined}
|
||||
*/
|
||||
let state = initialize.tokenize.call(context, effects)
|
||||
|
||||
/**
|
||||
* Track which character we expect to be consumed, to catch bugs.
|
||||
*
|
||||
* @type {Code}
|
||||
*/
|
||||
let expectedCode
|
||||
|
||||
if (initialize.resolveAll) {
|
||||
resolveAllConstructs.push(initialize)
|
||||
}
|
||||
|
||||
return context
|
||||
|
||||
/** @type {TokenizeContext['write']} */
|
||||
function write(slice) {
|
||||
chunks = push(chunks, slice)
|
||||
|
||||
main()
|
||||
|
||||
// Exit if we’re not done, resolve might change stuff.
|
||||
if (chunks[chunks.length - 1] !== codes.eof) {
|
||||
return []
|
||||
}
|
||||
|
||||
addResult(initialize, 0)
|
||||
|
||||
// Otherwise, resolve, and exit.
|
||||
context.events = resolveAll(resolveAllConstructs, context.events, context)
|
||||
|
||||
return context.events
|
||||
}
|
||||
|
||||
//
|
||||
// Tools.
|
||||
//
|
||||
|
||||
/** @type {TokenizeContext['sliceSerialize']} */
|
||||
function sliceSerialize(token, expandTabs) {
|
||||
return serializeChunks(sliceStream(token), expandTabs)
|
||||
}
|
||||
|
||||
/** @type {TokenizeContext['sliceStream']} */
|
||||
function sliceStream(token) {
|
||||
return sliceChunks(chunks, token)
|
||||
}
|
||||
|
||||
/** @type {TokenizeContext['now']} */
|
||||
function now() {
|
||||
// This is a hot path, so we clone manually instead of `Object.assign({}, point)`
|
||||
const {_bufferIndex, _index, line, column, offset} = point
|
||||
return {_bufferIndex, _index, line, column, offset}
|
||||
}
|
||||
|
||||
/** @type {TokenizeContext['defineSkip']} */
|
||||
function defineSkip(value) {
|
||||
columnStart[value.line] = value.column
|
||||
accountForPotentialSkip()
|
||||
debug('position: define skip: `%j`', point)
|
||||
}
|
||||
|
||||
//
|
||||
// State management.
|
||||
//
|
||||
|
||||
/**
|
||||
* Main loop (note that `_index` and `_bufferIndex` in `point` are modified by
|
||||
* `consume`).
|
||||
* Here is where we walk through the chunks, which either include strings of
|
||||
* several characters, or numerical character codes.
|
||||
* The reason to do this in a loop instead of a call is so the stack can
|
||||
* drain.
|
||||
*
|
||||
* @returns {undefined}
|
||||
* Nothing.
|
||||
*/
|
||||
function main() {
|
||||
/** @type {number} */
|
||||
let chunkIndex
|
||||
|
||||
while (point._index < chunks.length) {
|
||||
const chunk = chunks[point._index]
|
||||
|
||||
// If we’re in a buffer chunk, loop through it.
|
||||
if (typeof chunk === 'string') {
|
||||
chunkIndex = point._index
|
||||
|
||||
if (point._bufferIndex < 0) {
|
||||
point._bufferIndex = 0
|
||||
}
|
||||
|
||||
while (
|
||||
point._index === chunkIndex &&
|
||||
point._bufferIndex < chunk.length
|
||||
) {
|
||||
go(chunk.charCodeAt(point._bufferIndex))
|
||||
}
|
||||
} else {
|
||||
go(chunk)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deal with one code.
|
||||
*
|
||||
* @param {Code} code
|
||||
* Code.
|
||||
* @returns {undefined}
|
||||
* Nothing.
|
||||
*/
|
||||
function go(code) {
|
||||
assert(consumed === true, 'expected character to be consumed')
|
||||
consumed = undefined
|
||||
debug('main: passing `%s` to %s', code, state && state.name)
|
||||
expectedCode = code
|
||||
assert(typeof state === 'function', 'expected state')
|
||||
state = state(code)
|
||||
}
|
||||
|
||||
/** @type {Effects['consume']} */
|
||||
function consume(code) {
|
||||
assert(code === expectedCode, 'expected given code to equal expected code')
|
||||
|
||||
debug('consume: `%s`', code)
|
||||
|
||||
assert(
|
||||
consumed === undefined,
|
||||
'expected code to not have been consumed: this might be because `return x(code)` instead of `return x` was used'
|
||||
)
|
||||
assert(
|
||||
code === null
|
||||
? context.events.length === 0 ||
|
||||
context.events[context.events.length - 1][0] === 'exit'
|
||||
: context.events[context.events.length - 1][0] === 'enter',
|
||||
'expected last token to be open'
|
||||
)
|
||||
|
||||
if (markdownLineEnding(code)) {
|
||||
point.line++
|
||||
point.column = 1
|
||||
point.offset += code === codes.carriageReturnLineFeed ? 2 : 1
|
||||
accountForPotentialSkip()
|
||||
debug('position: after eol: `%j`', point)
|
||||
} else if (code !== codes.virtualSpace) {
|
||||
point.column++
|
||||
point.offset++
|
||||
}
|
||||
|
||||
// Not in a string chunk.
|
||||
if (point._bufferIndex < 0) {
|
||||
point._index++
|
||||
} else {
|
||||
point._bufferIndex++
|
||||
|
||||
// At end of string chunk.
|
||||
if (
|
||||
point._bufferIndex ===
|
||||
// Points w/ non-negative `_bufferIndex` reference
|
||||
// strings.
|
||||
/** @type {string} */ (chunks[point._index]).length
|
||||
) {
|
||||
point._bufferIndex = -1
|
||||
point._index++
|
||||
}
|
||||
}
|
||||
|
||||
// Expose the previous character.
|
||||
context.previous = code
|
||||
|
||||
// Mark as consumed.
|
||||
consumed = true
|
||||
}
|
||||
|
||||
/** @type {Effects['enter']} */
|
||||
function enter(type, fields) {
|
||||
/** @type {Token} */
|
||||
// @ts-expect-error Patch instead of assign required fields to help GC.
|
||||
const token = fields || {}
|
||||
token.type = type
|
||||
token.start = now()
|
||||
|
||||
assert(typeof type === 'string', 'expected string type')
|
||||
assert(type.length > 0, 'expected non-empty string')
|
||||
debug('enter: `%s`', type)
|
||||
|
||||
context.events.push(['enter', token, context])
|
||||
|
||||
stack.push(token)
|
||||
|
||||
return token
|
||||
}
|
||||
|
||||
/** @type {Effects['exit']} */
|
||||
function exit(type) {
|
||||
assert(typeof type === 'string', 'expected string type')
|
||||
assert(type.length > 0, 'expected non-empty string')
|
||||
|
||||
const token = stack.pop()
|
||||
assert(token, 'cannot close w/o open tokens')
|
||||
token.end = now()
|
||||
|
||||
assert(type === token.type, 'expected exit token to match current token')
|
||||
|
||||
assert(
|
||||
!(
|
||||
token.start._index === token.end._index &&
|
||||
token.start._bufferIndex === token.end._bufferIndex
|
||||
),
|
||||
'expected non-empty token (`' + type + '`)'
|
||||
)
|
||||
|
||||
debug('exit: `%s`', token.type)
|
||||
context.events.push(['exit', token, context])
|
||||
|
||||
return token
|
||||
}
|
||||
|
||||
/**
|
||||
* Use results.
|
||||
*
|
||||
* @type {ReturnHandle}
|
||||
*/
|
||||
function onsuccessfulconstruct(construct, info) {
|
||||
addResult(construct, info.from)
|
||||
}
|
||||
|
||||
/**
|
||||
* Discard results.
|
||||
*
|
||||
* @type {ReturnHandle}
|
||||
*/
|
||||
function onsuccessfulcheck(_, info) {
|
||||
info.restore()
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory to attempt/check/interrupt.
|
||||
*
|
||||
* @param {ReturnHandle} onreturn
|
||||
* Callback.
|
||||
* @param {{interrupt?: boolean | undefined} | undefined} [fields]
|
||||
* Fields.
|
||||
*/
|
||||
function constructFactory(onreturn, fields) {
|
||||
return hook
|
||||
|
||||
/**
|
||||
* Handle either an object mapping codes to constructs, a list of
|
||||
* constructs, or a single construct.
|
||||
*
|
||||
* @param {Array<Construct> | ConstructRecord | Construct} constructs
|
||||
* Constructs.
|
||||
* @param {State} returnState
|
||||
* State.
|
||||
* @param {State | undefined} [bogusState]
|
||||
* State.
|
||||
* @returns {State}
|
||||
* State.
|
||||
*/
|
||||
function hook(constructs, returnState, bogusState) {
|
||||
/** @type {ReadonlyArray<Construct>} */
|
||||
let listOfConstructs
|
||||
/** @type {number} */
|
||||
let constructIndex
|
||||
/** @type {Construct} */
|
||||
let currentConstruct
|
||||
/** @type {Info} */
|
||||
let info
|
||||
|
||||
return Array.isArray(constructs)
|
||||
? /* c8 ignore next 1 */
|
||||
handleListOfConstructs(constructs)
|
||||
: 'tokenize' in constructs
|
||||
? // Looks like a construct.
|
||||
handleListOfConstructs([/** @type {Construct} */ (constructs)])
|
||||
: handleMapOfConstructs(constructs)
|
||||
|
||||
/**
|
||||
* Handle a list of construct.
|
||||
*
|
||||
* @param {ConstructRecord} map
|
||||
* Constructs.
|
||||
* @returns {State}
|
||||
* State.
|
||||
*/
|
||||
function handleMapOfConstructs(map) {
|
||||
return start
|
||||
|
||||
/** @type {State} */
|
||||
function start(code) {
|
||||
const left = code !== null && map[code]
|
||||
const all = code !== null && map.null
|
||||
const list = [
|
||||
// To do: add more extension tests.
|
||||
/* c8 ignore next 2 */
|
||||
...(Array.isArray(left) ? left : left ? [left] : []),
|
||||
...(Array.isArray(all) ? all : all ? [all] : [])
|
||||
]
|
||||
|
||||
return handleListOfConstructs(list)(code)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a list of construct.
|
||||
*
|
||||
* @param {ReadonlyArray<Construct>} list
|
||||
* Constructs.
|
||||
* @returns {State}
|
||||
* State.
|
||||
*/
|
||||
function handleListOfConstructs(list) {
|
||||
listOfConstructs = list
|
||||
constructIndex = 0
|
||||
|
||||
if (list.length === 0) {
|
||||
assert(bogusState, 'expected `bogusState` to be given')
|
||||
return bogusState
|
||||
}
|
||||
|
||||
return handleConstruct(list[constructIndex])
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a single construct.
|
||||
*
|
||||
* @param {Construct} construct
|
||||
* Construct.
|
||||
* @returns {State}
|
||||
* State.
|
||||
*/
|
||||
function handleConstruct(construct) {
|
||||
return start
|
||||
|
||||
/** @type {State} */
|
||||
function start(code) {
|
||||
// To do: not needed to store if there is no bogus state, probably?
|
||||
// Currently doesn’t work because `inspect` in document does a check
|
||||
// w/o a bogus, which doesn’t make sense. But it does seem to help perf
|
||||
// by not storing.
|
||||
info = store()
|
||||
currentConstruct = construct
|
||||
|
||||
if (!construct.partial) {
|
||||
context.currentConstruct = construct
|
||||
}
|
||||
|
||||
// Always populated by defaults.
|
||||
assert(
|
||||
context.parser.constructs.disable.null,
|
||||
'expected `disable.null` to be populated'
|
||||
)
|
||||
|
||||
if (
|
||||
construct.name &&
|
||||
context.parser.constructs.disable.null.includes(construct.name)
|
||||
) {
|
||||
return nok(code)
|
||||
}
|
||||
|
||||
return construct.tokenize.call(
|
||||
// If we do have fields, create an object w/ `context` as its
|
||||
// prototype.
|
||||
// This allows a “live binding”, which is needed for `interrupt`.
|
||||
fields ? Object.assign(Object.create(context), fields) : context,
|
||||
effects,
|
||||
ok,
|
||||
nok
|
||||
)(code)
|
||||
}
|
||||
}
|
||||
|
||||
/** @type {State} */
|
||||
function ok(code) {
|
||||
assert(code === expectedCode, 'expected code')
|
||||
consumed = true
|
||||
onreturn(currentConstruct, info)
|
||||
return returnState
|
||||
}
|
||||
|
||||
/** @type {State} */
|
||||
function nok(code) {
|
||||
assert(code === expectedCode, 'expected code')
|
||||
consumed = true
|
||||
info.restore()
|
||||
|
||||
if (++constructIndex < listOfConstructs.length) {
|
||||
return handleConstruct(listOfConstructs[constructIndex])
|
||||
}
|
||||
|
||||
return bogusState
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Construct} construct
|
||||
* Construct.
|
||||
* @param {number} from
|
||||
* From.
|
||||
* @returns {undefined}
|
||||
* Nothing.
|
||||
*/
|
||||
function addResult(construct, from) {
|
||||
if (construct.resolveAll && !resolveAllConstructs.includes(construct)) {
|
||||
resolveAllConstructs.push(construct)
|
||||
}
|
||||
|
||||
if (construct.resolve) {
|
||||
splice(
|
||||
context.events,
|
||||
from,
|
||||
context.events.length - from,
|
||||
construct.resolve(context.events.slice(from), context)
|
||||
)
|
||||
}
|
||||
|
||||
if (construct.resolveTo) {
|
||||
context.events = construct.resolveTo(context.events, context)
|
||||
}
|
||||
|
||||
assert(
|
||||
construct.partial ||
|
||||
context.events.length === 0 ||
|
||||
context.events[context.events.length - 1][0] === 'exit',
|
||||
'expected last token to end'
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Store state.
|
||||
*
|
||||
* @returns {Info}
|
||||
* Info.
|
||||
*/
|
||||
function store() {
|
||||
const startPoint = now()
|
||||
const startPrevious = context.previous
|
||||
const startCurrentConstruct = context.currentConstruct
|
||||
const startEventsIndex = context.events.length
|
||||
const startStack = Array.from(stack)
|
||||
|
||||
return {from: startEventsIndex, restore}
|
||||
|
||||
/**
|
||||
* Restore state.
|
||||
*
|
||||
* @returns {undefined}
|
||||
* Nothing.
|
||||
*/
|
||||
function restore() {
|
||||
point = startPoint
|
||||
context.previous = startPrevious
|
||||
context.currentConstruct = startCurrentConstruct
|
||||
context.events.length = startEventsIndex
|
||||
stack = startStack
|
||||
accountForPotentialSkip()
|
||||
debug('position: restore: `%j`', point)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Move the current point a bit forward in the line when it’s on a column
|
||||
* skip.
|
||||
*
|
||||
* @returns {undefined}
|
||||
* Nothing.
|
||||
*/
|
||||
function accountForPotentialSkip() {
|
||||
if (point.line in columnStart && point.column < 2) {
|
||||
point.column = columnStart[point.line]
|
||||
point.offset += columnStart[point.line] - 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the chunks from a slice of chunks in the range of a token.
|
||||
*
|
||||
* @param {ReadonlyArray<Chunk>} chunks
|
||||
* Chunks.
|
||||
* @param {Pick<Token, 'end' | 'start'>} token
|
||||
* Token.
|
||||
* @returns {Array<Chunk>}
|
||||
* Chunks.
|
||||
*/
|
||||
function sliceChunks(chunks, token) {
|
||||
const startIndex = token.start._index
|
||||
const startBufferIndex = token.start._bufferIndex
|
||||
const endIndex = token.end._index
|
||||
const endBufferIndex = token.end._bufferIndex
|
||||
/** @type {Array<Chunk>} */
|
||||
let view
|
||||
|
||||
if (startIndex === endIndex) {
|
||||
assert(endBufferIndex > -1, 'expected non-negative end buffer index')
|
||||
assert(startBufferIndex > -1, 'expected non-negative start buffer index')
|
||||
// @ts-expect-error `_bufferIndex` is used on string chunks.
|
||||
view = [chunks[startIndex].slice(startBufferIndex, endBufferIndex)]
|
||||
} else {
|
||||
view = chunks.slice(startIndex, endIndex)
|
||||
|
||||
if (startBufferIndex > -1) {
|
||||
const head = view[0]
|
||||
if (typeof head === 'string') {
|
||||
view[0] = head.slice(startBufferIndex)
|
||||
/* c8 ignore next 4 -- used to be used, no longer */
|
||||
} else {
|
||||
assert(startBufferIndex === 0, 'expected `startBufferIndex` to be `0`')
|
||||
view.shift()
|
||||
}
|
||||
}
|
||||
|
||||
if (endBufferIndex > 0) {
|
||||
// @ts-expect-error `_bufferIndex` is used on string chunks.
|
||||
view.push(chunks[endIndex].slice(0, endBufferIndex))
|
||||
}
|
||||
}
|
||||
|
||||
return view
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the string value of a slice of chunks.
|
||||
*
|
||||
* @param {ReadonlyArray<Chunk>} chunks
|
||||
* Chunks.
|
||||
* @param {boolean | undefined} [expandTabs=false]
|
||||
* Whether to expand tabs (default: `false`).
|
||||
* @returns {string}
|
||||
* Result.
|
||||
*/
|
||||
function serializeChunks(chunks, expandTabs) {
|
||||
let index = -1
|
||||
/** @type {Array<string>} */
|
||||
const result = []
|
||||
/** @type {boolean | undefined} */
|
||||
let atTab
|
||||
|
||||
while (++index < chunks.length) {
|
||||
const chunk = chunks[index]
|
||||
/** @type {string} */
|
||||
let value
|
||||
|
||||
if (typeof chunk === 'string') {
|
||||
value = chunk
|
||||
} else
|
||||
switch (chunk) {
|
||||
case codes.carriageReturn: {
|
||||
value = values.cr
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
case codes.lineFeed: {
|
||||
value = values.lf
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
case codes.carriageReturnLineFeed: {
|
||||
value = values.cr + values.lf
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
case codes.horizontalTab: {
|
||||
value = expandTabs ? values.space : values.ht
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
case codes.virtualSpace: {
|
||||
if (!expandTabs && atTab) continue
|
||||
value = values.space
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
default: {
|
||||
assert(typeof chunk === 'number', 'expected number')
|
||||
// Currently only replacement character.
|
||||
value = String.fromCharCode(chunk)
|
||||
}
|
||||
}
|
||||
|
||||
atTab = chunk === codes.horizontalTab
|
||||
result.push(value)
|
||||
}
|
||||
|
||||
return result.join('')
|
||||
}
|
||||
Reference in New Issue
Block a user