761 lines
18 KiB
JavaScript
761 lines
18 KiB
JavaScript
|
|
// RTFM: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
|
|
|
|
var Uni = require('./unicode')
|
|
|
|
function isHexDigit(x) {
|
|
return (x >= '0' && x <= '9')
|
|
|| (x >= 'A' && x <= 'F')
|
|
|| (x >= 'a' && x <= 'f')
|
|
}
|
|
|
|
function isOctDigit(x) {
|
|
return x >= '0' && x <= '7'
|
|
}
|
|
|
|
function isDecDigit(x) {
|
|
return x >= '0' && x <= '9'
|
|
}
|
|
|
|
var unescapeMap = {
|
|
'\'': '\'',
|
|
'"' : '"',
|
|
'\\': '\\',
|
|
'b' : '\b',
|
|
'f' : '\f',
|
|
'n' : '\n',
|
|
'r' : '\r',
|
|
't' : '\t',
|
|
'v' : '\v',
|
|
'/' : '/',
|
|
}
|
|
|
|
function formatError(input, msg, position, lineno, column, json5) {
|
|
var result = msg + ' at ' + (lineno + 1) + ':' + (column + 1)
|
|
, tmppos = position - column - 1
|
|
, srcline = ''
|
|
, underline = ''
|
|
|
|
var isLineTerminator = json5 ? Uni.isLineTerminator : Uni.isLineTerminatorJSON
|
|
|
|
// output no more than 70 characters before the wrong ones
|
|
if (tmppos < position - 70) {
|
|
tmppos = position - 70
|
|
}
|
|
|
|
while (1) {
|
|
var chr = input[++tmppos]
|
|
|
|
if (isLineTerminator(chr) || tmppos === input.length) {
|
|
if (position >= tmppos) {
|
|
// ending line error, so show it after the last char
|
|
underline += '^'
|
|
}
|
|
break
|
|
}
|
|
srcline += chr
|
|
|
|
if (position === tmppos) {
|
|
underline += '^'
|
|
} else if (position > tmppos) {
|
|
underline += input[tmppos] === '\t' ? '\t' : ' '
|
|
}
|
|
|
|
// output no more than 78 characters on the string
|
|
if (srcline.length > 78) break
|
|
}
|
|
|
|
return result + '\n' + srcline + '\n' + underline
|
|
}
|
|
|
|
function parse(input, options) {
|
|
// parse as a standard JSON mode
|
|
var json5 = false
|
|
var cjson = false
|
|
|
|
if (options.legacy || options.mode === 'json') {
|
|
// use json
|
|
} else if (options.mode === 'cjson') {
|
|
cjson = true
|
|
} else if (options.mode === 'json5') {
|
|
json5 = true
|
|
} else {
|
|
// use it by default
|
|
json5 = true
|
|
}
|
|
|
|
var isLineTerminator = json5 ? Uni.isLineTerminator : Uni.isLineTerminatorJSON
|
|
var isWhiteSpace = json5 ? Uni.isWhiteSpace : Uni.isWhiteSpaceJSON
|
|
|
|
var length = input.length
|
|
, lineno = 0
|
|
, linestart = 0
|
|
, position = 0
|
|
, stack = []
|
|
|
|
var tokenStart = function() {}
|
|
var tokenEnd = function(v) {return v}
|
|
|
|
/* tokenize({
|
|
raw: '...',
|
|
type: 'whitespace'|'comment'|'key'|'literal'|'separator'|'newline',
|
|
value: 'number'|'string'|'whatever',
|
|
path: [...],
|
|
})
|
|
*/
|
|
if (options._tokenize) {
|
|
;(function() {
|
|
var start = null
|
|
tokenStart = function() {
|
|
if (start !== null) throw Error('internal error, token overlap')
|
|
start = position
|
|
}
|
|
|
|
tokenEnd = function(v, type) {
|
|
if (start != position) {
|
|
var hash = {
|
|
raw: input.substr(start, position-start),
|
|
type: type,
|
|
stack: stack.slice(0),
|
|
}
|
|
if (v !== undefined) hash.value = v
|
|
options._tokenize.call(null, hash)
|
|
}
|
|
start = null
|
|
return v
|
|
}
|
|
})()
|
|
}
|
|
|
|
function fail(msg) {
|
|
var column = position - linestart
|
|
|
|
if (!msg) {
|
|
if (position < length) {
|
|
var token = '\'' +
|
|
JSON
|
|
.stringify(input[position])
|
|
.replace(/^"|"$/g, '')
|
|
.replace(/'/g, "\\'")
|
|
.replace(/\\"/g, '"')
|
|
+ '\''
|
|
|
|
if (!msg) msg = 'Unexpected token ' + token
|
|
} else {
|
|
if (!msg) msg = 'Unexpected end of input'
|
|
}
|
|
}
|
|
|
|
var error = SyntaxError(formatError(input, msg, position, lineno, column, json5))
|
|
error.row = lineno + 1
|
|
error.column = column + 1
|
|
throw error
|
|
}
|
|
|
|
function newline(chr) {
|
|
// account for <cr><lf>
|
|
if (chr === '\r' && input[position] === '\n') position++
|
|
linestart = position
|
|
lineno++
|
|
}
|
|
|
|
function parseGeneric() {
|
|
var result
|
|
|
|
while (position < length) {
|
|
tokenStart()
|
|
var chr = input[position++]
|
|
|
|
if (chr === '"' || (chr === '\'' && json5)) {
|
|
return tokenEnd(parseString(chr), 'literal')
|
|
|
|
} else if (chr === '{') {
|
|
tokenEnd(undefined, 'separator')
|
|
return parseObject()
|
|
|
|
} else if (chr === '[') {
|
|
tokenEnd(undefined, 'separator')
|
|
return parseArray()
|
|
|
|
} else if (chr === '-'
|
|
|| chr === '.'
|
|
|| isDecDigit(chr)
|
|
// + number Infinity NaN
|
|
|| (json5 && (chr === '+' || chr === 'I' || chr === 'N'))
|
|
) {
|
|
return tokenEnd(parseNumber(), 'literal')
|
|
|
|
} else if (chr === 'n') {
|
|
parseKeyword('null')
|
|
return tokenEnd(null, 'literal')
|
|
|
|
} else if (chr === 't') {
|
|
parseKeyword('true')
|
|
return tokenEnd(true, 'literal')
|
|
|
|
} else if (chr === 'f') {
|
|
parseKeyword('false')
|
|
return tokenEnd(false, 'literal')
|
|
|
|
} else {
|
|
position--
|
|
return tokenEnd(undefined)
|
|
}
|
|
}
|
|
}
|
|
|
|
function parseKey() {
|
|
var result
|
|
|
|
while (position < length) {
|
|
tokenStart()
|
|
var chr = input[position++]
|
|
|
|
if (chr === '"' || (chr === '\'' && json5)) {
|
|
return tokenEnd(parseString(chr), 'key')
|
|
|
|
} else if (chr === '{') {
|
|
tokenEnd(undefined, 'separator')
|
|
return parseObject()
|
|
|
|
} else if (chr === '[') {
|
|
tokenEnd(undefined, 'separator')
|
|
return parseArray()
|
|
|
|
} else if (chr === '.'
|
|
|| isDecDigit(chr)
|
|
) {
|
|
return tokenEnd(parseNumber(true), 'key')
|
|
|
|
} else if (json5
|
|
&& Uni.isIdentifierStart(chr) || (chr === '\\' && input[position] === 'u')) {
|
|
// unicode char or a unicode sequence
|
|
var rollback = position - 1
|
|
var result = parseIdentifier()
|
|
|
|
if (result === undefined) {
|
|
position = rollback
|
|
return tokenEnd(undefined)
|
|
} else {
|
|
return tokenEnd(result, 'key')
|
|
}
|
|
|
|
} else {
|
|
position--
|
|
return tokenEnd(undefined)
|
|
}
|
|
}
|
|
}
|
|
|
|
function skipWhiteSpace() {
|
|
tokenStart()
|
|
while (position < length) {
|
|
var chr = input[position++]
|
|
|
|
if (isLineTerminator(chr)) {
|
|
position--
|
|
tokenEnd(undefined, 'whitespace')
|
|
tokenStart()
|
|
position++
|
|
newline(chr)
|
|
tokenEnd(undefined, 'newline')
|
|
tokenStart()
|
|
|
|
} else if (isWhiteSpace(chr)) {
|
|
// nothing
|
|
|
|
} else if (chr === '/'
|
|
&& (json5 || cjson)
|
|
&& (input[position] === '/' || input[position] === '*')
|
|
) {
|
|
position--
|
|
tokenEnd(undefined, 'whitespace')
|
|
tokenStart()
|
|
position++
|
|
skipComment(input[position++] === '*')
|
|
tokenEnd(undefined, 'comment')
|
|
tokenStart()
|
|
|
|
} else {
|
|
position--
|
|
break
|
|
}
|
|
}
|
|
return tokenEnd(undefined, 'whitespace')
|
|
}
|
|
|
|
function skipComment(multi) {
|
|
while (position < length) {
|
|
var chr = input[position++]
|
|
|
|
if (isLineTerminator(chr)) {
|
|
// LineTerminator is an end of singleline comment
|
|
if (!multi) {
|
|
// let parent function deal with newline
|
|
position--
|
|
return
|
|
}
|
|
|
|
newline(chr)
|
|
|
|
} else if (chr === '*' && multi) {
|
|
// end of multiline comment
|
|
if (input[position] === '/') {
|
|
position++
|
|
return
|
|
}
|
|
|
|
} else {
|
|
// nothing
|
|
}
|
|
}
|
|
|
|
if (multi) {
|
|
fail('Unclosed multiline comment')
|
|
}
|
|
}
|
|
|
|
function parseKeyword(keyword) {
|
|
// keyword[0] is not checked because it should've checked earlier
|
|
var _pos = position
|
|
var len = keyword.length
|
|
for (var i=1; i<len; i++) {
|
|
if (position >= length || keyword[i] != input[position]) {
|
|
position = _pos-1
|
|
fail()
|
|
}
|
|
position++
|
|
}
|
|
}
|
|
|
|
function parseObject() {
|
|
var result = options.null_prototype ? Object.create(null) : {}
|
|
, empty_object = {}
|
|
, is_non_empty = false
|
|
|
|
while (position < length) {
|
|
skipWhiteSpace()
|
|
var item1 = parseKey()
|
|
skipWhiteSpace()
|
|
tokenStart()
|
|
var chr = input[position++]
|
|
tokenEnd(undefined, 'separator')
|
|
|
|
if (chr === '}' && item1 === undefined) {
|
|
if (!json5 && is_non_empty) {
|
|
position--
|
|
fail('Trailing comma in object')
|
|
}
|
|
return result
|
|
|
|
} else if (chr === ':' && item1 !== undefined) {
|
|
skipWhiteSpace()
|
|
stack.push(item1)
|
|
var item2 = parseGeneric()
|
|
stack.pop()
|
|
|
|
if (item2 === undefined) fail('No value found for key ' + item1)
|
|
if (typeof(item1) !== 'string') {
|
|
if (!json5 || typeof(item1) !== 'number') {
|
|
fail('Wrong key type: ' + item1)
|
|
}
|
|
}
|
|
|
|
if ((item1 in empty_object || empty_object[item1] != null) && options.reserved_keys !== 'replace') {
|
|
if (options.reserved_keys === 'throw') {
|
|
fail('Reserved key: ' + item1)
|
|
} else {
|
|
// silently ignore it
|
|
}
|
|
} else {
|
|
if (typeof(options.reviver) === 'function') {
|
|
item2 = options.reviver.call(null, item1, item2)
|
|
}
|
|
|
|
if (item2 !== undefined) {
|
|
is_non_empty = true
|
|
Object.defineProperty(result, item1, {
|
|
value: item2,
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
})
|
|
}
|
|
}
|
|
|
|
skipWhiteSpace()
|
|
|
|
tokenStart()
|
|
var chr = input[position++]
|
|
tokenEnd(undefined, 'separator')
|
|
|
|
if (chr === ',') {
|
|
continue
|
|
|
|
} else if (chr === '}') {
|
|
return result
|
|
|
|
} else {
|
|
fail()
|
|
}
|
|
|
|
} else {
|
|
position--
|
|
fail()
|
|
}
|
|
}
|
|
|
|
fail()
|
|
}
|
|
|
|
function parseArray() {
|
|
var result = []
|
|
|
|
while (position < length) {
|
|
skipWhiteSpace()
|
|
stack.push(result.length)
|
|
var item = parseGeneric()
|
|
stack.pop()
|
|
skipWhiteSpace()
|
|
tokenStart()
|
|
var chr = input[position++]
|
|
tokenEnd(undefined, 'separator')
|
|
|
|
if (item !== undefined) {
|
|
if (typeof(options.reviver) === 'function') {
|
|
item = options.reviver.call(null, String(result.length), item)
|
|
}
|
|
if (item === undefined) {
|
|
result.length++
|
|
item = true // hack for check below, not included into result
|
|
} else {
|
|
result.push(item)
|
|
}
|
|
}
|
|
|
|
if (chr === ',') {
|
|
if (item === undefined) {
|
|
fail('Elisions are not supported')
|
|
}
|
|
|
|
} else if (chr === ']') {
|
|
if (!json5 && item === undefined && result.length) {
|
|
position--
|
|
fail('Trailing comma in array')
|
|
}
|
|
return result
|
|
|
|
} else {
|
|
position--
|
|
fail()
|
|
}
|
|
}
|
|
}
|
|
|
|
function parseNumber() {
|
|
// rewind because we don't know first char
|
|
position--
|
|
|
|
var start = position
|
|
, chr = input[position++]
|
|
, t
|
|
|
|
var to_num = function(is_octal) {
|
|
var str = input.substr(start, position - start)
|
|
|
|
if (is_octal) {
|
|
var result = parseInt(str.replace(/^0o?/, ''), 8)
|
|
} else {
|
|
var result = Number(str)
|
|
}
|
|
|
|
if (Number.isNaN(result)) {
|
|
position--
|
|
fail('Bad numeric literal - "' + input.substr(start, position - start + 1) + '"')
|
|
} else if (!json5 && !str.match(/^-?(0|[1-9][0-9]*)(\.[0-9]+)?(e[+-]?[0-9]+)?$/i)) {
|
|
// additional restrictions imposed by json
|
|
position--
|
|
fail('Non-json numeric literal - "' + input.substr(start, position - start + 1) + '"')
|
|
} else {
|
|
return result
|
|
}
|
|
}
|
|
|
|
// ex: -5982475.249875e+29384
|
|
// ^ skipping this
|
|
if (chr === '-' || (chr === '+' && json5)) chr = input[position++]
|
|
|
|
if (chr === 'N' && json5) {
|
|
parseKeyword('NaN')
|
|
return NaN
|
|
}
|
|
|
|
if (chr === 'I' && json5) {
|
|
parseKeyword('Infinity')
|
|
|
|
// returning +inf or -inf
|
|
return to_num()
|
|
}
|
|
|
|
if (chr >= '1' && chr <= '9') {
|
|
// ex: -5982475.249875e+29384
|
|
// ^^^ skipping these
|
|
while (position < length && isDecDigit(input[position])) position++
|
|
chr = input[position++]
|
|
}
|
|
|
|
// special case for leading zero: 0.123456
|
|
if (chr === '0') {
|
|
chr = input[position++]
|
|
|
|
// new syntax, "0o777" old syntax, "0777"
|
|
var is_octal = chr === 'o' || chr === 'O' || isOctDigit(chr)
|
|
var is_hex = chr === 'x' || chr === 'X'
|
|
|
|
if (json5 && (is_octal || is_hex)) {
|
|
while (position < length
|
|
&& (is_hex ? isHexDigit : isOctDigit)( input[position] )
|
|
) position++
|
|
|
|
var sign = 1
|
|
if (input[start] === '-') {
|
|
sign = -1
|
|
start++
|
|
} else if (input[start] === '+') {
|
|
start++
|
|
}
|
|
|
|
return sign * to_num(is_octal)
|
|
}
|
|
}
|
|
|
|
if (chr === '.') {
|
|
// ex: -5982475.249875e+29384
|
|
// ^^^ skipping these
|
|
while (position < length && isDecDigit(input[position])) position++
|
|
chr = input[position++]
|
|
}
|
|
|
|
if (chr === 'e' || chr === 'E') {
|
|
chr = input[position++]
|
|
if (chr === '-' || chr === '+') position++
|
|
// ex: -5982475.249875e+29384
|
|
// ^^^ skipping these
|
|
while (position < length && isDecDigit(input[position])) position++
|
|
chr = input[position++]
|
|
}
|
|
|
|
// we have char in the buffer, so count for it
|
|
position--
|
|
return to_num()
|
|
}
|
|
|
|
function parseIdentifier() {
|
|
// rewind because we don't know first char
|
|
position--
|
|
|
|
var result = ''
|
|
|
|
while (position < length) {
|
|
var chr = input[position++]
|
|
|
|
if (chr === '\\'
|
|
&& input[position] === 'u'
|
|
&& isHexDigit(input[position+1])
|
|
&& isHexDigit(input[position+2])
|
|
&& isHexDigit(input[position+3])
|
|
&& isHexDigit(input[position+4])
|
|
) {
|
|
// UnicodeEscapeSequence
|
|
chr = String.fromCharCode(parseInt(input.substr(position+1, 4), 16))
|
|
position += 5
|
|
}
|
|
|
|
if (result.length) {
|
|
// identifier started
|
|
if (Uni.isIdentifierPart(chr)) {
|
|
result += chr
|
|
} else {
|
|
position--
|
|
return result
|
|
}
|
|
|
|
} else {
|
|
if (Uni.isIdentifierStart(chr)) {
|
|
result += chr
|
|
} else {
|
|
return undefined
|
|
}
|
|
}
|
|
}
|
|
|
|
fail()
|
|
}
|
|
|
|
function parseString(endChar) {
|
|
// 7.8.4 of ES262 spec
|
|
var result = ''
|
|
|
|
while (position < length) {
|
|
var chr = input[position++]
|
|
|
|
if (chr === endChar) {
|
|
return result
|
|
|
|
} else if (chr === '\\') {
|
|
if (position >= length) fail()
|
|
chr = input[position++]
|
|
|
|
if (unescapeMap[chr] && (json5 || (chr != 'v' && chr != "'"))) {
|
|
result += unescapeMap[chr]
|
|
|
|
} else if (json5 && isLineTerminator(chr)) {
|
|
// line continuation
|
|
newline(chr)
|
|
|
|
} else if (chr === 'u' || (chr === 'x' && json5)) {
|
|
// unicode/character escape sequence
|
|
var off = chr === 'u' ? 4 : 2
|
|
|
|
// validation for \uXXXX
|
|
for (var i=0; i<off; i++) {
|
|
if (position >= length) fail()
|
|
if (!isHexDigit(input[position])) fail('Bad escape sequence')
|
|
position++
|
|
}
|
|
|
|
result += String.fromCharCode(parseInt(input.substr(position-off, off), 16))
|
|
} else if (json5 && isOctDigit(chr)) {
|
|
if (chr < '4' && isOctDigit(input[position]) && isOctDigit(input[position+1])) {
|
|
// three-digit octal
|
|
var digits = 3
|
|
} else if (isOctDigit(input[position])) {
|
|
// two-digit octal
|
|
var digits = 2
|
|
} else {
|
|
var digits = 1
|
|
}
|
|
position += digits - 1
|
|
result += String.fromCharCode(parseInt(input.substr(position-digits, digits), 8))
|
|
/*if (!isOctDigit(input[position])) {
|
|
// \0 is allowed still
|
|
result += '\0'
|
|
} else {
|
|
fail('Octal literals are not supported')
|
|
}*/
|
|
|
|
} else if (json5) {
|
|
// \X -> x
|
|
result += chr
|
|
|
|
} else {
|
|
position--
|
|
fail()
|
|
}
|
|
|
|
} else if (isLineTerminator(chr)) {
|
|
fail()
|
|
|
|
} else {
|
|
if (!json5 && chr.charCodeAt(0) < 32) {
|
|
position--
|
|
fail('Unexpected control character')
|
|
}
|
|
|
|
// SourceCharacter but not one of " or \ or LineTerminator
|
|
result += chr
|
|
}
|
|
}
|
|
|
|
fail()
|
|
}
|
|
|
|
skipWhiteSpace()
|
|
var return_value = parseGeneric()
|
|
if (return_value !== undefined || position < length) {
|
|
skipWhiteSpace()
|
|
|
|
if (position >= length) {
|
|
if (typeof(options.reviver) === 'function') {
|
|
return_value = options.reviver.call(null, '', return_value)
|
|
}
|
|
return return_value
|
|
} else {
|
|
fail()
|
|
}
|
|
|
|
} else {
|
|
if (position) {
|
|
fail('No data, only a whitespace')
|
|
} else {
|
|
fail('No data, empty input')
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* parse(text, options)
|
|
* or
|
|
* parse(text, reviver)
|
|
*
|
|
* where:
|
|
* text - string
|
|
* options - object
|
|
* reviver - function
|
|
*/
|
|
module.exports.parse = function parseJSON(input, options) {
|
|
// support legacy functions
|
|
if (typeof(options) === 'function') {
|
|
options = {
|
|
reviver: options
|
|
}
|
|
}
|
|
|
|
if (input === undefined) {
|
|
// parse(stringify(x)) should be equal x
|
|
// with JSON functions it is not 'cause of undefined
|
|
// so we're fixing it
|
|
return undefined
|
|
}
|
|
|
|
// JSON.parse compat
|
|
if (typeof(input) !== 'string') input = String(input)
|
|
if (options == null) options = {}
|
|
if (options.reserved_keys == null) options.reserved_keys = 'ignore'
|
|
|
|
if (options.reserved_keys === 'throw' || options.reserved_keys === 'ignore') {
|
|
if (options.null_prototype == null) {
|
|
options.null_prototype = true
|
|
}
|
|
}
|
|
|
|
try {
|
|
return parse(input, options)
|
|
} catch(err) {
|
|
// jju is a recursive parser, so JSON.parse("{{{{{{{") could blow up the stack
|
|
//
|
|
// this catch is used to skip all those internal calls
|
|
if (err instanceof SyntaxError && err.row != null && err.column != null) {
|
|
var old_err = err
|
|
err = SyntaxError(old_err.message)
|
|
err.column = old_err.column
|
|
err.row = old_err.row
|
|
}
|
|
throw err
|
|
}
|
|
}
|
|
|
|
module.exports.tokenize = function tokenizeJSON(input, options) {
|
|
if (options == null) options = {}
|
|
|
|
options._tokenize = function(smth) {
|
|
if (options._addstack) smth.stack.unshift.apply(smth.stack, options._addstack)
|
|
tokens.push(smth)
|
|
}
|
|
|
|
var tokens = []
|
|
tokens.data = module.exports.parse(input, options)
|
|
return tokens
|
|
}
|
|
|