d2parser: Support reading utf16 files
This commit is contained in:
parent
339c0b3d81
commit
b81da1ee62
7 changed files with 212 additions and 41 deletions
|
|
@ -1,7 +1,6 @@
|
||||||
package d2ir
|
package d2ir
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
|
|
@ -99,7 +98,7 @@ func (c *compiler) __import(imp *d2ast.Import) (*Map, bool) {
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
|
|
||||||
ast, err := d2parser.Parse(impPath, bufio.NewReader(f), &d2parser.ParseOptions{
|
ast, err := d2parser.Parse(impPath, f, &d2parser.ParseOptions{
|
||||||
UTF16: c.utf16,
|
UTF16: c.utf16,
|
||||||
ParseError: c.err,
|
ParseError: c.err,
|
||||||
})
|
})
|
||||||
|
|
|
||||||
1
d2parser/.gitignore
vendored
Normal file
1
d2parser/.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
utf16.d2
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package d2parser
|
package d2parser
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"math/big"
|
"math/big"
|
||||||
|
|
@ -9,13 +10,22 @@ import (
|
||||||
"unicode"
|
"unicode"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
"oss.terrastruct.com/util-go/go2"
|
tunicode "golang.org/x/text/encoding/unicode"
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
|
||||||
"oss.terrastruct.com/d2/d2ast"
|
"oss.terrastruct.com/d2/d2ast"
|
||||||
|
"oss.terrastruct.com/util-go/go2"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ParseOptions struct {
|
type ParseOptions struct {
|
||||||
UTF16 bool
|
// UTF16Pos would be used with input received from a browser where the browser will send the text as UTF-8 but
|
||||||
|
// JavaScript keeps strings in memory as UTF-16 and so needs UTF-16 indexes into the text to line up errors correctly.
|
||||||
|
// So you want to read UTF-8 still but adjust the indexes to pretend the input is utf16.
|
||||||
|
UTF16Pos bool
|
||||||
|
|
||||||
|
// UTF16Input makes the parser read the input as UTF16 and also sets UTF16Pos.
|
||||||
|
UTF16Input bool
|
||||||
|
|
||||||
ParseError *ParseError
|
ParseError *ParseError
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -27,23 +37,31 @@ type ParseOptions struct {
|
||||||
// The map may be compiled via Compile even if there are errors to keep language tooling
|
// The map may be compiled via Compile even if there are errors to keep language tooling
|
||||||
// operational. Though autoformat should not run.
|
// operational. Though autoformat should not run.
|
||||||
//
|
//
|
||||||
// If UTF16Mode is true, positions will be recorded in UTF-16 codeunits as required by LSP
|
// If UTF16Pos is true, positions will be recorded in UTF-16 codeunits as required by LSP
|
||||||
// and browser clients. See
|
// and browser clients. See
|
||||||
// https://microsoft.github.io/language-server-protocol/specifications/specification-current/#textDocuments
|
// https://microsoft.github.io/language-server-protocol/specifications/specification-current/#textDocuments
|
||||||
// TODO: update godocs
|
// TODO: update godocs
|
||||||
func Parse(path string, r io.RuneReader, opts *ParseOptions) (*d2ast.Map, error) {
|
func Parse(path string, r io.Reader, opts *ParseOptions) (*d2ast.Map, error) {
|
||||||
if opts == nil {
|
if opts == nil {
|
||||||
opts = &ParseOptions{
|
opts = &ParseOptions{
|
||||||
UTF16: false,
|
UTF16Pos: false,
|
||||||
|
UTF16Input: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
p := &parser{
|
p := &parser{
|
||||||
path: path,
|
path: path,
|
||||||
reader: r,
|
|
||||||
|
|
||||||
utf16: opts.UTF16,
|
utf16Input: opts.UTF16Input,
|
||||||
err: opts.ParseError,
|
utf16Pos: opts.UTF16Pos,
|
||||||
|
err: opts.ParseError,
|
||||||
|
}
|
||||||
|
if p.utf16Input {
|
||||||
|
p.utf16Pos = true
|
||||||
|
tr := transform.NewReader(r, tunicode.UTF16(tunicode.LittleEndian, tunicode.UseBOM).NewDecoder())
|
||||||
|
p.reader = bufio.NewReader(tr)
|
||||||
|
} else {
|
||||||
|
p.reader = bufio.NewReader(r)
|
||||||
}
|
}
|
||||||
if p.err == nil {
|
if p.err == nil {
|
||||||
p.err = &ParseError{}
|
p.err = &ParseError{}
|
||||||
|
|
@ -113,9 +131,10 @@ func ParseValue(value string) (d2ast.Value, error) {
|
||||||
//
|
//
|
||||||
// TODO: ast struct that combines map & errors and pass that around
|
// TODO: ast struct that combines map & errors and pass that around
|
||||||
type parser struct {
|
type parser struct {
|
||||||
path string
|
path string
|
||||||
pos d2ast.Position
|
pos d2ast.Position
|
||||||
utf16 bool
|
utf16Pos bool
|
||||||
|
utf16Input bool
|
||||||
|
|
||||||
reader io.RuneReader
|
reader io.RuneReader
|
||||||
readerPos d2ast.Position
|
readerPos d2ast.Position
|
||||||
|
|
@ -193,7 +212,10 @@ func (p *parser) _readRune() (r rune, eof bool) {
|
||||||
|
|
||||||
p.readerPos = p.lookaheadPos
|
p.readerPos = p.lookaheadPos
|
||||||
|
|
||||||
r, _, err := p.reader.ReadRune()
|
r, n, err := p.reader.ReadRune()
|
||||||
|
if p.utf16Input && n > 0 {
|
||||||
|
// TODO:
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
p.ioerr = true
|
p.ioerr = true
|
||||||
if err != io.EOF {
|
if err != io.EOF {
|
||||||
|
|
@ -217,13 +239,13 @@ func (p *parser) read() (r rune, eof bool) {
|
||||||
if eof {
|
if eof {
|
||||||
return 0, true
|
return 0, true
|
||||||
}
|
}
|
||||||
p.pos = p.pos.Advance(r, p.utf16)
|
p.pos = p.pos.Advance(r, p.utf16Pos)
|
||||||
p.lookaheadPos = p.pos
|
p.lookaheadPos = p.pos
|
||||||
return r, false
|
return r, false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *parser) replay(r rune) {
|
func (p *parser) replay(r rune) {
|
||||||
p.pos = p.pos.Subtract(r, p.utf16)
|
p.pos = p.pos.Subtract(r, p.utf16Pos)
|
||||||
|
|
||||||
// This is more complex than it needs to be to allow reusing the buffer underlying
|
// This is more complex than it needs to be to allow reusing the buffer underlying
|
||||||
// p.lookahead.
|
// p.lookahead.
|
||||||
|
|
@ -250,7 +272,7 @@ func (p *parser) peek() (r rune, eof bool) {
|
||||||
}
|
}
|
||||||
|
|
||||||
p.lookahead = append(p.lookahead, r)
|
p.lookahead = append(p.lookahead, r)
|
||||||
p.lookaheadPos = p.lookaheadPos.Advance(r, p.utf16)
|
p.lookaheadPos = p.lookaheadPos.Advance(r, p.utf16Pos)
|
||||||
return r, false
|
return r, false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -364,7 +386,7 @@ func (p *parser) parseMap(isFileMap bool) *d2ast.Map {
|
||||||
defer m.Range.End.From(&p.pos)
|
defer m.Range.End.From(&p.pos)
|
||||||
|
|
||||||
if !isFileMap {
|
if !isFileMap {
|
||||||
m.Range.Start = m.Range.Start.Subtract('{', p.utf16)
|
m.Range.Start = m.Range.Start.Subtract('{', p.utf16Pos)
|
||||||
p.depth++
|
p.depth++
|
||||||
defer dec(&p.depth)
|
defer dec(&p.depth)
|
||||||
}
|
}
|
||||||
|
|
@ -383,7 +405,7 @@ func (p *parser) parseMap(isFileMap bool) *d2ast.Map {
|
||||||
continue
|
continue
|
||||||
case '}':
|
case '}':
|
||||||
if isFileMap {
|
if isFileMap {
|
||||||
p.errorf(p.pos.Subtract(r, p.utf16), p.pos, "unexpected map termination character } in file map")
|
p.errorf(p.pos.Subtract(r, p.utf16Pos), p.pos, "unexpected map termination character } in file map")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
return m
|
return m
|
||||||
|
|
@ -489,7 +511,7 @@ func (p *parser) parseComment() *d2ast.Comment {
|
||||||
c := &d2ast.Comment{
|
c := &d2ast.Comment{
|
||||||
Range: d2ast.Range{
|
Range: d2ast.Range{
|
||||||
Path: p.path,
|
Path: p.path,
|
||||||
Start: p.pos.Subtract('#', p.utf16),
|
Start: p.pos.Subtract('#', p.utf16Pos),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
defer c.Range.End.From(&p.pos)
|
defer c.Range.End.From(&p.pos)
|
||||||
|
|
@ -546,7 +568,7 @@ func (p *parser) parseBlockComment() *d2ast.BlockComment {
|
||||||
bc := &d2ast.BlockComment{
|
bc := &d2ast.BlockComment{
|
||||||
Range: d2ast.Range{
|
Range: d2ast.Range{
|
||||||
Path: p.path,
|
Path: p.path,
|
||||||
Start: p.pos.SubtractString(`"""`, p.utf16),
|
Start: p.pos.SubtractString(`"""`, p.utf16Pos),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
defer bc.Range.End.From(&p.pos)
|
defer bc.Range.End.From(&p.pos)
|
||||||
|
|
@ -714,7 +736,7 @@ func (p *parser) parseMapKeyValue(mk *d2ast.Key) {
|
||||||
}
|
}
|
||||||
mk.Value = p.parseValue()
|
mk.Value = p.parseValue()
|
||||||
if mk.Value.Unbox() == nil {
|
if mk.Value.Unbox() == nil {
|
||||||
p.errorf(p.pos.Subtract(':', p.utf16), p.pos, "missing value after colon")
|
p.errorf(p.pos.Subtract(':', p.utf16Pos), p.pos, "missing value after colon")
|
||||||
}
|
}
|
||||||
|
|
||||||
sb := mk.Value.ScalarBox()
|
sb := mk.Value.ScalarBox()
|
||||||
|
|
@ -788,7 +810,7 @@ func (p *parser) parseEdgeIndex() *d2ast.EdgeIndex {
|
||||||
ei := &d2ast.EdgeIndex{
|
ei := &d2ast.EdgeIndex{
|
||||||
Range: d2ast.Range{
|
Range: d2ast.Range{
|
||||||
Path: p.path,
|
Path: p.path,
|
||||||
Start: p.pos.Subtract('[', p.utf16),
|
Start: p.pos.Subtract('[', p.utf16Pos),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
defer ei.Range.End.From(&p.pos)
|
defer ei.Range.End.From(&p.pos)
|
||||||
|
|
@ -816,7 +838,7 @@ func (p *parser) parseEdgeIndex() *d2ast.EdgeIndex {
|
||||||
}
|
}
|
||||||
p.commit()
|
p.commit()
|
||||||
if !unicode.IsDigit(r) {
|
if !unicode.IsDigit(r) {
|
||||||
p.errorf(p.pos.Subtract(r, p.utf16), p.pos, "unexpected character in edge index")
|
p.errorf(p.pos.Subtract(r, p.utf16Pos), p.pos, "unexpected character in edge index")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
sb.WriteRune(r)
|
sb.WriteRune(r)
|
||||||
|
|
@ -827,7 +849,7 @@ func (p *parser) parseEdgeIndex() *d2ast.EdgeIndex {
|
||||||
p.commit()
|
p.commit()
|
||||||
ei.Glob = true
|
ei.Glob = true
|
||||||
} else {
|
} else {
|
||||||
p.errorf(p.pos.Subtract(r, p.utf16), p.pos, "unexpected character in edge index")
|
p.errorf(p.pos.Subtract(r, p.utf16Pos), p.pos, "unexpected character in edge index")
|
||||||
// TODO: skip to ], maybe add a p.skipTo to skip to certain characters
|
// TODO: skip to ], maybe add a p.skipTo to skip to certain characters
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -870,8 +892,8 @@ func (p *parser) parseEdges(mk *d2ast.Key, src *d2ast.KeyPath) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if src == nil {
|
if src == nil {
|
||||||
p.errorf(p.lookaheadPos.Subtract(r, p.utf16), p.lookaheadPos, "connection missing source")
|
p.errorf(p.lookaheadPos.Subtract(r, p.utf16Pos), p.lookaheadPos, "connection missing source")
|
||||||
e.Range.Start = p.lookaheadPos.Subtract(r, p.utf16)
|
e.Range.Start = p.lookaheadPos.Subtract(r, p.utf16Pos)
|
||||||
}
|
}
|
||||||
p.commit()
|
p.commit()
|
||||||
|
|
||||||
|
|
@ -1056,7 +1078,7 @@ func (p *parser) parseUnquotedString(inKey bool) (s *d2ast.UnquotedString) {
|
||||||
p.rewind()
|
p.rewind()
|
||||||
if !eof {
|
if !eof {
|
||||||
if _s == "...@" {
|
if _s == "...@" {
|
||||||
p.errorf(p.pos, p.pos.AdvanceString("...@", p.utf16), "unquoted strings cannot begin with ...@ as that's import spread syntax")
|
p.errorf(p.pos, p.pos.AdvanceString("...@", p.utf16Pos), "unquoted strings cannot begin with ...@ as that's import spread syntax")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1162,7 +1184,7 @@ func (p *parser) parseUnquotedString(inKey bool) (s *d2ast.UnquotedString) {
|
||||||
|
|
||||||
r2, eof := p.read()
|
r2, eof := p.read()
|
||||||
if eof {
|
if eof {
|
||||||
p.errorf(p.pos.Subtract('\\', p.utf16), p.readerPos, "unfinished escape sequence")
|
p.errorf(p.pos.Subtract('\\', p.utf16Pos), p.readerPos, "unfinished escape sequence")
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1214,7 +1236,7 @@ func (p *parser) parseDoubleQuotedString(inKey bool) *d2ast.DoubleQuotedString {
|
||||||
s := &d2ast.DoubleQuotedString{
|
s := &d2ast.DoubleQuotedString{
|
||||||
Range: d2ast.Range{
|
Range: d2ast.Range{
|
||||||
Path: p.path,
|
Path: p.path,
|
||||||
Start: p.pos.Subtract('"', p.utf16),
|
Start: p.pos.Subtract('"', p.utf16Pos),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
defer s.Range.End.From(&p.pos)
|
defer s.Range.End.From(&p.pos)
|
||||||
|
|
@ -1266,7 +1288,7 @@ func (p *parser) parseDoubleQuotedString(inKey bool) *d2ast.DoubleQuotedString {
|
||||||
|
|
||||||
r2, eof := p.read()
|
r2, eof := p.read()
|
||||||
if eof {
|
if eof {
|
||||||
p.errorf(p.pos.Subtract('\\', p.utf16), p.readerPos, "unfinished escape sequence")
|
p.errorf(p.pos.Subtract('\\', p.utf16Pos), p.readerPos, "unfinished escape sequence")
|
||||||
p.errorf(s.Range.Start, p.readerPos, `double quoted strings must be terminated with "`)
|
p.errorf(s.Range.Start, p.readerPos, `double quoted strings must be terminated with "`)
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
@ -1285,7 +1307,7 @@ func (p *parser) parseSingleQuotedString() *d2ast.SingleQuotedString {
|
||||||
s := &d2ast.SingleQuotedString{
|
s := &d2ast.SingleQuotedString{
|
||||||
Range: d2ast.Range{
|
Range: d2ast.Range{
|
||||||
Path: p.path,
|
Path: p.path,
|
||||||
Start: p.pos.Subtract('\'', p.utf16),
|
Start: p.pos.Subtract('\'', p.utf16Pos),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
defer s.Range.End.From(&p.pos)
|
defer s.Range.End.From(&p.pos)
|
||||||
|
|
@ -1347,7 +1369,7 @@ func (p *parser) parseBlockString() *d2ast.BlockString {
|
||||||
bs := &d2ast.BlockString{
|
bs := &d2ast.BlockString{
|
||||||
Range: d2ast.Range{
|
Range: d2ast.Range{
|
||||||
Path: p.path,
|
Path: p.path,
|
||||||
Start: p.pos.Subtract('|', p.utf16),
|
Start: p.pos.Subtract('|', p.utf16Pos),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
defer bs.Range.End.From(&p.pos)
|
defer bs.Range.End.From(&p.pos)
|
||||||
|
|
@ -1460,7 +1482,7 @@ func (p *parser) parseArray() *d2ast.Array {
|
||||||
a := &d2ast.Array{
|
a := &d2ast.Array{
|
||||||
Range: d2ast.Range{
|
Range: d2ast.Range{
|
||||||
Path: p.path,
|
Path: p.path,
|
||||||
Start: p.pos.Subtract('[', p.utf16),
|
Start: p.pos.Subtract('[', p.utf16Pos),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
defer a.Range.End.From(&p.readerPos)
|
defer a.Range.End.From(&p.readerPos)
|
||||||
|
|
@ -1562,7 +1584,7 @@ func (p *parser) parseArrayNode(r rune) d2ast.ArrayNodeBox {
|
||||||
vbox := p.parseValue()
|
vbox := p.parseValue()
|
||||||
if vbox.UnquotedString != nil && vbox.UnquotedString.ScalarString() == "" &&
|
if vbox.UnquotedString != nil && vbox.UnquotedString.ScalarString() == "" &&
|
||||||
!(len(vbox.UnquotedString.Value) > 0 && vbox.UnquotedString.Value[0].Substitution != nil) {
|
!(len(vbox.UnquotedString.Value) > 0 && vbox.UnquotedString.Value[0].Substitution != nil) {
|
||||||
p.errorf(p.pos, p.pos.Advance(r, p.utf16), "unquoted strings cannot start on %q", r)
|
p.errorf(p.pos, p.pos.Advance(r, p.utf16Pos), "unquoted strings cannot start on %q", r)
|
||||||
}
|
}
|
||||||
box.Null = vbox.Null
|
box.Null = vbox.Null
|
||||||
box.Boolean = vbox.Boolean
|
box.Boolean = vbox.Boolean
|
||||||
|
|
@ -1661,14 +1683,14 @@ func (p *parser) parseSubstitution(spread bool) *d2ast.Substitution {
|
||||||
subst := &d2ast.Substitution{
|
subst := &d2ast.Substitution{
|
||||||
Range: d2ast.Range{
|
Range: d2ast.Range{
|
||||||
Path: p.path,
|
Path: p.path,
|
||||||
Start: p.pos.SubtractString("$", p.utf16),
|
Start: p.pos.SubtractString("$", p.utf16Pos),
|
||||||
},
|
},
|
||||||
Spread: spread,
|
Spread: spread,
|
||||||
}
|
}
|
||||||
defer subst.Range.End.From(&p.pos)
|
defer subst.Range.End.From(&p.pos)
|
||||||
|
|
||||||
if subst.Spread {
|
if subst.Spread {
|
||||||
subst.Range.Start = subst.Range.Start.SubtractString("...", p.utf16)
|
subst.Range.Start = subst.Range.Start.SubtractString("...", p.utf16Pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
r, newlines, eof := p.peekNotSpace()
|
r, newlines, eof := p.peekNotSpace()
|
||||||
|
|
@ -1711,14 +1733,14 @@ func (p *parser) parseImport(spread bool) *d2ast.Import {
|
||||||
imp := &d2ast.Import{
|
imp := &d2ast.Import{
|
||||||
Range: d2ast.Range{
|
Range: d2ast.Range{
|
||||||
Path: p.path,
|
Path: p.path,
|
||||||
Start: p.pos.SubtractString("$", p.utf16),
|
Start: p.pos.SubtractString("$", p.utf16Pos),
|
||||||
},
|
},
|
||||||
Spread: spread,
|
Spread: spread,
|
||||||
}
|
}
|
||||||
defer imp.Range.End.From(&p.pos)
|
defer imp.Range.End.From(&p.pos)
|
||||||
|
|
||||||
if imp.Spread {
|
if imp.Spread {
|
||||||
imp.Range.Start = imp.Range.Start.SubtractString("...", p.utf16)
|
imp.Range.Start = imp.Range.Start.SubtractString("...", p.utf16Pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
var pre strings.Builder
|
var pre strings.Builder
|
||||||
|
|
|
||||||
|
|
@ -10,12 +10,14 @@ import (
|
||||||
"oss.terrastruct.com/util-go/diff"
|
"oss.terrastruct.com/util-go/diff"
|
||||||
|
|
||||||
"oss.terrastruct.com/d2/d2ast"
|
"oss.terrastruct.com/d2/d2ast"
|
||||||
|
"oss.terrastruct.com/d2/d2format"
|
||||||
"oss.terrastruct.com/d2/d2parser"
|
"oss.terrastruct.com/d2/d2parser"
|
||||||
)
|
)
|
||||||
|
|
||||||
type testCase struct {
|
type testCase struct {
|
||||||
name string
|
name string
|
||||||
text string
|
text string
|
||||||
|
utf16 bool
|
||||||
assert func(t testing.TB, ast *d2ast.Map, err error)
|
assert func(t testing.TB, ast *d2ast.Map, err error)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -391,6 +393,22 @@ c-
|
||||||
assert.Equal(t, "1:13", ast.Nodes[0].MapKey.Edges[1].Dst.Range.End.String())
|
assert.Equal(t, "1:13", ast.Nodes[0].MapKey.Edges[1].Dst.Range.End.String())
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "utf16-input",
|
||||||
|
utf16: true,
|
||||||
|
text: "\xff\xfex\x00 \x00-\x00>\x00 \x00y\x00\r\x00\n\x00",
|
||||||
|
assert: func(t testing.TB, ast *d2ast.Map, err error) {
|
||||||
|
assert.Success(t, err)
|
||||||
|
assert.Equal(t, "x -> y\n", d2format.Format(ast))
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "errors/utf16-input",
|
||||||
|
text: "\xff\xfex\x00 \x00-\x00>\x00 \x00y\x00\r\x00\n\x00",
|
||||||
|
assert: func(t testing.TB, ast *d2ast.Map, err error) {
|
||||||
|
assert.ErrorString(t, err, `d2/testdata/d2parser/TestParse/errors/utf16-input.d2:1:13: invalid text beginning unquoted key`)
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Run("import", testImport)
|
t.Run("import", testImport)
|
||||||
|
|
@ -491,7 +509,11 @@ func runa(t *testing.T, tca []testCase) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
d2Path := fmt.Sprintf("d2/testdata/d2parser/%v.d2", t.Name())
|
d2Path := fmt.Sprintf("d2/testdata/d2parser/%v.d2", t.Name())
|
||||||
ast, err := d2parser.Parse(d2Path, strings.NewReader(tc.text), nil)
|
opts := &d2parser.ParseOptions{}
|
||||||
|
if tc.utf16 {
|
||||||
|
opts.UTF16Input = true
|
||||||
|
}
|
||||||
|
ast, err := d2parser.Parse(d2Path, strings.NewReader(tc.text), opts)
|
||||||
|
|
||||||
if tc.assert != nil {
|
if tc.assert != nil {
|
||||||
tc.assert(t, ast, err)
|
tc.assert(t, ast, err)
|
||||||
|
|
|
||||||
35
d2parser/utf16_gen.go
Normal file
35
d2parser/utf16_gen.go
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
//go:build ignore
|
||||||
|
|
||||||
|
// utf16_gen.go is used to create test UTF-16 input for the UTF-16 input test in parse_test.go
|
||||||
|
// Confirm `file utf16.txt` returns
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"golang.org/x/text/encoding/unicode"
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
// Pretend we're on Windows.
|
||||||
|
s := "x -> y\r\n"
|
||||||
|
|
||||||
|
b := &bytes.Buffer{}
|
||||||
|
t := transform.NewWriter(b, unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewEncoder())
|
||||||
|
_, err := io.WriteString(t, s)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("%q\n", b.String())
|
||||||
|
|
||||||
|
err = os.WriteFile("./utf16.d2", b.Bytes(), 0644)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
38
testdata/d2parser/TestParse/errors/utf16-input.exp.json
generated
vendored
Normal file
38
testdata/d2parser/TestParse/errors/utf16-input.exp.json
generated
vendored
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
{
|
||||||
|
"ast": {
|
||||||
|
"range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,0:0:0-1:1:22",
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"map_key": {
|
||||||
|
"range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,1:0:21-1:1:22",
|
||||||
|
"key": {
|
||||||
|
"range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,1:0:21-1:1:22",
|
||||||
|
"path": [
|
||||||
|
{
|
||||||
|
"unquoted_string": {
|
||||||
|
"range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,1:0:21-1:1:22",
|
||||||
|
"value": [
|
||||||
|
{
|
||||||
|
"string": "\u0000",
|
||||||
|
"raw_string": "\u0000"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"primary": {},
|
||||||
|
"value": {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"err": {
|
||||||
|
"errs": [
|
||||||
|
{
|
||||||
|
"range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,0:12:12-0:20:20",
|
||||||
|
"errmsg": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2:1:13: invalid text beginning unquoted key"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
54
testdata/d2parser/TestParse/utf16-input.exp.json
generated
vendored
Normal file
54
testdata/d2parser/TestParse/utf16-input.exp.json
generated
vendored
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
{
|
||||||
|
"ast": {
|
||||||
|
"range": "d2/testdata/d2parser/TestParse/utf16-input.d2,0:0:0-1:0:8",
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"map_key": {
|
||||||
|
"range": "d2/testdata/d2parser/TestParse/utf16-input.d2,0:0:0-0:7:7",
|
||||||
|
"edges": [
|
||||||
|
{
|
||||||
|
"range": "d2/testdata/d2parser/TestParse/utf16-input.d2,0:0:0-0:6:6",
|
||||||
|
"src": {
|
||||||
|
"range": "d2/testdata/d2parser/TestParse/utf16-input.d2,0:0:0-0:1:1",
|
||||||
|
"path": [
|
||||||
|
{
|
||||||
|
"unquoted_string": {
|
||||||
|
"range": "d2/testdata/d2parser/TestParse/utf16-input.d2,0:0:0-0:1:1",
|
||||||
|
"value": [
|
||||||
|
{
|
||||||
|
"string": "x",
|
||||||
|
"raw_string": "x"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"src_arrow": "",
|
||||||
|
"dst": {
|
||||||
|
"range": "d2/testdata/d2parser/TestParse/utf16-input.d2,0:5:5-0:6:6",
|
||||||
|
"path": [
|
||||||
|
{
|
||||||
|
"unquoted_string": {
|
||||||
|
"range": "d2/testdata/d2parser/TestParse/utf16-input.d2,0:5:5-0:6:6",
|
||||||
|
"value": [
|
||||||
|
{
|
||||||
|
"string": "y",
|
||||||
|
"raw_string": "y"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"dst_arrow": ">"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"primary": {},
|
||||||
|
"value": {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"err": null
|
||||||
|
}
|
||||||
Loading…
Reference in a new issue