From b5d68e60b43fa86b3131ee9d1ec05fd21b4bfbcc Mon Sep 17 00:00:00 2001 From: Anmol Sethi Date: Tue, 1 Aug 2023 12:09:55 -0700 Subject: [PATCH 1/8] Fix daily part 2 Hopefully works this time. --- .github/workflows/daily.yml | 2 +- ci/test.sh | 4 ---- make.sh | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 0f9f15eb3..3883537f4 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -16,7 +16,7 @@ jobs: with: go-version-file: ./go.mod cache: true - - run: DAILY=1 COLOR=1 CI_FORCE=1 ./make.sh all race + - run: COLOR=1 CI_FORCE=1 ./make.sh all race env: GITHUB_TOKEN: ${{ secrets._GITHUB_TOKEN }} DISCORD_WEBHOOK_URL: ${{ secrets.DISCORD_WEBHOOK_URL }} diff --git a/ci/test.sh b/ci/test.sh index f7d92a6de..81580c209 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -6,8 +6,4 @@ if [ "$*" = "" ]; then set ./... fi -if [ "${CI:-}" ]; then - export FORCE_COLOR=1 - npx playwright@1.31.1 install --with-deps chromium -fi go test --timeout=30m "$@" diff --git a/make.sh b/make.sh index a2615001f..9c244eb0d 100755 --- a/make.sh +++ b/make.sh @@ -14,7 +14,7 @@ if ! go version | grep -qF '1.20'; then exit 1 fi -if [ "${DAILY-}" ]; then +if [ "${CI:-}" ]; then export FORCE_COLOR=1 npx playwright@1.31.1 install --with-deps chromium fi From b81da1ee621260d5f7c5ba37030b9be60f01d70e Mon Sep 17 00:00:00 2001 From: Anmol Sethi Date: Wed, 2 Aug 2023 09:56:35 -0700 Subject: [PATCH 2/8] d2parser: Support reading utf16 files --- d2ir/import.go | 3 +- d2parser/.gitignore | 1 + d2parser/parse.go | 98 ++++++++++++------- d2parser/parse_test.go | 24 ++++- d2parser/utf16_gen.go | 35 +++++++ .../TestParse/errors/utf16-input.exp.json | 38 +++++++ .../d2parser/TestParse/utf16-input.exp.json | 54 ++++++++++ 7 files changed, 212 insertions(+), 41 deletions(-) create mode 100644 d2parser/.gitignore create mode 100644 d2parser/utf16_gen.go create mode 100644 testdata/d2parser/TestParse/errors/utf16-input.exp.json create mode 100644 testdata/d2parser/TestParse/utf16-input.exp.json diff --git a/d2ir/import.go b/d2ir/import.go index 147130071..383e5c24c 100644 --- a/d2ir/import.go +++ b/d2ir/import.go @@ -1,7 +1,6 @@ package d2ir import ( - "bufio" "io/fs" "os" "path" @@ -99,7 +98,7 @@ func (c *compiler) __import(imp *d2ast.Import) (*Map, bool) { } defer f.Close() - ast, err := d2parser.Parse(impPath, bufio.NewReader(f), &d2parser.ParseOptions{ + ast, err := d2parser.Parse(impPath, f, &d2parser.ParseOptions{ UTF16: c.utf16, ParseError: c.err, }) diff --git a/d2parser/.gitignore b/d2parser/.gitignore new file mode 100644 index 000000000..e3767e21a --- /dev/null +++ b/d2parser/.gitignore @@ -0,0 +1 @@ +utf16.d2 diff --git a/d2parser/parse.go b/d2parser/parse.go index 50f6c91c1..2eeae234a 100644 --- a/d2parser/parse.go +++ b/d2parser/parse.go @@ -1,6 +1,7 @@ package d2parser import ( + "bufio" "fmt" "io" "math/big" @@ -9,13 +10,22 @@ import ( "unicode" "unicode/utf8" - "oss.terrastruct.com/util-go/go2" + tunicode "golang.org/x/text/encoding/unicode" + "golang.org/x/text/transform" "oss.terrastruct.com/d2/d2ast" + "oss.terrastruct.com/util-go/go2" ) type ParseOptions struct { - UTF16 bool + // UTF16Pos would be used with input received from a browser where the browser will send the text as UTF-8 but + // JavaScript keeps strings in memory as UTF-16 and so needs UTF-16 indexes into the text to line up errors correctly. + // So you want to read UTF-8 still but adjust the indexes to pretend the input is utf16. + UTF16Pos bool + + // UTF16Input makes the parser read the input as UTF16 and also sets UTF16Pos. + UTF16Input bool + ParseError *ParseError } @@ -27,23 +37,31 @@ type ParseOptions struct { // The map may be compiled via Compile even if there are errors to keep language tooling // operational. Though autoformat should not run. // -// If UTF16Mode is true, positions will be recorded in UTF-16 codeunits as required by LSP +// If UTF16Pos is true, positions will be recorded in UTF-16 codeunits as required by LSP // and browser clients. See // https://microsoft.github.io/language-server-protocol/specifications/specification-current/#textDocuments // TODO: update godocs -func Parse(path string, r io.RuneReader, opts *ParseOptions) (*d2ast.Map, error) { +func Parse(path string, r io.Reader, opts *ParseOptions) (*d2ast.Map, error) { if opts == nil { opts = &ParseOptions{ - UTF16: false, + UTF16Pos: false, + UTF16Input: false, } } p := &parser{ - path: path, - reader: r, + path: path, - utf16: opts.UTF16, - err: opts.ParseError, + utf16Input: opts.UTF16Input, + utf16Pos: opts.UTF16Pos, + err: opts.ParseError, + } + if p.utf16Input { + p.utf16Pos = true + tr := transform.NewReader(r, tunicode.UTF16(tunicode.LittleEndian, tunicode.UseBOM).NewDecoder()) + p.reader = bufio.NewReader(tr) + } else { + p.reader = bufio.NewReader(r) } if p.err == nil { p.err = &ParseError{} @@ -113,9 +131,10 @@ func ParseValue(value string) (d2ast.Value, error) { // // TODO: ast struct that combines map & errors and pass that around type parser struct { - path string - pos d2ast.Position - utf16 bool + path string + pos d2ast.Position + utf16Pos bool + utf16Input bool reader io.RuneReader readerPos d2ast.Position @@ -193,7 +212,10 @@ func (p *parser) _readRune() (r rune, eof bool) { p.readerPos = p.lookaheadPos - r, _, err := p.reader.ReadRune() + r, n, err := p.reader.ReadRune() + if p.utf16Input && n > 0 { + // TODO: + } if err != nil { p.ioerr = true if err != io.EOF { @@ -217,13 +239,13 @@ func (p *parser) read() (r rune, eof bool) { if eof { return 0, true } - p.pos = p.pos.Advance(r, p.utf16) + p.pos = p.pos.Advance(r, p.utf16Pos) p.lookaheadPos = p.pos return r, false } func (p *parser) replay(r rune) { - p.pos = p.pos.Subtract(r, p.utf16) + p.pos = p.pos.Subtract(r, p.utf16Pos) // This is more complex than it needs to be to allow reusing the buffer underlying // p.lookahead. @@ -250,7 +272,7 @@ func (p *parser) peek() (r rune, eof bool) { } p.lookahead = append(p.lookahead, r) - p.lookaheadPos = p.lookaheadPos.Advance(r, p.utf16) + p.lookaheadPos = p.lookaheadPos.Advance(r, p.utf16Pos) return r, false } @@ -364,7 +386,7 @@ func (p *parser) parseMap(isFileMap bool) *d2ast.Map { defer m.Range.End.From(&p.pos) if !isFileMap { - m.Range.Start = m.Range.Start.Subtract('{', p.utf16) + m.Range.Start = m.Range.Start.Subtract('{', p.utf16Pos) p.depth++ defer dec(&p.depth) } @@ -383,7 +405,7 @@ func (p *parser) parseMap(isFileMap bool) *d2ast.Map { continue case '}': if isFileMap { - p.errorf(p.pos.Subtract(r, p.utf16), p.pos, "unexpected map termination character } in file map") + p.errorf(p.pos.Subtract(r, p.utf16Pos), p.pos, "unexpected map termination character } in file map") continue } return m @@ -489,7 +511,7 @@ func (p *parser) parseComment() *d2ast.Comment { c := &d2ast.Comment{ Range: d2ast.Range{ Path: p.path, - Start: p.pos.Subtract('#', p.utf16), + Start: p.pos.Subtract('#', p.utf16Pos), }, } defer c.Range.End.From(&p.pos) @@ -546,7 +568,7 @@ func (p *parser) parseBlockComment() *d2ast.BlockComment { bc := &d2ast.BlockComment{ Range: d2ast.Range{ Path: p.path, - Start: p.pos.SubtractString(`"""`, p.utf16), + Start: p.pos.SubtractString(`"""`, p.utf16Pos), }, } defer bc.Range.End.From(&p.pos) @@ -714,7 +736,7 @@ func (p *parser) parseMapKeyValue(mk *d2ast.Key) { } mk.Value = p.parseValue() if mk.Value.Unbox() == nil { - p.errorf(p.pos.Subtract(':', p.utf16), p.pos, "missing value after colon") + p.errorf(p.pos.Subtract(':', p.utf16Pos), p.pos, "missing value after colon") } sb := mk.Value.ScalarBox() @@ -788,7 +810,7 @@ func (p *parser) parseEdgeIndex() *d2ast.EdgeIndex { ei := &d2ast.EdgeIndex{ Range: d2ast.Range{ Path: p.path, - Start: p.pos.Subtract('[', p.utf16), + Start: p.pos.Subtract('[', p.utf16Pos), }, } defer ei.Range.End.From(&p.pos) @@ -816,7 +838,7 @@ func (p *parser) parseEdgeIndex() *d2ast.EdgeIndex { } p.commit() if !unicode.IsDigit(r) { - p.errorf(p.pos.Subtract(r, p.utf16), p.pos, "unexpected character in edge index") + p.errorf(p.pos.Subtract(r, p.utf16Pos), p.pos, "unexpected character in edge index") continue } sb.WriteRune(r) @@ -827,7 +849,7 @@ func (p *parser) parseEdgeIndex() *d2ast.EdgeIndex { p.commit() ei.Glob = true } else { - p.errorf(p.pos.Subtract(r, p.utf16), p.pos, "unexpected character in edge index") + p.errorf(p.pos.Subtract(r, p.utf16Pos), p.pos, "unexpected character in edge index") // TODO: skip to ], maybe add a p.skipTo to skip to certain characters } @@ -870,8 +892,8 @@ func (p *parser) parseEdges(mk *d2ast.Key, src *d2ast.KeyPath) { return } if src == nil { - p.errorf(p.lookaheadPos.Subtract(r, p.utf16), p.lookaheadPos, "connection missing source") - e.Range.Start = p.lookaheadPos.Subtract(r, p.utf16) + p.errorf(p.lookaheadPos.Subtract(r, p.utf16Pos), p.lookaheadPos, "connection missing source") + e.Range.Start = p.lookaheadPos.Subtract(r, p.utf16Pos) } p.commit() @@ -1056,7 +1078,7 @@ func (p *parser) parseUnquotedString(inKey bool) (s *d2ast.UnquotedString) { p.rewind() if !eof { if _s == "...@" { - p.errorf(p.pos, p.pos.AdvanceString("...@", p.utf16), "unquoted strings cannot begin with ...@ as that's import spread syntax") + p.errorf(p.pos, p.pos.AdvanceString("...@", p.utf16Pos), "unquoted strings cannot begin with ...@ as that's import spread syntax") } } @@ -1162,7 +1184,7 @@ func (p *parser) parseUnquotedString(inKey bool) (s *d2ast.UnquotedString) { r2, eof := p.read() if eof { - p.errorf(p.pos.Subtract('\\', p.utf16), p.readerPos, "unfinished escape sequence") + p.errorf(p.pos.Subtract('\\', p.utf16Pos), p.readerPos, "unfinished escape sequence") return s } @@ -1214,7 +1236,7 @@ func (p *parser) parseDoubleQuotedString(inKey bool) *d2ast.DoubleQuotedString { s := &d2ast.DoubleQuotedString{ Range: d2ast.Range{ Path: p.path, - Start: p.pos.Subtract('"', p.utf16), + Start: p.pos.Subtract('"', p.utf16Pos), }, } defer s.Range.End.From(&p.pos) @@ -1266,7 +1288,7 @@ func (p *parser) parseDoubleQuotedString(inKey bool) *d2ast.DoubleQuotedString { r2, eof := p.read() if eof { - p.errorf(p.pos.Subtract('\\', p.utf16), p.readerPos, "unfinished escape sequence") + p.errorf(p.pos.Subtract('\\', p.utf16Pos), p.readerPos, "unfinished escape sequence") p.errorf(s.Range.Start, p.readerPos, `double quoted strings must be terminated with "`) return s } @@ -1285,7 +1307,7 @@ func (p *parser) parseSingleQuotedString() *d2ast.SingleQuotedString { s := &d2ast.SingleQuotedString{ Range: d2ast.Range{ Path: p.path, - Start: p.pos.Subtract('\'', p.utf16), + Start: p.pos.Subtract('\'', p.utf16Pos), }, } defer s.Range.End.From(&p.pos) @@ -1347,7 +1369,7 @@ func (p *parser) parseBlockString() *d2ast.BlockString { bs := &d2ast.BlockString{ Range: d2ast.Range{ Path: p.path, - Start: p.pos.Subtract('|', p.utf16), + Start: p.pos.Subtract('|', p.utf16Pos), }, } defer bs.Range.End.From(&p.pos) @@ -1460,7 +1482,7 @@ func (p *parser) parseArray() *d2ast.Array { a := &d2ast.Array{ Range: d2ast.Range{ Path: p.path, - Start: p.pos.Subtract('[', p.utf16), + Start: p.pos.Subtract('[', p.utf16Pos), }, } defer a.Range.End.From(&p.readerPos) @@ -1562,7 +1584,7 @@ func (p *parser) parseArrayNode(r rune) d2ast.ArrayNodeBox { vbox := p.parseValue() if vbox.UnquotedString != nil && vbox.UnquotedString.ScalarString() == "" && !(len(vbox.UnquotedString.Value) > 0 && vbox.UnquotedString.Value[0].Substitution != nil) { - p.errorf(p.pos, p.pos.Advance(r, p.utf16), "unquoted strings cannot start on %q", r) + p.errorf(p.pos, p.pos.Advance(r, p.utf16Pos), "unquoted strings cannot start on %q", r) } box.Null = vbox.Null box.Boolean = vbox.Boolean @@ -1661,14 +1683,14 @@ func (p *parser) parseSubstitution(spread bool) *d2ast.Substitution { subst := &d2ast.Substitution{ Range: d2ast.Range{ Path: p.path, - Start: p.pos.SubtractString("$", p.utf16), + Start: p.pos.SubtractString("$", p.utf16Pos), }, Spread: spread, } defer subst.Range.End.From(&p.pos) if subst.Spread { - subst.Range.Start = subst.Range.Start.SubtractString("...", p.utf16) + subst.Range.Start = subst.Range.Start.SubtractString("...", p.utf16Pos) } r, newlines, eof := p.peekNotSpace() @@ -1711,14 +1733,14 @@ func (p *parser) parseImport(spread bool) *d2ast.Import { imp := &d2ast.Import{ Range: d2ast.Range{ Path: p.path, - Start: p.pos.SubtractString("$", p.utf16), + Start: p.pos.SubtractString("$", p.utf16Pos), }, Spread: spread, } defer imp.Range.End.From(&p.pos) if imp.Spread { - imp.Range.Start = imp.Range.Start.SubtractString("...", p.utf16) + imp.Range.Start = imp.Range.Start.SubtractString("...", p.utf16Pos) } var pre strings.Builder diff --git a/d2parser/parse_test.go b/d2parser/parse_test.go index 2dc195d02..2b7300ad3 100644 --- a/d2parser/parse_test.go +++ b/d2parser/parse_test.go @@ -10,12 +10,14 @@ import ( "oss.terrastruct.com/util-go/diff" "oss.terrastruct.com/d2/d2ast" + "oss.terrastruct.com/d2/d2format" "oss.terrastruct.com/d2/d2parser" ) type testCase struct { name string text string + utf16 bool assert func(t testing.TB, ast *d2ast.Map, err error) } @@ -391,6 +393,22 @@ c- assert.Equal(t, "1:13", ast.Nodes[0].MapKey.Edges[1].Dst.Range.End.String()) }, }, + { + name: "utf16-input", + utf16: true, + text: "\xff\xfex\x00 \x00-\x00>\x00 \x00y\x00\r\x00\n\x00", + assert: func(t testing.TB, ast *d2ast.Map, err error) { + assert.Success(t, err) + assert.Equal(t, "x -> y\n", d2format.Format(ast)) + }, + }, + { + name: "errors/utf16-input", + text: "\xff\xfex\x00 \x00-\x00>\x00 \x00y\x00\r\x00\n\x00", + assert: func(t testing.TB, ast *d2ast.Map, err error) { + assert.ErrorString(t, err, `d2/testdata/d2parser/TestParse/errors/utf16-input.d2:1:13: invalid text beginning unquoted key`) + }, + }, } t.Run("import", testImport) @@ -491,7 +509,11 @@ func runa(t *testing.T, tca []testCase) { t.Parallel() d2Path := fmt.Sprintf("d2/testdata/d2parser/%v.d2", t.Name()) - ast, err := d2parser.Parse(d2Path, strings.NewReader(tc.text), nil) + opts := &d2parser.ParseOptions{} + if tc.utf16 { + opts.UTF16Input = true + } + ast, err := d2parser.Parse(d2Path, strings.NewReader(tc.text), opts) if tc.assert != nil { tc.assert(t, ast, err) diff --git a/d2parser/utf16_gen.go b/d2parser/utf16_gen.go new file mode 100644 index 000000000..090705dfe --- /dev/null +++ b/d2parser/utf16_gen.go @@ -0,0 +1,35 @@ +//go:build ignore + +// utf16_gen.go is used to create test UTF-16 input for the UTF-16 input test in parse_test.go +// Confirm `file utf16.txt` returns +package main + +import ( + "bytes" + "fmt" + "io" + "log" + "os" + + "golang.org/x/text/encoding/unicode" + "golang.org/x/text/transform" +) + +func main() { + // Pretend we're on Windows. + s := "x -> y\r\n" + + b := &bytes.Buffer{} + t := transform.NewWriter(b, unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewEncoder()) + _, err := io.WriteString(t, s) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("%q\n", b.String()) + + err = os.WriteFile("./utf16.d2", b.Bytes(), 0644) + if err != nil { + log.Fatal(err) + } +} diff --git a/testdata/d2parser/TestParse/errors/utf16-input.exp.json b/testdata/d2parser/TestParse/errors/utf16-input.exp.json new file mode 100644 index 000000000..81f075aa7 --- /dev/null +++ b/testdata/d2parser/TestParse/errors/utf16-input.exp.json @@ -0,0 +1,38 @@ +{ + "ast": { + "range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,0:0:0-1:1:22", + "nodes": [ + { + "map_key": { + "range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,1:0:21-1:1:22", + "key": { + "range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,1:0:21-1:1:22", + "path": [ + { + "unquoted_string": { + "range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,1:0:21-1:1:22", + "value": [ + { + "string": "\u0000", + "raw_string": "\u0000" + } + ] + } + } + ] + }, + "primary": {}, + "value": {} + } + } + ] + }, + "err": { + "errs": [ + { + "range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,0:12:12-0:20:20", + "errmsg": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2:1:13: invalid text beginning unquoted key" + } + ] + } +} diff --git a/testdata/d2parser/TestParse/utf16-input.exp.json b/testdata/d2parser/TestParse/utf16-input.exp.json new file mode 100644 index 000000000..24f3479d2 --- /dev/null +++ b/testdata/d2parser/TestParse/utf16-input.exp.json @@ -0,0 +1,54 @@ +{ + "ast": { + "range": "d2/testdata/d2parser/TestParse/utf16-input.d2,0:0:0-1:0:8", + "nodes": [ + { + "map_key": { + "range": "d2/testdata/d2parser/TestParse/utf16-input.d2,0:0:0-0:7:7", + "edges": [ + { + "range": "d2/testdata/d2parser/TestParse/utf16-input.d2,0:0:0-0:6:6", + "src": { + "range": "d2/testdata/d2parser/TestParse/utf16-input.d2,0:0:0-0:1:1", + "path": [ + { + "unquoted_string": { + "range": "d2/testdata/d2parser/TestParse/utf16-input.d2,0:0:0-0:1:1", + "value": [ + { + "string": "x", + "raw_string": "x" + } + ] + } + } + ] + }, + "src_arrow": "", + "dst": { + "range": "d2/testdata/d2parser/TestParse/utf16-input.d2,0:5:5-0:6:6", + "path": [ + { + "unquoted_string": { + "range": "d2/testdata/d2parser/TestParse/utf16-input.d2,0:5:5-0:6:6", + "value": [ + { + "string": "y", + "raw_string": "y" + } + ] + } + } + ] + }, + "dst_arrow": ">" + } + ], + "primary": {}, + "value": {} + } + } + ] + }, + "err": null +} From 203953723e17e426b4fd3261e7c9af6b0a98a0fa Mon Sep 17 00:00:00 2001 From: Anmol Sethi Date: Wed, 2 Aug 2023 10:26:45 -0700 Subject: [PATCH 3/8] d2parser: Autodetect UTF-16 based on BOM Turns out I was wrong this is safe. --- d2compiler/compile.go | 10 ++-- d2exporter/export_test.go | 2 +- d2ir/compile.go | 6 +-- d2ir/import.go | 2 +- d2lib/d2.go | 6 +-- d2parser/parse.go | 49 ++++++++++--------- d2parser/parse_test.go | 12 ----- d2parser/utf16_gen.go | 3 ++ e2etests/e2e_test.go | 2 +- .../TestParse/errors/utf16-input.exp.json | 38 -------------- 10 files changed, 44 insertions(+), 86 deletions(-) delete mode 100644 testdata/d2parser/TestParse/errors/utf16-input.exp.json diff --git a/d2compiler/compile.go b/d2compiler/compile.go index 7c9e60ccc..f777ff3dd 100644 --- a/d2compiler/compile.go +++ b/d2compiler/compile.go @@ -21,27 +21,27 @@ import ( ) type CompileOptions struct { - UTF16 bool + UTF16Pos bool // FS is the file system used for resolving imports in the d2 text. // It should correspond to the root path. FS fs.FS } -func Compile(p string, r io.RuneReader, opts *CompileOptions) (*d2graph.Graph, *d2target.Config, error) { +func Compile(p string, r io.Reader, opts *CompileOptions) (*d2graph.Graph, *d2target.Config, error) { if opts == nil { opts = &CompileOptions{} } ast, err := d2parser.Parse(p, r, &d2parser.ParseOptions{ - UTF16: opts.UTF16, + UTF16Pos: opts.UTF16Pos, }) if err != nil { return nil, nil, err } ir, err := d2ir.Compile(ast, &d2ir.CompileOptions{ - UTF16: opts.UTF16, - FS: opts.FS, + UTF16Pos: opts.UTF16Pos, + FS: opts.FS, }) if err != nil { return nil, nil, err diff --git a/d2exporter/export_test.go b/d2exporter/export_test.go index 5b02901ae..4eb1974c9 100644 --- a/d2exporter/export_test.go +++ b/d2exporter/export_test.go @@ -223,7 +223,7 @@ func run(t *testing.T, tc testCase) { ctx = log.Leveled(ctx, slog.LevelDebug) g, config, err := d2compiler.Compile("", strings.NewReader(tc.dsl), &d2compiler.CompileOptions{ - UTF16: true, + UTF16Pos: true, }) if err != nil { t.Fatal(err) diff --git a/d2ir/compile.go b/d2ir/compile.go index a0783923e..9bb92b253 100644 --- a/d2ir/compile.go +++ b/d2ir/compile.go @@ -21,13 +21,13 @@ type compiler struct { importStack []string // importCache enables reuse of files imported multiple times. importCache map[string]*Map - utf16 bool + utf16Pos bool globStack []bool } type CompileOptions struct { - UTF16 bool + UTF16Pos bool // Pass nil to disable imports. FS fs.FS } @@ -45,7 +45,7 @@ func Compile(ast *d2ast.Map, opts *CompileOptions) (*Map, error) { fs: opts.FS, importCache: make(map[string]*Map), - utf16: opts.UTF16, + utf16Pos: opts.UTF16Pos, } m := &Map{} m.initRoot() diff --git a/d2ir/import.go b/d2ir/import.go index 383e5c24c..44cf509ff 100644 --- a/d2ir/import.go +++ b/d2ir/import.go @@ -99,7 +99,7 @@ func (c *compiler) __import(imp *d2ast.Import) (*Map, bool) { defer f.Close() ast, err := d2parser.Parse(impPath, f, &d2parser.ParseOptions{ - UTF16: c.utf16, + UTF16Pos: c.utf16Pos, ParseError: c.err, }) if err != nil { diff --git a/d2lib/d2.go b/d2lib/d2.go index 18372ee73..c34eb9206 100644 --- a/d2lib/d2.go +++ b/d2lib/d2.go @@ -23,7 +23,7 @@ import ( ) type CompileOptions struct { - UTF16 bool + UTF16Pos bool FS fs.FS MeasuredTexts []*d2target.MText Ruler *textmeasure.Ruler @@ -50,8 +50,8 @@ func Compile(ctx context.Context, input string, compileOpts *CompileOptions, ren } g, config, err := d2compiler.Compile(compileOpts.InputPath, strings.NewReader(input), &d2compiler.CompileOptions{ - UTF16: compileOpts.UTF16, - FS: compileOpts.FS, + UTF16Pos: compileOpts.UTF16Pos, + FS: compileOpts.FS, }) if err != nil { return nil, nil, err diff --git a/d2parser/parse.go b/d2parser/parse.go index 2eeae234a..732038480 100644 --- a/d2parser/parse.go +++ b/d2parser/parse.go @@ -2,6 +2,7 @@ package d2parser import ( "bufio" + "bytes" "fmt" "io" "math/big" @@ -23,9 +24,6 @@ type ParseOptions struct { // So you want to read UTF-8 still but adjust the indexes to pretend the input is utf16. UTF16Pos bool - // UTF16Input makes the parser read the input as UTF16 and also sets UTF16Pos. - UTF16Input bool - ParseError *ParseError } @@ -44,25 +42,36 @@ type ParseOptions struct { func Parse(path string, r io.Reader, opts *ParseOptions) (*d2ast.Map, error) { if opts == nil { opts = &ParseOptions{ - UTF16Pos: false, - UTF16Input: false, + UTF16Pos: false, } } p := &parser{ path: path, - utf16Input: opts.UTF16Input, - utf16Pos: opts.UTF16Pos, - err: opts.ParseError, + utf16Pos: opts.UTF16Pos, + err: opts.ParseError, } - if p.utf16Input { - p.utf16Pos = true - tr := transform.NewReader(r, tunicode.UTF16(tunicode.LittleEndian, tunicode.UseBOM).NewDecoder()) - p.reader = bufio.NewReader(tr) - } else { - p.reader = bufio.NewReader(r) + br := bufio.NewReader(r) + p.reader = br + + bom, err := br.Peek(2) + if err == nil { + // 0xFFFE is invalid UTF-8 so this is safe. + // Also a different BOM is used for UTF-8. + // See https://unicode.org/faq/utf_bom.html#bom4 + if bom[0] == 0xFF && bom[1] == 0xFE { + p.utf16Pos = true + + buf := make([]byte, br.Buffered()) + io.ReadFull(br, buf) + + mr := io.MultiReader(bytes.NewBuffer(buf), r) + tr := transform.NewReader(mr, tunicode.UTF16(tunicode.LittleEndian, tunicode.UseBOM).NewDecoder()) + br.Reset(tr) + } } + if p.err == nil { p.err = &ParseError{} } @@ -131,10 +140,9 @@ func ParseValue(value string) (d2ast.Value, error) { // // TODO: ast struct that combines map & errors and pass that around type parser struct { - path string - pos d2ast.Position - utf16Pos bool - utf16Input bool + path string + pos d2ast.Position + utf16Pos bool reader io.RuneReader readerPos d2ast.Position @@ -212,10 +220,7 @@ func (p *parser) _readRune() (r rune, eof bool) { p.readerPos = p.lookaheadPos - r, n, err := p.reader.ReadRune() - if p.utf16Input && n > 0 { - // TODO: - } + r, _, err := p.reader.ReadRune() if err != nil { p.ioerr = true if err != io.EOF { diff --git a/d2parser/parse_test.go b/d2parser/parse_test.go index 2b7300ad3..605be68f7 100644 --- a/d2parser/parse_test.go +++ b/d2parser/parse_test.go @@ -17,7 +17,6 @@ import ( type testCase struct { name string text string - utf16 bool assert func(t testing.TB, ast *d2ast.Map, err error) } @@ -395,20 +394,12 @@ c- }, { name: "utf16-input", - utf16: true, text: "\xff\xfex\x00 \x00-\x00>\x00 \x00y\x00\r\x00\n\x00", assert: func(t testing.TB, ast *d2ast.Map, err error) { assert.Success(t, err) assert.Equal(t, "x -> y\n", d2format.Format(ast)) }, }, - { - name: "errors/utf16-input", - text: "\xff\xfex\x00 \x00-\x00>\x00 \x00y\x00\r\x00\n\x00", - assert: func(t testing.TB, ast *d2ast.Map, err error) { - assert.ErrorString(t, err, `d2/testdata/d2parser/TestParse/errors/utf16-input.d2:1:13: invalid text beginning unquoted key`) - }, - }, } t.Run("import", testImport) @@ -510,9 +501,6 @@ func runa(t *testing.T, tca []testCase) { d2Path := fmt.Sprintf("d2/testdata/d2parser/%v.d2", t.Name()) opts := &d2parser.ParseOptions{} - if tc.utf16 { - opts.UTF16Input = true - } ast, err := d2parser.Parse(d2Path, strings.NewReader(tc.text), opts) if tc.assert != nil { diff --git a/d2parser/utf16_gen.go b/d2parser/utf16_gen.go index 090705dfe..2acecc16d 100644 --- a/d2parser/utf16_gen.go +++ b/d2parser/utf16_gen.go @@ -10,6 +10,7 @@ import ( "io" "log" "os" + "unicode/utf8" "golang.org/x/text/encoding/unicode" "golang.org/x/text/transform" @@ -27,6 +28,8 @@ func main() { } fmt.Printf("%q\n", b.String()) + fmt.Println("\xFF\xFE") + fmt.Println(utf8.ValidString("\xFF\xFE")) err = os.WriteFile("./utf16.d2", b.Bytes(), 0644) if err != nil { diff --git a/e2etests/e2e_test.go b/e2etests/e2e_test.go index 53201f8d7..2eaa799af 100644 --- a/e2etests/e2e_test.go +++ b/e2etests/e2e_test.go @@ -111,7 +111,7 @@ func serde(t *testing.T, tc testCase, ruler *textmeasure.Ruler) { ctx := context.Background() ctx = log.WithTB(ctx, t, nil) g, _, err := d2compiler.Compile("", strings.NewReader(tc.script), &d2compiler.CompileOptions{ - UTF16: false, + UTF16Pos: false, }) trequire.Nil(t, err) if len(g.Objects) > 0 { diff --git a/testdata/d2parser/TestParse/errors/utf16-input.exp.json b/testdata/d2parser/TestParse/errors/utf16-input.exp.json deleted file mode 100644 index 81f075aa7..000000000 --- a/testdata/d2parser/TestParse/errors/utf16-input.exp.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "ast": { - "range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,0:0:0-1:1:22", - "nodes": [ - { - "map_key": { - "range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,1:0:21-1:1:22", - "key": { - "range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,1:0:21-1:1:22", - "path": [ - { - "unquoted_string": { - "range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,1:0:21-1:1:22", - "value": [ - { - "string": "\u0000", - "raw_string": "\u0000" - } - ] - } - } - ] - }, - "primary": {}, - "value": {} - } - } - ] - }, - "err": { - "errs": [ - { - "range": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2,0:12:12-0:20:20", - "errmsg": "d2/testdata/d2parser/TestParse/errors/utf16-input.d2:1:13: invalid text beginning unquoted key" - } - ] - } -} From 25763df3deeef19bfe91f0a8671957756e6b68b5 Mon Sep 17 00:00:00 2001 From: Alexander Wang Date: Wed, 2 Aug 2023 11:24:13 -0700 Subject: [PATCH 4/8] Update next.md --- ci/release/changelogs/next.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/release/changelogs/next.md b/ci/release/changelogs/next.md index f3c0d2a77..823b1b79a 100644 --- a/ci/release/changelogs/next.md +++ b/ci/release/changelogs/next.md @@ -1,5 +1,7 @@ #### Features ๐Ÿš€ +- UTF-16 files are automatically detected and supported [#1525](https://github.com/terrastruct/d2/pull/1525) + #### Improvements ๐Ÿงน #### Bugfixes โ›‘๏ธ From a898e5021164bef9bcabc91848935911b1438279 Mon Sep 17 00:00:00 2001 From: Alexander Wang Date: Wed, 2 Aug 2023 11:38:53 -0700 Subject: [PATCH 5/8] fmt test --- d2cli/fmt.go | 10 ++++++++++ e2etests-cli/main_test.go | 20 +++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/d2cli/fmt.go b/d2cli/fmt.go index 5cb7999cb..d7aa62ca2 100644 --- a/d2cli/fmt.go +++ b/d2cli/fmt.go @@ -3,6 +3,8 @@ package d2cli import ( "bytes" "context" + "os" + "path/filepath" "oss.terrastruct.com/util-go/xdefer" @@ -21,6 +23,14 @@ func fmtCmd(ctx context.Context, ms *xmain.State) (err error) { } for _, inputPath := range ms.Opts.Args { + if inputPath != "-" { + inputPath = ms.AbsPath(inputPath) + d, err := os.Stat(inputPath) + if err == nil && d.IsDir() { + inputPath = filepath.Join(inputPath, "index.d2") + } + } + input, err := ms.ReadPath(inputPath) if err != nil { return err diff --git a/e2etests-cli/main_test.go b/e2etests-cli/main_test.go index d955e351f..bd0385bd5 100644 --- a/e2etests-cli/main_test.go +++ b/e2etests-cli/main_test.go @@ -521,6 +521,16 @@ i used to read assert.Testdata(t, ".svg", svg) }, }, + { + name: "basic-fmt", + run: func(t *testing.T, ctx context.Context, dir string, env *xos.Env) { + writeFile(t, dir, "hello-world.d2", `x ---> y`) + err := runTestMainPersist(t, ctx, dir, env, "fmt", "hello-world.d2") + assert.Success(t, err) + got := readFile(t, dir, "hello-world.d2") + assert.Equal(t, "x -> y\n", string(got)) + }, + }, } ctx := context.Background() @@ -561,6 +571,15 @@ func testMain(dir string, env *xos.Env, args ...string) *xmain.TestState { } func runTestMain(tb testing.TB, ctx context.Context, dir string, env *xos.Env, args ...string) error { + err := runTestMainPersist(tb, ctx, dir, env, args...) + if err != nil { + return err + } + removeD2Files(tb, dir) + return nil +} + +func runTestMainPersist(tb testing.TB, ctx context.Context, dir string, env *xos.Env, args ...string) error { tms := testMain(dir, env, args...) tms.Start(tb, ctx) defer tms.Cleanup(tb) @@ -568,7 +587,6 @@ func runTestMain(tb testing.TB, ctx context.Context, dir string, env *xos.Env, a if err != nil { return err } - removeD2Files(tb, dir) return nil } From b560e3b2638a9b886e5eaa0f401bd58a79f4efc2 Mon Sep 17 00:00:00 2001 From: Maxime Brunet Date: Tue, 1 Aug 2023 18:31:38 -0700 Subject: [PATCH 6/8] fix(cli): do not exit after 1st formatted file --- ci/release/changelogs/next.md | 2 ++ d2cli/fmt.go | 4 +++- e2etests-cli/main_test.go | 13 +++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/ci/release/changelogs/next.md b/ci/release/changelogs/next.md index 823b1b79a..62628fb1c 100644 --- a/ci/release/changelogs/next.md +++ b/ci/release/changelogs/next.md @@ -5,3 +5,5 @@ #### Improvements ๐Ÿงน #### Bugfixes โ›‘๏ธ + +- Fixes `d2 fmt` to format all files passed as arguments rather than first non-formatted only [#1523](https://github.com/terrastruct/d2/issues/1523) diff --git a/d2cli/fmt.go b/d2cli/fmt.go index d7aa62ca2..39f29bfd6 100644 --- a/d2cli/fmt.go +++ b/d2cli/fmt.go @@ -43,7 +43,9 @@ func fmtCmd(ctx context.Context, ms *xmain.State) (err error) { output := []byte(d2format.Format(m)) if !bytes.Equal(output, input) { - return ms.WritePath(inputPath, output) + if err := ms.WritePath(inputPath, output); err != nil { + return err + } } } return nil diff --git a/e2etests-cli/main_test.go b/e2etests-cli/main_test.go index bd0385bd5..29c92ec5c 100644 --- a/e2etests-cli/main_test.go +++ b/e2etests-cli/main_test.go @@ -531,6 +531,19 @@ i used to read assert.Equal(t, "x -> y\n", string(got)) }, }, + { + name: "fmt-multiple-files", + run: func(t *testing.T, ctx context.Context, dir string, env *xos.Env) { + writeFile(t, dir, "foo.d2", `a ---> b`) + writeFile(t, dir, "bar.d2", `x ---> y`) + err := runTestMainPersist(t, ctx, dir, env, "fmt", "foo.d2", "bar.d2") + assert.Success(t, err) + gotFoo := readFile(t, dir, "foo.d2") + gotBar := readFile(t, dir, "bar.d2") + assert.Equal(t, "a -> b\n", string(gotFoo)) + assert.Equal(t, "x -> y\n", string(gotBar)) + }, + }, } ctx := context.Background() From a5b867ba5beba35aec9b65cd11a1c9850af1acd3 Mon Sep 17 00:00:00 2001 From: Alexander Wang Date: Wed, 2 Aug 2023 19:44:36 -0700 Subject: [PATCH 7/8] fix race --- d2renderers/d2fonts/d2fonts.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/d2renderers/d2fonts/d2fonts.go b/d2renderers/d2fonts/d2fonts.go index 1a552dc32..51704db23 100644 --- a/d2renderers/d2fonts/d2fonts.go +++ b/d2renderers/d2fonts/d2fonts.go @@ -9,6 +9,7 @@ import ( "encoding/base64" "fmt" "strings" + "sync" "oss.terrastruct.com/d2/lib/font" fontlib "oss.terrastruct.com/d2/lib/font" @@ -41,6 +42,8 @@ func (f Font) GetEncodedSubset(corpus string) string { } } + FontFamiliesMu.Lock() + defer FontFamiliesMu.Unlock() fontBuf := make([]byte, len(FontFaces[f])) copy(fontBuf, FontFaces[f]) fontBuf = font.UTF8CutFont(fontBuf, uniqueChars) @@ -96,6 +99,8 @@ var FontFamilies = []FontFamily{ HandDrawn, } +var FontFamiliesMu sync.Mutex + //go:embed encoded/SourceSansPro-Regular.txt var sourceSansProRegularBase64 string @@ -309,6 +314,8 @@ func AddFontStyle(font Font, style FontStyle, ttf []byte) error { } func AddFontFamily(name string, regularTTF, italicTTF, boldTTF, semiboldTTF []byte) (*FontFamily, error) { + FontFamiliesMu.Lock() + defer FontFamiliesMu.Unlock() customFontFamily := FontFamily(name) regularFont := Font{ From 76cc72326bcd80f643ff0a9aaf06756a1a46b131 Mon Sep 17 00:00:00 2001 From: Maxime Brunet Date: Wed, 2 Aug 2023 18:58:17 -0700 Subject: [PATCH 8/8] feat: add d2-fmt pre-commit hook --- .pre-commit-hooks.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .pre-commit-hooks.yaml diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml new file mode 100644 index 000000000..1a626eae2 --- /dev/null +++ b/.pre-commit-hooks.yaml @@ -0,0 +1,6 @@ +- id: d2-fmt + name: d2 fmt + description: Format d2 files + entry: d2 fmt + language: golang + files: \.d2$