d2/lib/imgbundler/imgbundler.go

165 lines
3.5 KiB
Go
Raw Normal View History

2022-11-26 23:26:14 +00:00
package imgbundler
import (
"bytes"
2022-11-26 23:26:14 +00:00
"context"
"encoding/base64"
"fmt"
"io/ioutil"
"net/http"
2022-11-28 21:25:40 +00:00
"net/url"
2022-11-27 02:14:41 +00:00
"os"
2022-11-26 23:26:14 +00:00
"regexp"
"strings"
"sync"
"time"
"go.uber.org/multierr"
"oss.terrastruct.com/xdefer"
2022-11-29 21:19:03 +00:00
"oss.terrastruct.com/d2/lib/xmain"
2022-11-26 23:26:14 +00:00
)
2022-11-29 21:30:22 +00:00
// 32 MB
var max_img_size int64 = 33_554_432
2022-11-28 21:25:40 +00:00
var imageRe = regexp.MustCompile(`<image href="([^"]+)"`)
2022-11-26 23:26:14 +00:00
type resp struct {
srctxt string
data string
err error
}
2022-11-29 21:19:03 +00:00
func InlineLocal(ms *xmain.State, in []byte) ([]byte, error) {
return inline(ms, in, false)
2022-11-27 02:14:41 +00:00
}
2022-11-29 21:19:03 +00:00
func InlineRemote(ms *xmain.State, in []byte) ([]byte, error) {
return inline(ms, in, true)
2022-11-27 02:14:41 +00:00
}
2022-11-29 21:19:03 +00:00
func inline(ms *xmain.State, svg []byte, isRemote bool) (_ []byte, err error) {
defer xdefer.Errorf(&err, "failed to bundle images")
2022-11-29 19:01:51 +00:00
imgs := imageRe.FindAllSubmatch(svg, -1)
2022-11-28 21:25:40 +00:00
var filtered [][][]byte
2022-11-28 21:25:40 +00:00
for _, img := range imgs {
2022-11-29 19:01:51 +00:00
u, err := url.Parse(string(img[1]))
2022-11-28 21:25:40 +00:00
isRemoteImg := err == nil && strings.HasPrefix(u.Scheme, "http")
if isRemoteImg == isRemote {
filtered = append(filtered, img)
2022-11-28 21:25:40 +00:00
}
}
2022-11-26 23:26:14 +00:00
var wg sync.WaitGroup
respChan := make(chan resp)
// Limits the number of workers to 16.
sema := make(chan struct{}, 16)
2022-11-26 23:26:14 +00:00
ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5)
2022-11-26 23:26:14 +00:00
defer cancel()
wg.Add(len(filtered))
// Start workers as the sema allows.
2022-11-26 23:26:14 +00:00
go func() {
for _, img := range filtered {
sema <- struct{}{}
go func(src, href string) {
defer func() {
wg.Done()
<-sema
}()
var data string
var err error
if isRemote {
data, err = fetch(ctx, href)
2022-11-26 23:26:14 +00:00
} else {
data, err = read(href)
2022-11-26 23:26:14 +00:00
}
select {
case <-ctx.Done():
case respChan <- resp{
srctxt: src,
data: data,
err: err,
}:
}
}(string(img[0]), string(img[1]))
2022-11-26 23:26:14 +00:00
}
}()
go func() {
wg.Wait()
close(respChan)
}()
2022-11-26 23:26:14 +00:00
for {
select {
case <-ctx.Done():
2022-11-29 21:19:03 +00:00
ms.Log.Debug.Printf("there")
2022-11-29 20:54:30 +00:00
return nil, fmt.Errorf("failed waiting for imgbundler workers: %w", ctx.Err())
2022-11-29 21:19:03 +00:00
case <-time.After(time.Second * 5):
ms.Log.Info.Printf("fetching images...")
case resp, ok := <-respChan:
if !ok {
2022-11-29 21:19:03 +00:00
return svg, err
}
if resp.err != nil {
err = multierr.Combine(err, resp.err)
continue
}
svg = bytes.Replace(svg, []byte(resp.srctxt), []byte(fmt.Sprintf(`<image href="%s"`, resp.data)), 1)
}
}
2022-11-26 23:26:14 +00:00
}
var transport = http.DefaultTransport
2022-11-29 18:20:32 +00:00
func fetch(ctx context.Context, href string) (string, error) {
2022-11-29 20:54:30 +00:00
ctx, cancel := context.WithTimeout(ctx, time.Minute)
defer cancel()
2022-11-26 23:26:14 +00:00
req, err := http.NewRequestWithContext(ctx, "GET", href, nil)
if err != nil {
2022-11-29 18:20:32 +00:00
return "", err
2022-11-26 23:26:14 +00:00
}
client := &http.Client{Transport: transport}
imgResp, err := client.Do(req)
if err != nil {
2022-11-29 18:20:32 +00:00
return "", err
2022-11-26 23:26:14 +00:00
}
defer imgResp.Body.Close()
2022-11-29 21:19:03 +00:00
if imgResp.StatusCode != 200 {
return "", fmt.Errorf("img %s returned status code %d", href, imgResp.StatusCode)
}
2022-11-29 21:30:22 +00:00
r := http.MaxBytesReader(nil, imgResp.Body, max_img_size)
data, err := ioutil.ReadAll(r)
2022-11-26 23:26:14 +00:00
if err != nil {
2022-11-29 18:20:32 +00:00
return "", err
2022-11-26 23:26:14 +00:00
}
mimeType := http.DetectContentType(data)
mimeType = strings.Replace(mimeType, "text/xml", "image/svg+xml", 1)
2022-11-29 19:01:51 +00:00
enc := base64.StdEncoding.EncodeToString(data)
2022-11-26 23:26:14 +00:00
2022-11-29 18:20:32 +00:00
return fmt.Sprintf("data:%s;base64,%s", mimeType, enc), nil
2022-11-26 23:26:14 +00:00
}
2022-11-27 02:14:41 +00:00
func read(href string) (string, error) {
2022-11-27 02:14:41 +00:00
data, err := os.ReadFile(href)
if err != nil {
2022-11-29 18:20:32 +00:00
return "", err
2022-11-27 02:14:41 +00:00
}
mimeType := http.DetectContentType(data)
mimeType = strings.Replace(mimeType, "text/xml", "image/svg+xml", 1)
2022-11-29 19:01:51 +00:00
enc := base64.StdEncoding.EncodeToString(data)
2022-11-27 02:14:41 +00:00
2022-11-29 18:20:32 +00:00
return fmt.Sprintf("data:%s;base64,%s", mimeType, enc), nil
2022-11-27 02:14:41 +00:00
}