130 lines
3.7 KiB
Go
130 lines
3.7 KiB
Go
package rulesfn
|
|
|
|
import (
|
|
"fmt"
|
|
"net"
|
|
"net/url"
|
|
"strings"
|
|
|
|
smithyhttp "github.com/aws/smithy-go/transport/http"
|
|
)
|
|
|
|
// IsValidHostLabel returns if the input is a single valid [RFC 1123] host
|
|
// label. If allowSubDomains is true, will allow validation to include nested
|
|
// host labels. Returns false if the input is not a valid host label. If errors
|
|
// occur they will be added to the provided [ErrorCollector].
|
|
//
|
|
// [RFC 1123]: https://www.ietf.org/rfc/rfc1123.txt
|
|
func IsValidHostLabel(input string, allowSubDomains bool) bool {
|
|
var labels []string
|
|
if allowSubDomains {
|
|
labels = strings.Split(input, ".")
|
|
} else {
|
|
labels = []string{input}
|
|
}
|
|
|
|
for _, label := range labels {
|
|
if !smithyhttp.ValidHostLabel(label) {
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// ParseURL returns a [URL] if the provided string could be parsed. Returns nil
|
|
// if the string could not be parsed. Any parsing error will be added to the
|
|
// [ErrorCollector].
|
|
//
|
|
// If the input URL string contains an IP6 address with a zone index. The
|
|
// returned [builtin.URL.Authority] value will contain the percent escaped (%)
|
|
// zone index separator.
|
|
func ParseURL(input string) *URL {
|
|
u, err := url.Parse(input)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
if u.RawQuery != "" {
|
|
return nil
|
|
}
|
|
|
|
if u.Scheme != "http" && u.Scheme != "https" {
|
|
return nil
|
|
}
|
|
|
|
normalizedPath := u.Path
|
|
if !strings.HasPrefix(normalizedPath, "/") {
|
|
normalizedPath = "/" + normalizedPath
|
|
}
|
|
if !strings.HasSuffix(normalizedPath, "/") {
|
|
normalizedPath = normalizedPath + "/"
|
|
}
|
|
|
|
// IP6 hosts may have zone indexes that need to be escaped to be valid in a
|
|
// URI. The Go URL parser will unescape the `%25` into `%`. This needs to
|
|
// be reverted since the returned URL will be used in string builders.
|
|
authority := strings.ReplaceAll(u.Host, "%", "%25")
|
|
|
|
return &URL{
|
|
Scheme: u.Scheme,
|
|
Authority: authority,
|
|
Path: u.Path,
|
|
NormalizedPath: normalizedPath,
|
|
IsIp: net.ParseIP(hostnameWithoutZone(u)) != nil,
|
|
}
|
|
}
|
|
|
|
// URL provides the structure describing the parts of a parsed URL returned by
|
|
// [ParseURL].
|
|
type URL struct {
|
|
Scheme string // https://www.rfc-editor.org/rfc/rfc3986#section-3.1
|
|
Authority string // https://www.rfc-editor.org/rfc/rfc3986#section-3.2
|
|
Path string // https://www.rfc-editor.org/rfc/rfc3986#section-3.3
|
|
NormalizedPath string // https://www.rfc-editor.org/rfc/rfc3986#section-6.2.3
|
|
IsIp bool
|
|
}
|
|
|
|
// URIEncode returns an percent-encoded [RFC3986 section 2.1] version of the
|
|
// input string.
|
|
//
|
|
// [RFC3986 section 2.1]: https://www.rfc-editor.org/rfc/rfc3986#section-2.1
|
|
func URIEncode(input string) string {
|
|
var output strings.Builder
|
|
for _, c := range []byte(input) {
|
|
if validPercentEncodedChar(c) {
|
|
output.WriteByte(c)
|
|
continue
|
|
}
|
|
|
|
fmt.Fprintf(&output, "%%%X", c)
|
|
}
|
|
|
|
return output.String()
|
|
}
|
|
|
|
func validPercentEncodedChar(c byte) bool {
|
|
return (c >= 'a' && c <= 'z') ||
|
|
(c >= 'A' && c <= 'Z') ||
|
|
(c >= '0' && c <= '9') ||
|
|
c == '-' || c == '_' || c == '.' || c == '~'
|
|
}
|
|
|
|
// hostname implements u.Hostname() but strips the ipv6 zone ID (if present)
|
|
// such that net.ParseIP can still recognize IPv6 addresses with zone IDs.
|
|
//
|
|
// FUTURE(10/2023): netip.ParseAddr handles this natively but we can't take
|
|
// that package as a dependency yet due to our min go version (1.15, netip
|
|
// starts in 1.18). When we align with go runtime deprecation policy in
|
|
// 10/2023, we can remove this.
|
|
func hostnameWithoutZone(u *url.URL) string {
|
|
full := u.Hostname()
|
|
|
|
// this more or less mimics the internals of net/ (see unexported
|
|
// splitHostZone in that source) but throws the zone away because we don't
|
|
// need it
|
|
if i := strings.LastIndex(full, "%"); i > -1 {
|
|
return full[:i]
|
|
}
|
|
return full
|
|
}
|