pin tools like staticcheck and misspell in tools.go, update Makefile

2026-01-28 22:39:05 +08:00 · 2021-04-24 14:30:55 +09:00
parent 5a9b17f852
commit 5e9737e8b7
676 changed files with 124863 additions and 6950 deletions
--- a/22
+++ b/22
@@ -3,18 +3,28 @@ all: lint build test
 build:
 	go build ./...

-install: 
+install:
 	./scripts/make-install.sh

-lint:
-	gometalinter --exclude=vendor --exclude=repos --disable-all --enable=golint --enable=vet --enable=gofmt ./...
-	find . -name '*.go' | xargs gofmt -w -s
+lint: fmt vet misspell

-test: 
+fmt:
+	./scripts/gofmt.sh
+
+vet:
+	go vet ./check ./cmd/... ./download ./handlers ./tools/...
+	go vet ./main.go
+
+staticcheck:
+	@[ -x "$(shell which staticcheck)" ] || go install honnef.co/go/tools/cmd/staticcheck
+	staticcheck ./...
+
+test:
 	 go test -cover ./check ./handlers

 start:
 	 go run main.go

 misspell:
-	find . -name '*.go' -not -path './vendor/*' -not -path './_repos/*' | xargs misspell -error
+	@[ -x "$(shell which misspell)" ] || go install ./vendor/github.com/client9/misspell/cmd/misspell
+	find . -name '*.go' -not -path './vendor/*' -not -path './_repos/*' -not -path './download/test_downloads/*' | xargs misspell -error
--- a/go.mod
+++ b/go.mod
@@ -3,22 +3,25 @@ module github.com/gojp/goreportcard
 go 1.14

 require (
-	github.com/DataDog/zstd v1.4.5 // indirect
+	github.com/DataDog/zstd v1.4.8 // indirect
 	github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a // indirect
+	github.com/client9/misspell v0.3.4
 	github.com/dgraph-io/badger/v2 v2.2007.2
 	github.com/dgraph-io/ristretto v0.0.3 // indirect
 	github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13 // indirect
 	github.com/dustin/go-humanize v1.0.0
-	github.com/golang/protobuf v1.4.2 // indirect
-	github.com/golang/snappy v0.0.2 // indirect
+	github.com/golang/protobuf v1.5.2 // indirect
+	github.com/golang/snappy v0.0.3 // indirect
 	github.com/kr/pretty v0.2.1 // indirect
 	github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/prometheus/client_golang v0.9.0-pre1.0.20171201122222-661e31bf844d
+	github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4 // indirect
 	github.com/prometheus/common v0.0.0-20171117163051-2e54d0b93cba // indirect
 	github.com/prometheus/procfs v0.0.0-20171221151313-8f918ac9ab4b // indirect
-	golang.org/x/net v0.0.0-20200927032502-5d4f70055728 // indirect
-	golang.org/x/sys v0.0.0-20200926100807-9d91bd62050c // indirect
-	golang.org/x/tools v0.0.0-20190628034336-212fb13d595e
-	google.golang.org/protobuf v1.25.0 // indirect
+	golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5
+	golang.org/x/net v0.0.0-20210331212208-0fccb6fa2b5c // indirect
+	golang.org/x/sys v0.0.0-20210402192133-700132347e07 // indirect
+	golang.org/x/tools v0.1.0
+	honnef.co/go/tools v0.1.3
 )
--- a/go.sum
+++ b/go.sum
@@ -1,17 +1,16 @@
-cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
-github.com/DataDog/zstd v1.4.1 h1:3oxKN3wbHibqx897utPC2LTQU4J+IHWWJO+glkAkpFM=
 github.com/DataDog/zstd v1.4.1/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo=
-github.com/DataDog/zstd v1.4.5 h1:EndNeuB0l9syBZhut0wns3gV1hL8zX8LIu6ZiVHWLIQ=
-github.com/DataDog/zstd v1.4.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo=
+github.com/DataDog/zstd v1.4.8 h1:Rpmta4xZ/MgZnriKNd24iZMhGpP5dvUcs/uqfBapKZY=
+github.com/DataDog/zstd v1.4.8/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw=
 github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE=
 github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
 github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
 github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a h1:BtpsbiV638WQZwhA98cEZw2BsbnQJrbd0BI7tsy0W1c=
 github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
-github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
 github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
 github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
+github.com/client9/misspell v0.3.4 h1:ta993UF76GwbvJcIo3Y68y/M3WxlpEHPWIGDkJYwzJI=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
 github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
@@ -22,44 +21,28 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dgraph-io/badger/v2 v2.2007.2 h1:EjjK0KqwaFMlPin1ajhP943VPENHJdEz1KLIegjaI3k=
 github.com/dgraph-io/badger/v2 v2.2007.2/go.mod h1:26P/7fbL4kUZVEVKLAKXkBXKOydDmM2p1e+NhhnBCAE=
-github.com/dgraph-io/ristretto v0.0.3-0.20200630154024-f66de99634de h1:t0UHb5vdojIDUqktM6+xJAfScFBsVpXZmqC9dsgJmeA=
 github.com/dgraph-io/ristretto v0.0.3-0.20200630154024-f66de99634de/go.mod h1:KPxhHT9ZxKefz+PCeOGsrHpl1qZ7i70dGTu2u+Ahh6E=
 github.com/dgraph-io/ristretto v0.0.3 h1:jh22xisGBjrEVnRZ1DVTpBVQm0Xndu8sMl0CWDzSIBI=
 github.com/dgraph-io/ristretto v0.0.3/go.mod h1:KPxhHT9ZxKefz+PCeOGsrHpl1qZ7i70dGTu2u+Ahh6E=
-github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
 github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
 github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13 h1:fAjc9m62+UWV/WAFKLNi6ZS0675eEUC9y3AlwSbQu1Y=
 github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
 github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
-github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
-github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
 github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
-github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
-github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
 github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
-github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
-github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
-github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
-github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
-github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
-github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
-github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
-github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
+github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
+github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
 github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
-github.com/golang/snappy v0.0.2 h1:aeE13tS0IiQgFjYdoL8qN3K1N2bXXtI6Vi51/y7BpMw=
-github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
-github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
-github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
-github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
-github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.0 h1:/QaMHBdZ26BB3SSst0Iwl10Epc+xhTquomWX0oZEB6w=
-github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA=
+github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
 github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
 github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
 github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
@@ -101,66 +84,54 @@ github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJy
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
 github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
 github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
-golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
-golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
-golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
-golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5 h1:2M3HP5CCK1Si9FQhwnzYhXdG6DXeebvUHFpre8QvbyI=
+golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
+golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
+golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200927032502-5d4f70055728 h1:5wtQIAulKU5AbLQOkjxl32UufnIOqgBX72pS0AV14H0=
-golang.org/x/net v0.0.0-20200927032502-5d4f70055728/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
-golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
-golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.0.0-20210331212208-0fccb6fa2b5c h1:KHUzaHIpjWVlVVNh65G3hhuj3KB1HnjY6Cq5cTvRQT8=
+golang.org/x/net v0.0.0-20210331212208-0fccb6fa2b5c/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb h1:fgwFCsaw9buMuxNd6+DQfAuSFqbNiQZpcgJQAgJsK6k=
 golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200926100807-9d91bd62050c h1:38q6VNPWR010vN82/SB121GujZNIfAUb4YttE2rhGuc=
-golang.org/x/sys v0.0.0-20200926100807-9d91bd62050c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210402192133-700132347e07 h1:4k6HsQjxj6hVMsI2Vf0yKlzt5lXxZsMW1q0zaq2k8zY=
+golang.org/x/sys v0.0.0-20210402192133-700132347e07/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
-golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
-golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
-golang.org/x/tools v0.0.0-20190628034336-212fb13d595e h1:ZlQjfVdpDxeqxRfmO30CdqWWzTvgRCj0MxaUVfxEG1k=
-golang.org/x/tools v0.0.0-20190628034336-212fb13d595e/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.1.0 h1:po9/4sTYwZU9lPhi1tOrb4hCv3qrhiQ77LZfGa2OjwY=
+golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
-google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
-google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
-google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
-google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
-google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
-google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
-google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
-google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
-google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
-google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
-google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
-google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
-google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
-google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM=
-google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
-google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
-google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
-google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
+google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=
+google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
-honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.1.3 h1:qTakTkI6ni6LFD5sBwwsdSO+AQqbSIxOauHTTQKZ/7o=
+honnef.co/go/tools v0.1.3/go.mod h1:NgwopIslSNH47DimFoV78dnkksY2EFtX0ajyb3K/las=
--- a/scripts/gofmt.sh
+++ b/scripts/gofmt.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+need_gofmt=$(gofmt -s -l `find . -name '*.go' | grep -v vendor | grep -v _repos`)
+
+if [[ -n ${need_gofmt} ]]; then
+    echo "These files fail gofmt -s:"
+    echo "${need_gofmt}"
+    exit 1
+fi
+
+
--- a/tools/tools.go
+++ b/tools/tools.go
@@ -0,0 +1,9 @@
+// +build tools
+
+package tools
+
+import (
+	_ "github.com/client9/misspell/cmd/misspell"
+	_ "golang.org/x/lint/golint"
+	_ "honnef.co/go/tools/cmd/staticcheck"
+)
--- a/vendor/github.com/BurntSushi/toml/.gitignore
+++ b/vendor/github.com/BurntSushi/toml/.gitignore
@@ -0,0 +1,5 @@
+TAGS
+tags
+.*.swp
+tomlcheck/tomlcheck
+toml.test
--- a/vendor/github.com/BurntSushi/toml/.travis.yml
+++ b/vendor/github.com/BurntSushi/toml/.travis.yml
@@ -0,0 +1,15 @@
+language: go
+go:
+  - 1.1
+  - 1.2
+  - 1.3
+  - 1.4
+  - 1.5
+  - 1.6
+  - tip
+install:
+  - go install ./...
+  - go get github.com/BurntSushi/toml-test
+script:
+  - export PATH="$PATH:$HOME/gopath/bin"
+  - make test
--- a/vendor/github.com/BurntSushi/toml/COMPATIBLE
+++ b/vendor/github.com/BurntSushi/toml/COMPATIBLE
@@ -0,0 +1,3 @@
+Compatible with TOML version
+[v0.4.0](https://github.com/toml-lang/toml/blob/v0.4.0/versions/en/toml-v0.4.0.md)
+
--- a/vendor/github.com/BurntSushi/toml/COPYING
+++ b/vendor/github.com/BurntSushi/toml/COPYING
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2013 TOML authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/vendor/github.com/BurntSushi/toml/Makefile
+++ b/vendor/github.com/BurntSushi/toml/Makefile
@@ -0,0 +1,19 @@
+install:
+	go install ./...
+
+test: install
+	go test -v
+	toml-test toml-test-decoder
+	toml-test -encoder toml-test-encoder
+
+fmt:
+	gofmt -w *.go */*.go
+	colcheck *.go */*.go
+
+tags:
+	find ./ -name '*.go' -print0 | xargs -0 gotags > TAGS
+
+push:
+	git push origin master
+	git push github master
+
--- a/vendor/github.com/BurntSushi/toml/README.md
+++ b/vendor/github.com/BurntSushi/toml/README.md
@@ -0,0 +1,218 @@
+## TOML parser and encoder for Go with reflection
+
+TOML stands for Tom's Obvious, Minimal Language. This Go package provides a
+reflection interface similar to Go's standard library `json` and `xml`
+packages. This package also supports the `encoding.TextUnmarshaler` and
+`encoding.TextMarshaler` interfaces so that you can define custom data
+representations. (There is an example of this below.)
+
+Spec: https://github.com/toml-lang/toml
+
+Compatible with TOML version
+[v0.4.0](https://github.com/toml-lang/toml/blob/master/versions/en/toml-v0.4.0.md)
+
+Documentation: https://godoc.org/github.com/BurntSushi/toml
+
+Installation:
+
+```bash
+go get github.com/BurntSushi/toml
+```
+
+Try the toml validator:
+
+```bash
+go get github.com/BurntSushi/toml/cmd/tomlv
+tomlv some-toml-file.toml
+```
+
+[![Build Status](https://travis-ci.org/BurntSushi/toml.svg?branch=master)](https://travis-ci.org/BurntSushi/toml) [![GoDoc](https://godoc.org/github.com/BurntSushi/toml?status.svg)](https://godoc.org/github.com/BurntSushi/toml)
+
+### Testing
+
+This package passes all tests in
+[toml-test](https://github.com/BurntSushi/toml-test) for both the decoder
+and the encoder.
+
+### Examples
+
+This package works similarly to how the Go standard library handles `XML`
+and `JSON`. Namely, data is loaded into Go values via reflection.
+
+For the simplest example, consider some TOML file as just a list of keys
+and values:
+
+```toml
+Age = 25
+Cats = [ "Cauchy", "Plato" ]
+Pi = 3.14
+Perfection = [ 6, 28, 496, 8128 ]
+DOB = 1987-07-05T05:45:00Z
+```
+
+Which could be defined in Go as:
+
+```go
+type Config struct {
+  Age int
+  Cats []string
+  Pi float64
+  Perfection []int
+  DOB time.Time // requires `import time`
+}
+```
+
+And then decoded with:
+
+```go
+var conf Config
+if _, err := toml.Decode(tomlData, &conf); err != nil {
+  // handle error
+}
+```
+
+You can also use struct tags if your struct field name doesn't map to a TOML
+key value directly:
+
+```toml
+some_key_NAME = "wat"
+```
+
+```go
+type TOML struct {
+  ObscureKey string `toml:"some_key_NAME"`
+}
+```
+
+### Using the `encoding.TextUnmarshaler` interface
+
+Here's an example that automatically parses duration strings into
+`time.Duration` values:
+
+```toml
+[[song]]
+name = "Thunder Road"
+duration = "4m49s"
+
+[[song]]
+name = "Stairway to Heaven"
+duration = "8m03s"
+```
+
+Which can be decoded with:
+
+```go
+type song struct {
+  Name     string
+  Duration duration
+}
+type songs struct {
+  Song []song
+}
+var favorites songs
+if _, err := toml.Decode(blob, &favorites); err != nil {
+  log.Fatal(err)
+}
+
+for _, s := range favorites.Song {
+  fmt.Printf("%s (%s)\n", s.Name, s.Duration)
+}
+```
+
+And you'll also need a `duration` type that satisfies the
+`encoding.TextUnmarshaler` interface:
+
+```go
+type duration struct {
+	time.Duration
+}
+
+func (d *duration) UnmarshalText(text []byte) error {
+	var err error
+	d.Duration, err = time.ParseDuration(string(text))
+	return err
+}
+```
+
+### More complex usage
+
+Here's an example of how to load the example from the official spec page:
+
+```toml
+# This is a TOML document. Boom.
+
+title = "TOML Example"
+
+[owner]
+name = "Tom Preston-Werner"
+organization = "GitHub"
+bio = "GitHub Cofounder & CEO\nLikes tater tots and beer."
+dob = 1979-05-27T07:32:00Z # First class dates? Why not?
+
+[database]
+server = "192.168.1.1"
+ports = [ 8001, 8001, 8002 ]
+connection_max = 5000
+enabled = true
+
+[servers]
+
+  # You can indent as you please. Tabs or spaces. TOML don't care.
+  [servers.alpha]
+  ip = "10.0.0.1"
+  dc = "eqdc10"
+
+  [servers.beta]
+  ip = "10.0.0.2"
+  dc = "eqdc10"
+
+[clients]
+data = [ ["gamma", "delta"], [1, 2] ] # just an update to make sure parsers support it
+
+# Line breaks are OK when inside arrays
+hosts = [
+  "alpha",
+  "omega"
+]
+```
+
+And the corresponding Go types are:
+
+```go
+type tomlConfig struct {
+	Title string
+	Owner ownerInfo
+	DB database `toml:"database"`
+	Servers map[string]server
+	Clients clients
+}
+
+type ownerInfo struct {
+	Name string
+	Org string `toml:"organization"`
+	Bio string
+	DOB time.Time
+}
+
+type database struct {
+	Server string
+	Ports []int
+	ConnMax int `toml:"connection_max"`
+	Enabled bool
+}
+
+type server struct {
+	IP string
+	DC string
+}
+
+type clients struct {
+	Data [][]interface{}
+	Hosts []string
+}
+```
+
+Note that a case insensitive match will be tried if an exact match can't be
+found.
+
+A working example of the above can be found in `_examples/example.{go,toml}`.
--- a/vendor/github.com/BurntSushi/toml/decode.go
+++ b/vendor/github.com/BurntSushi/toml/decode.go
@@ -0,0 +1,509 @@
+package toml
+
+import (
+	"fmt"
+	"io"
+	"io/ioutil"
+	"math"
+	"reflect"
+	"strings"
+	"time"
+)
+
+func e(format string, args ...interface{}) error {
+	return fmt.Errorf("toml: "+format, args...)
+}
+
+// Unmarshaler is the interface implemented by objects that can unmarshal a
+// TOML description of themselves.
+type Unmarshaler interface {
+	UnmarshalTOML(interface{}) error
+}
+
+// Unmarshal decodes the contents of `p` in TOML format into a pointer `v`.
+func Unmarshal(p []byte, v interface{}) error {
+	_, err := Decode(string(p), v)
+	return err
+}
+
+// Primitive is a TOML value that hasn't been decoded into a Go value.
+// When using the various `Decode*` functions, the type `Primitive` may
+// be given to any value, and its decoding will be delayed.
+//
+// A `Primitive` value can be decoded using the `PrimitiveDecode` function.
+//
+// The underlying representation of a `Primitive` value is subject to change.
+// Do not rely on it.
+//
+// N.B. Primitive values are still parsed, so using them will only avoid
+// the overhead of reflection. They can be useful when you don't know the
+// exact type of TOML data until run time.
+type Primitive struct {
+	undecoded interface{}
+	context   Key
+}
+
+// DEPRECATED!
+//
+// Use MetaData.PrimitiveDecode instead.
+func PrimitiveDecode(primValue Primitive, v interface{}) error {
+	md := MetaData{decoded: make(map[string]bool)}
+	return md.unify(primValue.undecoded, rvalue(v))
+}
+
+// PrimitiveDecode is just like the other `Decode*` functions, except it
+// decodes a TOML value that has already been parsed. Valid primitive values
+// can *only* be obtained from values filled by the decoder functions,
+// including this method. (i.e., `v` may contain more `Primitive`
+// values.)
+//
+// Meta data for primitive values is included in the meta data returned by
+// the `Decode*` functions with one exception: keys returned by the Undecoded
+// method will only reflect keys that were decoded. Namely, any keys hidden
+// behind a Primitive will be considered undecoded. Executing this method will
+// update the undecoded keys in the meta data. (See the example.)
+func (md *MetaData) PrimitiveDecode(primValue Primitive, v interface{}) error {
+	md.context = primValue.context
+	defer func() { md.context = nil }()
+	return md.unify(primValue.undecoded, rvalue(v))
+}
+
+// Decode will decode the contents of `data` in TOML format into a pointer
+// `v`.
+//
+// TOML hashes correspond to Go structs or maps. (Dealer's choice. They can be
+// used interchangeably.)
+//
+// TOML arrays of tables correspond to either a slice of structs or a slice
+// of maps.
+//
+// TOML datetimes correspond to Go `time.Time` values.
+//
+// All other TOML types (float, string, int, bool and array) correspond
+// to the obvious Go types.
+//
+// An exception to the above rules is if a type implements the
+// encoding.TextUnmarshaler interface. In this case, any primitive TOML value
+// (floats, strings, integers, booleans and datetimes) will be converted to
+// a byte string and given to the value's UnmarshalText method. See the
+// Unmarshaler example for a demonstration with time duration strings.
+//
+// Key mapping
+//
+// TOML keys can map to either keys in a Go map or field names in a Go
+// struct. The special `toml` struct tag may be used to map TOML keys to
+// struct fields that don't match the key name exactly. (See the example.)
+// A case insensitive match to struct names will be tried if an exact match
+// can't be found.
+//
+// The mapping between TOML values and Go values is loose. That is, there
+// may exist TOML values that cannot be placed into your representation, and
+// there may be parts of your representation that do not correspond to
+// TOML values. This loose mapping can be made stricter by using the IsDefined
+// and/or Undecoded methods on the MetaData returned.
+//
+// This decoder will not handle cyclic types. If a cyclic type is passed,
+// `Decode` will not terminate.
+func Decode(data string, v interface{}) (MetaData, error) {
+	rv := reflect.ValueOf(v)
+	if rv.Kind() != reflect.Ptr {
+		return MetaData{}, e("Decode of non-pointer %s", reflect.TypeOf(v))
+	}
+	if rv.IsNil() {
+		return MetaData{}, e("Decode of nil %s", reflect.TypeOf(v))
+	}
+	p, err := parse(data)
+	if err != nil {
+		return MetaData{}, err
+	}
+	md := MetaData{
+		p.mapping, p.types, p.ordered,
+		make(map[string]bool, len(p.ordered)), nil,
+	}
+	return md, md.unify(p.mapping, indirect(rv))
+}
+
+// DecodeFile is just like Decode, except it will automatically read the
+// contents of the file at `fpath` and decode it for you.
+func DecodeFile(fpath string, v interface{}) (MetaData, error) {
+	bs, err := ioutil.ReadFile(fpath)
+	if err != nil {
+		return MetaData{}, err
+	}
+	return Decode(string(bs), v)
+}
+
+// DecodeReader is just like Decode, except it will consume all bytes
+// from the reader and decode it for you.
+func DecodeReader(r io.Reader, v interface{}) (MetaData, error) {
+	bs, err := ioutil.ReadAll(r)
+	if err != nil {
+		return MetaData{}, err
+	}
+	return Decode(string(bs), v)
+}
+
+// unify performs a sort of type unification based on the structure of `rv`,
+// which is the client representation.
+//
+// Any type mismatch produces an error. Finding a type that we don't know
+// how to handle produces an unsupported type error.
+func (md *MetaData) unify(data interface{}, rv reflect.Value) error {
+
+	// Special case. Look for a `Primitive` value.
+	if rv.Type() == reflect.TypeOf((*Primitive)(nil)).Elem() {
+		// Save the undecoded data and the key context into the primitive
+		// value.
+		context := make(Key, len(md.context))
+		copy(context, md.context)
+		rv.Set(reflect.ValueOf(Primitive{
+			undecoded: data,
+			context:   context,
+		}))
+		return nil
+	}
+
+	// Special case. Unmarshaler Interface support.
+	if rv.CanAddr() {
+		if v, ok := rv.Addr().Interface().(Unmarshaler); ok {
+			return v.UnmarshalTOML(data)
+		}
+	}
+
+	// Special case. Handle time.Time values specifically.
+	// TODO: Remove this code when we decide to drop support for Go 1.1.
+	// This isn't necessary in Go 1.2 because time.Time satisfies the encoding
+	// interfaces.
+	if rv.Type().AssignableTo(rvalue(time.Time{}).Type()) {
+		return md.unifyDatetime(data, rv)
+	}
+
+	// Special case. Look for a value satisfying the TextUnmarshaler interface.
+	if v, ok := rv.Interface().(TextUnmarshaler); ok {
+		return md.unifyText(data, v)
+	}
+	// BUG(burntsushi)
+	// The behavior here is incorrect whenever a Go type satisfies the
+	// encoding.TextUnmarshaler interface but also corresponds to a TOML
+	// hash or array. In particular, the unmarshaler should only be applied
+	// to primitive TOML values. But at this point, it will be applied to
+	// all kinds of values and produce an incorrect error whenever those values
+	// are hashes or arrays (including arrays of tables).
+
+	k := rv.Kind()
+
+	// laziness
+	if k >= reflect.Int && k <= reflect.Uint64 {
+		return md.unifyInt(data, rv)
+	}
+	switch k {
+	case reflect.Ptr:
+		elem := reflect.New(rv.Type().Elem())
+		err := md.unify(data, reflect.Indirect(elem))
+		if err != nil {
+			return err
+		}
+		rv.Set(elem)
+		return nil
+	case reflect.Struct:
+		return md.unifyStruct(data, rv)
+	case reflect.Map:
+		return md.unifyMap(data, rv)
+	case reflect.Array:
+		return md.unifyArray(data, rv)
+	case reflect.Slice:
+		return md.unifySlice(data, rv)
+	case reflect.String:
+		return md.unifyString(data, rv)
+	case reflect.Bool:
+		return md.unifyBool(data, rv)
+	case reflect.Interface:
+		// we only support empty interfaces.
+		if rv.NumMethod() > 0 {
+			return e("unsupported type %s", rv.Type())
+		}
+		return md.unifyAnything(data, rv)
+	case reflect.Float32:
+		fallthrough
+	case reflect.Float64:
+		return md.unifyFloat64(data, rv)
+	}
+	return e("unsupported type %s", rv.Kind())
+}
+
+func (md *MetaData) unifyStruct(mapping interface{}, rv reflect.Value) error {
+	tmap, ok := mapping.(map[string]interface{})
+	if !ok {
+		if mapping == nil {
+			return nil
+		}
+		return e("type mismatch for %s: expected table but found %T",
+			rv.Type().String(), mapping)
+	}
+
+	for key, datum := range tmap {
+		var f *field
+		fields := cachedTypeFields(rv.Type())
+		for i := range fields {
+			ff := &fields[i]
+			if ff.name == key {
+				f = ff
+				break
+			}
+			if f == nil && strings.EqualFold(ff.name, key) {
+				f = ff
+			}
+		}
+		if f != nil {
+			subv := rv
+			for _, i := range f.index {
+				subv = indirect(subv.Field(i))
+			}
+			if isUnifiable(subv) {
+				md.decoded[md.context.add(key).String()] = true
+				md.context = append(md.context, key)
+				if err := md.unify(datum, subv); err != nil {
+					return err
+				}
+				md.context = md.context[0 : len(md.context)-1]
+			} else if f.name != "" {
+				// Bad user! No soup for you!
+				return e("cannot write unexported field %s.%s",
+					rv.Type().String(), f.name)
+			}
+		}
+	}
+	return nil
+}
+
+func (md *MetaData) unifyMap(mapping interface{}, rv reflect.Value) error {
+	tmap, ok := mapping.(map[string]interface{})
+	if !ok {
+		if tmap == nil {
+			return nil
+		}
+		return badtype("map", mapping)
+	}
+	if rv.IsNil() {
+		rv.Set(reflect.MakeMap(rv.Type()))
+	}
+	for k, v := range tmap {
+		md.decoded[md.context.add(k).String()] = true
+		md.context = append(md.context, k)
+
+		rvkey := indirect(reflect.New(rv.Type().Key()))
+		rvval := reflect.Indirect(reflect.New(rv.Type().Elem()))
+		if err := md.unify(v, rvval); err != nil {
+			return err
+		}
+		md.context = md.context[0 : len(md.context)-1]
+
+		rvkey.SetString(k)
+		rv.SetMapIndex(rvkey, rvval)
+	}
+	return nil
+}
+
+func (md *MetaData) unifyArray(data interface{}, rv reflect.Value) error {
+	datav := reflect.ValueOf(data)
+	if datav.Kind() != reflect.Slice {
+		if !datav.IsValid() {
+			return nil
+		}
+		return badtype("slice", data)
+	}
+	sliceLen := datav.Len()
+	if sliceLen != rv.Len() {
+		return e("expected array length %d; got TOML array of length %d",
+			rv.Len(), sliceLen)
+	}
+	return md.unifySliceArray(datav, rv)
+}
+
+func (md *MetaData) unifySlice(data interface{}, rv reflect.Value) error {
+	datav := reflect.ValueOf(data)
+	if datav.Kind() != reflect.Slice {
+		if !datav.IsValid() {
+			return nil
+		}
+		return badtype("slice", data)
+	}
+	n := datav.Len()
+	if rv.IsNil() || rv.Cap() < n {
+		rv.Set(reflect.MakeSlice(rv.Type(), n, n))
+	}
+	rv.SetLen(n)
+	return md.unifySliceArray(datav, rv)
+}
+
+func (md *MetaData) unifySliceArray(data, rv reflect.Value) error {
+	sliceLen := data.Len()
+	for i := 0; i < sliceLen; i++ {
+		v := data.Index(i).Interface()
+		sliceval := indirect(rv.Index(i))
+		if err := md.unify(v, sliceval); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (md *MetaData) unifyDatetime(data interface{}, rv reflect.Value) error {
+	if _, ok := data.(time.Time); ok {
+		rv.Set(reflect.ValueOf(data))
+		return nil
+	}
+	return badtype("time.Time", data)
+}
+
+func (md *MetaData) unifyString(data interface{}, rv reflect.Value) error {
+	if s, ok := data.(string); ok {
+		rv.SetString(s)
+		return nil
+	}
+	return badtype("string", data)
+}
+
+func (md *MetaData) unifyFloat64(data interface{}, rv reflect.Value) error {
+	if num, ok := data.(float64); ok {
+		switch rv.Kind() {
+		case reflect.Float32:
+			fallthrough
+		case reflect.Float64:
+			rv.SetFloat(num)
+		default:
+			panic("bug")
+		}
+		return nil
+	}
+	return badtype("float", data)
+}
+
+func (md *MetaData) unifyInt(data interface{}, rv reflect.Value) error {
+	if num, ok := data.(int64); ok {
+		if rv.Kind() >= reflect.Int && rv.Kind() <= reflect.Int64 {
+			switch rv.Kind() {
+			case reflect.Int, reflect.Int64:
+				// No bounds checking necessary.
+			case reflect.Int8:
+				if num < math.MinInt8 || num > math.MaxInt8 {
+					return e("value %d is out of range for int8", num)
+				}
+			case reflect.Int16:
+				if num < math.MinInt16 || num > math.MaxInt16 {
+					return e("value %d is out of range for int16", num)
+				}
+			case reflect.Int32:
+				if num < math.MinInt32 || num > math.MaxInt32 {
+					return e("value %d is out of range for int32", num)
+				}
+			}
+			rv.SetInt(num)
+		} else if rv.Kind() >= reflect.Uint && rv.Kind() <= reflect.Uint64 {
+			unum := uint64(num)
+			switch rv.Kind() {
+			case reflect.Uint, reflect.Uint64:
+				// No bounds checking necessary.
+			case reflect.Uint8:
+				if num < 0 || unum > math.MaxUint8 {
+					return e("value %d is out of range for uint8", num)
+				}
+			case reflect.Uint16:
+				if num < 0 || unum > math.MaxUint16 {
+					return e("value %d is out of range for uint16", num)
+				}
+			case reflect.Uint32:
+				if num < 0 || unum > math.MaxUint32 {
+					return e("value %d is out of range for uint32", num)
+				}
+			}
+			rv.SetUint(unum)
+		} else {
+			panic("unreachable")
+		}
+		return nil
+	}
+	return badtype("integer", data)
+}
+
+func (md *MetaData) unifyBool(data interface{}, rv reflect.Value) error {
+	if b, ok := data.(bool); ok {
+		rv.SetBool(b)
+		return nil
+	}
+	return badtype("boolean", data)
+}
+
+func (md *MetaData) unifyAnything(data interface{}, rv reflect.Value) error {
+	rv.Set(reflect.ValueOf(data))
+	return nil
+}
+
+func (md *MetaData) unifyText(data interface{}, v TextUnmarshaler) error {
+	var s string
+	switch sdata := data.(type) {
+	case TextMarshaler:
+		text, err := sdata.MarshalText()
+		if err != nil {
+			return err
+		}
+		s = string(text)
+	case fmt.Stringer:
+		s = sdata.String()
+	case string:
+		s = sdata
+	case bool:
+		s = fmt.Sprintf("%v", sdata)
+	case int64:
+		s = fmt.Sprintf("%d", sdata)
+	case float64:
+		s = fmt.Sprintf("%f", sdata)
+	default:
+		return badtype("primitive (string-like)", data)
+	}
+	if err := v.UnmarshalText([]byte(s)); err != nil {
+		return err
+	}
+	return nil
+}
+
+// rvalue returns a reflect.Value of `v`. All pointers are resolved.
+func rvalue(v interface{}) reflect.Value {
+	return indirect(reflect.ValueOf(v))
+}
+
+// indirect returns the value pointed to by a pointer.
+// Pointers are followed until the value is not a pointer.
+// New values are allocated for each nil pointer.
+//
+// An exception to this rule is if the value satisfies an interface of
+// interest to us (like encoding.TextUnmarshaler).
+func indirect(v reflect.Value) reflect.Value {
+	if v.Kind() != reflect.Ptr {
+		if v.CanSet() {
+			pv := v.Addr()
+			if _, ok := pv.Interface().(TextUnmarshaler); ok {
+				return pv
+			}
+		}
+		return v
+	}
+	if v.IsNil() {
+		v.Set(reflect.New(v.Type().Elem()))
+	}
+	return indirect(reflect.Indirect(v))
+}
+
+func isUnifiable(rv reflect.Value) bool {
+	if rv.CanSet() {
+		return true
+	}
+	if _, ok := rv.Interface().(TextUnmarshaler); ok {
+		return true
+	}
+	return false
+}
+
+func badtype(expected string, data interface{}) error {
+	return e("cannot load TOML value of type %T into a Go %s", data, expected)
+}
--- a/vendor/github.com/BurntSushi/toml/decode_meta.go
+++ b/vendor/github.com/BurntSushi/toml/decode_meta.go
@@ -0,0 +1,121 @@
+package toml
+
+import "strings"
+
+// MetaData allows access to meta information about TOML data that may not
+// be inferrable via reflection. In particular, whether a key has been defined
+// and the TOML type of a key.
+type MetaData struct {
+	mapping map[string]interface{}
+	types   map[string]tomlType
+	keys    []Key
+	decoded map[string]bool
+	context Key // Used only during decoding.
+}
+
+// IsDefined returns true if the key given exists in the TOML data. The key
+// should be specified hierarchially. e.g.,
+//
+//	// access the TOML key 'a.b.c'
+//	IsDefined("a", "b", "c")
+//
+// IsDefined will return false if an empty key given. Keys are case sensitive.
+func (md *MetaData) IsDefined(key ...string) bool {
+	if len(key) == 0 {
+		return false
+	}
+
+	var hash map[string]interface{}
+	var ok bool
+	var hashOrVal interface{} = md.mapping
+	for _, k := range key {
+		if hash, ok = hashOrVal.(map[string]interface{}); !ok {
+			return false
+		}
+		if hashOrVal, ok = hash[k]; !ok {
+			return false
+		}
+	}
+	return true
+}
+
+// Type returns a string representation of the type of the key specified.
+//
+// Type will return the empty string if given an empty key or a key that
+// does not exist. Keys are case sensitive.
+func (md *MetaData) Type(key ...string) string {
+	fullkey := strings.Join(key, ".")
+	if typ, ok := md.types[fullkey]; ok {
+		return typ.typeString()
+	}
+	return ""
+}
+
+// Key is the type of any TOML key, including key groups. Use (MetaData).Keys
+// to get values of this type.
+type Key []string
+
+func (k Key) String() string {
+	return strings.Join(k, ".")
+}
+
+func (k Key) maybeQuotedAll() string {
+	var ss []string
+	for i := range k {
+		ss = append(ss, k.maybeQuoted(i))
+	}
+	return strings.Join(ss, ".")
+}
+
+func (k Key) maybeQuoted(i int) string {
+	quote := false
+	for _, c := range k[i] {
+		if !isBareKeyChar(c) {
+			quote = true
+			break
+		}
+	}
+	if quote {
+		return "\"" + strings.Replace(k[i], "\"", "\\\"", -1) + "\""
+	}
+	return k[i]
+}
+
+func (k Key) add(piece string) Key {
+	newKey := make(Key, len(k)+1)
+	copy(newKey, k)
+	newKey[len(k)] = piece
+	return newKey
+}
+
+// Keys returns a slice of every key in the TOML data, including key groups.
+// Each key is itself a slice, where the first element is the top of the
+// hierarchy and the last is the most specific.
+//
+// The list will have the same order as the keys appeared in the TOML data.
+//
+// All keys returned are non-empty.
+func (md *MetaData) Keys() []Key {
+	return md.keys
+}
+
+// Undecoded returns all keys that have not been decoded in the order in which
+// they appear in the original TOML document.
+//
+// This includes keys that haven't been decoded because of a Primitive value.
+// Once the Primitive value is decoded, the keys will be considered decoded.
+//
+// Also note that decoding into an empty interface will result in no decoding,
+// and so no keys will be considered decoded.
+//
+// In this sense, the Undecoded keys correspond to keys in the TOML document
+// that do not have a concrete type in your representation.
+func (md *MetaData) Undecoded() []Key {
+	undecoded := make([]Key, 0, len(md.keys))
+	for _, key := range md.keys {
+		if !md.decoded[key.String()] {
+			undecoded = append(undecoded, key)
+		}
+	}
+	return undecoded
+}
--- a/vendor/github.com/BurntSushi/toml/doc.go
+++ b/vendor/github.com/BurntSushi/toml/doc.go
@@ -0,0 +1,27 @@
+/*
+Package toml provides facilities for decoding and encoding TOML configuration
+files via reflection. There is also support for delaying decoding with
+the Primitive type, and querying the set of keys in a TOML document with the
+MetaData type.
+
+The specification implemented: https://github.com/toml-lang/toml
+
+The sub-command github.com/BurntSushi/toml/cmd/tomlv can be used to verify
+whether a file is a valid TOML document. It can also be used to print the
+type of each key in a TOML document.
+
+Testing
+
+There are two important types of tests used for this package. The first is
+contained inside '*_test.go' files and uses the standard Go unit testing
+framework. These tests are primarily devoted to holistically testing the
+decoder and encoder.
+
+The second type of testing is used to verify the implementation's adherence
+to the TOML specification. These tests have been factored into their own
+project: https://github.com/BurntSushi/toml-test
+
+The reason the tests are in a separate project is so that they can be used by
+any implementation of TOML. Namely, it is language agnostic.
+*/
+package toml
--- a/vendor/github.com/BurntSushi/toml/encode.go
+++ b/vendor/github.com/BurntSushi/toml/encode.go
@@ -0,0 +1,568 @@
+package toml
+
+import (
+	"bufio"
+	"errors"
+	"fmt"
+	"io"
+	"reflect"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+)
+
+type tomlEncodeError struct{ error }
+
+var (
+	errArrayMixedElementTypes = errors.New(
+		"toml: cannot encode array with mixed element types")
+	errArrayNilElement = errors.New(
+		"toml: cannot encode array with nil element")
+	errNonString = errors.New(
+		"toml: cannot encode a map with non-string key type")
+	errAnonNonStruct = errors.New(
+		"toml: cannot encode an anonymous field that is not a struct")
+	errArrayNoTable = errors.New(
+		"toml: TOML array element cannot contain a table")
+	errNoKey = errors.New(
+		"toml: top-level values must be Go maps or structs")
+	errAnything = errors.New("") // used in testing
+)
+
+var quotedReplacer = strings.NewReplacer(
+	"\t", "\\t",
+	"\n", "\\n",
+	"\r", "\\r",
+	"\"", "\\\"",
+	"\\", "\\\\",
+)
+
+// Encoder controls the encoding of Go values to a TOML document to some
+// io.Writer.
+//
+// The indentation level can be controlled with the Indent field.
+type Encoder struct {
+	// A single indentation level. By default it is two spaces.
+	Indent string
+
+	// hasWritten is whether we have written any output to w yet.
+	hasWritten bool
+	w          *bufio.Writer
+}
+
+// NewEncoder returns a TOML encoder that encodes Go values to the io.Writer
+// given. By default, a single indentation level is 2 spaces.
+func NewEncoder(w io.Writer) *Encoder {
+	return &Encoder{
+		w:      bufio.NewWriter(w),
+		Indent: "  ",
+	}
+}
+
+// Encode writes a TOML representation of the Go value to the underlying
+// io.Writer. If the value given cannot be encoded to a valid TOML document,
+// then an error is returned.
+//
+// The mapping between Go values and TOML values should be precisely the same
+// as for the Decode* functions. Similarly, the TextMarshaler interface is
+// supported by encoding the resulting bytes as strings. (If you want to write
+// arbitrary binary data then you will need to use something like base64 since
+// TOML does not have any binary types.)
+//
+// When encoding TOML hashes (i.e., Go maps or structs), keys without any
+// sub-hashes are encoded first.
+//
+// If a Go map is encoded, then its keys are sorted alphabetically for
+// deterministic output. More control over this behavior may be provided if
+// there is demand for it.
+//
+// Encoding Go values without a corresponding TOML representation---like map
+// types with non-string keys---will cause an error to be returned. Similarly
+// for mixed arrays/slices, arrays/slices with nil elements, embedded
+// non-struct types and nested slices containing maps or structs.
+// (e.g., [][]map[string]string is not allowed but []map[string]string is OK
+// and so is []map[string][]string.)
+func (enc *Encoder) Encode(v interface{}) error {
+	rv := eindirect(reflect.ValueOf(v))
+	if err := enc.safeEncode(Key([]string{}), rv); err != nil {
+		return err
+	}
+	return enc.w.Flush()
+}
+
+func (enc *Encoder) safeEncode(key Key, rv reflect.Value) (err error) {
+	defer func() {
+		if r := recover(); r != nil {
+			if terr, ok := r.(tomlEncodeError); ok {
+				err = terr.error
+				return
+			}
+			panic(r)
+		}
+	}()
+	enc.encode(key, rv)
+	return nil
+}
+
+func (enc *Encoder) encode(key Key, rv reflect.Value) {
+	// Special case. Time needs to be in ISO8601 format.
+	// Special case. If we can marshal the type to text, then we used that.
+	// Basically, this prevents the encoder for handling these types as
+	// generic structs (or whatever the underlying type of a TextMarshaler is).
+	switch rv.Interface().(type) {
+	case time.Time, TextMarshaler:
+		enc.keyEqElement(key, rv)
+		return
+	}
+
+	k := rv.Kind()
+	switch k {
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32,
+		reflect.Int64,
+		reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32,
+		reflect.Uint64,
+		reflect.Float32, reflect.Float64, reflect.String, reflect.Bool:
+		enc.keyEqElement(key, rv)
+	case reflect.Array, reflect.Slice:
+		if typeEqual(tomlArrayHash, tomlTypeOfGo(rv)) {
+			enc.eArrayOfTables(key, rv)
+		} else {
+			enc.keyEqElement(key, rv)
+		}
+	case reflect.Interface:
+		if rv.IsNil() {
+			return
+		}
+		enc.encode(key, rv.Elem())
+	case reflect.Map:
+		if rv.IsNil() {
+			return
+		}
+		enc.eTable(key, rv)
+	case reflect.Ptr:
+		if rv.IsNil() {
+			return
+		}
+		enc.encode(key, rv.Elem())
+	case reflect.Struct:
+		enc.eTable(key, rv)
+	default:
+		panic(e("unsupported type for key '%s': %s", key, k))
+	}
+}
+
+// eElement encodes any value that can be an array element (primitives and
+// arrays).
+func (enc *Encoder) eElement(rv reflect.Value) {
+	switch v := rv.Interface().(type) {
+	case time.Time:
+		// Special case time.Time as a primitive. Has to come before
+		// TextMarshaler below because time.Time implements
+		// encoding.TextMarshaler, but we need to always use UTC.
+		enc.wf(v.UTC().Format("2006-01-02T15:04:05Z"))
+		return
+	case TextMarshaler:
+		// Special case. Use text marshaler if it's available for this value.
+		if s, err := v.MarshalText(); err != nil {
+			encPanic(err)
+		} else {
+			enc.writeQuoted(string(s))
+		}
+		return
+	}
+	switch rv.Kind() {
+	case reflect.Bool:
+		enc.wf(strconv.FormatBool(rv.Bool()))
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32,
+		reflect.Int64:
+		enc.wf(strconv.FormatInt(rv.Int(), 10))
+	case reflect.Uint, reflect.Uint8, reflect.Uint16,
+		reflect.Uint32, reflect.Uint64:
+		enc.wf(strconv.FormatUint(rv.Uint(), 10))
+	case reflect.Float32:
+		enc.wf(floatAddDecimal(strconv.FormatFloat(rv.Float(), 'f', -1, 32)))
+	case reflect.Float64:
+		enc.wf(floatAddDecimal(strconv.FormatFloat(rv.Float(), 'f', -1, 64)))
+	case reflect.Array, reflect.Slice:
+		enc.eArrayOrSliceElement(rv)
+	case reflect.Interface:
+		enc.eElement(rv.Elem())
+	case reflect.String:
+		enc.writeQuoted(rv.String())
+	default:
+		panic(e("unexpected primitive type: %s", rv.Kind()))
+	}
+}
+
+// By the TOML spec, all floats must have a decimal with at least one
+// number on either side.
+func floatAddDecimal(fstr string) string {
+	if !strings.Contains(fstr, ".") {
+		return fstr + ".0"
+	}
+	return fstr
+}
+
+func (enc *Encoder) writeQuoted(s string) {
+	enc.wf("\"%s\"", quotedReplacer.Replace(s))
+}
+
+func (enc *Encoder) eArrayOrSliceElement(rv reflect.Value) {
+	length := rv.Len()
+	enc.wf("[")
+	for i := 0; i < length; i++ {
+		elem := rv.Index(i)
+		enc.eElement(elem)
+		if i != length-1 {
+			enc.wf(", ")
+		}
+	}
+	enc.wf("]")
+}
+
+func (enc *Encoder) eArrayOfTables(key Key, rv reflect.Value) {
+	if len(key) == 0 {
+		encPanic(errNoKey)
+	}
+	for i := 0; i < rv.Len(); i++ {
+		trv := rv.Index(i)
+		if isNil(trv) {
+			continue
+		}
+		panicIfInvalidKey(key)
+		enc.newline()
+		enc.wf("%s[[%s]]", enc.indentStr(key), key.maybeQuotedAll())
+		enc.newline()
+		enc.eMapOrStruct(key, trv)
+	}
+}
+
+func (enc *Encoder) eTable(key Key, rv reflect.Value) {
+	panicIfInvalidKey(key)
+	if len(key) == 1 {
+		// Output an extra newline between top-level tables.
+		// (The newline isn't written if nothing else has been written though.)
+		enc.newline()
+	}
+	if len(key) > 0 {
+		enc.wf("%s[%s]", enc.indentStr(key), key.maybeQuotedAll())
+		enc.newline()
+	}
+	enc.eMapOrStruct(key, rv)
+}
+
+func (enc *Encoder) eMapOrStruct(key Key, rv reflect.Value) {
+	switch rv := eindirect(rv); rv.Kind() {
+	case reflect.Map:
+		enc.eMap(key, rv)
+	case reflect.Struct:
+		enc.eStruct(key, rv)
+	default:
+		panic("eTable: unhandled reflect.Value Kind: " + rv.Kind().String())
+	}
+}
+
+func (enc *Encoder) eMap(key Key, rv reflect.Value) {
+	rt := rv.Type()
+	if rt.Key().Kind() != reflect.String {
+		encPanic(errNonString)
+	}
+
+	// Sort keys so that we have deterministic output. And write keys directly
+	// underneath this key first, before writing sub-structs or sub-maps.
+	var mapKeysDirect, mapKeysSub []string
+	for _, mapKey := range rv.MapKeys() {
+		k := mapKey.String()
+		if typeIsHash(tomlTypeOfGo(rv.MapIndex(mapKey))) {
+			mapKeysSub = append(mapKeysSub, k)
+		} else {
+			mapKeysDirect = append(mapKeysDirect, k)
+		}
+	}
+
+	var writeMapKeys = func(mapKeys []string) {
+		sort.Strings(mapKeys)
+		for _, mapKey := range mapKeys {
+			mrv := rv.MapIndex(reflect.ValueOf(mapKey))
+			if isNil(mrv) {
+				// Don't write anything for nil fields.
+				continue
+			}
+			enc.encode(key.add(mapKey), mrv)
+		}
+	}
+	writeMapKeys(mapKeysDirect)
+	writeMapKeys(mapKeysSub)
+}
+
+func (enc *Encoder) eStruct(key Key, rv reflect.Value) {
+	// Write keys for fields directly under this key first, because if we write
+	// a field that creates a new table, then all keys under it will be in that
+	// table (not the one we're writing here).
+	rt := rv.Type()
+	var fieldsDirect, fieldsSub [][]int
+	var addFields func(rt reflect.Type, rv reflect.Value, start []int)
+	addFields = func(rt reflect.Type, rv reflect.Value, start []int) {
+		for i := 0; i < rt.NumField(); i++ {
+			f := rt.Field(i)
+			// skip unexported fields
+			if f.PkgPath != "" && !f.Anonymous {
+				continue
+			}
+			frv := rv.Field(i)
+			if f.Anonymous {
+				t := f.Type
+				switch t.Kind() {
+				case reflect.Struct:
+					// Treat anonymous struct fields with
+					// tag names as though they are not
+					// anonymous, like encoding/json does.
+					if getOptions(f.Tag).name == "" {
+						addFields(t, frv, f.Index)
+						continue
+					}
+				case reflect.Ptr:
+					if t.Elem().Kind() == reflect.Struct &&
+						getOptions(f.Tag).name == "" {
+						if !frv.IsNil() {
+							addFields(t.Elem(), frv.Elem(), f.Index)
+						}
+						continue
+					}
+					// Fall through to the normal field encoding logic below
+					// for non-struct anonymous fields.
+				}
+			}
+
+			if typeIsHash(tomlTypeOfGo(frv)) {
+				fieldsSub = append(fieldsSub, append(start, f.Index...))
+			} else {
+				fieldsDirect = append(fieldsDirect, append(start, f.Index...))
+			}
+		}
+	}
+	addFields(rt, rv, nil)
+
+	var writeFields = func(fields [][]int) {
+		for _, fieldIndex := range fields {
+			sft := rt.FieldByIndex(fieldIndex)
+			sf := rv.FieldByIndex(fieldIndex)
+			if isNil(sf) {
+				// Don't write anything for nil fields.
+				continue
+			}
+
+			opts := getOptions(sft.Tag)
+			if opts.skip {
+				continue
+			}
+			keyName := sft.Name
+			if opts.name != "" {
+				keyName = opts.name
+			}
+			if opts.omitempty && isEmpty(sf) {
+				continue
+			}
+			if opts.omitzero && isZero(sf) {
+				continue
+			}
+
+			enc.encode(key.add(keyName), sf)
+		}
+	}
+	writeFields(fieldsDirect)
+	writeFields(fieldsSub)
+}
+
+// tomlTypeName returns the TOML type name of the Go value's type. It is
+// used to determine whether the types of array elements are mixed (which is
+// forbidden). If the Go value is nil, then it is illegal for it to be an array
+// element, and valueIsNil is returned as true.
+
+// Returns the TOML type of a Go value. The type may be `nil`, which means
+// no concrete TOML type could be found.
+func tomlTypeOfGo(rv reflect.Value) tomlType {
+	if isNil(rv) || !rv.IsValid() {
+		return nil
+	}
+	switch rv.Kind() {
+	case reflect.Bool:
+		return tomlBool
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32,
+		reflect.Int64,
+		reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32,
+		reflect.Uint64:
+		return tomlInteger
+	case reflect.Float32, reflect.Float64:
+		return tomlFloat
+	case reflect.Array, reflect.Slice:
+		if typeEqual(tomlHash, tomlArrayType(rv)) {
+			return tomlArrayHash
+		}
+		return tomlArray
+	case reflect.Ptr, reflect.Interface:
+		return tomlTypeOfGo(rv.Elem())
+	case reflect.String:
+		return tomlString
+	case reflect.Map:
+		return tomlHash
+	case reflect.Struct:
+		switch rv.Interface().(type) {
+		case time.Time:
+			return tomlDatetime
+		case TextMarshaler:
+			return tomlString
+		default:
+			return tomlHash
+		}
+	default:
+		panic("unexpected reflect.Kind: " + rv.Kind().String())
+	}
+}
+
+// tomlArrayType returns the element type of a TOML array. The type returned
+// may be nil if it cannot be determined (e.g., a nil slice or a zero length
+// slize). This function may also panic if it finds a type that cannot be
+// expressed in TOML (such as nil elements, heterogeneous arrays or directly
+// nested arrays of tables).
+func tomlArrayType(rv reflect.Value) tomlType {
+	if isNil(rv) || !rv.IsValid() || rv.Len() == 0 {
+		return nil
+	}
+	firstType := tomlTypeOfGo(rv.Index(0))
+	if firstType == nil {
+		encPanic(errArrayNilElement)
+	}
+
+	rvlen := rv.Len()
+	for i := 1; i < rvlen; i++ {
+		elem := rv.Index(i)
+		switch elemType := tomlTypeOfGo(elem); {
+		case elemType == nil:
+			encPanic(errArrayNilElement)
+		case !typeEqual(firstType, elemType):
+			encPanic(errArrayMixedElementTypes)
+		}
+	}
+	// If we have a nested array, then we must make sure that the nested
+	// array contains ONLY primitives.
+	// This checks arbitrarily nested arrays.
+	if typeEqual(firstType, tomlArray) || typeEqual(firstType, tomlArrayHash) {
+		nest := tomlArrayType(eindirect(rv.Index(0)))
+		if typeEqual(nest, tomlHash) || typeEqual(nest, tomlArrayHash) {
+			encPanic(errArrayNoTable)
+		}
+	}
+	return firstType
+}
+
+type tagOptions struct {
+	skip      bool // "-"
+	name      string
+	omitempty bool
+	omitzero  bool
+}
+
+func getOptions(tag reflect.StructTag) tagOptions {
+	t := tag.Get("toml")
+	if t == "-" {
+		return tagOptions{skip: true}
+	}
+	var opts tagOptions
+	parts := strings.Split(t, ",")
+	opts.name = parts[0]
+	for _, s := range parts[1:] {
+		switch s {
+		case "omitempty":
+			opts.omitempty = true
+		case "omitzero":
+			opts.omitzero = true
+		}
+	}
+	return opts
+}
+
+func isZero(rv reflect.Value) bool {
+	switch rv.Kind() {
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return rv.Int() == 0
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
+		return rv.Uint() == 0
+	case reflect.Float32, reflect.Float64:
+		return rv.Float() == 0.0
+	}
+	return false
+}
+
+func isEmpty(rv reflect.Value) bool {
+	switch rv.Kind() {
+	case reflect.Array, reflect.Slice, reflect.Map, reflect.String:
+		return rv.Len() == 0
+	case reflect.Bool:
+		return !rv.Bool()
+	}
+	return false
+}
+
+func (enc *Encoder) newline() {
+	if enc.hasWritten {
+		enc.wf("\n")
+	}
+}
+
+func (enc *Encoder) keyEqElement(key Key, val reflect.Value) {
+	if len(key) == 0 {
+		encPanic(errNoKey)
+	}
+	panicIfInvalidKey(key)
+	enc.wf("%s%s = ", enc.indentStr(key), key.maybeQuoted(len(key)-1))
+	enc.eElement(val)
+	enc.newline()
+}
+
+func (enc *Encoder) wf(format string, v ...interface{}) {
+	if _, err := fmt.Fprintf(enc.w, format, v...); err != nil {
+		encPanic(err)
+	}
+	enc.hasWritten = true
+}
+
+func (enc *Encoder) indentStr(key Key) string {
+	return strings.Repeat(enc.Indent, len(key)-1)
+}
+
+func encPanic(err error) {
+	panic(tomlEncodeError{err})
+}
+
+func eindirect(v reflect.Value) reflect.Value {
+	switch v.Kind() {
+	case reflect.Ptr, reflect.Interface:
+		return eindirect(v.Elem())
+	default:
+		return v
+	}
+}
+
+func isNil(rv reflect.Value) bool {
+	switch rv.Kind() {
+	case reflect.Interface, reflect.Map, reflect.Ptr, reflect.Slice:
+		return rv.IsNil()
+	default:
+		return false
+	}
+}
+
+func panicIfInvalidKey(key Key) {
+	for _, k := range key {
+		if len(k) == 0 {
+			encPanic(e("Key '%s' is not a valid table name. Key names "+
+				"cannot be empty.", key.maybeQuotedAll()))
+		}
+	}
+}
+
+func isValidKeyName(s string) bool {
+	return len(s) != 0
+}
--- a/vendor/github.com/BurntSushi/toml/encoding_types.go
+++ b/vendor/github.com/BurntSushi/toml/encoding_types.go
@@ -0,0 +1,19 @@
+// +build go1.2
+
+package toml
+
+// In order to support Go 1.1, we define our own TextMarshaler and
+// TextUnmarshaler types. For Go 1.2+, we just alias them with the
+// standard library interfaces.
+
+import (
+	"encoding"
+)
+
+// TextMarshaler is a synonym for encoding.TextMarshaler. It is defined here
+// so that Go 1.1 can be supported.
+type TextMarshaler encoding.TextMarshaler
+
+// TextUnmarshaler is a synonym for encoding.TextUnmarshaler. It is defined
+// here so that Go 1.1 can be supported.
+type TextUnmarshaler encoding.TextUnmarshaler
--- a/vendor/github.com/BurntSushi/toml/encoding_types_1.1.go
+++ b/vendor/github.com/BurntSushi/toml/encoding_types_1.1.go
@@ -0,0 +1,18 @@
+// +build !go1.2
+
+package toml
+
+// These interfaces were introduced in Go 1.2, so we add them manually when
+// compiling for Go 1.1.
+
+// TextMarshaler is a synonym for encoding.TextMarshaler. It is defined here
+// so that Go 1.1 can be supported.
+type TextMarshaler interface {
+	MarshalText() (text []byte, err error)
+}
+
+// TextUnmarshaler is a synonym for encoding.TextUnmarshaler. It is defined
+// here so that Go 1.1 can be supported.
+type TextUnmarshaler interface {
+	UnmarshalText(text []byte) error
+}
--- a/vendor/github.com/BurntSushi/toml/lex.go
+++ b/vendor/github.com/BurntSushi/toml/lex.go
@@ -0,0 +1,953 @@
+package toml
+
+import (
+	"fmt"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+type itemType int
+
+const (
+	itemError itemType = iota
+	itemNIL            // used in the parser to indicate no type
+	itemEOF
+	itemText
+	itemString
+	itemRawString
+	itemMultilineString
+	itemRawMultilineString
+	itemBool
+	itemInteger
+	itemFloat
+	itemDatetime
+	itemArray // the start of an array
+	itemArrayEnd
+	itemTableStart
+	itemTableEnd
+	itemArrayTableStart
+	itemArrayTableEnd
+	itemKeyStart
+	itemCommentStart
+	itemInlineTableStart
+	itemInlineTableEnd
+)
+
+const (
+	eof              = 0
+	comma            = ','
+	tableStart       = '['
+	tableEnd         = ']'
+	arrayTableStart  = '['
+	arrayTableEnd    = ']'
+	tableSep         = '.'
+	keySep           = '='
+	arrayStart       = '['
+	arrayEnd         = ']'
+	commentStart     = '#'
+	stringStart      = '"'
+	stringEnd        = '"'
+	rawStringStart   = '\''
+	rawStringEnd     = '\''
+	inlineTableStart = '{'
+	inlineTableEnd   = '}'
+)
+
+type stateFn func(lx *lexer) stateFn
+
+type lexer struct {
+	input string
+	start int
+	pos   int
+	line  int
+	state stateFn
+	items chan item
+
+	// Allow for backing up up to three runes.
+	// This is necessary because TOML contains 3-rune tokens (""" and ''').
+	prevWidths [3]int
+	nprev      int // how many of prevWidths are in use
+	// If we emit an eof, we can still back up, but it is not OK to call
+	// next again.
+	atEOF bool
+
+	// A stack of state functions used to maintain context.
+	// The idea is to reuse parts of the state machine in various places.
+	// For example, values can appear at the top level or within arbitrarily
+	// nested arrays. The last state on the stack is used after a value has
+	// been lexed. Similarly for comments.
+	stack []stateFn
+}
+
+type item struct {
+	typ  itemType
+	val  string
+	line int
+}
+
+func (lx *lexer) nextItem() item {
+	for {
+		select {
+		case item := <-lx.items:
+			return item
+		default:
+			lx.state = lx.state(lx)
+		}
+	}
+}
+
+func lex(input string) *lexer {
+	lx := &lexer{
+		input: input,
+		state: lexTop,
+		line:  1,
+		items: make(chan item, 10),
+		stack: make([]stateFn, 0, 10),
+	}
+	return lx
+}
+
+func (lx *lexer) push(state stateFn) {
+	lx.stack = append(lx.stack, state)
+}
+
+func (lx *lexer) pop() stateFn {
+	if len(lx.stack) == 0 {
+		return lx.errorf("BUG in lexer: no states to pop")
+	}
+	last := lx.stack[len(lx.stack)-1]
+	lx.stack = lx.stack[0 : len(lx.stack)-1]
+	return last
+}
+
+func (lx *lexer) current() string {
+	return lx.input[lx.start:lx.pos]
+}
+
+func (lx *lexer) emit(typ itemType) {
+	lx.items <- item{typ, lx.current(), lx.line}
+	lx.start = lx.pos
+}
+
+func (lx *lexer) emitTrim(typ itemType) {
+	lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line}
+	lx.start = lx.pos
+}
+
+func (lx *lexer) next() (r rune) {
+	if lx.atEOF {
+		panic("next called after EOF")
+	}
+	if lx.pos >= len(lx.input) {
+		lx.atEOF = true
+		return eof
+	}
+
+	if lx.input[lx.pos] == '\n' {
+		lx.line++
+	}
+	lx.prevWidths[2] = lx.prevWidths[1]
+	lx.prevWidths[1] = lx.prevWidths[0]
+	if lx.nprev < 3 {
+		lx.nprev++
+	}
+	r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
+	lx.prevWidths[0] = w
+	lx.pos += w
+	return r
+}
+
+// ignore skips over the pending input before this point.
+func (lx *lexer) ignore() {
+	lx.start = lx.pos
+}
+
+// backup steps back one rune. Can be called only twice between calls to next.
+func (lx *lexer) backup() {
+	if lx.atEOF {
+		lx.atEOF = false
+		return
+	}
+	if lx.nprev < 1 {
+		panic("backed up too far")
+	}
+	w := lx.prevWidths[0]
+	lx.prevWidths[0] = lx.prevWidths[1]
+	lx.prevWidths[1] = lx.prevWidths[2]
+	lx.nprev--
+	lx.pos -= w
+	if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
+		lx.line--
+	}
+}
+
+// accept consumes the next rune if it's equal to `valid`.
+func (lx *lexer) accept(valid rune) bool {
+	if lx.next() == valid {
+		return true
+	}
+	lx.backup()
+	return false
+}
+
+// peek returns but does not consume the next rune in the input.
+func (lx *lexer) peek() rune {
+	r := lx.next()
+	lx.backup()
+	return r
+}
+
+// skip ignores all input that matches the given predicate.
+func (lx *lexer) skip(pred func(rune) bool) {
+	for {
+		r := lx.next()
+		if pred(r) {
+			continue
+		}
+		lx.backup()
+		lx.ignore()
+		return
+	}
+}
+
+// errorf stops all lexing by emitting an error and returning `nil`.
+// Note that any value that is a character is escaped if it's a special
+// character (newlines, tabs, etc.).
+func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
+	lx.items <- item{
+		itemError,
+		fmt.Sprintf(format, values...),
+		lx.line,
+	}
+	return nil
+}
+
+// lexTop consumes elements at the top level of TOML data.
+func lexTop(lx *lexer) stateFn {
+	r := lx.next()
+	if isWhitespace(r) || isNL(r) {
+		return lexSkip(lx, lexTop)
+	}
+	switch r {
+	case commentStart:
+		lx.push(lexTop)
+		return lexCommentStart
+	case tableStart:
+		return lexTableStart
+	case eof:
+		if lx.pos > lx.start {
+			return lx.errorf("unexpected EOF")
+		}
+		lx.emit(itemEOF)
+		return nil
+	}
+
+	// At this point, the only valid item can be a key, so we back up
+	// and let the key lexer do the rest.
+	lx.backup()
+	lx.push(lexTopEnd)
+	return lexKeyStart
+}
+
+// lexTopEnd is entered whenever a top-level item has been consumed. (A value
+// or a table.) It must see only whitespace, and will turn back to lexTop
+// upon a newline. If it sees EOF, it will quit the lexer successfully.
+func lexTopEnd(lx *lexer) stateFn {
+	r := lx.next()
+	switch {
+	case r == commentStart:
+		// a comment will read to a newline for us.
+		lx.push(lexTop)
+		return lexCommentStart
+	case isWhitespace(r):
+		return lexTopEnd
+	case isNL(r):
+		lx.ignore()
+		return lexTop
+	case r == eof:
+		lx.emit(itemEOF)
+		return nil
+	}
+	return lx.errorf("expected a top-level item to end with a newline, "+
+		"comment, or EOF, but got %q instead", r)
+}
+
+// lexTable lexes the beginning of a table. Namely, it makes sure that
+// it starts with a character other than '.' and ']'.
+// It assumes that '[' has already been consumed.
+// It also handles the case that this is an item in an array of tables.
+// e.g., '[[name]]'.
+func lexTableStart(lx *lexer) stateFn {
+	if lx.peek() == arrayTableStart {
+		lx.next()
+		lx.emit(itemArrayTableStart)
+		lx.push(lexArrayTableEnd)
+	} else {
+		lx.emit(itemTableStart)
+		lx.push(lexTableEnd)
+	}
+	return lexTableNameStart
+}
+
+func lexTableEnd(lx *lexer) stateFn {
+	lx.emit(itemTableEnd)
+	return lexTopEnd
+}
+
+func lexArrayTableEnd(lx *lexer) stateFn {
+	if r := lx.next(); r != arrayTableEnd {
+		return lx.errorf("expected end of table array name delimiter %q, "+
+			"but got %q instead", arrayTableEnd, r)
+	}
+	lx.emit(itemArrayTableEnd)
+	return lexTopEnd
+}
+
+func lexTableNameStart(lx *lexer) stateFn {
+	lx.skip(isWhitespace)
+	switch r := lx.peek(); {
+	case r == tableEnd || r == eof:
+		return lx.errorf("unexpected end of table name " +
+			"(table names cannot be empty)")
+	case r == tableSep:
+		return lx.errorf("unexpected table separator " +
+			"(table names cannot be empty)")
+	case r == stringStart || r == rawStringStart:
+		lx.ignore()
+		lx.push(lexTableNameEnd)
+		return lexValue // reuse string lexing
+	default:
+		return lexBareTableName
+	}
+}
+
+// lexBareTableName lexes the name of a table. It assumes that at least one
+// valid character for the table has already been read.
+func lexBareTableName(lx *lexer) stateFn {
+	r := lx.next()
+	if isBareKeyChar(r) {
+		return lexBareTableName
+	}
+	lx.backup()
+	lx.emit(itemText)
+	return lexTableNameEnd
+}
+
+// lexTableNameEnd reads the end of a piece of a table name, optionally
+// consuming whitespace.
+func lexTableNameEnd(lx *lexer) stateFn {
+	lx.skip(isWhitespace)
+	switch r := lx.next(); {
+	case isWhitespace(r):
+		return lexTableNameEnd
+	case r == tableSep:
+		lx.ignore()
+		return lexTableNameStart
+	case r == tableEnd:
+		return lx.pop()
+	default:
+		return lx.errorf("expected '.' or ']' to end table name, "+
+			"but got %q instead", r)
+	}
+}
+
+// lexKeyStart consumes a key name up until the first non-whitespace character.
+// lexKeyStart will ignore whitespace.
+func lexKeyStart(lx *lexer) stateFn {
+	r := lx.peek()
+	switch {
+	case r == keySep:
+		return lx.errorf("unexpected key separator %q", keySep)
+	case isWhitespace(r) || isNL(r):
+		lx.next()
+		return lexSkip(lx, lexKeyStart)
+	case r == stringStart || r == rawStringStart:
+		lx.ignore()
+		lx.emit(itemKeyStart)
+		lx.push(lexKeyEnd)
+		return lexValue // reuse string lexing
+	default:
+		lx.ignore()
+		lx.emit(itemKeyStart)
+		return lexBareKey
+	}
+}
+
+// lexBareKey consumes the text of a bare key. Assumes that the first character
+// (which is not whitespace) has not yet been consumed.
+func lexBareKey(lx *lexer) stateFn {
+	switch r := lx.next(); {
+	case isBareKeyChar(r):
+		return lexBareKey
+	case isWhitespace(r):
+		lx.backup()
+		lx.emit(itemText)
+		return lexKeyEnd
+	case r == keySep:
+		lx.backup()
+		lx.emit(itemText)
+		return lexKeyEnd
+	default:
+		return lx.errorf("bare keys cannot contain %q", r)
+	}
+}
+
+// lexKeyEnd consumes the end of a key and trims whitespace (up to the key
+// separator).
+func lexKeyEnd(lx *lexer) stateFn {
+	switch r := lx.next(); {
+	case r == keySep:
+		return lexSkip(lx, lexValue)
+	case isWhitespace(r):
+		return lexSkip(lx, lexKeyEnd)
+	default:
+		return lx.errorf("expected key separator %q, but got %q instead",
+			keySep, r)
+	}
+}
+
+// lexValue starts the consumption of a value anywhere a value is expected.
+// lexValue will ignore whitespace.
+// After a value is lexed, the last state on the next is popped and returned.
+func lexValue(lx *lexer) stateFn {
+	// We allow whitespace to precede a value, but NOT newlines.
+	// In array syntax, the array states are responsible for ignoring newlines.
+	r := lx.next()
+	switch {
+	case isWhitespace(r):
+		return lexSkip(lx, lexValue)
+	case isDigit(r):
+		lx.backup() // avoid an extra state and use the same as above
+		return lexNumberOrDateStart
+	}
+	switch r {
+	case arrayStart:
+		lx.ignore()
+		lx.emit(itemArray)
+		return lexArrayValue
+	case inlineTableStart:
+		lx.ignore()
+		lx.emit(itemInlineTableStart)
+		return lexInlineTableValue
+	case stringStart:
+		if lx.accept(stringStart) {
+			if lx.accept(stringStart) {
+				lx.ignore() // Ignore """
+				return lexMultilineString
+			}
+			lx.backup()
+		}
+		lx.ignore() // ignore the '"'
+		return lexString
+	case rawStringStart:
+		if lx.accept(rawStringStart) {
+			if lx.accept(rawStringStart) {
+				lx.ignore() // Ignore """
+				return lexMultilineRawString
+			}
+			lx.backup()
+		}
+		lx.ignore() // ignore the "'"
+		return lexRawString
+	case '+', '-':
+		return lexNumberStart
+	case '.': // special error case, be kind to users
+		return lx.errorf("floats must start with a digit, not '.'")
+	}
+	if unicode.IsLetter(r) {
+		// Be permissive here; lexBool will give a nice error if the
+		// user wrote something like
+		//   x = foo
+		// (i.e. not 'true' or 'false' but is something else word-like.)
+		lx.backup()
+		return lexBool
+	}
+	return lx.errorf("expected value but found %q instead", r)
+}
+
+// lexArrayValue consumes one value in an array. It assumes that '[' or ','
+// have already been consumed. All whitespace and newlines are ignored.
+func lexArrayValue(lx *lexer) stateFn {
+	r := lx.next()
+	switch {
+	case isWhitespace(r) || isNL(r):
+		return lexSkip(lx, lexArrayValue)
+	case r == commentStart:
+		lx.push(lexArrayValue)
+		return lexCommentStart
+	case r == comma:
+		return lx.errorf("unexpected comma")
+	case r == arrayEnd:
+		// NOTE(caleb): The spec isn't clear about whether you can have
+		// a trailing comma or not, so we'll allow it.
+		return lexArrayEnd
+	}
+
+	lx.backup()
+	lx.push(lexArrayValueEnd)
+	return lexValue
+}
+
+// lexArrayValueEnd consumes everything between the end of an array value and
+// the next value (or the end of the array): it ignores whitespace and newlines
+// and expects either a ',' or a ']'.
+func lexArrayValueEnd(lx *lexer) stateFn {
+	r := lx.next()
+	switch {
+	case isWhitespace(r) || isNL(r):
+		return lexSkip(lx, lexArrayValueEnd)
+	case r == commentStart:
+		lx.push(lexArrayValueEnd)
+		return lexCommentStart
+	case r == comma:
+		lx.ignore()
+		return lexArrayValue // move on to the next value
+	case r == arrayEnd:
+		return lexArrayEnd
+	}
+	return lx.errorf(
+		"expected a comma or array terminator %q, but got %q instead",
+		arrayEnd, r,
+	)
+}
+
+// lexArrayEnd finishes the lexing of an array.
+// It assumes that a ']' has just been consumed.
+func lexArrayEnd(lx *lexer) stateFn {
+	lx.ignore()
+	lx.emit(itemArrayEnd)
+	return lx.pop()
+}
+
+// lexInlineTableValue consumes one key/value pair in an inline table.
+// It assumes that '{' or ',' have already been consumed. Whitespace is ignored.
+func lexInlineTableValue(lx *lexer) stateFn {
+	r := lx.next()
+	switch {
+	case isWhitespace(r):
+		return lexSkip(lx, lexInlineTableValue)
+	case isNL(r):
+		return lx.errorf("newlines not allowed within inline tables")
+	case r == commentStart:
+		lx.push(lexInlineTableValue)
+		return lexCommentStart
+	case r == comma:
+		return lx.errorf("unexpected comma")
+	case r == inlineTableEnd:
+		return lexInlineTableEnd
+	}
+	lx.backup()
+	lx.push(lexInlineTableValueEnd)
+	return lexKeyStart
+}
+
+// lexInlineTableValueEnd consumes everything between the end of an inline table
+// key/value pair and the next pair (or the end of the table):
+// it ignores whitespace and expects either a ',' or a '}'.
+func lexInlineTableValueEnd(lx *lexer) stateFn {
+	r := lx.next()
+	switch {
+	case isWhitespace(r):
+		return lexSkip(lx, lexInlineTableValueEnd)
+	case isNL(r):
+		return lx.errorf("newlines not allowed within inline tables")
+	case r == commentStart:
+		lx.push(lexInlineTableValueEnd)
+		return lexCommentStart
+	case r == comma:
+		lx.ignore()
+		return lexInlineTableValue
+	case r == inlineTableEnd:
+		return lexInlineTableEnd
+	}
+	return lx.errorf("expected a comma or an inline table terminator %q, "+
+		"but got %q instead", inlineTableEnd, r)
+}
+
+// lexInlineTableEnd finishes the lexing of an inline table.
+// It assumes that a '}' has just been consumed.
+func lexInlineTableEnd(lx *lexer) stateFn {
+	lx.ignore()
+	lx.emit(itemInlineTableEnd)
+	return lx.pop()
+}
+
+// lexString consumes the inner contents of a string. It assumes that the
+// beginning '"' has already been consumed and ignored.
+func lexString(lx *lexer) stateFn {
+	r := lx.next()
+	switch {
+	case r == eof:
+		return lx.errorf("unexpected EOF")
+	case isNL(r):
+		return lx.errorf("strings cannot contain newlines")
+	case r == '\\':
+		lx.push(lexString)
+		return lexStringEscape
+	case r == stringEnd:
+		lx.backup()
+		lx.emit(itemString)
+		lx.next()
+		lx.ignore()
+		return lx.pop()
+	}
+	return lexString
+}
+
+// lexMultilineString consumes the inner contents of a string. It assumes that
+// the beginning '"""' has already been consumed and ignored.
+func lexMultilineString(lx *lexer) stateFn {
+	switch lx.next() {
+	case eof:
+		return lx.errorf("unexpected EOF")
+	case '\\':
+		return lexMultilineStringEscape
+	case stringEnd:
+		if lx.accept(stringEnd) {
+			if lx.accept(stringEnd) {
+				lx.backup()
+				lx.backup()
+				lx.backup()
+				lx.emit(itemMultilineString)
+				lx.next()
+				lx.next()
+				lx.next()
+				lx.ignore()
+				return lx.pop()
+			}
+			lx.backup()
+		}
+	}
+	return lexMultilineString
+}
+
+// lexRawString consumes a raw string. Nothing can be escaped in such a string.
+// It assumes that the beginning "'" has already been consumed and ignored.
+func lexRawString(lx *lexer) stateFn {
+	r := lx.next()
+	switch {
+	case r == eof:
+		return lx.errorf("unexpected EOF")
+	case isNL(r):
+		return lx.errorf("strings cannot contain newlines")
+	case r == rawStringEnd:
+		lx.backup()
+		lx.emit(itemRawString)
+		lx.next()
+		lx.ignore()
+		return lx.pop()
+	}
+	return lexRawString
+}
+
+// lexMultilineRawString consumes a raw string. Nothing can be escaped in such
+// a string. It assumes that the beginning "'''" has already been consumed and
+// ignored.
+func lexMultilineRawString(lx *lexer) stateFn {
+	switch lx.next() {
+	case eof:
+		return lx.errorf("unexpected EOF")
+	case rawStringEnd:
+		if lx.accept(rawStringEnd) {
+			if lx.accept(rawStringEnd) {
+				lx.backup()
+				lx.backup()
+				lx.backup()
+				lx.emit(itemRawMultilineString)
+				lx.next()
+				lx.next()
+				lx.next()
+				lx.ignore()
+				return lx.pop()
+			}
+			lx.backup()
+		}
+	}
+	return lexMultilineRawString
+}
+
+// lexMultilineStringEscape consumes an escaped character. It assumes that the
+// preceding '\\' has already been consumed.
+func lexMultilineStringEscape(lx *lexer) stateFn {
+	// Handle the special case first:
+	if isNL(lx.next()) {
+		return lexMultilineString
+	}
+	lx.backup()
+	lx.push(lexMultilineString)
+	return lexStringEscape(lx)
+}
+
+func lexStringEscape(lx *lexer) stateFn {
+	r := lx.next()
+	switch r {
+	case 'b':
+		fallthrough
+	case 't':
+		fallthrough
+	case 'n':
+		fallthrough
+	case 'f':
+		fallthrough
+	case 'r':
+		fallthrough
+	case '"':
+		fallthrough
+	case '\\':
+		return lx.pop()
+	case 'u':
+		return lexShortUnicodeEscape
+	case 'U':
+		return lexLongUnicodeEscape
+	}
+	return lx.errorf("invalid escape character %q; only the following "+
+		"escape characters are allowed: "+
+		`\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r)
+}
+
+func lexShortUnicodeEscape(lx *lexer) stateFn {
+	var r rune
+	for i := 0; i < 4; i++ {
+		r = lx.next()
+		if !isHexadecimal(r) {
+			return lx.errorf(`expected four hexadecimal digits after '\u', `+
+				"but got %q instead", lx.current())
+		}
+	}
+	return lx.pop()
+}
+
+func lexLongUnicodeEscape(lx *lexer) stateFn {
+	var r rune
+	for i := 0; i < 8; i++ {
+		r = lx.next()
+		if !isHexadecimal(r) {
+			return lx.errorf(`expected eight hexadecimal digits after '\U', `+
+				"but got %q instead", lx.current())
+		}
+	}
+	return lx.pop()
+}
+
+// lexNumberOrDateStart consumes either an integer, a float, or datetime.
+func lexNumberOrDateStart(lx *lexer) stateFn {
+	r := lx.next()
+	if isDigit(r) {
+		return lexNumberOrDate
+	}
+	switch r {
+	case '_':
+		return lexNumber
+	case 'e', 'E':
+		return lexFloat
+	case '.':
+		return lx.errorf("floats must start with a digit, not '.'")
+	}
+	return lx.errorf("expected a digit but got %q", r)
+}
+
+// lexNumberOrDate consumes either an integer, float or datetime.
+func lexNumberOrDate(lx *lexer) stateFn {
+	r := lx.next()
+	if isDigit(r) {
+		return lexNumberOrDate
+	}
+	switch r {
+	case '-':
+		return lexDatetime
+	case '_':
+		return lexNumber
+	case '.', 'e', 'E':
+		return lexFloat
+	}
+
+	lx.backup()
+	lx.emit(itemInteger)
+	return lx.pop()
+}
+
+// lexDatetime consumes a Datetime, to a first approximation.
+// The parser validates that it matches one of the accepted formats.
+func lexDatetime(lx *lexer) stateFn {
+	r := lx.next()
+	if isDigit(r) {
+		return lexDatetime
+	}
+	switch r {
+	case '-', 'T', ':', '.', 'Z', '+':
+		return lexDatetime
+	}
+
+	lx.backup()
+	lx.emit(itemDatetime)
+	return lx.pop()
+}
+
+// lexNumberStart consumes either an integer or a float. It assumes that a sign
+// has already been read, but that *no* digits have been consumed.
+// lexNumberStart will move to the appropriate integer or float states.
+func lexNumberStart(lx *lexer) stateFn {
+	// We MUST see a digit. Even floats have to start with a digit.
+	r := lx.next()
+	if !isDigit(r) {
+		if r == '.' {
+			return lx.errorf("floats must start with a digit, not '.'")
+		}
+		return lx.errorf("expected a digit but got %q", r)
+	}
+	return lexNumber
+}
+
+// lexNumber consumes an integer or a float after seeing the first digit.
+func lexNumber(lx *lexer) stateFn {
+	r := lx.next()
+	if isDigit(r) {
+		return lexNumber
+	}
+	switch r {
+	case '_':
+		return lexNumber
+	case '.', 'e', 'E':
+		return lexFloat
+	}
+
+	lx.backup()
+	lx.emit(itemInteger)
+	return lx.pop()
+}
+
+// lexFloat consumes the elements of a float. It allows any sequence of
+// float-like characters, so floats emitted by the lexer are only a first
+// approximation and must be validated by the parser.
+func lexFloat(lx *lexer) stateFn {
+	r := lx.next()
+	if isDigit(r) {
+		return lexFloat
+	}
+	switch r {
+	case '_', '.', '-', '+', 'e', 'E':
+		return lexFloat
+	}
+
+	lx.backup()
+	lx.emit(itemFloat)
+	return lx.pop()
+}
+
+// lexBool consumes a bool string: 'true' or 'false.
+func lexBool(lx *lexer) stateFn {
+	var rs []rune
+	for {
+		r := lx.next()
+		if !unicode.IsLetter(r) {
+			lx.backup()
+			break
+		}
+		rs = append(rs, r)
+	}
+	s := string(rs)
+	switch s {
+	case "true", "false":
+		lx.emit(itemBool)
+		return lx.pop()
+	}
+	return lx.errorf("expected value but found %q instead", s)
+}
+
+// lexCommentStart begins the lexing of a comment. It will emit
+// itemCommentStart and consume no characters, passing control to lexComment.
+func lexCommentStart(lx *lexer) stateFn {
+	lx.ignore()
+	lx.emit(itemCommentStart)
+	return lexComment
+}
+
+// lexComment lexes an entire comment. It assumes that '#' has been consumed.
+// It will consume *up to* the first newline character, and pass control
+// back to the last state on the stack.
+func lexComment(lx *lexer) stateFn {
+	r := lx.peek()
+	if isNL(r) || r == eof {
+		lx.emit(itemText)
+		return lx.pop()
+	}
+	lx.next()
+	return lexComment
+}
+
+// lexSkip ignores all slurped input and moves on to the next state.
+func lexSkip(lx *lexer, nextState stateFn) stateFn {
+	return func(lx *lexer) stateFn {
+		lx.ignore()
+		return nextState
+	}
+}
+
+// isWhitespace returns true if `r` is a whitespace character according
+// to the spec.
+func isWhitespace(r rune) bool {
+	return r == '\t' || r == ' '
+}
+
+func isNL(r rune) bool {
+	return r == '\n' || r == '\r'
+}
+
+func isDigit(r rune) bool {
+	return r >= '0' && r <= '9'
+}
+
+func isHexadecimal(r rune) bool {
+	return (r >= '0' && r <= '9') ||
+		(r >= 'a' && r <= 'f') ||
+		(r >= 'A' && r <= 'F')
+}
+
+func isBareKeyChar(r rune) bool {
+	return (r >= 'A' && r <= 'Z') ||
+		(r >= 'a' && r <= 'z') ||
+		(r >= '0' && r <= '9') ||
+		r == '_' ||
+		r == '-'
+}
+
+func (itype itemType) String() string {
+	switch itype {
+	case itemError:
+		return "Error"
+	case itemNIL:
+		return "NIL"
+	case itemEOF:
+		return "EOF"
+	case itemText:
+		return "Text"
+	case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
+		return "String"
+	case itemBool:
+		return "Bool"
+	case itemInteger:
+		return "Integer"
+	case itemFloat:
+		return "Float"
+	case itemDatetime:
+		return "DateTime"
+	case itemTableStart:
+		return "TableStart"
+	case itemTableEnd:
+		return "TableEnd"
+	case itemKeyStart:
+		return "KeyStart"
+	case itemArray:
+		return "Array"
+	case itemArrayEnd:
+		return "ArrayEnd"
+	case itemCommentStart:
+		return "CommentStart"
+	}
+	panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
+}
+
+func (item item) String() string {
+	return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
+}
--- a/vendor/github.com/BurntSushi/toml/parse.go
+++ b/vendor/github.com/BurntSushi/toml/parse.go
@@ -0,0 +1,592 @@
+package toml
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+	"time"
+	"unicode"
+	"unicode/utf8"
+)
+
+type parser struct {
+	mapping map[string]interface{}
+	types   map[string]tomlType
+	lx      *lexer
+
+	// A list of keys in the order that they appear in the TOML data.
+	ordered []Key
+
+	// the full key for the current hash in scope
+	context Key
+
+	// the base key name for everything except hashes
+	currentKey string
+
+	// rough approximation of line number
+	approxLine int
+
+	// A map of 'key.group.names' to whether they were created implicitly.
+	implicits map[string]bool
+}
+
+type parseError string
+
+func (pe parseError) Error() string {
+	return string(pe)
+}
+
+func parse(data string) (p *parser, err error) {
+	defer func() {
+		if r := recover(); r != nil {
+			var ok bool
+			if err, ok = r.(parseError); ok {
+				return
+			}
+			panic(r)
+		}
+	}()
+
+	p = &parser{
+		mapping:   make(map[string]interface{}),
+		types:     make(map[string]tomlType),
+		lx:        lex(data),
+		ordered:   make([]Key, 0),
+		implicits: make(map[string]bool),
+	}
+	for {
+		item := p.next()
+		if item.typ == itemEOF {
+			break
+		}
+		p.topLevel(item)
+	}
+
+	return p, nil
+}
+
+func (p *parser) panicf(format string, v ...interface{}) {
+	msg := fmt.Sprintf("Near line %d (last key parsed '%s'): %s",
+		p.approxLine, p.current(), fmt.Sprintf(format, v...))
+	panic(parseError(msg))
+}
+
+func (p *parser) next() item {
+	it := p.lx.nextItem()
+	if it.typ == itemError {
+		p.panicf("%s", it.val)
+	}
+	return it
+}
+
+func (p *parser) bug(format string, v ...interface{}) {
+	panic(fmt.Sprintf("BUG: "+format+"\n\n", v...))
+}
+
+func (p *parser) expect(typ itemType) item {
+	it := p.next()
+	p.assertEqual(typ, it.typ)
+	return it
+}
+
+func (p *parser) assertEqual(expected, got itemType) {
+	if expected != got {
+		p.bug("Expected '%s' but got '%s'.", expected, got)
+	}
+}
+
+func (p *parser) topLevel(item item) {
+	switch item.typ {
+	case itemCommentStart:
+		p.approxLine = item.line
+		p.expect(itemText)
+	case itemTableStart:
+		kg := p.next()
+		p.approxLine = kg.line
+
+		var key Key
+		for ; kg.typ != itemTableEnd && kg.typ != itemEOF; kg = p.next() {
+			key = append(key, p.keyString(kg))
+		}
+		p.assertEqual(itemTableEnd, kg.typ)
+
+		p.establishContext(key, false)
+		p.setType("", tomlHash)
+		p.ordered = append(p.ordered, key)
+	case itemArrayTableStart:
+		kg := p.next()
+		p.approxLine = kg.line
+
+		var key Key
+		for ; kg.typ != itemArrayTableEnd && kg.typ != itemEOF; kg = p.next() {
+			key = append(key, p.keyString(kg))
+		}
+		p.assertEqual(itemArrayTableEnd, kg.typ)
+
+		p.establishContext(key, true)
+		p.setType("", tomlArrayHash)
+		p.ordered = append(p.ordered, key)
+	case itemKeyStart:
+		kname := p.next()
+		p.approxLine = kname.line
+		p.currentKey = p.keyString(kname)
+
+		val, typ := p.value(p.next())
+		p.setValue(p.currentKey, val)
+		p.setType(p.currentKey, typ)
+		p.ordered = append(p.ordered, p.context.add(p.currentKey))
+		p.currentKey = ""
+	default:
+		p.bug("Unexpected type at top level: %s", item.typ)
+	}
+}
+
+// Gets a string for a key (or part of a key in a table name).
+func (p *parser) keyString(it item) string {
+	switch it.typ {
+	case itemText:
+		return it.val
+	case itemString, itemMultilineString,
+		itemRawString, itemRawMultilineString:
+		s, _ := p.value(it)
+		return s.(string)
+	default:
+		p.bug("Unexpected key type: %s", it.typ)
+		panic("unreachable")
+	}
+}
+
+// value translates an expected value from the lexer into a Go value wrapped
+// as an empty interface.
+func (p *parser) value(it item) (interface{}, tomlType) {
+	switch it.typ {
+	case itemString:
+		return p.replaceEscapes(it.val), p.typeOfPrimitive(it)
+	case itemMultilineString:
+		trimmed := stripFirstNewline(stripEscapedWhitespace(it.val))
+		return p.replaceEscapes(trimmed), p.typeOfPrimitive(it)
+	case itemRawString:
+		return it.val, p.typeOfPrimitive(it)
+	case itemRawMultilineString:
+		return stripFirstNewline(it.val), p.typeOfPrimitive(it)
+	case itemBool:
+		switch it.val {
+		case "true":
+			return true, p.typeOfPrimitive(it)
+		case "false":
+			return false, p.typeOfPrimitive(it)
+		}
+		p.bug("Expected boolean value, but got '%s'.", it.val)
+	case itemInteger:
+		if !numUnderscoresOK(it.val) {
+			p.panicf("Invalid integer %q: underscores must be surrounded by digits",
+				it.val)
+		}
+		val := strings.Replace(it.val, "_", "", -1)
+		num, err := strconv.ParseInt(val, 10, 64)
+		if err != nil {
+			// Distinguish integer values. Normally, it'd be a bug if the lexer
+			// provides an invalid integer, but it's possible that the number is
+			// out of range of valid values (which the lexer cannot determine).
+			// So mark the former as a bug but the latter as a legitimate user
+			// error.
+			if e, ok := err.(*strconv.NumError); ok &&
+				e.Err == strconv.ErrRange {
+
+				p.panicf("Integer '%s' is out of the range of 64-bit "+
+					"signed integers.", it.val)
+			} else {
+				p.bug("Expected integer value, but got '%s'.", it.val)
+			}
+		}
+		return num, p.typeOfPrimitive(it)
+	case itemFloat:
+		parts := strings.FieldsFunc(it.val, func(r rune) bool {
+			switch r {
+			case '.', 'e', 'E':
+				return true
+			}
+			return false
+		})
+		for _, part := range parts {
+			if !numUnderscoresOK(part) {
+				p.panicf("Invalid float %q: underscores must be "+
+					"surrounded by digits", it.val)
+			}
+		}
+		if !numPeriodsOK(it.val) {
+			// As a special case, numbers like '123.' or '1.e2',
+			// which are valid as far as Go/strconv are concerned,
+			// must be rejected because TOML says that a fractional
+			// part consists of '.' followed by 1+ digits.
+			p.panicf("Invalid float %q: '.' must be followed "+
+				"by one or more digits", it.val)
+		}
+		val := strings.Replace(it.val, "_", "", -1)
+		num, err := strconv.ParseFloat(val, 64)
+		if err != nil {
+			if e, ok := err.(*strconv.NumError); ok &&
+				e.Err == strconv.ErrRange {
+
+				p.panicf("Float '%s' is out of the range of 64-bit "+
+					"IEEE-754 floating-point numbers.", it.val)
+			} else {
+				p.panicf("Invalid float value: %q", it.val)
+			}
+		}
+		return num, p.typeOfPrimitive(it)
+	case itemDatetime:
+		var t time.Time
+		var ok bool
+		var err error
+		for _, format := range []string{
+			"2006-01-02T15:04:05Z07:00",
+			"2006-01-02T15:04:05",
+			"2006-01-02",
+		} {
+			t, err = time.ParseInLocation(format, it.val, time.Local)
+			if err == nil {
+				ok = true
+				break
+			}
+		}
+		if !ok {
+			p.panicf("Invalid TOML Datetime: %q.", it.val)
+		}
+		return t, p.typeOfPrimitive(it)
+	case itemArray:
+		array := make([]interface{}, 0)
+		types := make([]tomlType, 0)
+
+		for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
+			if it.typ == itemCommentStart {
+				p.expect(itemText)
+				continue
+			}
+
+			val, typ := p.value(it)
+			array = append(array, val)
+			types = append(types, typ)
+		}
+		return array, p.typeOfArray(types)
+	case itemInlineTableStart:
+		var (
+			hash         = make(map[string]interface{})
+			outerContext = p.context
+			outerKey     = p.currentKey
+		)
+
+		p.context = append(p.context, p.currentKey)
+		p.currentKey = ""
+		for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() {
+			if it.typ != itemKeyStart {
+				p.bug("Expected key start but instead found %q, around line %d",
+					it.val, p.approxLine)
+			}
+			if it.typ == itemCommentStart {
+				p.expect(itemText)
+				continue
+			}
+
+			// retrieve key
+			k := p.next()
+			p.approxLine = k.line
+			kname := p.keyString(k)
+
+			// retrieve value
+			p.currentKey = kname
+			val, typ := p.value(p.next())
+			// make sure we keep metadata up to date
+			p.setType(kname, typ)
+			p.ordered = append(p.ordered, p.context.add(p.currentKey))
+			hash[kname] = val
+		}
+		p.context = outerContext
+		p.currentKey = outerKey
+		return hash, tomlHash
+	}
+	p.bug("Unexpected value type: %s", it.typ)
+	panic("unreachable")
+}
+
+// numUnderscoresOK checks whether each underscore in s is surrounded by
+// characters that are not underscores.
+func numUnderscoresOK(s string) bool {
+	accept := false
+	for _, r := range s {
+		if r == '_' {
+			if !accept {
+				return false
+			}
+			accept = false
+			continue
+		}
+		accept = true
+	}
+	return accept
+}
+
+// numPeriodsOK checks whether every period in s is followed by a digit.
+func numPeriodsOK(s string) bool {
+	period := false
+	for _, r := range s {
+		if period && !isDigit(r) {
+			return false
+		}
+		period = r == '.'
+	}
+	return !period
+}
+
+// establishContext sets the current context of the parser,
+// where the context is either a hash or an array of hashes. Which one is
+// set depends on the value of the `array` parameter.
+//
+// Establishing the context also makes sure that the key isn't a duplicate, and
+// will create implicit hashes automatically.
+func (p *parser) establishContext(key Key, array bool) {
+	var ok bool
+
+	// Always start at the top level and drill down for our context.
+	hashContext := p.mapping
+	keyContext := make(Key, 0)
+
+	// We only need implicit hashes for key[0:-1]
+	for _, k := range key[0 : len(key)-1] {
+		_, ok = hashContext[k]
+		keyContext = append(keyContext, k)
+
+		// No key? Make an implicit hash and move on.
+		if !ok {
+			p.addImplicit(keyContext)
+			hashContext[k] = make(map[string]interface{})
+		}
+
+		// If the hash context is actually an array of tables, then set
+		// the hash context to the last element in that array.
+		//
+		// Otherwise, it better be a table, since this MUST be a key group (by
+		// virtue of it not being the last element in a key).
+		switch t := hashContext[k].(type) {
+		case []map[string]interface{}:
+			hashContext = t[len(t)-1]
+		case map[string]interface{}:
+			hashContext = t
+		default:
+			p.panicf("Key '%s' was already created as a hash.", keyContext)
+		}
+	}
+
+	p.context = keyContext
+	if array {
+		// If this is the first element for this array, then allocate a new
+		// list of tables for it.
+		k := key[len(key)-1]
+		if _, ok := hashContext[k]; !ok {
+			hashContext[k] = make([]map[string]interface{}, 0, 5)
+		}
+
+		// Add a new table. But make sure the key hasn't already been used
+		// for something else.
+		if hash, ok := hashContext[k].([]map[string]interface{}); ok {
+			hashContext[k] = append(hash, make(map[string]interface{}))
+		} else {
+			p.panicf("Key '%s' was already created and cannot be used as "+
+				"an array.", keyContext)
+		}
+	} else {
+		p.setValue(key[len(key)-1], make(map[string]interface{}))
+	}
+	p.context = append(p.context, key[len(key)-1])
+}
+
+// setValue sets the given key to the given value in the current context.
+// It will make sure that the key hasn't already been defined, account for
+// implicit key groups.
+func (p *parser) setValue(key string, value interface{}) {
+	var tmpHash interface{}
+	var ok bool
+
+	hash := p.mapping
+	keyContext := make(Key, 0)
+	for _, k := range p.context {
+		keyContext = append(keyContext, k)
+		if tmpHash, ok = hash[k]; !ok {
+			p.bug("Context for key '%s' has not been established.", keyContext)
+		}
+		switch t := tmpHash.(type) {
+		case []map[string]interface{}:
+			// The context is a table of hashes. Pick the most recent table
+			// defined as the current hash.
+			hash = t[len(t)-1]
+		case map[string]interface{}:
+			hash = t
+		default:
+			p.bug("Expected hash to have type 'map[string]interface{}', but "+
+				"it has '%T' instead.", tmpHash)
+		}
+	}
+	keyContext = append(keyContext, key)
+
+	if _, ok := hash[key]; ok {
+		// Typically, if the given key has already been set, then we have
+		// to raise an error since duplicate keys are disallowed. However,
+		// it's possible that a key was previously defined implicitly. In this
+		// case, it is allowed to be redefined concretely. (See the
+		// `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.)
+		//
+		// But we have to make sure to stop marking it as an implicit. (So that
+		// another redefinition provokes an error.)
+		//
+		// Note that since it has already been defined (as a hash), we don't
+		// want to overwrite it. So our business is done.
+		if p.isImplicit(keyContext) {
+			p.removeImplicit(keyContext)
+			return
+		}
+
+		// Otherwise, we have a concrete key trying to override a previous
+		// key, which is *always* wrong.
+		p.panicf("Key '%s' has already been defined.", keyContext)
+	}
+	hash[key] = value
+}
+
+// setType sets the type of a particular value at a given key.
+// It should be called immediately AFTER setValue.
+//
+// Note that if `key` is empty, then the type given will be applied to the
+// current context (which is either a table or an array of tables).
+func (p *parser) setType(key string, typ tomlType) {
+	keyContext := make(Key, 0, len(p.context)+1)
+	for _, k := range p.context {
+		keyContext = append(keyContext, k)
+	}
+	if len(key) > 0 { // allow type setting for hashes
+		keyContext = append(keyContext, key)
+	}
+	p.types[keyContext.String()] = typ
+}
+
+// addImplicit sets the given Key as having been created implicitly.
+func (p *parser) addImplicit(key Key) {
+	p.implicits[key.String()] = true
+}
+
+// removeImplicit stops tagging the given key as having been implicitly
+// created.
+func (p *parser) removeImplicit(key Key) {
+	p.implicits[key.String()] = false
+}
+
+// isImplicit returns true if the key group pointed to by the key was created
+// implicitly.
+func (p *parser) isImplicit(key Key) bool {
+	return p.implicits[key.String()]
+}
+
+// current returns the full key name of the current context.
+func (p *parser) current() string {
+	if len(p.currentKey) == 0 {
+		return p.context.String()
+	}
+	if len(p.context) == 0 {
+		return p.currentKey
+	}
+	return fmt.Sprintf("%s.%s", p.context, p.currentKey)
+}
+
+func stripFirstNewline(s string) string {
+	if len(s) == 0 || s[0] != '\n' {
+		return s
+	}
+	return s[1:]
+}
+
+func stripEscapedWhitespace(s string) string {
+	esc := strings.Split(s, "\\\n")
+	if len(esc) > 1 {
+		for i := 1; i < len(esc); i++ {
+			esc[i] = strings.TrimLeftFunc(esc[i], unicode.IsSpace)
+		}
+	}
+	return strings.Join(esc, "")
+}
+
+func (p *parser) replaceEscapes(str string) string {
+	var replaced []rune
+	s := []byte(str)
+	r := 0
+	for r < len(s) {
+		if s[r] != '\\' {
+			c, size := utf8.DecodeRune(s[r:])
+			r += size
+			replaced = append(replaced, c)
+			continue
+		}
+		r += 1
+		if r >= len(s) {
+			p.bug("Escape sequence at end of string.")
+			return ""
+		}
+		switch s[r] {
+		default:
+			p.bug("Expected valid escape code after \\, but got %q.", s[r])
+			return ""
+		case 'b':
+			replaced = append(replaced, rune(0x0008))
+			r += 1
+		case 't':
+			replaced = append(replaced, rune(0x0009))
+			r += 1
+		case 'n':
+			replaced = append(replaced, rune(0x000A))
+			r += 1
+		case 'f':
+			replaced = append(replaced, rune(0x000C))
+			r += 1
+		case 'r':
+			replaced = append(replaced, rune(0x000D))
+			r += 1
+		case '"':
+			replaced = append(replaced, rune(0x0022))
+			r += 1
+		case '\\':
+			replaced = append(replaced, rune(0x005C))
+			r += 1
+		case 'u':
+			// At this point, we know we have a Unicode escape of the form
+			// `uXXXX` at [r, r+5). (Because the lexer guarantees this
+			// for us.)
+			escaped := p.asciiEscapeToUnicode(s[r+1 : r+5])
+			replaced = append(replaced, escaped)
+			r += 5
+		case 'U':
+			// At this point, we know we have a Unicode escape of the form
+			// `uXXXX` at [r, r+9). (Because the lexer guarantees this
+			// for us.)
+			escaped := p.asciiEscapeToUnicode(s[r+1 : r+9])
+			replaced = append(replaced, escaped)
+			r += 9
+		}
+	}
+	return string(replaced)
+}
+
+func (p *parser) asciiEscapeToUnicode(bs []byte) rune {
+	s := string(bs)
+	hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
+	if err != nil {
+		p.bug("Could not parse '%s' as a hexadecimal number, but the "+
+			"lexer claims it's OK: %s", s, err)
+	}
+	if !utf8.ValidRune(rune(hex)) {
+		p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s)
+	}
+	return rune(hex)
+}
+
+func isStringType(ty itemType) bool {
+	return ty == itemString || ty == itemMultilineString ||
+		ty == itemRawString || ty == itemRawMultilineString
+}
--- a/vendor/github.com/BurntSushi/toml/session.vim
+++ b/vendor/github.com/BurntSushi/toml/session.vim
@@ -0,0 +1 @@
+au BufWritePost *.go silent!make tags > /dev/null 2>&1
--- a/vendor/github.com/BurntSushi/toml/type_check.go
+++ b/vendor/github.com/BurntSushi/toml/type_check.go
@@ -0,0 +1,91 @@
+package toml
+
+// tomlType represents any Go type that corresponds to a TOML type.
+// While the first draft of the TOML spec has a simplistic type system that
+// probably doesn't need this level of sophistication, we seem to be militating
+// toward adding real composite types.
+type tomlType interface {
+	typeString() string
+}
+
+// typeEqual accepts any two types and returns true if they are equal.
+func typeEqual(t1, t2 tomlType) bool {
+	if t1 == nil || t2 == nil {
+		return false
+	}
+	return t1.typeString() == t2.typeString()
+}
+
+func typeIsHash(t tomlType) bool {
+	return typeEqual(t, tomlHash) || typeEqual(t, tomlArrayHash)
+}
+
+type tomlBaseType string
+
+func (btype tomlBaseType) typeString() string {
+	return string(btype)
+}
+
+func (btype tomlBaseType) String() string {
+	return btype.typeString()
+}
+
+var (
+	tomlInteger   tomlBaseType = "Integer"
+	tomlFloat     tomlBaseType = "Float"
+	tomlDatetime  tomlBaseType = "Datetime"
+	tomlString    tomlBaseType = "String"
+	tomlBool      tomlBaseType = "Bool"
+	tomlArray     tomlBaseType = "Array"
+	tomlHash      tomlBaseType = "Hash"
+	tomlArrayHash tomlBaseType = "ArrayHash"
+)
+
+// typeOfPrimitive returns a tomlType of any primitive value in TOML.
+// Primitive values are: Integer, Float, Datetime, String and Bool.
+//
+// Passing a lexer item other than the following will cause a BUG message
+// to occur: itemString, itemBool, itemInteger, itemFloat, itemDatetime.
+func (p *parser) typeOfPrimitive(lexItem item) tomlType {
+	switch lexItem.typ {
+	case itemInteger:
+		return tomlInteger
+	case itemFloat:
+		return tomlFloat
+	case itemDatetime:
+		return tomlDatetime
+	case itemString:
+		return tomlString
+	case itemMultilineString:
+		return tomlString
+	case itemRawString:
+		return tomlString
+	case itemRawMultilineString:
+		return tomlString
+	case itemBool:
+		return tomlBool
+	}
+	p.bug("Cannot infer primitive type of lex item '%s'.", lexItem)
+	panic("unreachable")
+}
+
+// typeOfArray returns a tomlType for an array given a list of types of its
+// values.
+//
+// In the current spec, if an array is homogeneous, then its type is always
+// "Array". If the array is not homogeneous, an error is generated.
+func (p *parser) typeOfArray(types []tomlType) tomlType {
+	// Empty arrays are cool.
+	if len(types) == 0 {
+		return tomlArray
+	}
+
+	theType := types[0]
+	for _, t := range types[1:] {
+		if !typeEqual(theType, t) {
+			p.panicf("Array contains values of type '%s' and '%s', but "+
+				"arrays must be homogeneous.", theType, t)
+		}
+	}
+	return tomlArray
+}
--- a/vendor/github.com/BurntSushi/toml/type_fields.go
+++ b/vendor/github.com/BurntSushi/toml/type_fields.go
@@ -0,0 +1,242 @@
+package toml
+
+// Struct field handling is adapted from code in encoding/json:
+//
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the Go distribution.
+
+import (
+	"reflect"
+	"sort"
+	"sync"
+)
+
+// A field represents a single field found in a struct.
+type field struct {
+	name  string       // the name of the field (`toml` tag included)
+	tag   bool         // whether field has a `toml` tag
+	index []int        // represents the depth of an anonymous field
+	typ   reflect.Type // the type of the field
+}
+
+// byName sorts field by name, breaking ties with depth,
+// then breaking ties with "name came from toml tag", then
+// breaking ties with index sequence.
+type byName []field
+
+func (x byName) Len() int { return len(x) }
+
+func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
+
+func (x byName) Less(i, j int) bool {
+	if x[i].name != x[j].name {
+		return x[i].name < x[j].name
+	}
+	if len(x[i].index) != len(x[j].index) {
+		return len(x[i].index) < len(x[j].index)
+	}
+	if x[i].tag != x[j].tag {
+		return x[i].tag
+	}
+	return byIndex(x).Less(i, j)
+}
+
+// byIndex sorts field by index sequence.
+type byIndex []field
+
+func (x byIndex) Len() int { return len(x) }
+
+func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
+
+func (x byIndex) Less(i, j int) bool {
+	for k, xik := range x[i].index {
+		if k >= len(x[j].index) {
+			return false
+		}
+		if xik != x[j].index[k] {
+			return xik < x[j].index[k]
+		}
+	}
+	return len(x[i].index) < len(x[j].index)
+}
+
+// typeFields returns a list of fields that TOML should recognize for the given
+// type. The algorithm is breadth-first search over the set of structs to
+// include - the top struct and then any reachable anonymous structs.
+func typeFields(t reflect.Type) []field {
+	// Anonymous fields to explore at the current level and the next.
+	current := []field{}
+	next := []field{{typ: t}}
+
+	// Count of queued names for current level and the next.
+	count := map[reflect.Type]int{}
+	nextCount := map[reflect.Type]int{}
+
+	// Types already visited at an earlier level.
+	visited := map[reflect.Type]bool{}
+
+	// Fields found.
+	var fields []field
+
+	for len(next) > 0 {
+		current, next = next, current[:0]
+		count, nextCount = nextCount, map[reflect.Type]int{}
+
+		for _, f := range current {
+			if visited[f.typ] {
+				continue
+			}
+			visited[f.typ] = true
+
+			// Scan f.typ for fields to include.
+			for i := 0; i < f.typ.NumField(); i++ {
+				sf := f.typ.Field(i)
+				if sf.PkgPath != "" && !sf.Anonymous { // unexported
+					continue
+				}
+				opts := getOptions(sf.Tag)
+				if opts.skip {
+					continue
+				}
+				index := make([]int, len(f.index)+1)
+				copy(index, f.index)
+				index[len(f.index)] = i
+
+				ft := sf.Type
+				if ft.Name() == "" && ft.Kind() == reflect.Ptr {
+					// Follow pointer.
+					ft = ft.Elem()
+				}
+
+				// Record found field and index sequence.
+				if opts.name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct {
+					tagged := opts.name != ""
+					name := opts.name
+					if name == "" {
+						name = sf.Name
+					}
+					fields = append(fields, field{name, tagged, index, ft})
+					if count[f.typ] > 1 {
+						// If there were multiple instances, add a second,
+						// so that the annihilation code will see a duplicate.
+						// It only cares about the distinction between 1 or 2,
+						// so don't bother generating any more copies.
+						fields = append(fields, fields[len(fields)-1])
+					}
+					continue
+				}
+
+				// Record new anonymous struct to explore in next round.
+				nextCount[ft]++
+				if nextCount[ft] == 1 {
+					f := field{name: ft.Name(), index: index, typ: ft}
+					next = append(next, f)
+				}
+			}
+		}
+	}
+
+	sort.Sort(byName(fields))
+
+	// Delete all fields that are hidden by the Go rules for embedded fields,
+	// except that fields with TOML tags are promoted.
+
+	// The fields are sorted in primary order of name, secondary order
+	// of field index length. Loop over names; for each name, delete
+	// hidden fields by choosing the one dominant field that survives.
+	out := fields[:0]
+	for advance, i := 0, 0; i < len(fields); i += advance {
+		// One iteration per name.
+		// Find the sequence of fields with the name of this first field.
+		fi := fields[i]
+		name := fi.name
+		for advance = 1; i+advance < len(fields); advance++ {
+			fj := fields[i+advance]
+			if fj.name != name {
+				break
+			}
+		}
+		if advance == 1 { // Only one field with this name
+			out = append(out, fi)
+			continue
+		}
+		dominant, ok := dominantField(fields[i : i+advance])
+		if ok {
+			out = append(out, dominant)
+		}
+	}
+
+	fields = out
+	sort.Sort(byIndex(fields))
+
+	return fields
+}
+
+// dominantField looks through the fields, all of which are known to
+// have the same name, to find the single field that dominates the
+// others using Go's embedding rules, modified by the presence of
+// TOML tags. If there are multiple top-level fields, the boolean
+// will be false: This condition is an error in Go and we skip all
+// the fields.
+func dominantField(fields []field) (field, bool) {
+	// The fields are sorted in increasing index-length order. The winner
+	// must therefore be one with the shortest index length. Drop all
+	// longer entries, which is easy: just truncate the slice.
+	length := len(fields[0].index)
+	tagged := -1 // Index of first tagged field.
+	for i, f := range fields {
+		if len(f.index) > length {
+			fields = fields[:i]
+			break
+		}
+		if f.tag {
+			if tagged >= 0 {
+				// Multiple tagged fields at the same level: conflict.
+				// Return no field.
+				return field{}, false
+			}
+			tagged = i
+		}
+	}
+	if tagged >= 0 {
+		return fields[tagged], true
+	}
+	// All remaining fields have the same length. If there's more than one,
+	// we have a conflict (two fields named "X" at the same level) and we
+	// return no field.
+	if len(fields) > 1 {
+		return field{}, false
+	}
+	return fields[0], true
+}
+
+var fieldCache struct {
+	sync.RWMutex
+	m map[reflect.Type][]field
+}
+
+// cachedTypeFields is like typeFields but uses a cache to avoid repeated work.
+func cachedTypeFields(t reflect.Type) []field {
+	fieldCache.RLock()
+	f := fieldCache.m[t]
+	fieldCache.RUnlock()
+	if f != nil {
+		return f
+	}
+
+	// Compute fields without lock.
+	// Might duplicate effort but won't hold other computations back.
+	f = typeFields(t)
+	if f == nil {
+		f = []field{}
+	}
+
+	fieldCache.Lock()
+	if fieldCache.m == nil {
+		fieldCache.m = map[reflect.Type][]field{}
+	}
+	fieldCache.m[t] = f
+	fieldCache.Unlock()
+	return f
+}
--- a/vendor/github.com/DataDog/zstd/README.md
+++ b/vendor/github.com/DataDog/zstd/README.md
@@ -60,6 +60,9 @@ NewWriterLevelDict(w io.Writer, level int, dict []byte) *Writer
 // Write compresses the input data and write it to the underlying writer
 (w *Writer) Write(p []byte) (int, error)

+// Flush writes any unwritten data to the underlying writer
+(w *Writer) Flush() error
+
 // Close flushes the buffer and frees C zstd objects
 (w *Writer) Close() error
 ```
--- a/vendor/github.com/DataDog/zstd/bitstream.h
+++ b/vendor/github.com/DataDog/zstd/bitstream.h
@@ -1,35 +1,15 @@
 /* ******************************************************************
-   bitstream
-   Part of FSE library
-   Copyright (C) 2013-present, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * bitstream
+ * Part of FSE library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 ****************************************************************** */
 #ifndef BITSTREAM_H_MODULE
 #define BITSTREAM_H_MODULE
@@ -37,7 +17,6 @@
 #if defined (__cplusplus)
 extern "C" {
 #endif
-
 /*
 *  This API consists of small unitary functions, which must be inlined for best performance.
 *  Since link-time-optimization is not available for all compilers,
@@ -48,6 +27,7 @@ extern "C" {
 *  Dependencies
 ******************************************/
 #include "mem.h"            /* unaligned access routines */
+#include "compiler.h"       /* UNLIKELY() */
 #include "debug.h"          /* assert(), DEBUGLOG(), RAWLOG() */
 #include "error_private.h"  /* error codes and messages */

@@ -55,10 +35,12 @@ extern "C" {
 /*=========================================
 *  Target specific
 =========================================*/
-#if defined(__BMI__) && defined(__GNUC__)
-#  include <immintrin.h>   /* support for bextr (experimental) */
-#elif defined(__ICCARM__)
-#  include <intrinsics.h>
+#ifndef ZSTD_NO_INTRINSICS
+#  if defined(__BMI__) && defined(__GNUC__)
+#    include <immintrin.h>   /* support for bextr (experimental) */
+#  elif defined(__ICCARM__)
+#    include <intrinsics.h>
+#  endif
 #endif

 #define STREAM_ACCUMULATOR_MIN_32  25
@@ -160,9 +142,12 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
    assert(val != 0);
    {
 #   if defined(_MSC_VER)   /* Visual */
-        unsigned long r=0;
-        _BitScanReverse ( &r, val );
-        return (unsigned) r;
+#       if STATIC_BMI2 == 1
+		return _lzcnt_u32(val) ^ 31;
+#       else
+		unsigned long r = 0;
+		return _BitScanReverse(&r, val) ? (unsigned)r : 0;
+#       endif
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
        return __builtin_clz (val) ^ 31;
 #   elif defined(__ICCARM__)    /* IAR Intrinsic */
@@ -218,7 +203,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
                            size_t value, unsigned nbBits)
 {
-    MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
+    DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
    assert(nbBits < BIT_MASK_SIZE);
    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
@@ -291,7 +276,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
 */
 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
 {
-    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+    if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }

    bitD->start = (const char*)srcBuffer;
    bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
@@ -337,12 +322,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
    return srcSize;
 }

-MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
 {
    return bitContainer >> start;
 }

-MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
 {
    U32 const regMask = sizeof(bitContainer)*8 - 1;
    /* if start > regMask, bitstream is corrupted, and result is undefined */
@@ -350,10 +335,14 @@ MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 co
    return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
 }

-MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
 {
+#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
+	return  _bzhi_u64(bitContainer, nbBits);
+#else
    assert(nbBits < BIT_MASK_SIZE);
    return bitContainer & BIT_mask[nbBits];
+#endif
 }

 /*! BIT_lookBits() :
@@ -362,7 +351,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
 *  On 32-bits, maxNbBits==24.
 *  On 64-bits, maxNbBits==56.
 * @return : value extracted */
-MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
+MEM_STATIC  FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
 {
    /* arbitrate between double-shift and shift+mask */
 #if 1
@@ -385,7 +374,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
    return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
 }

-MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
 {
    bitD->bitsConsumed += nbBits;
 }
@@ -394,7 +383,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
 *  Read (consume) next n bits from local register and update.
 *  Pay attention to not read more than nbBits contained into local register.
 * @return : extracted value. */
-MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
 {
    size_t const value = BIT_lookBits(bitD, nbBits);
    BIT_skipBits(bitD, nbBits);
@@ -411,6 +400,23 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
    return value;
 }

+/*! BIT_reloadDStreamFast() :
+ *  Similar to BIT_reloadDStream(), but with two differences:
+ *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
+ *  2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
+ *     point you must use BIT_reloadDStream() to reload.
+ */
+MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
+{
+    if (UNLIKELY(bitD->ptr < bitD->limitPtr))
+        return BIT_DStream_overflow;
+    assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
+    bitD->ptr -= bitD->bitsConsumed >> 3;
+    bitD->bitsConsumed &= 7;
+    bitD->bitContainer = MEM_readLEST(bitD->ptr);
+    return BIT_DStream_unfinished;
+}
+
 /*! BIT_reloadDStream() :
 *  Refill `bitD` from buffer previously set in BIT_initDStream() .
 *  This function is safe, it guarantees it will not read beyond src buffer.
@@ -422,10 +428,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
        return BIT_DStream_overflow;

    if (bitD->ptr >= bitD->limitPtr) {
-        bitD->ptr -= bitD->bitsConsumed >> 3;
-        bitD->bitsConsumed &= 7;
-        bitD->bitContainer = MEM_readLEST(bitD->ptr);
-        return BIT_DStream_unfinished;
+        return BIT_reloadDStreamFast(bitD);
    }
    if (bitD->ptr == bitD->start) {
        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
--- a/vendor/github.com/DataDog/zstd/compiler.h
+++ b/vendor/github.com/DataDog/zstd/compiler.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -17,7 +17,7 @@
 /* force inlining */

 #if !defined(ZSTD_NO_INLINE)
-#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
 #  define INLINE_KEYWORD inline
 #else
 #  define INLINE_KEYWORD
@@ -38,6 +38,17 @@

 #endif

+/**
+  On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
+  This explictly marks such functions as __cdecl so that the code will still compile
+  if a CC other than __cdecl has been made the default.
+*/
+#if  defined(_MSC_VER)
+#  define WIN_CDECL __cdecl
+#else
+#  define WIN_CDECL
+#endif
+
 /**
 * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
 * parameters. They must be inlined for the compiler to eliminate the constant
@@ -117,6 +128,9 @@
 #  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
 #    define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
 #    define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+#  elif defined(__aarch64__)
+#    define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
+#    define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
 #  else
 #    define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
 #    define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
@@ -136,7 +150,7 @@

 /* vectorization
 * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
-#if !defined(__clang__) && defined(__GNUC__)
+#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
 #  if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
 #    define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
 #  else
@@ -146,6 +160,19 @@
 #  define DONT_VECTORIZE
 #endif

+/* Tell the compiler that a branch is likely or unlikely.
+ * Only use these macros if it causes the compiler to generate better code.
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
+ * and clang, please do.
+ */
+#if defined(__GNUC__)
+#define LIKELY(x) (__builtin_expect((x), 1))
+#define UNLIKELY(x) (__builtin_expect((x), 0))
+#else
+#define LIKELY(x) (x)
+#define UNLIKELY(x) (x)
+#endif
+
 /* disable warnings */
 #ifdef _MSC_VER    /* Visual Studio */
 #  include <intrin.h>                    /* For Visual 2005 */
@@ -156,4 +183,106 @@
 #  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
 #endif

+/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
+#ifndef STATIC_BMI2
+#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
+#    ifdef __AVX2__  //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
+#       define STATIC_BMI2 1
+#    endif
+#  endif
+#endif
+
+#ifndef STATIC_BMI2
+    #define STATIC_BMI2 0
+#endif
+
+/* compat. with non-clang compilers */
+#ifndef __has_builtin
+#  define __has_builtin(x) 0
+#endif
+
+/* compat. with non-clang compilers */
+#ifndef __has_feature
+#  define __has_feature(x) 0
+#endif
+
+/* detects whether we are being compiled under msan */
+#ifndef ZSTD_MEMORY_SANITIZER
+#  if __has_feature(memory_sanitizer)
+#    define ZSTD_MEMORY_SANITIZER 1
+#  else
+#    define ZSTD_MEMORY_SANITIZER 0
+#  endif
+#endif
+
+#if ZSTD_MEMORY_SANITIZER
+/* Not all platforms that support msan provide sanitizers/msan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+#include <stddef.h>  /* size_t */
+#define ZSTD_DEPS_NEED_STDINT
+#include "zstd_deps.h"  /* intptr_t */
+
+/* Make memory region fully initialized (without changing its contents). */
+void __msan_unpoison(const volatile void *a, size_t size);
+
+/* Make memory region fully uninitialized (without changing its contents).
+   This is a legacy interface that does not update origin information. Use
+   __msan_allocated_memory() instead. */
+void __msan_poison(const volatile void *a, size_t size);
+
+/* Returns the offset of the first (at least partially) poisoned byte in the
+   memory range, or -1 if the whole range is good. */
+intptr_t __msan_test_shadow(const volatile void *x, size_t size);
+#endif
+
+/* detects whether we are being compiled under asan */
+#ifndef ZSTD_ADDRESS_SANITIZER
+#  if __has_feature(address_sanitizer)
+#    define ZSTD_ADDRESS_SANITIZER 1
+#  elif defined(__SANITIZE_ADDRESS__)
+#    define ZSTD_ADDRESS_SANITIZER 1
+#  else
+#    define ZSTD_ADDRESS_SANITIZER 0
+#  endif
+#endif
+
+#if ZSTD_ADDRESS_SANITIZER
+/* Not all platforms that support asan provide sanitizers/asan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+#include <stddef.h>  /* size_t */
+
+/**
+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
+ *
+ * This memory must be previously allocated by your program. Instrumented
+ * code is forbidden from accessing addresses in this region until it is
+ * unpoisoned. This function is not guaranteed to poison the entire region -
+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
+ * alignment restrictions.
+ *
+ * \note This function is not thread-safe because no two threads can poison or
+ * unpoison memory in the same memory region simultaneously.
+ *
+ * \param addr Start of memory region.
+ * \param size Size of memory region. */
+void __asan_poison_memory_region(void const volatile *addr, size_t size);
+
+/**
+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
+ *
+ * This memory must be previously allocated by your program. Accessing
+ * addresses in this region is allowed until this region is poisoned again.
+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
+ * to ASan alignment restrictions.
+ *
+ * \note This function is not thread-safe because no two threads can
+ * poison or unpoison memory in the same memory region simultaneously.
+ *
+ * \param addr Start of memory region.
+ * \param size Size of memory region. */
+void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
+#endif
+
 #endif /* ZSTD_COMPILER_H */
--- a/vendor/github.com/DataDog/zstd/cover.c
+++ b/vendor/github.com/DataDog/zstd/cover.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -40,33 +40,42 @@
 *  Constants
 ***************************************/
 #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
-#define DEFAULT_SPLITPOINT 1.0
+#define COVER_DEFAULT_SPLITPOINT 1.0

 /*-*************************************
 *  Console display
 ***************************************/
+#ifndef LOCALDISPLAYLEVEL
 static int g_displayLevel = 2;
+#endif
+#undef  DISPLAY
 #define DISPLAY(...)                                                           \
  {                                                                            \
    fprintf(stderr, __VA_ARGS__);                                              \
    fflush(stderr);                                                            \
  }
+#undef  LOCALDISPLAYLEVEL
 #define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
  if (displayLevel >= l) {                                                     \
    DISPLAY(__VA_ARGS__);                                                      \
  } /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
+#undef  DISPLAYLEVEL
 #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)

+#ifndef LOCALDISPLAYUPDATE
+static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
+static clock_t g_time = 0;
+#endif
+#undef  LOCALDISPLAYUPDATE
 #define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \
  if (displayLevel >= l) {                                                     \
-    if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) {             \
+    if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) {             \
      g_time = clock();                                                        \
      DISPLAY(__VA_ARGS__);                                                    \
    }                                                                          \
  }
+#undef  DISPLAYUPDATE
 #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
-static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
-static clock_t g_time = 0;

 /*-*************************************
 * Hash table
@@ -120,9 +129,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
 /**
 * Internal hash function
 */
-static const U32 prime4bytes = 2654435761U;
+static const U32 COVER_prime4bytes = 2654435761U;
 static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
-  return (key * prime4bytes) >> (32 - map->sizeLog);
+  return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
 }

 /**
@@ -215,7 +224,7 @@ typedef struct {
 } COVER_ctx_t;

 /* We need a global context for qsort... */
-static COVER_ctx_t *g_ctx = NULL;
+static COVER_ctx_t *g_coverCtx = NULL;

 /*-*************************************
 *  Helper functions
@@ -258,11 +267,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {

 /**
 * Same as COVER_cmp() except ties are broken by pointer value
- * NOTE: g_ctx must be set to call this function.  A global is required because
+ * NOTE: g_coverCtx must be set to call this function.  A global is required because
 * qsort doesn't take an opaque pointer.
 */
-static int COVER_strict_cmp(const void *lp, const void *rp) {
-  int result = COVER_cmp(g_ctx, lp, rp);
+static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
+  int result = COVER_cmp(g_coverCtx, lp, rp);
  if (result == 0) {
    result = lp < rp ? -1 : 1;
  }
@@ -271,8 +280,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
 /**
 * Faster version for d <= 8.
 */
-static int COVER_strict_cmp8(const void *lp, const void *rp) {
-  int result = COVER_cmp8(g_ctx, lp, rp);
+static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
+  int result = COVER_cmp8(g_coverCtx, lp, rp);
  if (result == 0) {
    result = lp < rp ? -1 : 1;
  }
@@ -603,7 +612,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
    /* qsort doesn't take an opaque pointer, so pass as a global.
     * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
     */
-    g_ctx = ctx;
+    g_coverCtx = ctx;
 #if defined(__OpenBSD__)
    mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
          (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
@@ -946,7 +955,7 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
  free(selection.dictContent);
 }

-COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
+COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
        size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
        size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {

@@ -954,8 +963,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
  size_t largestCompressed = 0;
  BYTE* customDictContentEnd = customDictContent + dictContentSize;

-  BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
-  BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
+  BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
+  BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
  double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;

  if (!largestDictbuffer || !candidateDictBuffer) {
@@ -967,7 +976,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
  /* Initial dictionary size and compressed size */
  memcpy(largestDictbuffer, customDictContent, dictContentSize);
  dictContentSize = ZDICT_finalizeDictionary(
-    largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
+    largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
    samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);

  if (ZDICT_isError(dictContentSize)) {
@@ -1001,7 +1010,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
  while (dictContentSize < largestDict) {
    memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
    dictContentSize = ZDICT_finalizeDictionary(
-      candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
+      candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
      samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);

    if (ZDICT_isError(dictContentSize)) {
@@ -1079,7 +1088,7 @@ static void COVER_tryParameters(void *opaque) {
  {
    const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
                                              dictBufferCapacity, parameters);
-    selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
+    selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
        totalCompressedSize);

@@ -1106,7 +1115,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
  /* constants */
  const unsigned nbThreads = parameters->nbThreads;
  const double splitPoint =
-      parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
+      parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
--- a/vendor/github.com/DataDog/zstd/cover.h
+++ b/vendor/github.com/DataDog/zstd/cover.h
@@ -1,3 +1,13 @@
+/*
+ * Copyright (c) 2017-2020, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
 #include <stdio.h>  /* fprintf */
 #include <stdlib.h> /* malloc, free, qsort */
 #include <string.h> /* memset */
@@ -142,6 +152,6 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
 * smallest dictionary within a specified regression of the compressed size
 * from the largest dictionary.
 */
- COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
                       size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
                       size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
--- a/vendor/github.com/DataDog/zstd/cpu.h
+++ b/vendor/github.com/DataDog/zstd/cpu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-present, Facebook, Inc.
+ * Copyright (c) 2018-2020, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -16,8 +16,6 @@
 * https://github.com/facebook/folly/blob/master/folly/CpuId.h
 */

-#include <string.h>
-
 #include "mem.h"

 #ifdef _MSC_VER
--- a/vendor/github.com/DataDog/zstd/debug.c
+++ b/vendor/github.com/DataDog/zstd/debug.c
@@ -1,35 +1,15 @@
 /* ******************************************************************
-   debug
-   Part of FSE library
-   Copyright (C) 2013-present, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * debug
+ * Part of FSE library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 ****************************************************************** */


--- a/vendor/github.com/DataDog/zstd/debug.h
+++ b/vendor/github.com/DataDog/zstd/debug.h
@@ -1,35 +1,15 @@
 /* ******************************************************************
-   debug
-   Part of FSE library
-   Copyright (C) 2013-present, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * debug
+ * Part of FSE library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 ****************************************************************** */


@@ -71,15 +51,6 @@ extern "C" {
 #endif


-/* DEBUGFILE can be defined externally,
- * typically through compiler command line.
- * note : currently useless.
- * Value must be stderr or stdout */
-#ifndef DEBUGFILE
-#  define DEBUGFILE stderr
-#endif
-
-
 /* recommended values for DEBUGLEVEL :
 * 0 : release mode, no debug, all run-time checks disabled
 * 1 : enables assert() only, no display
@@ -96,7 +67,8 @@ extern "C" {
 */

 #if (DEBUGLEVEL>=1)
-#  include <assert.h>
+#  define ZSTD_DEPS_NEED_ASSERT
+#  include "zstd_deps.h"
 #else
 #  ifndef assert   /* assert may be already defined, due to prior #include <assert.h> */
 #    define assert(condition) ((void)0)   /* disable assert (default) */
@@ -104,7 +76,8 @@ extern "C" {
 #endif

 #if (DEBUGLEVEL>=2)
-#  include <stdio.h>
+#  define ZSTD_DEPS_NEED_IO
+#  include "zstd_deps.h"
 extern int g_debuglevel; /* the variable is only declared,
                            it actually lives in debug.c,
                            and is shared by the whole process.
@@ -112,14 +85,14 @@ extern int g_debuglevel; /* the variable is only declared,
                            It's useful when enabling very verbose levels
                            on selective conditions (such as position in src) */

-#  define RAWLOG(l, ...) {                                      \
-                if (l<=g_debuglevel) {                          \
-                    fprintf(stderr, __VA_ARGS__);               \
+#  define RAWLOG(l, ...) {                                       \
+                if (l<=g_debuglevel) {                           \
+                    ZSTD_DEBUG_PRINT(__VA_ARGS__);               \
            }   }
-#  define DEBUGLOG(l, ...) {                                    \
-                if (l<=g_debuglevel) {                          \
-                    fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
-                    fprintf(stderr, " \n");                     \
+#  define DEBUGLOG(l, ...) {                                     \
+                if (l<=g_debuglevel) {                           \
+                    ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
+                    ZSTD_DEBUG_PRINT(" \n");                     \
            }   }
 #else
 #  define RAWLOG(l, ...)      {}    /* disabled */
--- a/vendor/github.com/DataDog/zstd/entropy_common.c
+++ b/vendor/github.com/DataDog/zstd/entropy_common.c
@@ -1,36 +1,16 @@
-/*
-   Common functions of New Generation Entropy library
-   Copyright (C) 2016, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-    You can contact the author at :
-    - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c
-*************************************************************************** */
+/* ******************************************************************
+ * Common functions of New Generation Entropy library
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */

 /* *************************************
 *  Dependencies
@@ -58,8 +38,31 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
 /*-**************************************************************
 *  FSE NCount encoding-decoding
 ****************************************************************/
-size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
-                 const void* headerBuffer, size_t hbSize)
+static U32 FSE_ctz(U32 val)
+{
+    assert(val != 0);
+    {
+#   if defined(_MSC_VER)   /* Visual */
+        unsigned long r=0;
+        return _BitScanForward(&r, val) ? (unsigned)r : 0;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
+        return __builtin_ctz(val);
+#   elif defined(__ICCARM__)    /* IAR Intrinsic */
+        return __CTZ(val);
+#   else   /* Software version */
+        U32 count = 0;
+        while ((val & 1) == 0) {
+            val >>= 1;
+            ++count;
+        }
+        return count;
+#   endif
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                           const void* headerBuffer, size_t hbSize)
 {
    const BYTE* const istart = (const BYTE*) headerBuffer;
    const BYTE* const iend = istart + hbSize;
@@ -70,23 +73,23 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
    U32 bitStream;
    int bitCount;
    unsigned charnum = 0;
+    unsigned const maxSV1 = *maxSVPtr + 1;
    int previous0 = 0;

-    if (hbSize < 4) {
-        /* This function only works when hbSize >= 4 */
-        char buffer[4];
-        memset(buffer, 0, sizeof(buffer));
-        memcpy(buffer, headerBuffer, hbSize);
+    if (hbSize < 8) {
+        /* This function only works when hbSize >= 8 */
+        char buffer[8] = {0};
+        ZSTD_memcpy(buffer, headerBuffer, hbSize);
        {   size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
                                                    buffer, sizeof(buffer));
            if (FSE_isError(countSize)) return countSize;
            if (countSize > hbSize) return ERROR(corruption_detected);
            return countSize;
    }   }
-    assert(hbSize >= 4);
+    assert(hbSize >= 8);

    /* init */
-    memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
+    ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
    bitStream = MEM_readLE32(ip);
    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
@@ -97,36 +100,58 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
    threshold = 1<<nbBits;
    nbBits++;

-    while ((remaining>1) & (charnum<=*maxSVPtr)) {
+    for (;;) {
        if (previous0) {
-            unsigned n0 = charnum;
-            while ((bitStream & 0xFFFF) == 0xFFFF) {
-                n0 += 24;
-                if (ip < iend-5) {
-                    ip += 2;
-                    bitStream = MEM_readLE32(ip) >> bitCount;
+            /* Count the number of repeats. Each time the
+             * 2-bit repeat code is 0b11 there is another
+             * repeat.
+             * Avoid UB by setting the high bit to 1.
+             */
+            int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
+            while (repeats >= 12) {
+                charnum += 3 * 12;
+                if (LIKELY(ip <= iend-7)) {
+                    ip += 3;
                } else {
-                    bitStream >>= 16;
-                    bitCount   += 16;
-            }   }
-            while ((bitStream & 3) == 3) {
-                n0 += 3;
-                bitStream >>= 2;
-                bitCount += 2;
+                    bitCount -= (int)(8 * (iend - 7 - ip));
+                    bitCount &= 31;
+                    ip = iend - 4;
+                }
+                bitStream = MEM_readLE32(ip) >> bitCount;
+                repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
            }
-            n0 += bitStream & 3;
+            charnum += 3 * repeats;
+            bitStream >>= 2 * repeats;
+            bitCount += 2 * repeats;
+
+            /* Add the final repeat which isn't 0b11. */
+            assert((bitStream & 3) < 3);
+            charnum += bitStream & 3;
            bitCount += 2;
-            if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
-            while (charnum < n0) normalizedCounter[charnum++] = 0;
-            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+
+            /* This is an error, but break and return an error
+             * at the end, because returning out of a loop makes
+             * it harder for the compiler to optimize.
+             */
+            if (charnum >= maxSV1) break;
+
+            /* We don't need to set the normalized count to 0
+             * because we already memset the whole buffer to 0.
+             */
+
+            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
                assert((bitCount >> 3) <= 3); /* For first condition to work */
                ip += bitCount>>3;
                bitCount &= 7;
-                bitStream = MEM_readLE32(ip) >> bitCount;
            } else {
-                bitStream >>= 2;
-        }   }
-        {   int const max = (2*threshold-1) - remaining;
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                bitCount &= 31;
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> bitCount;
+        }
+        {
+            int const max = (2*threshold-1) - remaining;
            int count;

            if ((bitStream & (threshold-1)) < (U32)max) {
@@ -139,24 +164,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
            }

            count--;   /* extra accuracy */
-            remaining -= count < 0 ? -count : count;   /* -1 means +1 */
+            /* When it matters (small blocks), this is a
+             * predictable branch, because we don't use -1.
+             */
+            if (count >= 0) {
+                remaining -= count;
+            } else {
+                assert(count == -1);
+                remaining += count;
+            }
            normalizedCounter[charnum++] = (short)count;
            previous0 = !count;
-            while (remaining < threshold) {
-                nbBits--;
-                threshold >>= 1;
-            }

-            if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+            assert(threshold > 1);
+            if (remaining < threshold) {
+                /* This branch can be folded into the
+                 * threshold update condition because we
+                 * know that threshold > 1.
+                 */
+                if (remaining <= 1) break;
+                nbBits = BIT_highbit32(remaining) + 1;
+                threshold = 1 << (nbBits - 1);
+            }
+            if (charnum >= maxSV1) break;
+
+            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
                ip += bitCount>>3;
                bitCount &= 7;
            } else {
                bitCount -= (int)(8 * (iend - 4 - ip));
+                bitCount &= 31;
                ip = iend - 4;
            }
-            bitStream = MEM_readLE32(ip) >> (bitCount & 31);
-    }   }   /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
+            bitStream = MEM_readLE32(ip) >> bitCount;
+    }   }
    if (remaining != 1) return ERROR(corruption_detected);
+    /* Only possible when there are too many zeros. */
+    if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
    if (bitCount > 32) return ERROR(corruption_detected);
    *maxSVPtr = charnum-1;

@@ -164,6 +208,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
    return ip-istart;
 }

+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_readNCount_body_default(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+#endif
+
+size_t FSE_readNCount_bmi2(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+    }
+#endif
+    (void)bmi2;
+    return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+size_t FSE_readNCount(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
+}
+

 /*! HUF_readStats() :
    Read compact Huffman tree, saved by HUF_writeCTable().
@@ -175,6 +256,17 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
                     U32* nbSymbolsPtr, U32* tableLogPtr,
                     const void* src, size_t srcSize)
+{
+    U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+    return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                   U32* nbSymbolsPtr, U32* tableLogPtr,
+                   const void* src, size_t srcSize,
+                   void* workSpace, size_t wkspSize,
+                   int bmi2)
 {
    U32 weightTotal;
    const BYTE* ip = (const BYTE*) src;
@@ -183,7 +275,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,

    if (!srcSize) return ERROR(srcSize_wrong);
    iSize = ip[0];
-    /* memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
+    /* ZSTD_memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */

    if (iSize >= 128) {  /* special header */
        oSize = iSize - 127;
@@ -197,14 +289,14 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
                huffWeight[n+1] = ip[n/2] & 15;
    }   }   }
    else  {   /* header compressed with FSE (normal case) */
-        FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)];  /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
-        oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6);   /* max (hwSize-1) values decoded, as last one is implied */
+        /* max (hwSize-1) values decoded, as last one is implied */
+        oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
        if (FSE_isError(oSize)) return oSize;
    }

    /* collect weight stats */
-    memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
+    ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
    weightTotal = 0;
    {   U32 n; for (n=0; n<oSize; n++) {
            if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
@@ -234,3 +326,37 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
    *nbSymbolsPtr = (U32)(oSize+1);
    return iSize+1;
 }
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize)
+{
+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
+}
+
+#if DYNAMIC_BMI2
+static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize)
+{
+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
+}
+#endif
+
+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize,
+                     int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+    }
+#endif
+    (void)bmi2;
+    return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+}
--- a/vendor/github.com/DataDog/zstd/error_private.c
+++ b/vendor/github.com/DataDog/zstd/error_private.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -47,6 +47,8 @@ const char* ERR_getErrorString(ERR_enum code)
        /* following error codes are not stable and may be removed or changed in a future version */
    case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
    case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
+    case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
+    case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
    case PREFIX(maxCode):
    default: return notErrorCode;
    }
--- a/vendor/github.com/DataDog/zstd/error_private.h
+++ b/vendor/github.com/DataDog/zstd/error_private.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -21,7 +21,7 @@ extern "C" {
 /* ****************************************
 *  Dependencies
 ******************************************/
-#include <stddef.h>        /* size_t */
+#include "zstd_deps.h"    /* size_t */
 #include "zstd_errors.h"  /* enum list */


@@ -49,7 +49,7 @@ typedef ZSTD_ErrorCode ERR_enum;
 /*-****************************************
 *  Error codes handling
 ******************************************/
-#undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
+#undef ERROR   /* already defined on Visual Studio */
 #define ERROR(name) ZSTD_ERROR(name)
 #define ZSTD_ERROR(name) ((size_t)-PREFIX(name))

@@ -57,6 +57,10 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }

 ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }

+/* check and forward error code */
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
+#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
+

 /*-****************************************
 *  Error Strings
--- a/vendor/github.com/DataDog/zstd/fastcover.c
+++ b/vendor/github.com/DataDog/zstd/fastcover.c
@@ -1,3 +1,13 @@
+/*
+ * Copyright (c) 2018-2020, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
 /*-*************************************
 *  Dependencies
 ***************************************/
@@ -11,6 +21,7 @@
 #include "threading.h"
 #include "cover.h"
 #include "zstd_internal.h" /* includes zstd.h */
+#include "zstd_compress_internal.h" /* ZSTD_hash*() */
 #ifndef ZDICT_STATIC_LINKING_ONLY
 #define ZDICT_STATIC_LINKING_ONLY
 #endif
@@ -23,7 +34,7 @@
 #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
 #define FASTCOVER_MAX_F 31
 #define FASTCOVER_MAX_ACCEL 10
-#define DEFAULT_SPLITPOINT 0.75
+#define FASTCOVER_DEFAULT_SPLITPOINT 0.75
 #define DEFAULT_F 20
 #define DEFAULT_ACCEL 1

@@ -31,50 +42,50 @@
 /*-*************************************
 *  Console display
 ***************************************/
+#ifndef LOCALDISPLAYLEVEL
 static int g_displayLevel = 2;
+#endif
+#undef  DISPLAY
 #define DISPLAY(...)                                                           \
  {                                                                            \
    fprintf(stderr, __VA_ARGS__);                                              \
    fflush(stderr);                                                            \
  }
+#undef  LOCALDISPLAYLEVEL
 #define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
  if (displayLevel >= l) {                                                     \
    DISPLAY(__VA_ARGS__);                                                      \
  } /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
+#undef  DISPLAYLEVEL
 #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)

+#ifndef LOCALDISPLAYUPDATE
+static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
+static clock_t g_time = 0;
+#endif
+#undef  LOCALDISPLAYUPDATE
 #define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \
  if (displayLevel >= l) {                                                     \
-    if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) {             \
+    if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) {             \
      g_time = clock();                                                        \
      DISPLAY(__VA_ARGS__);                                                    \
    }                                                                          \
  }
+#undef  DISPLAYUPDATE
 #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
-static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
-static clock_t g_time = 0;


 /*-*************************************
 * Hash Functions
 ***************************************/
-static const U64 prime6bytes = 227718039650203ULL;
-static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
-static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
-
-static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
-static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
-static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
-
-
 /**
- * Hash the d-byte value pointed to by p and mod 2^f
+ * Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
 */
-static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
+static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
  if (d == 6) {
-    return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
+    return ZSTD_hash6Ptr(p, f);
  }
-  return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
+  return ZSTD_hash8Ptr(p, f);
 }


@@ -476,7 +487,7 @@ static void FASTCOVER_tryParameters(void *opaque)
                                                    parameters, segmentFreqs);

    const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
-    selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
+    selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
         ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
         totalCompressedSize);

@@ -607,7 +618,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
    /* constants */
    const unsigned nbThreads = parameters->nbThreads;
    const double splitPoint =
-        parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
+        parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
    const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
    const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
    const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
--- a/vendor/github.com/DataDog/zstd/fse.h
+++ b/vendor/github.com/DataDog/zstd/fse.h
@@ -1,35 +1,15 @@
 /* ******************************************************************
-   FSE : Finite State Entropy codec
-   Public Prototypes declaration
-   Copyright (C) 2013-2016, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * FSE : Finite State Entropy codec
+ * Public Prototypes declaration
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 ****************************************************************** */

 #if defined (__cplusplus)
@@ -43,7 +23,7 @@ extern "C" {
 /*-*****************************************
 *  Dependencies
 ******************************************/
-#include <stddef.h>    /* size_t, ptrdiff_t */
+#include "zstd_deps.h"    /* size_t, ptrdiff_t */


 /*-*****************************************
@@ -157,10 +137,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
 /*! FSE_normalizeCount():
    normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
    'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
+    useLowProbCount is a boolean parameter which trades off compressed size for
+    faster header decoding. When it is set to 1, the compressed data will be slightly
+    smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
+    faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
+    is a good default, since header deserialization makes a big speed difference.
+    Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
    @return : tableLog,
              or an errorCode, which can be tested using FSE_isError() */
 FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
-                    const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
+                    const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);

 /*! FSE_NCountWriteBound():
    Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@@ -248,6 +234,13 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
                           const void* rBuffer, size_t rBuffSize);

+/*! FSE_readNCount_bmi2():
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
+ */
+FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
+                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+                           const void* rBuffer, size_t rBuffSize, int bmi2);
+
 /*! Constructor and Destructor of FSE_DTable.
    Note that its size depends on 'tableLog' */
 typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
@@ -308,12 +301,12 @@ If there is an error, the function will return an error code, which can be teste
 *******************************************/
 /* FSE buffer bounds */
 #define FSE_NCOUNTBOUND 512
-#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
+#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */

 /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
-#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
-#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<(maxTableLog)))

 /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
 #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue)   (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
@@ -329,9 +322,9 @@ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsi

 /* FSE_compress_wksp() :
 * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
- * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
+ * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
 */
-#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
+#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);

 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
@@ -342,18 +335,30 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);

 /* FSE_buildCTable_wksp() :
 * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
- * `wkspSize` must be >= `(1<<tableLog)`.
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
 */
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);

+#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
+#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
+FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
+
 size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
 /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */

 size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
 /**< build a fake FSE_DTable, designed to always generate the same symbolValue */

-size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
-/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue))
+#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
+/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
+
+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
+/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */

 typedef enum {
   FSE_repeat_none,  /**< Cannot use the previous table */
@@ -664,6 +669,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 #ifndef FSE_DEFAULT_MEMORY_USAGE
 #  define FSE_DEFAULT_MEMORY_USAGE 13
 #endif
+#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
+#  error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
+#endif

 /*!FSE_MAX_SYMBOL_VALUE :
 *  Maximum symbol value authorized.
@@ -697,7 +705,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 #  error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
 #endif

-#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
+#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)


 #endif /* FSE_STATIC_LINKING_ONLY */
--- a/vendor/github.com/DataDog/zstd/fse_compress.c
+++ b/vendor/github.com/DataDog/zstd/fse_compress.c
@@ -1,42 +1,20 @@
 /* ******************************************************************
-   FSE : Finite State Entropy encoder
-   Copyright (C) 2013-present, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-    You can contact the author at :
-    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * FSE : Finite State Entropy encoder
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 ****************************************************************** */

 /* **************************************************************
 *  Includes
 ****************************************************************/
-#include <stdlib.h>     /* malloc, free, qsort */
-#include <string.h>     /* memcpy, memset */
 #include "compiler.h"
 #include "mem.h"        /* U32, U16, etc. */
 #include "debug.h"      /* assert, DEBUGLOG */
@@ -45,6 +23,9 @@
 #define FSE_STATIC_LINKING_ONLY
 #include "fse.h"
 #include "error_private.h"
+#define ZSTD_DEPS_NEED_MALLOC
+#define ZSTD_DEPS_NEED_MATH64
+#include "zstd_deps.h"  /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */


 /* **************************************************************
@@ -94,13 +75,15 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
    U32 const step = FSE_TABLESTEP(tableSize);
-    U32 cumul[FSE_MAX_SYMBOL_VALUE+2];

-    FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
+    U32* cumul = (U32*)workSpace;
+    FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
+
    U32 highThreshold = tableSize-1;

+    if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
+    if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
    /* CTable header */
-    if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
    tableU16[-2] = (U16) tableLog;
    tableU16[-1] = (U16) maxSymbolValue;
    assert(tableLog < 16);   /* required for threshold strategy to work */
@@ -109,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
     * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */

     #ifdef __clang_analyzer__
-     memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
+     ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
     #endif

    /* symbol start positions */
@@ -188,12 +171,13 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
    return 0;
 }

-
+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
 {
    FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];   /* memset() is not necessary, even if static analyzer complain about it */
    return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
 }
+#endif



@@ -327,10 +311,10 @@ FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
    size_t size;
    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
-    return (FSE_CTable*)malloc(size);
+    return (FSE_CTable*)ZSTD_malloc(size);
 }

-void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
+void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }

 /* provides the minimum logSize to safely represent a distribution */
 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
@@ -361,11 +345,10 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
 }

-
 /* Secondary normalization method.
   To be used when primary method fails. */

-static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
+static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
 {
    short const NOT_YET_ASSIGNED = -2;
    U32 s;
@@ -382,7 +365,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
            continue;
        }
        if (count[s] <= lowThreshold) {
-            norm[s] = -1;
+            norm[s] = lowProbCount;
            distributed++;
            total -= count[s];
            continue;
@@ -434,7 +417,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,

    {   U64 const vStepLog = 62 - tableLog;
        U64 const mid = (1ULL << (vStepLog-1)) - 1;
-        U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total;   /* scale on remaining */
+        U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total);   /* scale on remaining */
        U64 tmpTotal = mid;
        for (s=0; s<=maxSymbolValue; s++) {
            if (norm[s]==NOT_YET_ASSIGNED) {
@@ -451,10 +434,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
    return 0;
 }

-
 size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
                           const unsigned* count, size_t total,
-                           unsigned maxSymbolValue)
+                           unsigned maxSymbolValue, unsigned useLowProbCount)
 {
    /* Sanity checks */
    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
@@ -463,8 +445,9 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
    if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */

    {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
+        short const lowProbCount = useLowProbCount ? -1 : 1;
        U64 const scale = 62 - tableLog;
-        U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
+        U64 const step = ZSTD_div64((U64)1<<62, (U32)total);   /* <== here, one division ! */
        U64 const vStep = 1ULL<<(scale-20);
        int stillToDistribute = 1<<tableLog;
        unsigned s;
@@ -476,7 +459,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
            if (count[s] == total) return 0;   /* rle special case */
            if (count[s] == 0) { normalizedCounter[s]=0; continue; }
            if (count[s] <= lowThreshold) {
-                normalizedCounter[s] = -1;
+                normalizedCounter[s] = lowProbCount;
                stillToDistribute--;
            } else {
                short proba = (short)((count[s]*step) >> scale);
@@ -490,7 +473,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
        }   }
        if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
            /* corner case, need another normalization method */
-            size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
+            size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
            if (FSE_isError(errorCode)) return errorCode;
        }
        else normalizedCounter[largest] += (short)stillToDistribute;
@@ -645,9 +628,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,

 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }

-#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
-#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
-
+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
 /* FSE_compress_wksp() :
 * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
 * `wkspSize` size must be `(1<<tableLog)`.
@@ -666,7 +647,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
    size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));

    /* init conditions */
-    if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
+    if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
    if (srcSize <= 1) return 0;  /* Not compressible */
    if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
    if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
@@ -679,7 +660,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
    }

    tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
-    CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
+    CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) );

    /* Write table description header */
    {   CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
@@ -701,13 +682,16 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src

 typedef struct {
    FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
-    BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
+    union {
+      U32 hist_wksp[HIST_WKSP_SIZE_U32];
+      BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
+    } workspace;
 } fseWkspMax_t;

 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
 {
    fseWkspMax_t scratchBuffer;
-    DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
+    DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
    return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
 }
@@ -716,6 +700,6 @@ size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcS
 {
    return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
 }
-
+#endif

 #endif   /* FSE_COMMONDEFS_ONLY */
--- a/vendor/github.com/DataDog/zstd/fse_decompress.c
+++ b/vendor/github.com/DataDog/zstd/fse_decompress.c
@@ -1,48 +1,29 @@
 /* ******************************************************************
-   FSE : Finite State Entropy decoder
-   Copyright (C) 2013-2015, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-    You can contact the author at :
-    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * FSE : Finite State Entropy decoder
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 ****************************************************************** */


 /* **************************************************************
 *  Includes
 ****************************************************************/
-#include <stdlib.h>     /* malloc, free, qsort */
-#include <string.h>     /* memcpy, memset */
+#include "debug.h"      /* assert */
 #include "bitstream.h"
 #include "compiler.h"
 #define FSE_STATIC_LINKING_ONLY
 #include "fse.h"
 #include "error_private.h"
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h"


 /* **************************************************************
@@ -51,11 +32,6 @@
 #define FSE_isError ERR_isError
 #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */

-/* check and forward error code */
-#ifndef CHECK_F
-#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
-#endif
-

 /* **************************************************************
 *  Templates
@@ -84,25 +60,27 @@
 FSE_DTable* FSE_createDTable (unsigned tableLog)
 {
    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
-    return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+    return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
 }

 void FSE_freeDTable (FSE_DTable* dt)
 {
-    free(dt);
+    ZSTD_free(dt);
 }

-size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
 {
    void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
-    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U16* symbolNext = (U16*)workSpace;
+    BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);

    U32 const maxSV1 = maxSymbolValue + 1;
    U32 const tableSize = 1 << tableLog;
    U32 highThreshold = tableSize-1;

    /* Sanity Checks */
+    if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);

@@ -120,11 +98,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
                    symbolNext[s] = normalizedCounter[s];
        }   }   }
-        memcpy(dt, &DTableH, sizeof(DTableH));
+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
    }

    /* Spread symbols */
-    {   U32 const tableMask = tableSize-1;
+    if (highThreshold == tableSize - 1) {
+        size_t const tableMask = tableSize-1;
+        size_t const step = FSE_TABLESTEP(tableSize);
+        /* First lay down the symbols in order.
+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+         * misses since small blocks generally have small table logs, so nearly
+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+         * our buffer to handle the over-write.
+         */
+        {
+            U64 const add = 0x0101010101010101ull;
+            size_t pos = 0;
+            U64 sv = 0;
+            U32 s;
+            for (s=0; s<maxSV1; ++s, sv += add) {
+                int i;
+                int const n = normalizedCounter[s];
+                MEM_write64(spread + pos, sv);
+                for (i = 8; i < n; i += 8) {
+                    MEM_write64(spread + pos + i, sv);
+                }
+                pos += n;
+            }
+        }
+        /* Now we spread those positions across the table.
+         * The benefit of doing it in two stages is that we avoid the the
+         * variable size inner loop, which caused lots of branch misses.
+         * Now we can run through all the positions without any branch misses.
+         * We unroll the loop twice, since that is what emperically worked best.
+         */
+        {
+            size_t position = 0;
+            size_t s;
+            size_t const unroll = 2;
+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+            for (s = 0; s < (size_t)tableSize; s += unroll) {
+                size_t u;
+                for (u = 0; u < unroll; ++u) {
+                    size_t const uPosition = (position + (u * step)) & tableMask;
+                    tableDecode[uPosition].symbol = spread[s + u];
+                }
+                position = (position + (unroll * step)) & tableMask;
+            }
+            assert(position == 0);
+        }
+    } else {
+        U32 const tableMask = tableSize-1;
        U32 const step = FSE_TABLESTEP(tableSize);
        U32 s, position = 0;
        for (s=0; s<maxSV1; s++) {
@@ -149,6 +173,11 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
    return 0;
 }

+size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
+}
+

 #ifndef FSE_COMMONDEFS_ONLY

@@ -276,36 +305,89 @@ size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
 }


-size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
+        void* dst, size_t dstCapacity,
+        const void* cSrc, size_t cSrcSize,
+        unsigned maxLog, void* workSpace, size_t wkspSize,
+        int bmi2)
 {
    const BYTE* const istart = (const BYTE*)cSrc;
    const BYTE* ip = istart;
    short counting[FSE_MAX_SYMBOL_VALUE+1];
    unsigned tableLog;
    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    FSE_DTable* const dtable = (FSE_DTable*)workSpace;

    /* normal FSE decoding mode */
-    size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
    if (FSE_isError(NCountLength)) return NCountLength;
-    //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
    if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
+    assert(NCountLength <= cSrcSize);
    ip += NCountLength;
    cSrcSize -= NCountLength;

-    CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
+    if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
+    workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog);
+    wkspSize -= FSE_DTABLE_SIZE(tableLog);

-    return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace);   /* always return, even if it is an error code */
+    CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) );
+
+    {
+        const void* ptr = dtable;
+        const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+        const U32 fastMode = DTableH->fastMode;
+
+        /* select fast mode (static) */
+        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
+        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
+    }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
+}
+#endif
+
+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
+    }
+#endif
+    (void)bmi2;
+    return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
 }


 typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];

-size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
-{
-    DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
-    return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
+size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {
+    U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];
+    return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
 }

+size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
+{
+    /* Static analyzer seems unable to understand this table will be properly initialized later */
+    U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
+    return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));
+}
+#endif


 #endif   /* FSE_COMMONDEFS_ONLY */
--- a/vendor/github.com/DataDog/zstd/go.mod
+++ b/vendor/github.com/DataDog/zstd/go.mod
@@ -0,0 +1,3 @@
+module github.com/DataDog/zstd
+
+go 1.14
--- a/vendor/github.com/DataDog/zstd/hist.c
+++ b/vendor/github.com/DataDog/zstd/hist.c
@@ -1,36 +1,16 @@
 /* ******************************************************************
-   hist : Histogram functions
-   part of Finite State Entropy project
-   Copyright (C) 2013-present, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-    You can contact the author at :
-    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 ****************************************************************** */

 /* --- dependencies --- */
@@ -54,7 +34,7 @@ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
    unsigned maxSymbolValue = *maxSymbolValuePtr;
    unsigned largestCount=0;

-    memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
+    ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
    if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }

    while (ip<end) {
@@ -80,9 +60,9 @@ typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
 * this design makes better use of OoO cpus,
 * and is noticeably faster when some values are heavily repeated.
 * But it needs some additional workspace for intermediate tables.
- * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
+ * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
 * @return : largest histogram frequency,
- *           or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
+ *           or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
 static size_t HIST_count_parallel_wksp(
                                unsigned* count, unsigned* maxSymbolValuePtr,
                                const void* source, size_t sourceSize,
@@ -91,22 +71,21 @@ static size_t HIST_count_parallel_wksp(
 {
    const BYTE* ip = (const BYTE*)source;
    const BYTE* const iend = ip+sourceSize;
-    unsigned maxSymbolValue = *maxSymbolValuePtr;
+    size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
    unsigned max=0;
    U32* const Counting1 = workSpace;
    U32* const Counting2 = Counting1 + 256;
    U32* const Counting3 = Counting2 + 256;
    U32* const Counting4 = Counting3 + 256;

-    memset(workSpace, 0, 4*256*sizeof(unsigned));
-
    /* safety checks */
+    assert(*maxSymbolValuePtr <= 255);
    if (!sourceSize) {
-        memset(count, 0, maxSymbolValue + 1);
+        ZSTD_memset(count, 0, countSize);
        *maxSymbolValuePtr = 0;
        return 0;
    }
-    if (!maxSymbolValue) maxSymbolValue = 255;            /* 0 == default */
+    ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));

    /* by stripes of 16 bytes */
    {   U32 cached = MEM_read32(ip); ip += 4;
@@ -138,21 +117,18 @@ static size_t HIST_count_parallel_wksp(
    /* finish last symbols */
    while (ip<iend) Counting1[*ip++]++;

-    if (check) {   /* verify stats will fit into destination table */
-        U32 s; for (s=255; s>maxSymbolValue; s--) {
-            Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
-            if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
-    }   }
-
    {   U32 s;
-        if (maxSymbolValue > 255) maxSymbolValue = 255;
-        for (s=0; s<=maxSymbolValue; s++) {
-            count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
-            if (count[s] > max) max = count[s];
+        for (s=0; s<256; s++) {
+            Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
+            if (Counting1[s] > max) max = Counting1[s];
    }   }

-    while (!count[maxSymbolValue]) maxSymbolValue--;
-    *maxSymbolValuePtr = maxSymbolValue;
+    {   unsigned maxSymbolValue = 255;
+        while (!Counting1[maxSymbolValue]) maxSymbolValue--;
+        if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
+        *maxSymbolValuePtr = maxSymbolValue;
+        ZSTD_memmove(count, Counting1, countSize);   /* in case count & Counting1 are overlapping */
+    }
    return (size_t)max;
 }

@@ -172,14 +148,6 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
    return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
 }

-/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
-size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
-                     const void* source, size_t sourceSize)
-{
-    unsigned tmpCounters[HIST_WKSP_SIZE_U32];
-    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
-}
-
 /* HIST_count_wksp() :
 * Same as HIST_count(), but using an externally provided scratch buffer.
 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
@@ -195,9 +163,19 @@ size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
 }

+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
+/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
+size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
+                     const void* source, size_t sourceSize)
+{
+    unsigned tmpCounters[HIST_WKSP_SIZE_U32];
+    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
+}
+
 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
                 const void* src, size_t srcSize)
 {
    unsigned tmpCounters[HIST_WKSP_SIZE_U32];
    return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
 }
+#endif
--- a/vendor/github.com/DataDog/zstd/hist.h
+++ b/vendor/github.com/DataDog/zstd/hist.h
@@ -1,40 +1,20 @@
 /* ******************************************************************
-   hist : Histogram functions
-   part of Finite State Entropy project
-   Copyright (C) 2013-present, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-    You can contact the author at :
-    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 ****************************************************************** */

 /* --- dependencies --- */
-#include <stddef.h>   /* size_t */
+#include "zstd_deps.h"   /* size_t */


 /* --- simple histogram functions --- */
--- a/vendor/github.com/DataDog/zstd/huf.h
+++ b/vendor/github.com/DataDog/zstd/huf.h
@@ -1,35 +1,15 @@
 /* ******************************************************************
-   huff0 huffman codec,
-   part of Finite State Entropy library
-   Copyright (C) 2013-present, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * huff0 huffman codec,
+ * part of Finite State Entropy library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 ****************************************************************** */

 #if defined (__cplusplus)
@@ -40,7 +20,7 @@ extern "C" {
 #define HUF_H_298734234

 /* *** Dependencies *** */
-#include <stddef.h>    /* size_t */
+#include "zstd_deps.h"    /* size_t */


 /* *** library symbols visibility *** */
@@ -110,7 +90,7 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
 /** HUF_compress4X_wksp() :
 *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
 * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
-#define HUF_WORKSPACE_SIZE (6 << 10)
+#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
 #define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
 HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
                                     const void* src, size_t srcSize,
@@ -131,6 +111,8 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,

 /* *** Dependencies *** */
 #include "mem.h"   /* U32 */
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"


 /* *** Constants *** */
@@ -153,12 +135,16 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
 #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */

 /* static allocation of HUF's Compression Table */
+/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
+struct HUF_CElt_s {
+  U16  val;
+  BYTE nbBits;
+};   /* typedef'd to HUF_CElt */
+typedef struct HUF_CElt_s HUF_CElt;   /* consider it an incomplete type */
 #define HUF_CTABLE_SIZE_U32(maxSymbolValue)   ((maxSymbolValue)+1)   /* Use tables of U32, for proper alignment */
 #define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
 #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
-    U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
-    void* name##hv = &(name##hb); \
-    HUF_CElt* name = (HUF_CElt*)(name##hv)   /* no final ; */
+    HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */

 /* static allocation of HUF's DTable */
 typedef U32 HUF_DTable;
@@ -204,10 +190,11 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
 *  or to save and regenerate 'CTable' using external methods.
 */
 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
-typedef struct HUF_CElt_s HUF_CElt;   /* incomplete type */
 size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);

 typedef enum {
   HUF_repeat_none,  /**< Cannot use the previous table */
@@ -244,9 +231,22 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
                     U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
                     const void* src, size_t srcSize);

+/*! HUF_readStats_wksp() :
+ * Same as HUF_readStats() but takes an external workspace which must be
+ * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
+#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
+                          U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
+                          const void* src, size_t srcSize,
+                          void* workspace, size_t wkspSize,
+                          int bmi2);
+
 /** HUF_readCTable() :
 *  Loading a CTable saved with HUF_writeCTable() */
-size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);

 /** HUF_getNbBits() :
 *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
@@ -350,6 +350,9 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
 #endif
 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif

 #endif /* HUF_STATIC_LINKING_ONLY */

--- a/vendor/github.com/DataDog/zstd/huf_compress.c
+++ b/vendor/github.com/DataDog/zstd/huf_compress.c
@@ -1,35 +1,15 @@
 /* ******************************************************************
-   Huffman encoder, part of New Generation Entropy library
-   Copyright (C) 2013-2016, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-    You can contact the author at :
-    - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
-    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * Huffman encoder, part of New Generation Entropy library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 ****************************************************************** */

 /* **************************************************************
@@ -43,8 +23,7 @@
 /* **************************************************************
 *  Includes
 ****************************************************************/
-#include <string.h>     /* memcpy, memset */
-#include <stdio.h>      /* printf (debug) */
+#include "zstd_deps.h"     /* ZSTD_memcpy, ZSTD_memset */
 #include "compiler.h"
 #include "bitstream.h"
 #include "hist.h"
@@ -60,8 +39,6 @@
 ****************************************************************/
 #define HUF_isError ERR_isError
 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
-#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
-#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }


 /* **************************************************************
@@ -92,7 +69,7 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
    U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;

    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
-    BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
+    U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];

    unsigned count[HUF_TABLELOG_MAX+1];
    S16 norm[HUF_TABLELOG_MAX+1];
@@ -107,29 +84,24 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
    }

    tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
-    CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
+    CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );

    /* Write table description header */
-    {   CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
+    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
        op += hSize;
    }

    /* Compress */
    CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
-    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
        if (cSize == 0) return 0;   /* not enough space for compressed data */
        op += cSize;
    }

-    return op-ostart;
+    return (size_t)(op-ostart);
 }


-struct HUF_CElt_s {
-  U16  val;
-  BYTE nbBits;
-};   /* typedef'd to HUF_CElt within "huf.h" */
-
 /*! HUF_writeCTable() :
    `CTable` : Huffman tree to save, using huf representation.
    @return : size of saved CTable */
@@ -169,7 +141,7 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
 }


-size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize)
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
 {
    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
@@ -178,6 +150,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void

    /* get symbol weights */
    CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
+    *hasZeroWeights = (rankVal[0] > 0);

    /* check result */
    if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
@@ -186,15 +159,15 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
    /* Prepare base value per rank */
    {   U32 n, nextRankStart = 0;
        for (n=1; n<=tableLog; n++) {
-            U32 current = nextRankStart;
+            U32 curr = nextRankStart;
            nextRankStart += (rankVal[n] << (n-1));
-            rankVal[n] = current;
+            rankVal[n] = curr;
    }   }

    /* fill nbBits */
    {   U32 n; for (n=0; n<nbSymbols; n++) {
            const U32 w = huffWeight[n];
-            CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
+            CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
    }   }

    /* fill val */
@@ -232,106 +205,202 @@ typedef struct nodeElt_s {
    BYTE nbBits;
 } nodeElt;

+/**
+ * HUF_setMaxHeight():
+ * Enforces maxNbBits on the Huffman tree described in huffNode.
+ *
+ * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
+ * the tree to so that it is a valid canonical Huffman tree.
+ *
+ * @pre               The sum of the ranks of each symbol == 2^largestBits,
+ *                    where largestBits == huffNode[lastNonNull].nbBits.
+ * @post              The sum of the ranks of each symbol == 2^largestBits,
+ *                    where largestBits is the return value <= maxNbBits.
+ *
+ * @param huffNode    The Huffman tree modified in place to enforce maxNbBits.
+ * @param lastNonNull The symbol with the lowest count in the Huffman tree.
+ * @param maxNbBits   The maximum allowed number of bits, which the Huffman tree
+ *                    may not respect. After this function the Huffman tree will
+ *                    respect maxNbBits.
+ * @return            The maximum number of bits of the Huffman tree after adjustment,
+ *                    necessarily no more than maxNbBits.
+ */
 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
 {
    const U32 largestBits = huffNode[lastNonNull].nbBits;
-    if (largestBits <= maxNbBits) return largestBits;   /* early exit : no elt > maxNbBits */
+    /* early exit : no elt > maxNbBits, so the tree is already valid. */
+    if (largestBits <= maxNbBits) return largestBits;

    /* there are several too large elements (at least >= 2) */
    {   int totalCost = 0;
        const U32 baseCost = 1 << (largestBits - maxNbBits);
-        U32 n = lastNonNull;
+        int n = (int)lastNonNull;

+        /* Adjust any ranks > maxNbBits to maxNbBits.
+         * Compute totalCost, which is how far the sum of the ranks is
+         * we are over 2^largestBits after adjust the offending ranks.
+         */
        while (huffNode[n].nbBits > maxNbBits) {
            totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
            huffNode[n].nbBits = (BYTE)maxNbBits;
-            n --;
-        }  /* n stops at huffNode[n].nbBits <= maxNbBits */
-        while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using < maxNbBits */
+            n--;
+        }
+        /* n stops at huffNode[n].nbBits <= maxNbBits */
+        assert(huffNode[n].nbBits <= maxNbBits);
+        /* n end at index of smallest symbol using < maxNbBits */
+        while (huffNode[n].nbBits == maxNbBits) --n;

-        /* renorm totalCost */
-        totalCost >>= (largestBits - maxNbBits);  /* note : totalCost is necessarily a multiple of baseCost */
+        /* renorm totalCost from 2^largestBits to 2^maxNbBits
+         * note : totalCost is necessarily a multiple of baseCost */
+        assert((totalCost & (baseCost - 1)) == 0);
+        totalCost >>= (largestBits - maxNbBits);
+        assert(totalCost > 0);

        /* repay normalized cost */
        {   U32 const noSymbol = 0xF0F0F0F0;
            U32 rankLast[HUF_TABLELOG_MAX+2];
-            int pos;

-            /* Get pos of last (smallest) symbol per rank */
-            memset(rankLast, 0xF0, sizeof(rankLast));
+            /* Get pos of last (smallest = lowest cum. count) symbol per rank */
+            ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
            {   U32 currentNbBits = maxNbBits;
+                int pos;
                for (pos=n ; pos >= 0; pos--) {
                    if (huffNode[pos].nbBits >= currentNbBits) continue;
                    currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
-                    rankLast[maxNbBits-currentNbBits] = pos;
+                    rankLast[maxNbBits-currentNbBits] = (U32)pos;
            }   }

            while (totalCost > 0) {
-                U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
+                /* Try to reduce the next power of 2 above totalCost because we
+                 * gain back half the rank.
+                 */
+                U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
                for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
-                    U32 highPos = rankLast[nBitsToDecrease];
-                    U32 lowPos = rankLast[nBitsToDecrease-1];
+                    U32 const highPos = rankLast[nBitsToDecrease];
+                    U32 const lowPos = rankLast[nBitsToDecrease-1];
                    if (highPos == noSymbol) continue;
+                    /* Decrease highPos if no symbols of lowPos or if it is
+                     * not cheaper to remove 2 lowPos than highPos.
+                     */
                    if (lowPos == noSymbol) break;
                    {   U32 const highTotal = huffNode[highPos].count;
                        U32 const lowTotal = 2 * huffNode[lowPos].count;
                        if (highTotal <= lowTotal) break;
                }   }
                /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
+                assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
                /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
                while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
-                    nBitsToDecrease ++;
+                    nBitsToDecrease++;
+                assert(rankLast[nBitsToDecrease] != noSymbol);
+                /* Increase the number of bits to gain back half the rank cost. */
                totalCost -= 1 << (nBitsToDecrease-1);
+                huffNode[rankLast[nBitsToDecrease]].nbBits++;
+
+                /* Fix up the new rank.
+                 * If the new rank was empty, this symbol is now its smallest.
+                 * Otherwise, this symbol will be the largest in the new rank so no adjustment.
+                 */
                if (rankLast[nBitsToDecrease-1] == noSymbol)
-                    rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
-                huffNode[rankLast[nBitsToDecrease]].nbBits ++;
+                    rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
+                /* Fix up the old rank.
+                 * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
+                 * it must be the only symbol in its rank, so the old rank now has no symbols.
+                 * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
+                 * the smallest node in the rank. If the previous position belongs to a different rank,
+                 * then the rank is now empty.
+                 */
                if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
                    rankLast[nBitsToDecrease] = noSymbol;
                else {
                    rankLast[nBitsToDecrease]--;
                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
                        rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
-            }   }   /* while (totalCost > 0) */
+                }
+            }   /* while (totalCost > 0) */

+            /* If we've removed too much weight, then we have to add it back.
+             * To avoid overshooting again, we only adjust the smallest rank.
+             * We take the largest nodes from the lowest rank 0 and move them
+             * to rank 1. There's guaranteed to be enough rank 0 symbols because
+             * TODO.
+             */
            while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
-                if (rankLast[1] == noSymbol) {  /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
+                /* special case : no rank 1 symbol (using maxNbBits-1);
+                 * let's create one from largest rank 0 (using maxNbBits).
+                 */
+                if (rankLast[1] == noSymbol) {
                    while (huffNode[n].nbBits == maxNbBits) n--;
                    huffNode[n+1].nbBits--;
-                    rankLast[1] = n+1;
+                    assert(n >= 0);
+                    rankLast[1] = (U32)(n+1);
                    totalCost++;
                    continue;
                }
                huffNode[ rankLast[1] + 1 ].nbBits--;
                rankLast[1]++;
                totalCost ++;
-    }   }   }   /* there are several too large elements (at least >= 2) */
+            }
+        }   /* repay normalized cost */
+    }   /* there are several too large elements (at least >= 2) */

    return maxNbBits;
 }

-
 typedef struct {
    U32 base;
-    U32 current;
+    U32 curr;
 } rankPos;

-static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue)
-{
-    rankPos rank[32];
-    U32 n;
+typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];

-    memset(rank, 0, sizeof(rank));
-    for (n=0; n<=maxSymbolValue; n++) {
-        U32 r = BIT_highbit32(count[n] + 1);
-        rank[r].base ++;
+#define RANK_POSITION_TABLE_SIZE 32
+
+typedef struct {
+  huffNodeTable huffNodeTbl;
+  rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
+} HUF_buildCTable_wksp_tables;
+
+/**
+ * HUF_sort():
+ * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
+ *
+ * @param[out] huffNode       Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
+ *                            Must have (maxSymbolValue + 1) entries.
+ * @param[in]  count          Histogram of the symbols.
+ * @param[in]  maxSymbolValue Maximum symbol value.
+ * @param      rankPosition   This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
+ */
+static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
+{
+    int n;
+    int const maxSymbolValue1 = (int)maxSymbolValue + 1;
+
+    /* Compute base and set curr to base.
+     * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1.
+     * Then 2^lowerRank <= count[n]+1 <= 2^rank.
+     * We attribute each symbol to lowerRank's base value, because we want to know where
+     * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
+     */
+    ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
+    for (n = 0; n < maxSymbolValue1; ++n) {
+        U32 lowerRank = BIT_highbit32(count[n] + 1);
+        rankPosition[lowerRank].base++;
    }
-    for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
-    for (n=0; n<32; n++) rank[n].current = rank[n].base;
-    for (n=0; n<=maxSymbolValue; n++) {
+    assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
+    for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
+        rankPosition[n-1].base += rankPosition[n].base;
+        rankPosition[n-1].curr = rankPosition[n-1].base;
+    }
+    /* Sort */
+    for (n = 0; n < maxSymbolValue1; ++n) {
        U32 const c = count[n];
        U32 const r = BIT_highbit32(c+1) + 1;
-        U32 pos = rank[r].current++;
-        while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
+        U32 pos = rankPosition[r].curr++;
+        /* Insert into the correct position in the rank.
+         * We have at most 256 symbols, so this insertion should be fine.
+         */
+        while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
            huffNode[pos] = huffNode[pos-1];
            pos--;
        }
@@ -343,45 +412,40 @@ static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValu

 /** HUF_buildCTable_wksp() :
 *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
+ *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
 */
 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
-typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
-size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
+
+/* HUF_buildTree():
+ * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
+ *
+ * @param huffNode        The array sorted by HUF_sort(). Builds the Huffman tree in this array.
+ * @param maxSymbolValue  The maximum symbol value.
+ * @return                The smallest node in the Huffman tree (by count).
+ */
+static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
 {
-    nodeElt* const huffNode0 = (nodeElt*)workSpace;
-    nodeElt* const huffNode = huffNode0+1;
-    U32 n, nonNullRank;
+    nodeElt* const huffNode0 = huffNode - 1;
+    int nonNullRank;
    int lowS, lowN;
-    U16 nodeNb = STARTNODE;
-    U32 nodeRoot;
-
-    /* safety checks */
-    if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
-    if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
-    if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
-    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
-    memset(huffNode0, 0, sizeof(huffNodeTable));
-
-    /* sort, decreasing order */
-    HUF_sort(huffNode, count, maxSymbolValue);
-
+    int nodeNb = STARTNODE;
+    int n, nodeRoot;
    /* init for parents */
-    nonNullRank = maxSymbolValue;
+    nonNullRank = (int)maxSymbolValue;
    while(huffNode[nonNullRank].count == 0) nonNullRank--;
    lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
    huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
-    huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
+    huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
    nodeNb++; lowS-=2;
    for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
    huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */

    /* create parents */
    while (nodeNb <= nodeRoot) {
-        U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
-        U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
        huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
-        huffNode[n1].parent = huffNode[n2].parent = nodeNb;
+        huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
        nodeNb++;
    }

@@ -392,42 +456,73 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
    for (n=0; n<=nonNullRank; n++)
        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;

-    /* enforce maxTableLog */
-    maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
+    return nonNullRank;
+}

-    /* fill result into tree (val, nbBits) */
-    {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
-        U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
-        if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
-        for (n=0; n<=nonNullRank; n++)
-            nbPerRank[huffNode[n].nbBits]++;
-        /* determine stating value per rank */
-        {   U16 min = 0;
-            for (n=maxNbBits; n>0; n--) {
-                valPerRank[n] = min;      /* get starting value within each rank */
-                min += nbPerRank[n];
-                min >>= 1;
-        }   }
-        for (n=0; n<=maxSymbolValue; n++)
-            tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
-        for (n=0; n<=maxSymbolValue; n++)
-            tree[n].val = valPerRank[tree[n].nbBits]++;   /* assign value within rank, symbol order */
-    }
+/**
+ * HUF_buildCTableFromTree():
+ * Build the CTable given the Huffman tree in huffNode.
+ *
+ * @param[out] CTable         The output Huffman CTable.
+ * @param      huffNode       The Huffman tree.
+ * @param      nonNullRank    The last and smallest node in the Huffman tree.
+ * @param      maxSymbolValue The maximum symbol value.
+ * @param      maxNbBits      The exact maximum number of bits used in the Huffman tree.
+ */
+static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
+{
+    /* fill result into ctable (val, nbBits) */
+    int n;
+    U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
+    U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
+    int const alphabetSize = (int)(maxSymbolValue + 1);
+    for (n=0; n<=nonNullRank; n++)
+        nbPerRank[huffNode[n].nbBits]++;
+    /* determine starting value per rank */
+    {   U16 min = 0;
+        for (n=(int)maxNbBits; n>0; n--) {
+            valPerRank[n] = min;      /* get starting value within each rank */
+            min += nbPerRank[n];
+            min >>= 1;
+    }   }
+    for (n=0; n<alphabetSize; n++)
+        CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
+    for (n=0; n<alphabetSize; n++)
+        CTable[n].val = valPerRank[CTable[n].nbBits]++;   /* assign value within rank, symbol order */
+}
+
+size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
+{
+    HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
+    nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
+    nodeElt* const huffNode = huffNode0+1;
+    int nonNullRank;
+
+    /* safety checks */
+    if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
+      return ERROR(workSpace_tooSmall);
+    if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
+      return ERROR(maxSymbolValue_tooLarge);
+    ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
+
+    /* sort, decreasing order */
+    HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
+
+    /* build tree */
+    nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
+
+    /* enforce maxTableLog */
+    maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
+    if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
+
+    HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);

    return maxNbBits;
 }

-/** HUF_buildCTable() :
- * @return : maxNbBits
- *  Note : count is used before tree is written, so they can safely overlap
- */
-size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
-{
-    huffNodeTable nodeTable;
-    return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
-}
-
-static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
 {
    size_t nbBits = 0;
    int s;
@@ -437,7 +532,7 @@ static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count
    return nbBits >> 3;
 }

-static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
  int bad = 0;
  int s;
  for (s = 0; s <= (int)maxSymbolValue; ++s) {
@@ -476,7 +571,7 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,

    /* init */
    if (dstSize < 8) return 0;   /* not enough space to compress */
-    { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
+    { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
      if (HUF_isError(initErr)) return 0; }

    n = srcSize & ~3;  /* join to mod 4 */
@@ -573,7 +668,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
    if (srcSize < 12) return 0;   /* no saving possible : too small input */
    op += 6;   /* jumpTable */

-    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
        if (cSize==0) return 0;
        assert(cSize <= 65535);
        MEM_writeLE16(ostart, (U16)cSize);
@@ -581,7 +677,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
    }

    ip += segmentSize;
-    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
        if (cSize==0) return 0;
        assert(cSize <= 65535);
        MEM_writeLE16(ostart+2, (U16)cSize);
@@ -589,7 +686,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
    }

    ip += segmentSize;
-    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
        if (cSize==0) return 0;
        assert(cSize <= 65535);
        MEM_writeLE16(ostart+4, (U16)cSize);
@@ -597,12 +695,14 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
    }

    ip += segmentSize;
-    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
+    assert(op <= oend);
+    assert(ip <= iend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
        if (cSize==0) return 0;
        op += cSize;
    }

-    return op-ostart;
+    return (size_t)(op-ostart);
 }

 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
@@ -618,40 +718,44 @@ static size_t HUF_compressCTable_internal(
                HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
 {
    size_t const cSize = (nbStreams==HUF_singleStream) ?
-                         HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
-                         HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
+                         HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
+                         HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
    if (HUF_isError(cSize)) { return cSize; }
    if (cSize==0) { return 0; }   /* uncompressible */
    op += cSize;
    /* check compressibility */
+    assert(op >= ostart);
    if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
-    return op-ostart;
+    return (size_t)(op-ostart);
 }

 typedef struct {
    unsigned count[HUF_SYMBOLVALUE_MAX + 1];
    HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
-    huffNodeTable nodeTable;
+    HUF_buildCTable_wksp_tables buildCTable_wksp;
 } HUF_compress_tables_t;

 /* HUF_compress_internal() :
- * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+ * `workSpace_align4` must be aligned on 4-bytes boundaries,
+ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
 static size_t
 HUF_compress_internal (void* dst, size_t dstSize,
                 const void* src, size_t srcSize,
                       unsigned maxSymbolValue, unsigned huffLog,
                       HUF_nbStreams_e nbStreams,
-                       void* workSpace, size_t wkspSize,
+                       void* workSpace_align4, size_t wkspSize,
                       HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
                 const int bmi2)
 {
-    HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
+    HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4;
    BYTE* const ostart = (BYTE*)dst;
    BYTE* const oend = ostart + dstSize;
    BYTE* op = ostart;

+    HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
+    assert(((size_t)workSpace_align4 & 3) == 0);   /* must be aligned on 4-bytes boundaries */
+
    /* checks & inits */
-    if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
    if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
    if (!srcSize) return 0;  /* Uncompressed */
    if (!dstSize) return 0;  /* cannot fit anything within dst budget */
@@ -669,7 +773,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
    }

    /* Scan input and build symbol stats */
-    {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
+    {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) );
        if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
        if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
    }
@@ -691,11 +795,11 @@ HUF_compress_internal (void* dst, size_t dstSize,
    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
    {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
                                            maxSymbolValue, huffLog,
-                                            table->nodeTable, sizeof(table->nodeTable));
+                                            &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
        CHECK_F(maxBits);
        huffLog = (U32)maxBits;
        /* Zero unused symbols in CTable, so we can check it for validity */
-        memset(table->CTable + (maxSymbolValue + 1), 0,
+        ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
               sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
    }

@@ -716,7 +820,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
        op += hSize;
        if (repeat) { *repeat = HUF_repeat_none; }
        if (oldHufTable)
-            memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
+            ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
    }
    return HUF_compressCTable_internal(ostart, op, oend,
                                       src, srcSize,
@@ -747,14 +851,6 @@ size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
                                 repeat, preferRepeat, bmi2);
 }

-size_t HUF_compress1X (void* dst, size_t dstSize,
-                 const void* src, size_t srcSize,
-                 unsigned maxSymbolValue, unsigned huffLog)
-{
-    unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
-    return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
-}
-
 /* HUF_compress4X_repeat():
 * compress input using 4 streams.
 * provide workspace to generate compression tables */
@@ -784,6 +880,25 @@ size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
                                 hufTable, repeat, preferRepeat, bmi2);
 }

+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
+/** HUF_buildCTable() :
+ * @return : maxNbBits
+ *  Note : count is used before tree is written, so they can safely overlap
+ */
+size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
+{
+    HUF_buildCTable_wksp_tables workspace;
+    return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
+}
+
+size_t HUF_compress1X (void* dst, size_t dstSize,
+                 const void* src, size_t srcSize,
+                 unsigned maxSymbolValue, unsigned huffLog)
+{
+    unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
+    return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
+}
+
 size_t HUF_compress2 (void* dst, size_t dstSize,
                const void* src, size_t srcSize,
                unsigned maxSymbolValue, unsigned huffLog)
@@ -796,3 +911,4 @@ size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSi
 {
    return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
 }
+#endif
--- a/vendor/github.com/DataDog/zstd/huf_decompress.c
+++ b/vendor/github.com/DataDog/zstd/huf_decompress.c
@@ -1,41 +1,21 @@
 /* ******************************************************************
-   huff0 huffman decoder,
-   part of Finite State Entropy library
-   Copyright (C) 2013-present, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-    You can contact the author at :
-    - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * huff0 huffman decoder,
+ * part of Finite State Entropy library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 ****************************************************************** */

 /* **************************************************************
 *  Dependencies
 ****************************************************************/
-#include <string.h>     /* memcpy, memset */
+#include "zstd_deps.h"  /* ZSTD_memcpy, ZSTD_memset */
 #include "compiler.h"
 #include "bitstream.h"  /* BIT_* */
 #include "fse.h"        /* to compress headers */
@@ -61,9 +41,6 @@
 *  Error Management
 ****************************************************************/
 #define HUF_isError ERR_isError
-#ifndef CHECK_F
-#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
-#endif


 /* **************************************************************
@@ -126,7 +103,7 @@ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved;
 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
 {
    DTableDesc dtd;
-    memcpy(&dtd, table, sizeof(dtd));
+    ZSTD_memcpy(&dtd, table, sizeof(dtd));
    return dtd;
 }

@@ -138,29 +115,51 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
 /*-***************************/
 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */

+/**
+ * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
+ * a time.
+ */
+static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
+    U64 D4;
+    if (MEM_isLittleEndian()) {
+        D4 = symbol + (nbBits << 8);
+    } else {
+        D4 = (symbol << 8) + nbBits;
+    }
+    D4 *= 0x0001000100010001ULL;
+    return D4;
+}
+
+typedef struct {
+        U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+        U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
+        U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+        BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
+        BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
+} HUF_ReadDTableX1_Workspace;
+
+
 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
+{
+    return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
 {
    U32 tableLog = 0;
    U32 nbSymbols = 0;
    size_t iSize;
    void* const dtPtr = DTable + 1;
    HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
+    HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;

-    U32* rankVal;
-    BYTE* huffWeight;
-    size_t spaceUsed32 = 0;
-
-    rankVal = (U32 *)workSpace + spaceUsed32;
-    spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
-    huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
-    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
-
-    if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
+    DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
+    if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);

    DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
-    /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
+    /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */

-    iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
+    iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
    if (HUF_isError(iSize)) return iSize;

    /* Table header */
@@ -168,40 +167,117 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
        dtd.tableType = 0;
        dtd.tableLog = (BYTE)tableLog;
-        memcpy(DTable, &dtd, sizeof(dtd));
+        ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
    }

-    /* Calculate starting value for each rank */
-    {   U32 n, nextRankStart = 0;
-        for (n=1; n<tableLog+1; n++) {
-            U32 const current = nextRankStart;
-            nextRankStart += (rankVal[n] << (n-1));
-            rankVal[n] = current;
-    }   }
-
-    /* fill DTable */
-    {   U32 n;
-        for (n=0; n<nbSymbols; n++) {
-            U32 const w = huffWeight[n];
-            U32 const length = (1 << w) >> 1;
-            U32 u;
-            HUF_DEltX1 D;
-            D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
-            for (u = rankVal[w]; u < rankVal[w] + length; u++)
-                dt[u] = D;
-            rankVal[w] += length;
-    }   }
+    /* Compute symbols and rankStart given rankVal:
+     *
+     * rankVal already contains the number of values of each weight.
+     *
+     * symbols contains the symbols ordered by weight. First are the rankVal[0]
+     * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
+     * symbols[0] is filled (but unused) to avoid a branch.
+     *
+     * rankStart contains the offset where each rank belongs in the DTable.
+     * rankStart[0] is not filled because there are no entries in the table for
+     * weight 0.
+     */
+    {
+        int n;
+        int nextRankStart = 0;
+        int const unroll = 4;
+        int const nLimit = (int)nbSymbols - unroll + 1;
+        for (n=0; n<(int)tableLog+1; n++) {
+            U32 const curr = nextRankStart;
+            nextRankStart += wksp->rankVal[n];
+            wksp->rankStart[n] = curr;
+        }
+        for (n=0; n < nLimit; n += unroll) {
+            int u;
+            for (u=0; u < unroll; ++u) {
+                size_t const w = wksp->huffWeight[n+u];
+                wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
+            }
+        }
+        for (; n < (int)nbSymbols; ++n) {
+            size_t const w = wksp->huffWeight[n];
+            wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
+        }
+    }

+    /* fill DTable
+     * We fill all entries of each weight in order.
+     * That way length is a constant for each iteration of the outter loop.
+     * We can switch based on the length to a different inner loop which is
+     * optimized for that particular case.
+     */
+    {
+        U32 w;
+        int symbol=wksp->rankVal[0];
+        int rankStart=0;
+        for (w=1; w<tableLog+1; ++w) {
+            int const symbolCount = wksp->rankVal[w];
+            int const length = (1 << w) >> 1;
+            int uStart = rankStart;
+            BYTE const nbBits = (BYTE)(tableLog + 1 - w);
+            int s;
+            int u;
+            switch (length) {
+            case 1:
+                for (s=0; s<symbolCount; ++s) {
+                    HUF_DEltX1 D;
+                    D.byte = wksp->symbols[symbol + s];
+                    D.nbBits = nbBits;
+                    dt[uStart] = D;
+                    uStart += 1;
+                }
+                break;
+            case 2:
+                for (s=0; s<symbolCount; ++s) {
+                    HUF_DEltX1 D;
+                    D.byte = wksp->symbols[symbol + s];
+                    D.nbBits = nbBits;
+                    dt[uStart+0] = D;
+                    dt[uStart+1] = D;
+                    uStart += 2;
+                }
+                break;
+            case 4:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    MEM_write64(dt + uStart, D4);
+                    uStart += 4;
+                }
+                break;
+            case 8:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    MEM_write64(dt + uStart, D4);
+                    MEM_write64(dt + uStart + 4, D4);
+                    uStart += 8;
+                }
+                break;
+            default:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    for (u=0; u < length; u += 16) {
+                        MEM_write64(dt + uStart + u + 0, D4);
+                        MEM_write64(dt + uStart + u + 4, D4);
+                        MEM_write64(dt + uStart + u + 8, D4);
+                        MEM_write64(dt + uStart + u + 12, D4);
+                    }
+                    assert(u == length);
+                    uStart += length;
+                }
+                break;
+            }
+            symbol += symbolCount;
+            rankStart += symbolCount * length;
+        }
+    }
    return iSize;
 }

-size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_readDTableX1_wksp(DTable, src, srcSize,
-                                 workSpace, sizeof(workSpace));
-}
-
 FORCE_INLINE_TEMPLATE BYTE
 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
 {
@@ -282,6 +358,7 @@ HUF_decompress4X1_usingDTable_internal_body(
    {   const BYTE* const istart = (const BYTE*) cSrc;
        BYTE* const ostart = (BYTE*) dst;
        BYTE* const oend = ostart + dstSize;
+        BYTE* const olimit = oend - 3;
        const void* const dtPtr = DTable + 1;
        const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;

@@ -306,9 +383,9 @@ HUF_decompress4X1_usingDTable_internal_body(
        BYTE* op2 = opStart2;
        BYTE* op3 = opStart3;
        BYTE* op4 = opStart4;
-        U32 endSignal = BIT_DStream_unfinished;
        DTableDesc const dtd = HUF_getDTableDesc(DTable);
        U32 const dtLog = dtd.tableLog;
+        U32 endSignal = 1;

        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
@@ -317,8 +394,7 @@ HUF_decompress4X1_usingDTable_internal_body(
        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );

        /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
-        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-        while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
+        for ( ; (endSignal) & (op4 < olimit) ; ) {
            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
@@ -335,10 +411,10 @@ HUF_decompress4X1_usingDTable_internal_body(
            HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
            HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
            HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
-            BIT_reloadDStream(&bitD1);
-            BIT_reloadDStream(&bitD2);
-            BIT_reloadDStream(&bitD3);
-            BIT_reloadDStream(&bitD4);
+            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
        }

        /* check corruption */
@@ -400,20 +476,6 @@ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
 }


-size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
-                              const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
-                                       workSpace, sizeof(workSpace));
-}
-
-size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
-    return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
-}
-
 size_t HUF_decompress4X1_usingDTable(
          void* dst,  size_t dstSize,
    const void* cSrc, size_t cSrcSize,
@@ -430,8 +492,7 @@ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size
 {
    const BYTE* ip = (const BYTE*) cSrc;

-    size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
-                                                workSpace, wkspSize);
+    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
    if (HUF_isError(hSize)) return hSize;
    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
    ip += hSize; cSrcSize -= hSize;
@@ -447,18 +508,6 @@ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
 }


-size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
-                                       workSpace, sizeof(workSpace));
-}
-size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
-    return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
-}
-
 #endif /* HUF_FORCE_DECOMPRESS_X2 */


@@ -485,7 +534,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 co
    U32 rankVal[HUF_TABLELOG_MAX + 1];

    /* get pre-calculated rankVal */
-    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));

    /* fill skipped values */
    if (minWeight>1) {
@@ -527,7 +576,7 @@ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
    const U32 minBits  = nbBitsBaseline - maxWeight;
    U32 s;

-    memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));

    /* fill DTable */
    for (s=0; s<sortedListSize; s++) {
@@ -592,11 +641,11 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
    if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);

    rankStart = rankStart0 + 1;
-    memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
+    ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));

    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
    if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
-    /* memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
+    /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */

    iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
    if (HUF_isError(iSize)) return iSize;
@@ -610,9 +659,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
    /* Get start index of each weight */
    {   U32 w, nextRankStart = 0;
        for (w=1; w<maxW+1; w++) {
-            U32 current = nextRankStart;
+            U32 curr = nextRankStart;
            nextRankStart += rankStats[w];
-            rankStart[w] = current;
+            rankStart[w] = curr;
        }
        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
        sizeOfSort = nextRankStart;
@@ -635,9 +684,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
            U32 nextRankVal = 0;
            U32 w;
            for (w=1; w<maxW+1; w++) {
-                U32 current = nextRankVal;
+                U32 curr = nextRankVal;
                nextRankVal += rankStats[w] << (w+rescale);
-                rankVal0[w] = current;
+                rankVal0[w] = curr;
        }   }
        {   U32 const minBits = tableLog+1 - maxW;
            U32 consumed;
@@ -655,23 +704,16 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,

    dtd.tableLog = (BYTE)maxTableLog;
    dtd.tableType = 1;
-    memcpy(DTable, &dtd, sizeof(dtd));
+    ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
    return iSize;
 }

-size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
-{
-  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-  return HUF_readDTableX2_wksp(DTable, src, srcSize,
-                               workSpace, sizeof(workSpace));
-}
-

 FORCE_INLINE_TEMPLATE U32
 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
 {
    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
-    memcpy(op, dt+val, 2);
+    ZSTD_memcpy(op, dt+val, 2);
    BIT_skipBits(DStream, dt[val].nbBits);
    return dt[val].length;
 }
@@ -680,7 +722,7 @@ FORCE_INLINE_TEMPLATE U32
 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
 {
    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
-    memcpy(op, dt+val, 1);
+    ZSTD_memcpy(op, dt+val, 1);
    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
    else {
        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
@@ -757,7 +799,6 @@ HUF_decompress1X2_usingDTable_internal_body(
    return dstSize;
 }

-
 FORCE_INLINE_TEMPLATE size_t
 HUF_decompress4X2_usingDTable_internal_body(
          void* dst,  size_t dstSize,
@@ -769,6 +810,7 @@ HUF_decompress4X2_usingDTable_internal_body(
    {   const BYTE* const istart = (const BYTE*) cSrc;
        BYTE* const ostart = (BYTE*) dst;
        BYTE* const oend = ostart + dstSize;
+        BYTE* const olimit = oend - (sizeof(size_t)-1);
        const void* const dtPtr = DTable+1;
        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;

@@ -793,7 +835,7 @@ HUF_decompress4X2_usingDTable_internal_body(
        BYTE* op2 = opStart2;
        BYTE* op3 = opStart3;
        BYTE* op4 = opStart4;
-        U32 endSignal;
+        U32 endSignal = 1;
        DTableDesc const dtd = HUF_getDTableDesc(DTable);
        U32 const dtLog = dtd.tableLog;

@@ -804,8 +846,29 @@ HUF_decompress4X2_usingDTable_internal_body(
        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );

        /* 16-32 symbols per loop (4-8 symbols per stream) */
-        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-        for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
+        for ( ; (endSignal) & (op4 < olimit); ) {
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+#else
            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
@@ -822,8 +885,12 @@ HUF_decompress4X2_usingDTable_internal_body(
            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
-
-            endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+            endSignal = (U32)LIKELY(
+                        (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
+#endif
        }

        /* check corruption */
@@ -876,20 +943,6 @@ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
 }


-size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
-                              const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
-                                       workSpace, sizeof(workSpace));
-}
-
-size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
-    return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
-}
-
 size_t HUF_decompress4X2_usingDTable(
          void* dst,  size_t dstSize,
    const void* cSrc, size_t cSrcSize,
@@ -923,20 +976,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
 }


-size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
-                              const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
-                                       workSpace, sizeof(workSpace));
-}
-
-size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
-    return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
-}
-
 #endif /* HUF_FORCE_DECOMPRESS_X1 */


@@ -1037,67 +1076,6 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
 }


-typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
-
-size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
-    static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
-#endif
-
-    /* validation checks */
-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
-    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
-    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
-    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
-
-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-        (void)algoNb;
-        assert(algoNb == 0);
-        return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-        (void)algoNb;
-        assert(algoNb == 1);
-        return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
-#else
-        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
-#endif
-    }
-}
-
-size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    /* validation checks */
-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
-    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
-    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
-    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
-
-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-        (void)algoNb;
-        assert(algoNb == 0);
-        return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-        (void)algoNb;
-        assert(algoNb == 1);
-        return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
-#else
-        return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
-                        HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
-#endif
-    }
-}
-
-size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
-                                         workSpace, sizeof(workSpace));
-}
-
-
 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
                                     size_t dstSize, const void* cSrc,
                                     size_t cSrcSize, void* workSpace,
@@ -1131,8 +1109,8 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
    /* validation checks */
    if (dstSize == 0) return ERROR(dstSize_tooSmall);
    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
-    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
-    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */

    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
 #if defined(HUF_FORCE_DECOMPRESS_X1)
@@ -1154,14 +1132,6 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
    }
 }

-size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
-                             const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
-                                      workSpace, sizeof(workSpace));
-}
-

 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
 {
@@ -1185,7 +1155,7 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
 {
    const BYTE* ip = (const BYTE*) cSrc;

-    size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
+    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
    if (HUF_isError(hSize)) return hSize;
    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
    ip += hSize; cSrcSize -= hSize;
@@ -1232,3 +1202,149 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
 #endif
    }
 }
+
+#ifndef ZSTD_NO_UNUSED_FUNCTIONS
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_readDTableX1_wksp(DTable, src, srcSize,
+                                 workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
+}
+#endif
+
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
+{
+  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+  return HUF_readDTableX2_wksp(DTable, src, srcSize,
+                               workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+#endif
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+#endif
+
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
+                              const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                       workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
+    return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
+}
+#endif
+
+typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+
+size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
+    static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
+#endif
+
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
+#else
+        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
+#endif
+    }
+}
+
+size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
+                        HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
+#endif
+    }
+}
+
+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                         workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
+                             const void* cSrc, size_t cSrcSize)
+{
+    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
+                                      workSpace, sizeof(workSpace));
+}
+#endif
--- a/vendor/github.com/DataDog/zstd/mem.h
+++ b/vendor/github.com/DataDog/zstd/mem.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -18,8 +18,10 @@ extern "C" {
 /*-****************************************
 *  Dependencies
 ******************************************/
-#include <stddef.h>     /* size_t, ptrdiff_t */
-#include <string.h>     /* memcpy */
+#include <stddef.h>  /* size_t, ptrdiff_t */
+#include "compiler.h"  /* __has_builtin */
+#include "debug.h"  /* DEBUG_STATIC_ASSERT */
+#include "zstd_deps.h"  /* ZSTD_memcpy */


 /*-****************************************
@@ -39,93 +41,15 @@ extern "C" {
 #  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
 #endif

-#ifndef __has_builtin
-#  define __has_builtin(x) 0  /* compat. with non-clang compilers */
-#endif
-
-/* code only tested on 32 and 64 bits systems */
-#define MEM_STATIC_ASSERT(c)   { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
-MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
-
-/* detects whether we are being compiled under msan */
-#if defined (__has_feature)
-#  if __has_feature(memory_sanitizer)
-#    define MEMORY_SANITIZER 1
-#  endif
-#endif
-
-#if defined (MEMORY_SANITIZER)
-/* Not all platforms that support msan provide sanitizers/msan_interface.h.
- * We therefore declare the functions we need ourselves, rather than trying to
- * include the header file... */
-
-#include <stdint.h> /* intptr_t */
-
-/* Make memory region fully initialized (without changing its contents). */
-void __msan_unpoison(const volatile void *a, size_t size);
-
-/* Make memory region fully uninitialized (without changing its contents).
-   This is a legacy interface that does not update origin information. Use
-   __msan_allocated_memory() instead. */
-void __msan_poison(const volatile void *a, size_t size);
-
-/* Returns the offset of the first (at least partially) poisoned byte in the
-   memory range, or -1 if the whole range is good. */
-intptr_t __msan_test_shadow(const volatile void *x, size_t size);
-#endif
-
-/* detects whether we are being compiled under asan */
-#if defined (__has_feature)
-#  if __has_feature(address_sanitizer)
-#    define ADDRESS_SANITIZER 1
-#  endif
-#elif defined(__SANITIZE_ADDRESS__)
-#  define ADDRESS_SANITIZER 1
-#endif
-
-#if defined (ADDRESS_SANITIZER)
-/* Not all platforms that support asan provide sanitizers/asan_interface.h.
- * We therefore declare the functions we need ourselves, rather than trying to
- * include the header file... */
-
-/**
- * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
- *
- * This memory must be previously allocated by your program. Instrumented
- * code is forbidden from accessing addresses in this region until it is
- * unpoisoned. This function is not guaranteed to poison the entire region -
- * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
- * alignment restrictions.
- *
- * \note This function is not thread-safe because no two threads can poison or
- * unpoison memory in the same memory region simultaneously.
- *
- * \param addr Start of memory region.
- * \param size Size of memory region. */
-void __asan_poison_memory_region(void const volatile *addr, size_t size);
-
-/**
- * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
- *
- * This memory must be previously allocated by your program. Accessing
- * addresses in this region is allowed until this region is poisoned again.
- * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
- * to ASan alignment restrictions.
- *
- * \note This function is not thread-safe because no two threads can
- * poison or unpoison memory in the same memory region simultaneously.
- *
- * \param addr Start of memory region.
- * \param size Size of memory region. */
-void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
-#endif
-
-
 /*-**************************************************************
 *  Basic Types
 *****************************************************************/
 #if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h>
+#  if defined(_AIX)
+#    include <inttypes.h>
+#  else
+#    include <stdint.h> /* intptr_t */
+#  endif
  typedef   uint8_t BYTE;
  typedef  uint16_t U16;
  typedef   int16_t S16;
@@ -157,7 +81,53 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);


 /*-**************************************************************
-*  Memory I/O
+*  Memory I/O API
+*****************************************************************/
+/*=== Static platform detection ===*/
+MEM_STATIC unsigned MEM_32bits(void);
+MEM_STATIC unsigned MEM_64bits(void);
+MEM_STATIC unsigned MEM_isLittleEndian(void);
+
+/*=== Native unaligned read/write ===*/
+MEM_STATIC U16 MEM_read16(const void* memPtr);
+MEM_STATIC U32 MEM_read32(const void* memPtr);
+MEM_STATIC U64 MEM_read64(const void* memPtr);
+MEM_STATIC size_t MEM_readST(const void* memPtr);
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value);
+MEM_STATIC void MEM_write32(void* memPtr, U32 value);
+MEM_STATIC void MEM_write64(void* memPtr, U64 value);
+
+/*=== Little endian unaligned read/write ===*/
+MEM_STATIC U16 MEM_readLE16(const void* memPtr);
+MEM_STATIC U32 MEM_readLE24(const void* memPtr);
+MEM_STATIC U32 MEM_readLE32(const void* memPtr);
+MEM_STATIC U64 MEM_readLE64(const void* memPtr);
+MEM_STATIC size_t MEM_readLEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
+MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
+
+/*=== Big endian unaligned read/write ===*/
+MEM_STATIC U32 MEM_readBE32(const void* memPtr);
+MEM_STATIC U64 MEM_readBE64(const void* memPtr);
+MEM_STATIC size_t MEM_readBEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
+
+/*=== Byteswap ===*/
+MEM_STATIC U32 MEM_swap32(U32 in);
+MEM_STATIC U64 MEM_swap64(U64 in);
+MEM_STATIC size_t MEM_swapST(size_t in);
+
+
+/*-**************************************************************
+*  Memory I/O Implementation
 *****************************************************************/
 /* MEM_FORCE_MEMORY_ACCESS :
 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
@@ -236,37 +206,37 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v =

 MEM_STATIC U16 MEM_read16(const void* memPtr)
 {
-    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+    U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
 }

 MEM_STATIC U32 MEM_read32(const void* memPtr)
 {
-    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+    U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
 }

 MEM_STATIC U64 MEM_read64(const void* memPtr)
 {
-    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
+    U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
 }

 MEM_STATIC size_t MEM_readST(const void* memPtr)
 {
-    size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+    size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
 }

 MEM_STATIC void MEM_write16(void* memPtr, U16 value)
 {
-    memcpy(memPtr, &value, sizeof(value));
+    ZSTD_memcpy(memPtr, &value, sizeof(value));
 }

 MEM_STATIC void MEM_write32(void* memPtr, U32 value)
 {
-    memcpy(memPtr, &value, sizeof(value));
+    ZSTD_memcpy(memPtr, &value, sizeof(value));
 }

 MEM_STATIC void MEM_write64(void* memPtr, U64 value)
 {
-    memcpy(memPtr, &value, sizeof(value));
+    ZSTD_memcpy(memPtr, &value, sizeof(value));
 }

 #endif /* MEM_FORCE_MEMORY_ACCESS */
@@ -445,6 +415,9 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
        MEM_writeBE64(memPtr, (U64)val);
 }

+/* code only tested on 32 and 64 bits systems */
+MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
+

 #if defined (__cplusplus)
 }
--- a/vendor/github.com/DataDog/zstd/pool.c
+++ b/vendor/github.com/DataDog/zstd/pool.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -10,9 +10,9 @@


 /* ======   Dependencies   ======= */
-#include <stddef.h>    /* size_t */
+#include "zstd_deps.h" /* size_t */
 #include "debug.h"     /* assert */
-#include "zstd_internal.h"  /* ZSTD_malloc, ZSTD_free */
+#include "zstd_internal.h"  /* ZSTD_customMalloc, ZSTD_customFree */
 #include "pool.h"

 /* ======   Compiler specifics   ====== */
@@ -105,6 +105,10 @@ static void* POOL_thread(void* opaque) {
    assert(0);  /* Unreachable */
 }

+POOL_ctx* ZSTD_createThreadPool(size_t numThreads) {
+    return POOL_create (numThreads, 0);
+}
+
 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
    return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
 }
@@ -115,14 +119,14 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
    /* Check parameters */
    if (!numThreads) { return NULL; }
    /* Allocate the context and zero initialize */
-    ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
+    ctx = (POOL_ctx*)ZSTD_customCalloc(sizeof(POOL_ctx), customMem);
    if (!ctx) { return NULL; }
    /* Initialize the job queue.
     * It needs one extra space since one space is wasted to differentiate
     * empty and full queues.
     */
    ctx->queueSize = queueSize + 1;
-    ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem);
+    ctx->queue = (POOL_job*)ZSTD_customMalloc(ctx->queueSize * sizeof(POOL_job), customMem);
    ctx->queueHead = 0;
    ctx->queueTail = 0;
    ctx->numThreadsBusy = 0;
@@ -136,7 +140,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
    }
    ctx->shutdown = 0;
    /* Allocate space for the thread handles */
-    ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
+    ctx->threads = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
    ctx->threadCapacity = 0;
    ctx->customMem = customMem;
    /* Check for errors */
@@ -179,12 +183,14 @@ void POOL_free(POOL_ctx *ctx) {
    ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
    ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
    ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
-    ZSTD_free(ctx->queue, ctx->customMem);
-    ZSTD_free(ctx->threads, ctx->customMem);
-    ZSTD_free(ctx, ctx->customMem);
+    ZSTD_customFree(ctx->queue, ctx->customMem);
+    ZSTD_customFree(ctx->threads, ctx->customMem);
+    ZSTD_customFree(ctx, ctx->customMem);
 }

-
+void ZSTD_freeThreadPool (ZSTD_threadPool* pool) {
+  POOL_free (pool);
+}

 size_t POOL_sizeof(POOL_ctx *ctx) {
    if (ctx==NULL) return 0;  /* supports sizeof NULL */
@@ -203,11 +209,11 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
        return 0;
    }
    /* numThreads > threadCapacity */
-    {   ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
+    {   ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
        if (!threadPool) return 1;
        /* replace existing thread pool */
-        memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
-        ZSTD_free(ctx->threads, ctx->customMem);
+        ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
+        ZSTD_customFree(ctx->threads, ctx->customMem);
        ctx->threads = threadPool;
        /* Initialize additional threads */
        {   size_t threadId;
@@ -301,7 +307,7 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
 struct POOL_ctx_s {
    int dummy;
 };
-static POOL_ctx g_ctx;
+static POOL_ctx g_poolCtx;

 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
    return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
@@ -311,11 +317,11 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customM
    (void)numThreads;
    (void)queueSize;
    (void)customMem;
-    return &g_ctx;
+    return &g_poolCtx;
 }

 void POOL_free(POOL_ctx* ctx) {
-    assert(!ctx || ctx == &g_ctx);
+    assert(!ctx || ctx == &g_poolCtx);
    (void)ctx;
 }

@@ -337,7 +343,7 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {

 size_t POOL_sizeof(POOL_ctx* ctx) {
    if (ctx==NULL) return 0;  /* supports sizeof NULL */
-    assert(ctx == &g_ctx);
+    assert(ctx == &g_poolCtx);
    return sizeof(*ctx);
 }

--- a/vendor/github.com/DataDog/zstd/pool.h
+++ b/vendor/github.com/DataDog/zstd/pool.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -16,7 +16,7 @@ extern "C" {
 #endif


-#include <stddef.h>   /* size_t */
+#include "zstd_deps.h"
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_customMem */
 #include "zstd.h"

--- a/vendor/github.com/DataDog/zstd/threading.c
+++ b/vendor/github.com/DataDog/zstd/threading.c
@@ -2,12 +2,13 @@
 * Copyright (c) 2016 Tino Reichardt
 * All rights reserved.
 *
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
- *
- * You can contact the author at:
- * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ * You may select, at your option, one of the above-listed licenses.
 */

 /**
@@ -77,11 +78,12 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)

 #if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)

-#include <stdlib.h>
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h"

 int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
 {
-    *mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
+    *mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t));
    if (!*mutex)
        return 1;
    return pthread_mutex_init(*mutex, attr);
@@ -93,14 +95,14 @@ int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
        return 0;
    {
        int const ret = pthread_mutex_destroy(*mutex);
-        free(*mutex);
+        ZSTD_free(*mutex);
        return ret;
    }
 }

 int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
 {
-    *cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
+    *cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t));
    if (!*cond)
        return 1;
    return pthread_cond_init(*cond, attr);
@@ -112,7 +114,7 @@ int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
        return 0;
    {
        int const ret = pthread_cond_destroy(*cond);
-        free(*cond);
+        ZSTD_free(*cond);
        return ret;
    }
 }
--- a/vendor/github.com/DataDog/zstd/threading.h
+++ b/vendor/github.com/DataDog/zstd/threading.h
@@ -2,12 +2,13 @@
 * Copyright (c) 2016 Tino Reichardt
 * All rights reserved.
 *
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
- *
- * You can contact the author at:
- * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ * You may select, at your option, one of the above-listed licenses.
 */

 #ifndef THREADING_H_938743
--- a/vendor/github.com/DataDog/zstd/travis_test_32.sh
+++ b/vendor/github.com/DataDog/zstd/travis_test_32.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 # Get utilities
-yum -y -q -e 0 install wget tar unzip gcc
+#yum -y -q -e 0 install wget tar unzip gcc
+apt-get update
+apt-get -y install wget tar unzip gcc

 # Get Go
 wget -q https://dl.google.com/go/go1.13.linux-386.tar.gz
@@ -13,5 +15,5 @@ unzip mr.zip

 # Build and run tests
 go build
-PAYLOAD=$(pwd)/mr go test -v
-PAYLOAD=$(pwd)/mr go test -bench .
+DISABLE_BIG_TESTS=1 PAYLOAD=$(pwd)/mr go test -v
+DISABLE_BIG_TESTS=1 PAYLOAD=$(pwd)/mr go test -bench .
--- a/vendor/github.com/DataDog/zstd/xxhash.c
+++ b/vendor/github.com/DataDog/zstd/xxhash.c
@@ -1,35 +1,15 @@
 /*
-*  xxHash - Fast Hash algorithm
-*  Copyright (C) 2012-2016, Yann Collet
-*
-*  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-*
-*  Redistribution and use in source and binary forms, with or without
-*  modification, are permitted provided that the following conditions are
-*  met:
-*
-*  * Redistributions of source code must retain the above copyright
-*  notice, this list of conditions and the following disclaimer.
-*  * Redistributions in binary form must reproduce the above
-*  copyright notice, this list of conditions and the following disclaimer
-*  in the documentation and/or other materials provided with the
-*  distribution.
-*
-*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-*  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-*  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-*  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-*  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-*  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-*  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-*  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-*  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-*  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
-*  You can contact the author at :
-*  - xxHash homepage: http://www.xxhash.com
-*  - xxHash source repository : https://github.com/Cyan4973/xxHash
+ *  xxHash - Fast Hash algorithm
+ *  Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - xxHash homepage: http://www.xxhash.com
+ *  - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * 
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 */


@@ -97,14 +77,12 @@
 *  Includes & Memory related functions
 ***************************************/
 /* Modify the local functions below should you wish to use some other memory routines */
-/* for malloc(), free() */
-#include <stdlib.h>
-#include <stddef.h>     /* size_t */
-static void* XXH_malloc(size_t s) { return malloc(s); }
-static void  XXH_free  (void* p)  { free(p); }
-/* for memcpy() */
-#include <string.h>
-static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+/* for ZSTD_malloc(), ZSTD_free() */
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h"  /* size_t, ZSTD_malloc, ZSTD_free, ZSTD_memcpy */
+static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); }
+static void  XXH_free  (void* p)  { ZSTD_free(p); }
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); }

 #ifndef XXH_STATIC_LINKING_ONLY
 #  define XXH_STATIC_LINKING_ONLY
@@ -115,49 +93,13 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
 /* *************************************
 *  Compiler Specific Options
 ***************************************/
-#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-#  define INLINE_KEYWORD inline
-#else
-#  define INLINE_KEYWORD
-#endif
-
-#if defined(__GNUC__) || defined(__ICCARM__)
-#  define FORCE_INLINE_ATTR __attribute__((always_inline))
-#elif defined(_MSC_VER)
-#  define FORCE_INLINE_ATTR __forceinline
-#else
-#  define FORCE_INLINE_ATTR
-#endif
-
-#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
-
-
-#ifdef _MSC_VER
-#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
-#endif
+#include "compiler.h"


 /* *************************************
 *  Basic Types
 ***************************************/
-#ifndef MEM_MODULE
-# define MEM_MODULE
-# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-#   include <stdint.h>
-    typedef uint8_t  BYTE;
-    typedef uint16_t U16;
-    typedef uint32_t U32;
-    typedef  int32_t S32;
-    typedef uint64_t U64;
-#  else
-    typedef unsigned char      BYTE;
-    typedef unsigned short     U16;
-    typedef unsigned int       U32;
-    typedef   signed int       S32;
-    typedef unsigned long long U64;   /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
-#  endif
-#endif
-
+#include "mem.h"  /* BYTE, U32, U64, size_t */

 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))

@@ -183,14 +125,14 @@ static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
 static U32 XXH_read32(const void* memPtr)
 {
    U32 val;
-    memcpy(&val, memPtr, sizeof(val));
+    ZSTD_memcpy(&val, memPtr, sizeof(val));
    return val;
 }

 static U64 XXH_read64(const void* memPtr)
 {
    U64 val;
-    memcpy(&val, memPtr, sizeof(val));
+    ZSTD_memcpy(&val, memPtr, sizeof(val));
    return val;
 }

@@ -327,12 +269,12 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
 ****************************/
 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
 {
-    memcpy(dstState, srcState, sizeof(*dstState));
+    ZSTD_memcpy(dstState, srcState, sizeof(*dstState));
 }

 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
 {
-    memcpy(dstState, srcState, sizeof(*dstState));
+    ZSTD_memcpy(dstState, srcState, sizeof(*dstState));
 }


@@ -574,12 +516,12 @@ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
 {
    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
-    memset(&state, 0, sizeof(state)-4);   /* do not write into reserved, for future removal */
+    ZSTD_memset(&state, 0, sizeof(state)-4);   /* do not write into reserved, for future removal */
    state.v1 = seed + PRIME32_1 + PRIME32_2;
    state.v2 = seed + PRIME32_2;
    state.v3 = seed + 0;
    state.v4 = seed - PRIME32_1;
-    memcpy(statePtr, &state, sizeof(state));
+    ZSTD_memcpy(statePtr, &state, sizeof(state));
    return XXH_OK;
 }

@@ -587,12 +529,12 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int s
 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
 {
    XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
-    memset(&state, 0, sizeof(state)-8);   /* do not write into reserved, for future removal */
+    ZSTD_memset(&state, 0, sizeof(state)-8);   /* do not write into reserved, for future removal */
    state.v1 = seed + PRIME64_1 + PRIME64_2;
    state.v2 = seed + PRIME64_2;
    state.v3 = seed + 0;
    state.v4 = seed - PRIME64_1;
-    memcpy(statePtr, &state, sizeof(state));
+    ZSTD_memcpy(statePtr, &state, sizeof(state));
    return XXH_OK;
 }

@@ -729,7 +671,9 @@ FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, c
    state->total_len += len;

    if (state->memsize + len < 32) {  /* fill in tmp buffer */
-        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+        if (input != NULL) {
+            XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+        }
        state->memsize += (U32)len;
        return XXH_OK;
    }
@@ -861,14 +805,14 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
 {
    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
-    memcpy(dst, &hash, sizeof(*dst));
+    ZSTD_memcpy(dst, &hash, sizeof(*dst));
 }

 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
 {
    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
-    memcpy(dst, &hash, sizeof(*dst));
+    ZSTD_memcpy(dst, &hash, sizeof(*dst));
 }

 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
--- a/vendor/github.com/DataDog/zstd/xxhash.h
+++ b/vendor/github.com/DataDog/zstd/xxhash.h
@@ -1,35 +1,15 @@
 /*
-   xxHash - Extremely Fast Hash algorithm
-   Header File
-   Copyright (C) 2012-2016, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * 
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 */

 /* Notice extracted from xxHash homepage :
@@ -75,7 +55,7 @@ extern "C" {
 /* ****************************
 *  Definitions
 ******************************/
-#include <stddef.h>   /* size_t */
+#include "zstd_deps.h"
 typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;


--- a/vendor/github.com/DataDog/zstd/zbuff.h
+++ b/vendor/github.com/DataDog/zstd/zbuff.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/vendor/github.com/DataDog/zstd/zbuff_common.c
+++ b/vendor/github.com/DataDog/zstd/zbuff_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/vendor/github.com/DataDog/zstd/zbuff_compress.c
+++ b/vendor/github.com/DataDog/zstd/zbuff_compress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/vendor/github.com/DataDog/zstd/zbuff_decompress.c
+++ b/vendor/github.com/DataDog/zstd/zbuff_decompress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/vendor/github.com/DataDog/zstd/zdict.c
+++ b/vendor/github.com/DataDog/zstd/zdict.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -48,6 +48,7 @@
 #  define ZDICT_STATIC_LINKING_ONLY
 #endif
 #include "zdict.h"
+#include "zstd_compress_internal.h" /* ZSTD_loadCEntropy() */


 /*-*************************************
@@ -61,14 +62,15 @@

 #define NOISELENGTH 32

-static const int g_compressionLevel_default = 3;
 static const U32 g_selectivity_default = 9;


 /*-*************************************
 *  Console display
 ***************************************/
+#undef  DISPLAY
 #define DISPLAY(...)         { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
+#undef  DISPLAYLEVEL
 #define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); }    /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */

 static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
@@ -99,6 +101,26 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
    return MEM_readLE32((const char*)dictBuffer + 4);
 }

+size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
+{
+    size_t headerSize;
+    if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
+
+    {   ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
+        U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
+        if (!bs || !wksp) {
+            headerSize = ERROR(memory_allocation);
+        } else {
+            ZSTD_reset_compressedBlockState(bs);
+            headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize);
+        }
+
+        free(bs);
+        free(wksp);
+    }
+
+    return headerSize;
+}

 /*-********************************************************
 *  Dictionary training functions
@@ -508,6 +530,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
    clock_t displayClock = 0;
    clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;

+#   undef  DISPLAYUPDATE
 #   define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
            if (ZDICT_clockSpan(displayClock) > refreshRate)  \
            { displayClock = clock(); DISPLAY(__VA_ARGS__); \
@@ -588,12 +611,12 @@ typedef struct

 #define MAXREPOFFSET 1024

-static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
+static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
                              unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
                              const void* src, size_t srcSize,
                              U32 notificationLevel)
 {
-    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
+    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog);
    size_t cSize;

    if (srcSize > blockSizeMax) srcSize = blockSizeMax;   /* protection vs large samples */
@@ -682,7 +705,7 @@ static void ZDICT_flatLit(unsigned* countLit)

 #define OFFCODE_MAX 30  /* only applicable to first block */
 static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
-                                   unsigned compressionLevel,
+                                   int compressionLevel,
                             const void*  srcBuffer, const size_t* fileSizes, unsigned nbFiles,
                             const void* dictBuffer, size_t  dictBufferSize,
                                   unsigned notificationLevel)
@@ -717,7 +740,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    memset(repOffset, 0, sizeof(repOffset));
    repOffset[1] = repOffset[4] = repOffset[8] = 1;
    memset(bestRepOffset, 0, sizeof(bestRepOffset));
-    if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
+    if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT;
    params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);

    esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
@@ -731,7 +754,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,

    /* collect stats on all samples */
    for (u=0; u<nbFiles; u++) {
-        ZDICT_countEStats(esr, params,
+        ZDICT_countEStats(esr, &params,
                          countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
                         (const char*)srcBuffer + pos, fileSizes[u],
                          notificationLevel);
@@ -762,7 +785,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    /* note : the result of this phase should be used to better appreciate the impact on statistics */

    total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
-    errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
+    errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1);
    if (FSE_isError(errorCode)) {
        eSize = errorCode;
        DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
@@ -771,7 +794,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    Offlog = (U32)errorCode;

    total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
-    errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
+    errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1);
    if (FSE_isError(errorCode)) {
        eSize = errorCode;
        DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
@@ -780,7 +803,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    mlLog = (U32)errorCode;

    total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
-    errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
+    errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1);
    if (FSE_isError(errorCode)) {
        eSize = errorCode;
        DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
@@ -869,7 +892,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
    size_t hSize;
 #define HBUFFSIZE 256   /* should prove large enough for all entropy headers */
    BYTE header[HBUFFSIZE];
-    int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
+    int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
    U32 const notificationLevel = params.notificationLevel;

    /* check conditions */
@@ -915,7 +938,7 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
        const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
        ZDICT_params_t params)
 {
-    int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
+    int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
    U32 const notificationLevel = params.notificationLevel;
    size_t hSize = 8;

@@ -1090,8 +1113,8 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
    memset(&params, 0, sizeof(params));
    params.d = 8;
    params.steps = 4;
-    /* Default to level 6 since no compression level information is available */
-    params.zParams.compressionLevel = 3;
+    /* Use default level since no compression level information is available */
+    params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
 #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
    params.zParams.notificationLevel = DEBUGLEVEL;
 #endif
--- a/vendor/github.com/DataDog/zstd/zdict.h
+++ b/vendor/github.com/DataDog/zstd/zdict.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -61,9 +61,57 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
                                    const void* samplesBuffer,
                                    const size_t* samplesSizes, unsigned nbSamples);

+typedef struct {
+    int      compressionLevel;   /*< optimize for a specific zstd compression level; 0 means default */
+    unsigned notificationLevel;  /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
+    unsigned dictID;             /*< force dictID value; 0 means auto mode (32-bits random value) */
+} ZDICT_params_t;
+
+/*! ZDICT_finalizeDictionary():
+ * Given a custom content as a basis for dictionary, and a set of samples,
+ * finalize dictionary by adding headers and statistics according to the zstd
+ * dictionary format.
+ *
+ * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each
+ * sample in order. The samples are used to construct the statistics, so they
+ * should be representative of what you will compress with this dictionary.
+ *
+ * The compression level can be set in `parameters`. You should pass the
+ * compression level you expect to use in production. The statistics for each
+ * compression level differ, so tuning the dictionary for the compression level
+ * can help quite a bit.
+ *
+ * You can set an explicit dictionary ID in `parameters`, or allow us to pick
+ * a random dictionary ID for you, but we can't guarantee no collisions.
+ *
+ * The dstDictBuffer and the dictContent may overlap, and the content will be
+ * appended to the end of the header. If the header + the content doesn't fit in
+ * maxDictSize the beginning of the content is truncated to make room, since it
+ * is presumed that the most profitable content is at the end of the dictionary,
+ * since that is the cheapest to reference.
+ *
+ * `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
+ * `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
+ *
+ * @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
+ *          or an error code, which can be tested by ZDICT_isError().
+ * Note: ZDICT_finalizeDictionary() will push notifications into stderr if
+ *       instructed to, using notificationLevel>0.
+ * NOTE: This function currently may fail in several edge cases including:
+ *         * Not enough samples
+ *         * Samples are uncompressible
+ *         * Samples are all exactly the same
+ */
+ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
+                                const void* dictContent, size_t dictContentSize,
+                                const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                                ZDICT_params_t parameters);
+

 /*======   Helper functions   ======*/
 ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize);  /**< extracts dictID; @return zero if error (not a valid dictionary) */
+ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize);  /* returns dict header size; returns a ZSTD error code on failure */
 ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
 ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);

@@ -78,11 +126,8 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
 * Use them only in association with static linking.
 * ==================================================================================== */

-typedef struct {
-    int      compressionLevel;   /* optimize for a specific zstd compression level; 0 means default */
-    unsigned notificationLevel;  /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
-    unsigned dictID;             /* force dictID value; 0 means auto mode (32-bits random value) */
-} ZDICT_params_t;
+#define ZDICT_CONTENTSIZE_MIN 128
+#define ZDICT_DICTSIZE_MIN    256

 /*! ZDICT_cover_params_t:
 *  k and d are the only required parameters.
@@ -198,28 +243,6 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
                    const size_t* samplesSizes, unsigned nbSamples,
                    ZDICT_fastCover_params_t* parameters);

-/*! ZDICT_finalizeDictionary():
- * Given a custom content as a basis for dictionary, and a set of samples,
- * finalize dictionary by adding headers and statistics.
- *
- * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
- *
- * dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
- * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
- *
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
- *          or an error code, which can be tested by ZDICT_isError().
- * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
- * Note 2: dictBuffer and dictContent can overlap
- */
-#define ZDICT_CONTENTSIZE_MIN 128
-#define ZDICT_DICTSIZE_MIN    256
-ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
-                                const void* dictContent, size_t dictContentSize,
-                                const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-                                ZDICT_params_t parameters);
-
 typedef struct {
    unsigned selectivityLevel;   /* 0 means default; larger => select more => larger dictionary */
    ZDICT_params_t zParams;
@@ -256,7 +279,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
 #  define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
 #  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
 #    define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
-#  elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
+#  elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
 #    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
 #  elif (ZDICT_GCC_VERSION >= 301)
 #    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
--- a/vendor/github.com/DataDog/zstd/zstd.go
+++ b/vendor/github.com/DataDog/zstd/zstd.go
@@ -3,27 +3,12 @@ package zstd
 /*
 #define ZSTD_STATIC_LINKING_ONLY
 #include "zstd.h"
-#include "stdint.h"  // for uintptr_t
-
-// The following *_wrapper function are used for removing superflouos
-// memory allocations when calling the wrapped functions from Go code.
-// See https://github.com/golang/go/issues/24450 for details.
-
-static size_t ZSTD_compress_wrapper(uintptr_t dst, size_t maxDstSize, const uintptr_t src, size_t srcSize, int compressionLevel) {
-	return ZSTD_compress((void*)dst, maxDstSize, (const void*)src, srcSize, compressionLevel);
-}
-
-static size_t ZSTD_decompress_wrapper(uintptr_t dst, size_t maxDstSize, uintptr_t src, size_t srcSize) {
-	return ZSTD_decompress((void*)dst, maxDstSize, (const void *)src, srcSize);
-}
-
 */
 import "C"
 import (
 	"bytes"
 	"errors"
 	"io/ioutil"
-	"runtime"
 	"unsafe"
 )

@@ -73,19 +58,18 @@ func CompressLevel(dst, src []byte, level int) ([]byte, error) {
 		dst = make([]byte, bound)
 	}

-	srcPtr := C.uintptr_t(uintptr(0)) // Do not point anywhere, if src is empty
+	var srcPtr *byte // Do not point anywhere, if src is empty
 	if len(src) > 0 {
-		srcPtr = C.uintptr_t(uintptr(unsafe.Pointer(&src[0])))
+		srcPtr = &src[0]
 	}

-	cWritten := C.ZSTD_compress_wrapper(
-		C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))),
+	cWritten := C.ZSTD_compress(
+		unsafe.Pointer(&dst[0]),
 		C.size_t(len(dst)),
-		srcPtr,
+		unsafe.Pointer(srcPtr),
 		C.size_t(len(src)),
 		C.int(level))

-	runtime.KeepAlive(src)
 	written := int(cWritten)
 	// Check if the return is an Error code
 	if err := getError(written); err != nil {
@@ -103,13 +87,12 @@ func Decompress(dst, src []byte) ([]byte, error) {
 	}
 	decompress := func(dst, src []byte) ([]byte, error) {

-		cWritten := C.ZSTD_decompress_wrapper(
-			C.uintptr_t(uintptr(unsafe.Pointer(&dst[0]))),
+		cWritten := C.ZSTD_decompress(
+			unsafe.Pointer(&dst[0]),
 			C.size_t(len(dst)),
-			C.uintptr_t(uintptr(unsafe.Pointer(&src[0]))),
+			unsafe.Pointer(&src[0]),
 			C.size_t(len(src)))

-		runtime.KeepAlive(src)
 		written := int(cWritten)
 		// Check error
 		if err := getError(written); err != nil {
@@ -120,8 +103,7 @@ func Decompress(dst, src []byte) ([]byte, error) {

 	if len(dst) == 0 {
 		// Attempt to use zStd to determine decompressed size (may result in error or 0)
-		size := int(C.size_t(C.ZSTD_getDecompressedSize(unsafe.Pointer(&src[0]), C.size_t(len(src)))))
-
+		size := int(C.ZSTD_getDecompressedSize(unsafe.Pointer(&src[0]), C.size_t(len(src))))
 		if err := getError(size); err != nil {
 			return nil, err
 		}
--- a/vendor/github.com/DataDog/zstd/zstd.h
+++ b/vendor/github.com/DataDog/zstd/zstd.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -72,16 +72,21 @@ extern "C" {
 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
 #define ZSTD_VERSION_MINOR    4
-#define ZSTD_VERSION_RELEASE  4
-
+#define ZSTD_VERSION_RELEASE  8
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
-ZSTDLIB_API unsigned ZSTD_versionNumber(void);   /**< to check runtime library version */
+
+/*! ZSTD_versionNumber() :
+ *  Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */
+ZSTDLIB_API unsigned ZSTD_versionNumber(void);

 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
 #define ZSTD_QUOTE(str) #str
 #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
-ZSTDLIB_API const char* ZSTD_versionString(void);   /* requires v1.3.0+ */
+
+/*! ZSTD_versionString() :
+ *  Return runtime library version, like "1.4.5". Requires v1.3.0+. */
+ZSTDLIB_API const char* ZSTD_versionString(void);

 /* *************************************
 *  Default constant
@@ -274,7 +279,10 @@ typedef enum {
                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
                              * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
                              * Note 1 : it's possible to pass a negative compression level.
-                              * Note 2 : setting a level resets all other compression parameters to default */
+                              * Note 2 : setting a level does not automatically set all other compression parameters
+                              *   to default. Setting this will however eventually dynamically impact the compression
+                              *   parameters which have not been manually set. The manually set
+                              *   ones will 'stick'. */
    /* Advanced compression parameters :
     * It's possible to pin down compression parameters to some specific values.
     * In which case, these values are no longer dynamically selected by the compressor */
@@ -331,7 +339,9 @@ typedef enum {
                                     * for large inputs, by finding large matches at long distance.
                                     * It increases memory usage and window size.
                                     * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
-                                     * except when expressly set to a different value. */
+                                     * except when expressly set to a different value.
+                                     * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and
+                                     * compression strategy >= ZSTD_btopt (== compression level 16+) */
    ZSTD_c_ldmHashLog=161,   /* Size of the table for long distance matching, as a power of 2.
                              * Larger values increase memory usage and compression ratio,
                              * but decrease compression speed.
@@ -362,16 +372,20 @@ typedef enum {
    ZSTD_c_dictIDFlag=202,   /* When applicable, dictionary's ID is written into frame header (default:1) */

    /* multi-threading parameters */
-    /* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
-     * They return an error otherwise. */
+    /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
+     * Otherwise, trying to set any other value than default (0) will be a no-op and return an error.
+     * In a situation where it's unknown if the linked library supports multi-threading or not,
+     * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property.
+     */
    ZSTD_c_nbWorkers=400,    /* Select how many threads will be spawned to compress in parallel.
-                              * When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() :
+                              * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() :
                              * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
-                              * while compression work is performed in parallel, within worker threads.
+                              * while compression is performed in parallel, within worker thread(s).
                              * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
                              *  in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
                              * More workers improve speed, but also increase memory usage.
-                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
+                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned,
+                              * compression is performed inside Caller's thread, and all invocations are blocking */
    ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
                              * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
                              * 0 means default, which is dynamically determined based on compression parameters.
@@ -400,6 +414,11 @@ typedef enum {
     * ZSTD_c_literalCompressionMode
     * ZSTD_c_targetCBlockSize
     * ZSTD_c_srcSizeHint
+     * ZSTD_c_enableDedicatedDictSearch
+     * ZSTD_c_stableInBuffer
+     * ZSTD_c_stableOutBuffer
+     * ZSTD_c_blockDelimiters
+     * ZSTD_c_validateSequences
     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
     * note : never ever use experimentalParam? names directly;
     *        also, the enums values themselves are unstable and can still change.
@@ -410,7 +429,12 @@ typedef enum {
     ZSTD_c_experimentalParam4=1001,
     ZSTD_c_experimentalParam5=1002,
     ZSTD_c_experimentalParam6=1003,
-     ZSTD_c_experimentalParam7=1004
+     ZSTD_c_experimentalParam7=1004,
+     ZSTD_c_experimentalParam8=1005,
+     ZSTD_c_experimentalParam9=1006,
+     ZSTD_c_experimentalParam10=1007,
+     ZSTD_c_experimentalParam11=1008,
+     ZSTD_c_experimentalParam12=1009
 } ZSTD_cParameter;

 typedef struct {
@@ -519,11 +543,15 @@ typedef enum {
    /* note : additional experimental parameters are also available
     * within the experimental section of the API.
     * At the time of this writing, they include :
-     * ZSTD_c_format
+     * ZSTD_d_format
+     * ZSTD_d_stableOutBuffer
+     * ZSTD_d_forceIgnoreChecksum
     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
     * note : never ever use experimentalParam? names directly
     */
-     ZSTD_d_experimentalParam1=1000
+     ZSTD_d_experimentalParam1=1000,
+     ZSTD_d_experimentalParam2=1001,
+     ZSTD_d_experimentalParam3=1002

 } ZSTD_dParameter;

@@ -659,8 +687,9 @@ typedef enum {
 *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
 *  - output->pos must be <= dstCapacity, input->pos must be <= srcSize
 *  - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
+ *  - endOp must be a valid directive
 *  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
- *  - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available,
+ *  - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available,
 *                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
 *                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
 *  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
@@ -763,7 +792,7 @@ ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);

 /* This function is redundant with the advanced API and equivalent to:
 *
- *     ZSTD_DCtx_reset(zds);
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
 *     ZSTD_DCtx_refDDict(zds, NULL);
 */
 ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
@@ -1095,21 +1124,40 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;

 typedef struct {
-    unsigned int matchPos; /* Match pos in dst */
-    /* If seqDef.offset > 3, then this is seqDef.offset - 3
-     * If seqDef.offset < 3, then this is the corresponding repeat offset
-     * But if seqDef.offset < 3 and litLength == 0, this is the
-     *   repeat offset before the corresponding repeat offset
-     * And if seqDef.offset == 3 and litLength == 0, this is the
-     *   most recent repeat offset - 1
-     */
-    unsigned int offset;
-    unsigned int litLength; /* Literal length */
-    unsigned int matchLength; /* Match length */
-    /* 0 when seq not rep and seqDef.offset otherwise
-     * when litLength == 0 this will be <= 4, otherwise <= 3 like normal
-     */
-    unsigned int rep;
+    unsigned int offset;      /* The offset of the match. (NOT the same as the offset code)
+                               * If offset == 0 and matchLength == 0, this sequence represents the last
+                               * literals in the block of litLength size.
+                               */
+
+    unsigned int litLength;   /* Literal length of the sequence. */
+    unsigned int matchLength; /* Match length of the sequence. */
+
+                              /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.
+                               * In this case, we will treat the sequence as a marker for a block boundary.
+                               */
+
+    unsigned int rep;         /* Represents which repeat offset is represented by the field 'offset'.
+                               * Ranges from [0, 3].
+                               *
+                               * Repeat offsets are essentially previous offsets from previous sequences sorted in
+                               * recency order. For more detail, see doc/zstd_compression_format.md
+                               *
+                               * If rep == 0, then 'offset' does not contain a repeat offset.
+                               * If rep > 0:
+                               *  If litLength != 0:
+                               *      rep == 1 --> offset == repeat_offset_1
+                               *      rep == 2 --> offset == repeat_offset_2
+                               *      rep == 3 --> offset == repeat_offset_3
+                               *  If litLength == 0:
+                               *      rep == 1 --> offset == repeat_offset_2
+                               *      rep == 2 --> offset == repeat_offset_3
+                               *      rep == 3 --> offset == repeat_offset_1 - 1
+                               *
+                               * Note: This field is optional. ZSTD_generateSequences() will calculate the value of
+                               * 'rep', but repeat offsets do not necessarily need to be calculated from an external
+                               * sequence provider's perspective. For example, ZSTD_compressSequences() does not
+                               * use this 'rep' field at all (as of now).
+                               */
 } ZSTD_Sequence;

 typedef struct {
@@ -1151,6 +1199,12 @@ typedef enum {
                                 * Decoder cannot recognise automatically this format, requiring this instruction. */
 } ZSTD_format_e;

+typedef enum {
+    /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */
+    ZSTD_d_validateChecksum = 0,
+    ZSTD_d_ignoreChecksum = 1
+} ZSTD_forceIgnoreChecksum_e;
+
 typedef enum {
    /* Note: this enum and the behavior it controls are effectively internal
     * implementation details of the compressor. They are expected to continue
@@ -1248,14 +1302,74 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
 *           or an error code (if srcSize is too small) */
 ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);

-/*! ZSTD_getSequences() :
- * Extract sequences from the sequence store
+typedef enum {
+  ZSTD_sf_noBlockDelimiters = 0,         /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
+  ZSTD_sf_explicitBlockDelimiters = 1    /* Representation of ZSTD_Sequence contains explicit block delimiters */
+} ZSTD_sequenceFormat_e;
+
+/*! ZSTD_generateSequences() :
+ * Generate sequences using ZSTD_compress2, given a source buffer.
+ *
+ * Each block will end with a dummy sequence
+ * with offset == 0, matchLength == 0, and litLength == length of last literals.
+ * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
+ * simply acts as a block delimiter.
+ *
 * zc can be used to insert custom compression params.
 * This function invokes ZSTD_compress2
- * @return : number of sequences extracted
+ *
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
+ * @return : number of sequences generated
 */
-ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
-    size_t outSeqsSize, const void* src, size_t srcSize);
+
+ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+                                          size_t outSeqsSize, const void* src, size_t srcSize);
+
+/*! ZSTD_mergeBlockDelimiters() :
+ * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
+ * by merging them into into the literals of the next sequence.
+ *
+ * As such, the final generated result has no explicit representation of block boundaries,
+ * and the final last literals segment is not represented in the sequences.
+ *
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
+ * @return : number of sequences left after merging
+ */
+ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
+
+/*! ZSTD_compressSequences() :
+ * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
+ * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
+ * The entire source is compressed into a single frame.
+ *
+ * The compression behavior changes based on cctx params. In particular:
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on
+ *    the block size derived from the cctx, and sequences may be split. This is the default setting.
+ *
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
+ *
+ *    If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
+ *    behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
+ *    specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
+ *
+ *    In addition to the two adjustable experimental params, there are other important cctx params.
+ *    - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
+ *    - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
+ *    - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
+ *      is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
+ *
+ * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
+ * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
+ *         and cannot emit an RLE block that disagrees with the repcode history
+ * @return : final compressed size or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
+                                  const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                                  const void* src, size_t srcSize);


 /***************************************
@@ -1263,23 +1377,28 @@ ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
 ***************************************/

 /*! ZSTD_estimate*() :
- *  These functions make it possible to estimate memory usage of a future
- *  {D,C}Ctx, before its creation.
+ *  These functions make it possible to estimate memory usage
+ *  of a future {D,C}Ctx, before its creation.
 *
- *  ZSTD_estimateCCtxSize() will provide a budget large enough for any
- *  compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(),
- *  this estimate does not include space for a window buffer, so this estimate
- *  is guaranteed to be enough for single-shot compressions, but not streaming
- *  compressions. It will however assume the input may be arbitrarily large,
- *  which is the worst case. If srcSize is known to always be small,
- *  ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
- *  ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with
- *  ZSTD_getCParams() to create cParams from compressionLevel.
- *  ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with
- *  ZSTD_CCtxParams_setParameter().
+ *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
+ *  for any compression level up to selected one.
+ *  Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
+ *         does not include space for a window buffer.
+ *         Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
+ *  The estimate will assume the input may be arbitrarily large,
+ *  which is the worst case.
 *
- *  Note: only single-threaded compression is supported. This function will
- *  return an error code if ZSTD_c_nbWorkers is >= 1. */
+ *  When srcSize can be bound by a known and rather "small" value,
+ *  this fact can be used to provide a tighter estimation
+ *  because the CCtx compression context will need less memory.
+ *  This tighter estimation can be provided by more advanced functions
+ *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
+ *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
+ *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
+ *
+ *  Note 2 : only single-threaded compression is supported.
+ *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ */
 ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
 ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
 ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
@@ -1362,7 +1481,11 @@ ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
 typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
 typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
 typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
-static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< this constant defers to stdlib's functions */
+static
+#ifdef __GNUC__
+__attribute__((__unused__))
+#endif
+ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< this constant defers to stdlib's functions */

 ZSTDLIB_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
@@ -1375,13 +1498,36 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS
                                                  ZSTD_compressionParameters cParams,
                                                  ZSTD_customMem customMem);

+/* ! Thread pool :
+ * These prototypes make it possible to share a thread pool among multiple compression contexts.
+ * This can limit resources for applications with multiple threads where each one uses
+ * a threaded compression mode (via ZSTD_c_nbWorkers parameter).
+ * ZSTD_createThreadPool creates a new thread pool with a given number of threads.
+ * Note that the lifetime of such pool must exist while being used.
+ * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
+ * to use an internal thread pool).
+ * ZSTD_freeThreadPool frees a thread pool.
+ */
+typedef struct POOL_ctx_s ZSTD_threadPool;
+ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
+ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);
+ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
+
+/*
+ * This API is temporary and is expected to change or disappear in the future!
+ */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
+    const void* dict, size_t dictSize,
+    ZSTD_dictLoadMethod_e dictLoadMethod,
+    ZSTD_dictContentType_e dictContentType,
+    const ZSTD_CCtx_params* cctxParams,
+    ZSTD_customMem customMem);
+
 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
                                                  ZSTD_dictContentType_e dictContentType,
                                                  ZSTD_customMem customMem);

-
-
 /***************************************
 *  Advanced compression functions
 ***************************************/
@@ -1394,6 +1540,12 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictS
 *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);

+/*! ZSTD_getDictID_fromCDict() :
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+
 /*! ZSTD_getCParams() :
 * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
 * `estimatedSrcSize` value is optional, select 0 if not known */
@@ -1508,6 +1660,143 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
 * but compression ratio may regress significantly if guess considerably underestimates */
 #define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7

+/* Controls whether the new and experimental "dedicated dictionary search
+ * structure" can be used. This feature is still rough around the edges, be
+ * prepared for surprising behavior!
+ *
+ * How to use it:
+ *
+ * When using a CDict, whether to use this feature or not is controlled at
+ * CDict creation, and it must be set in a CCtxParams set passed into that
+ * construction (via ZSTD_createCDict_advanced2()). A compression will then
+ * use the feature or not based on how the CDict was constructed; the value of
+ * this param, set in the CCtx, will have no effect.
+ *
+ * However, when a dictionary buffer is passed into a CCtx, such as via
+ * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control
+ * whether the CDict that is created internally can use the feature or not.
+ *
+ * What it does:
+ *
+ * Normally, the internal data structures of the CDict are analogous to what
+ * would be stored in a CCtx after compressing the contents of a dictionary.
+ * To an approximation, a compression using a dictionary can then use those
+ * data structures to simply continue what is effectively a streaming
+ * compression where the simulated compression of the dictionary left off.
+ * Which is to say, the search structures in the CDict are normally the same
+ * format as in the CCtx.
+ *
+ * It is possible to do better, since the CDict is not like a CCtx: the search
+ * structures are written once during CDict creation, and then are only read
+ * after that, while the search structures in the CCtx are both read and
+ * written as the compression goes along. This means we can choose a search
+ * structure for the dictionary that is read-optimized.
+ *
+ * This feature enables the use of that different structure.
+ *
+ * Note that some of the members of the ZSTD_compressionParameters struct have
+ * different semantics and constraints in the dedicated search structure. It is
+ * highly recommended that you simply set a compression level in the CCtxParams
+ * you pass into the CDict creation call, and avoid messing with the cParams
+ * directly.
+ *
+ * Effects:
+ *
+ * This will only have any effect when the selected ZSTD_strategy
+ * implementation supports this feature. Currently, that's limited to
+ * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2.
+ *
+ * Note that this means that the CDict tables can no longer be copied into the
+ * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be
+ * useable. The dictionary can only be attached or reloaded.
+ *
+ * In general, you should expect compression to be faster--sometimes very much
+ * so--and CDict creation to be slightly slower. Eventually, we will probably
+ * make this mode the default.
+ */
+#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8
+
+/* ZSTD_c_stableInBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the compressor, and
+ * compression will fail if it ever changes. This means the only flush
+ * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end
+ * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos)
+ * MUST not be modified during compression or you will get data corruption.
+ *
+ * When this flag is enabled zstd won't allocate an input window buffer,
+ * because the user guarantees it can reference the ZSTD_inBuffer until
+ * the frame is complete. But, it will still allocate an output buffer
+ * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
+ * avoid the memcpy() from the input buffer to the input window buffer.
+ *
+ * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used.
+ * That means this flag cannot be used with ZSTD_compressStream().
+ *
+ * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, compression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST
+ * not be modified during compression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_inBuffer to find
+ * matches. Normally zstd maintains its own window buffer for this purpose,
+ * but passing this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
+
+/* ZSTD_c_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells he compressor that the ZSTD_outBuffer will not be resized between
+ * calls. Specifically: (out.size - out.pos) will never grow. This gives the
+ * compressor the freedom to say: If the compressed data doesn't fit in the
+ * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to
+ * always decompress directly into the output buffer, instead of decompressing
+ * into an internal buffer and copying to the output buffer.
+ *
+ * When this flag is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer. It will still allocate the
+ * input window buffer (see ZSTD_c_stableInBuffer).
+ *
+ * Zstd will check that (out.size - out.pos) never grows and return an error
+ * if it does. While not strictly necessary, this should prevent surprises.
+ */
+#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10
+
+/* ZSTD_c_blockDelimiters
+ * Default is 0 == ZSTD_sf_noBlockDelimiters.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ *
+ * Designates whether or not the given array of ZSTD_Sequence contains block delimiters
+ * and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
+ * See the definition of ZSTD_Sequence for more specifics.
+ */
+#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
+
+/* ZSTD_c_validateSequences
+ * Default is 0 == disabled. Set to 1 to enable sequence validation.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ * Designates whether or not we validate sequences provided to ZSTD_compressSequences()
+ * during function execution.
+ *
+ * Without validation, providing a sequence that does not conform to the zstd spec will cause
+ * undefined behavior, and may produce a corrupted block.
+ *
+ * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
+ * specifics regarding offset/matchlength requirements) then the function will bail out and
+ * return an error.
+ *
+ */
+#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
+
 /*! ZSTD_CCtx_getParameter() :
 *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
 *  and store it into int* value.
@@ -1556,8 +1845,10 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, Z
 /*! ZSTD_CCtxParams_setParameter() :
 *  Similar to ZSTD_CCtx_setParameter.
 *  Set one compression parameter, selected by enum ZSTD_cParameter.
- *  Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams().
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Parameters must be applied to a ZSTD_CCtx using
+ *  ZSTD_CCtx_setParametersUsingCCtxParams().
+ * @result : a code representing success or failure (which can be tested with
+ *           ZSTD_isError()).
 */
 ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);

@@ -1637,11 +1928,60 @@ ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* pre
 */
 ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);

+/*! ZSTD_DCtx_getParameter() :
+ *  Get the requested decompression parameter value, selected by enum ZSTD_dParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
+
 /* ZSTD_d_format
 * experimental parameter,
 * allowing selection between ZSTD_format_e input compression formats
 */
 #define ZSTD_d_format ZSTD_d_experimentalParam1
+/* ZSTD_d_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the decompressor, and
+ * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
+ * MUST be large enough to fit the entire decompressed frame. This will be
+ * checked when the frame content size is known. The data in the ZSTD_outBuffer
+ * in the range [dst, dst + pos) MUST not be modified during decompression
+ * or you will get data corruption.
+ *
+ * When this flags is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer, but it will still allocate
+ * an input buffer large enough to fit any compressed block. This will also
+ * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
+ * If you need to avoid the input buffer allocation use the buffer-less
+ * streaming API.
+ *
+ * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, decompression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
+ * not be modified during decompression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
+ * matches. Normally zstd maintains its own buffer for this purpose, but passing
+ * this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
+
+/* ZSTD_d_forceIgnoreChecksum
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
+ *
+ * Tells the decompressor to skip checksum validation during decompression, regardless
+ * of whether checksumming was specified during compression. This offers some
+ * slight performance benefits, and may be useful for debugging.
+ * Param has values of type ZSTD_forceIgnoreChecksum_e
+ */
+#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3

 /*! ZSTD_DCtx_setFormat() :
 *  Instruct the decoder context about what kind of data to decode next.
@@ -1670,7 +2010,8 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
 ********************************************************************/

 /*=====   Advanced Streaming compression functions  =====*/
-/**! ZSTD_initCStream_srcSize() :
+
+/*! ZSTD_initCStream_srcSize() :
 * This function is deprecated, and equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
 *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
@@ -1687,7 +2028,7 @@ ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
                         int compressionLevel,
                         unsigned long long pledgedSrcSize);

-/**! ZSTD_initCStream_usingDict() :
+/*! ZSTD_initCStream_usingDict() :
 * This function is deprecated, and is equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
 *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
@@ -1704,7 +2045,7 @@ ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
                     const void* dict, size_t dictSize,
                           int compressionLevel);

-/**! ZSTD_initCStream_advanced() :
+/*! ZSTD_initCStream_advanced() :
 * This function is deprecated, and is approximately equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
 *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
@@ -1725,7 +2066,7 @@ ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
                          ZSTD_parameters params,
                          unsigned long long pledgedSrcSize);

-/**! ZSTD_initCStream_usingCDict() :
+/*! ZSTD_initCStream_usingCDict() :
 * This function is deprecated, and equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
 *     ZSTD_CCtx_refCDict(zcs, cdict);
@@ -1735,7 +2076,7 @@ ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
 */
 ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);

-/**! ZSTD_initCStream_usingCDict_advanced() :
+/*! ZSTD_initCStream_usingCDict_advanced() :
 *   This function is DEPRECATED, and is approximately equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
 *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
@@ -1808,7 +2149,8 @@ ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);


 /*=====   Advanced Streaming decompression functions  =====*/
-/**
+
+/*!
 * This function is deprecated, and is equivalent to:
 *
 *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
@@ -1819,7 +2161,7 @@ ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
 */
 ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);

-/**
+/*!
 * This function is deprecated, and is equivalent to:
 *
 *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
@@ -1830,7 +2172,7 @@ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dic
 */
 ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);

-/**
+/*!
 * This function is deprecated, and is equivalent to:
 *
 *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
@@ -1892,7 +2234,7 @@ ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstC
 ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);


-/*-
+/**
  Buffer-less streaming decompression (synchronous mode)

  A ZSTD_DCtx object is required to track streaming operations.
--- a/vendor/github.com/DataDog/zstd/zstd_common.c
+++ b/vendor/github.com/DataDog/zstd/zstd_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -13,8 +13,8 @@
 /*-*************************************
 *  Dependencies
 ***************************************/
-#include <stdlib.h>      /* malloc, calloc, free */
-#include <string.h>      /* memset */
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h"   /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
 #include "error_private.h"
 #include "zstd_internal.h"

@@ -53,31 +53,31 @@ const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString
 /*=**************************************************************
 *  Custom allocator
 ****************************************************************/
-void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
 {
    if (customMem.customAlloc)
        return customMem.customAlloc(customMem.opaque, size);
-    return malloc(size);
+    return ZSTD_malloc(size);
 }

-void* ZSTD_calloc(size_t size, ZSTD_customMem customMem)
+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
 {
    if (customMem.customAlloc) {
        /* calloc implemented as malloc+memset;
         * not as efficient as calloc, but next best guess for custom malloc */
        void* const ptr = customMem.customAlloc(customMem.opaque, size);
-        memset(ptr, 0, size);
+        ZSTD_memset(ptr, 0, size);
        return ptr;
    }
-    return calloc(1, size);
+    return ZSTD_calloc(1, size);
 }

-void ZSTD_free(void* ptr, ZSTD_customMem customMem)
+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
 {
    if (ptr!=NULL) {
        if (customMem.customFree)
            customMem.customFree(customMem.opaque, ptr);
        else
-            free(ptr);
+            ZSTD_free(ptr);
    }
 }
--- a/vendor/github.com/DataDog/zstd/zstd_compress.c
+++ b/vendor/github.com/DataDog/zstd/zstd_compress.c
--- a/vendor/github.com/DataDog/zstd/zstd_compress_internal.h
+++ b/vendor/github.com/DataDog/zstd/zstd_compress_internal.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -28,7 +28,6 @@
 extern "C" {
 #endif

-
 /*-*************************************
 *  Constants
 ***************************************/
@@ -64,7 +63,7 @@ typedef struct {
 } ZSTD_localDict;

 typedef struct {
-    U32 CTable[HUF_CTABLE_SIZE_U32(255)];
+    HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
    HUF_repeat repeatMode;
 } ZSTD_hufCTables_t;

@@ -83,10 +82,27 @@ typedef struct {
 } ZSTD_entropyCTables_t;

 typedef struct {
-    U32 off;
-    U32 len;
+    U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */
+    U32 len;            /* Raw length of match */
 } ZSTD_match_t;

+typedef struct {
+    U32 offset;         /* Offset of sequence */
+    U32 litLength;      /* Length of literals prior to match */
+    U32 matchLength;    /* Raw length of match */
+} rawSeq;
+
+typedef struct {
+  rawSeq* seq;          /* The start of the sequences */
+  size_t pos;           /* The index in seq where reading stopped. pos <= size. */
+  size_t posInSequence; /* The position within the sequence at seq[pos] where reading
+                           stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
+  size_t size;          /* The number of sequences. <= capacity. */
+  size_t capacity;      /* The capacity starting from `seq` pointer */
+} rawSeqStore_t;
+
+UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
+
 typedef struct {
    int price;
    U32 off;
@@ -147,9 +163,13 @@ struct ZSTD_matchState_t {
    U32* hashTable;
    U32* hashTable3;
    U32* chainTable;
+    int dedicatedDictSearch;  /* Indicates whether this matchState is using the
+                               * dedicated dictionary search structure.
+                               */
    optState_t opt;         /* optimal parser state */
    const ZSTD_matchState_t* dictMatchState;
    ZSTD_compressionParameters cParams;
+    const rawSeqStore_t* ldmSeqStore;
 };

 typedef struct {
@@ -166,6 +186,7 @@ typedef struct {
 typedef struct {
    ZSTD_window_t window;   /* State for the window round buffer management */
    ldmEntry_t* hashTable;
+    U32 loadedDictEnd;
    BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
    U64 hashPower;          /* Used to compute the rolling hash.
                             * Depends on ldmParams.minMatchLength */
@@ -180,19 +201,6 @@ typedef struct {
    U32 windowLog;          /* Window log for the LDM */
 } ldmParams_t;

-typedef struct {
-    U32 offset;
-    U32 litLength;
-    U32 matchLength;
-} rawSeq;
-
-typedef struct {
-  rawSeq* seq;     /* The start of the sequences */
-  size_t pos;      /* The position where reading stopped. <= size. */
-  size_t size;     /* The number of sequences. <= capacity. */
-  size_t capacity; /* The capacity starting from `seq` pointer */
-} rawSeqStore_t;
-
 typedef struct {
    int collectSequences;
    ZSTD_Sequence* seqStart;
@@ -227,10 +235,34 @@ struct ZSTD_CCtx_params_s {
    /* Long distance matching parameters */
    ldmParams_t ldmParams;

+    /* Dedicated dict search algorithm trigger */
+    int enableDedicatedDictSearch;
+
+    /* Input/output buffer modes */
+    ZSTD_bufferMode_e inBufferMode;
+    ZSTD_bufferMode_e outBufferMode;
+
+    /* Sequence compression API */
+    ZSTD_sequenceFormat_e blockDelimiters;
+    int validateSequences;
+
    /* Internal use, for createCCtxParams() and freeCCtxParams() only */
    ZSTD_customMem customMem;
 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */

+#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
+#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
+
+/**
+ * Indicates whether this compression proceeds directly from user-provided
+ * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
+ * whether the context needs to buffer the input/output (ZSTDb_buffered).
+ */
+typedef enum {
+    ZSTDb_not_buffered,
+    ZSTDb_buffered
+} ZSTD_buffered_policy_e;
+
 struct ZSTD_CCtx_s {
    ZSTD_compressionStage_e stage;
    int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
@@ -246,9 +278,11 @@ struct ZSTD_CCtx_s {
    unsigned long long producedCSize;
    XXH64_state_t xxhState;
    ZSTD_customMem customMem;
+    ZSTD_threadPool* pool;
    size_t staticSize;
    SeqCollector seqCollector;
    int isFirstBlock;
+    int initialized;

    seqStore_t seqStore;      /* sequences storage ptrs */
    ldmState_t ldmState;      /* long distance matching state */
@@ -256,7 +290,10 @@ struct ZSTD_CCtx_s {
    size_t maxNbLdmSequences;
    rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
    ZSTD_blockState_t blockState;
-    U32* entropyWorkspace;  /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
+    U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
+
+    /* Wether we are streaming or not */
+    ZSTD_buffered_policy_e bufferedPolicy;

    /* streaming */
    char*  inBuff;
@@ -271,6 +308,10 @@ struct ZSTD_CCtx_s {
    ZSTD_cStreamStage streamStage;
    U32    frameEnded;

+    /* Stable in/out buffer verification */
+    ZSTD_inBuffer expectedInBuffer;
+    size_t expectedOutBufferSize;
+
    /* Dictionary */
    ZSTD_localDict localDict;
    const ZSTD_CDict* cdict;
@@ -284,8 +325,32 @@ struct ZSTD_CCtx_s {

 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;

-typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
+typedef enum {
+    ZSTD_noDict = 0,
+    ZSTD_extDict = 1,
+    ZSTD_dictMatchState = 2,
+    ZSTD_dedicatedDictSearch = 3
+} ZSTD_dictMode_e;

+typedef enum {
+    ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
+                                 * In this mode we use both the srcSize and the dictSize
+                                 * when selecting and adjusting parameters.
+                                 */
+    ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
+                                 * In this mode we only take the srcSize into account when selecting
+                                 * and adjusting parameters.
+                                 */
+    ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
+                                 * In this mode we take both the source size and the dictionary size
+                                 * into account when selecting and adjusting the parameters.
+                                 */
+    ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
+                                 * We don't know what these parameters are for. We default to the legacy
+                                 * behavior of taking both the source size and the dict size into account
+                                 * when selecting and adjusting parameters.
+                                 */
+} ZSTD_cParamMode_e;

 typedef size_t (*ZSTD_blockCompressor) (
        ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -324,6 +389,31 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
    return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
 }

+typedef struct repcodes_s {
+    U32 rep[3];
+} repcodes_t;
+
+MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
+{
+    repcodes_t newReps;
+    if (offset >= ZSTD_REP_NUM) {  /* full offset */
+        newReps.rep[2] = rep[1];
+        newReps.rep[1] = rep[0];
+        newReps.rep[0] = offset - ZSTD_REP_MOVE;
+    } else {   /* repcode */
+        U32 const repCode = offset + ll0;
+        if (repCode > 0) {  /* note : if repCode==0, no change */
+            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
+            newReps.rep[1] = rep[0];
+            newReps.rep[0] = currentOffset;
+        } else {   /* repCode == 0 */
+            ZSTD_memcpy(&newReps, rep, sizeof(newReps));
+        }
+    }
+    return newReps;
+}
+
 /* ZSTD_cParam_withinBounds:
 * @return 1 if value is within cParam bounds,
 * 0 otherwise */
@@ -336,6 +426,30 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
    return 1;
 }

+/* ZSTD_noCompressBlock() :
+ * Writes uncompressed block to dst buffer from given src.
+ * Returns the size of the block */
+MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
+{
+    U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
+    RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
+                    dstSize_tooSmall, "dst buf too small for uncompressed block");
+    MEM_writeLE24(dst, cBlockHeader24);
+    ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
+    return ZSTD_blockHeaderSize + srcSize;
+}
+
+MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
+{
+    BYTE* const op = (BYTE*)dst;
+    U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
+    RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
+    MEM_writeLE24(op, cBlockHeader);
+    op[3] = src;
+    return 4;
+}
+
+
 /* ZSTD_minGain() :
 * minimum compression required
 * to generate a compress block or a compressed literals section.
@@ -348,6 +462,21 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
    return (srcSize >> minlog) + 2;
 }

+MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
+{
+    switch (cctxParams->literalCompressionMode) {
+    case ZSTD_lcm_huffman:
+        return 0;
+    case ZSTD_lcm_uncompressed:
+        return 1;
+    default:
+        assert(0 /* impossible: pre-validated */);
+        /* fall-through */
+    case ZSTD_lcm_auto:
+        return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
+    }
+}
+
 /*! ZSTD_safecopyLiterals() :
 *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
 *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
@@ -432,9 +561,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
    if (MEM_isLittleEndian()) {
        if (MEM_64bits()) {
 #       if defined(_MSC_VER) && defined(_WIN64)
-            unsigned long r = 0;
-            _BitScanForward64( &r, (U64)val );
-            return (unsigned)(r>>3);
+#           if STATIC_BMI2
+                return _tzcnt_u64(val) >> 3;
+#           else
+                unsigned long r = 0;
+                return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
+#           endif
 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
            return (__builtin_ctzll((U64)val) >> 3);
 #       else
@@ -451,8 +583,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
        } else { /* 32 bits */
 #       if defined(_MSC_VER)
            unsigned long r=0;
-            _BitScanForward( &r, (U32)val );
-            return (unsigned)(r>>3);
+            return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
            return (__builtin_ctz((U32)val) >> 3);
 #       else
@@ -466,9 +597,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
    } else {  /* Big Endian CPU */
        if (MEM_64bits()) {
 #       if defined(_MSC_VER) && defined(_WIN64)
-            unsigned long r = 0;
-            _BitScanReverse64( &r, val );
-            return (unsigned)(r>>3);
+#           if STATIC_BMI2
+			    return _lzcnt_u64(val) >> 3;
+#           else
+			    unsigned long r = 0;
+			    return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;
+#           endif
 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
            return (__builtin_clzll(val) >> 3);
 #       else
@@ -482,8 +616,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
        } else { /* 32 bits */
 #       if defined(_MSC_VER)
            unsigned long r = 0;
-            _BitScanReverse( &r, (unsigned long)val );
-            return (unsigned)(r>>3);
+            return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
            return (__builtin_clz((U32)val) >> 3);
 #       else
@@ -564,7 +697,8 @@ static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }

-MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
+MEM_STATIC FORCE_INLINE_ATTR
+size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
 {
    switch(mls)
    {
@@ -680,7 +814,7 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
    return ZSTD_window_hasExtDict(ms->window) ?
        ZSTD_extDict :
        ms->dictMatchState != NULL ?
-            ZSTD_dictMatchState :
+            (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
            ZSTD_noDict;
 }

@@ -692,8 +826,8 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
                                                  void const* srcEnd)
 {
-    U32 const current = (U32)((BYTE const*)srcEnd - window.base);
-    return current > ZSTD_CURRENT_MAX;
+    U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
+    return curr > ZSTD_CURRENT_MAX;
 }

 /**
@@ -729,18 +863,30 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
     *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
     */
    U32 const cycleMask = (1U << cycleLog) - 1;
-    U32 const current = (U32)((BYTE const*)src - window->base);
-    U32 const newCurrent = (current & cycleMask) + maxDist;
-    U32 const correction = current - newCurrent;
+    U32 const curr = (U32)((BYTE const*)src - window->base);
+    U32 const currentCycle0 = curr & cycleMask;
+    /* Exclude zero so that newCurrent - maxDist >= 1. */
+    U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
+    U32 const newCurrent = currentCycle1 + maxDist;
+    U32 const correction = curr - newCurrent;
    assert((maxDist & cycleMask) == 0);
-    assert(current > newCurrent);
+    assert(curr > newCurrent);
    /* Loose bound, should be around 1<<29 (see above) */
    assert(correction > 1<<28);

    window->base += correction;
    window->dictBase += correction;
-    window->lowLimit -= correction;
-    window->dictLimit -= correction;
+    if (window->lowLimit <= correction) window->lowLimit = 1;
+    else window->lowLimit -= correction;
+    if (window->dictLimit <= correction) window->dictLimit = 1;
+    else window->dictLimit -= correction;
+
+    /* Ensure we can still reference the full window. */
+    assert(newCurrent >= maxDist);
+    assert(newCurrent - maxDist >= 1);
+    /* Ensure that lowLimit and dictLimit didn't underflow. */
+    assert(window->lowLimit <= newCurrent);
+    assert(window->dictLimit <= newCurrent);

    DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
             window->lowLimit);
@@ -844,6 +990,15 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
    }   }   }
 }

+MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
+    ZSTD_memset(window, 0, sizeof(*window));
+    window->base = (BYTE const*)"";
+    window->dictBase = (BYTE const*)"";
+    window->dictLimit = 1;    /* start from 1, so that 1st position is valid */
+    window->lowLimit = 1;     /* it ensures first and later CCtx usages compress the same */
+    window->nextSrc = window->base + 1;   /* see issue #1241 */
+}
+
 /**
 * ZSTD_window_update():
 * Updates the window by appending [src, src + srcSize) to the window.
@@ -857,6 +1012,10 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
    BYTE const* const ip = (BYTE const*)src;
    U32 contiguous = 1;
    DEBUGLOG(5, "ZSTD_window_update");
+    if (srcSize == 0)
+        return contiguous;
+    assert(window->base != NULL);
+    assert(window->dictBase != NULL);
    /* Check if blocks follow each other */
    if (src != window->nextSrc) {
        /* not contiguous */
@@ -867,7 +1026,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
        window->dictLimit = (U32)distanceFromBase;
        window->dictBase = window->base;
        window->base = ip - distanceFromBase;
-        // ms->nextToUpdate = window->dictLimit;
+        /* ms->nextToUpdate = window->dictLimit; */
        if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
        contiguous = 0;
    }
@@ -883,12 +1042,35 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
    return contiguous;
 }

-MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
+/**
+ * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
 {
    U32    const maxDistance = 1U << windowLog;
    U32    const lowestValid = ms->window.lowLimit;
-    U32    const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
+    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
    U32    const isDictionary = (ms->loadedDictEnd != 0);
+    /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
+     * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
+     * valid for the entire block. So this check is sufficient to find the lowest valid match index.
+     */
+    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
+    return matchLowest;
+}
+
+/**
+ * Returns the lowest allowed match index in the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
+{
+    U32    const maxDistance = 1U << windowLog;
+    U32    const lowestValid = ms->window.dictLimit;
+    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    U32    const isDictionary = (ms->loadedDictEnd != 0);
+    /* When computing the lowest prefix index we need to take the dictionary into account to handle
+     * the edge case where the dictionary and the source are contiguous in memory.
+     */
    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
    return matchLowest;
 }
@@ -931,6 +1113,20 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
 }
 #endif

+/* ===============================================================
+ * Shared internal declarations
+ * These prototypes may be called from sources not in lib/compress
+ * =============================================================== */
+
+/* ZSTD_loadCEntropy() :
+ * dict : must point at beginning of a valid zstd dictionary.
+ * return : size of dictionary header (size of magic number + dict ID + entropy tables)
+ * assumptions : magic number supposed already checked
+ *               and dictSize >= 8 */
+size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+                         const void* const dict, size_t dictSize);
+
+void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);

 /* ==============================================================
 * Private declarations
@@ -940,9 +1136,10 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
 /* ZSTD_getCParamsFromCCtxParams() :
 * cParams are built depending on compressionLevel, src size hints,
 * LDM and manually set compression parameters.
+ * Note: srcSizeHint == 0 means 0!
 */
 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
-        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);

 /*! ZSTD_initCStream_internal() :
 *  Private use only. Init streaming operation.
@@ -999,5 +1196,8 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
 */
 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);

+/** ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);

 #endif /* ZSTD_COMPRESS_H */
--- a/vendor/github.com/DataDog/zstd/zstd_compress_literals.c
+++ b/vendor/github.com/DataDog/zstd/zstd_compress_literals.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -18,7 +18,7 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
    BYTE* const ostart = (BYTE* const)dst;
    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);

-    RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall);
+    RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");

    switch(flSize)
    {
@@ -35,7 +35,8 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
            assert(0);
    }

-    memcpy(ostart + flSize, src, srcSize);
+    ZSTD_memcpy(ostart + flSize, src, srcSize);
+    DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
    return srcSize + flSize;
 }

@@ -62,6 +63,7 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void*
    }

    ostart[flSize] = *(const BYTE*)src;
+    DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
    return flSize+1;
 }

@@ -80,11 +82,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
    symbolEncodingType_e hType = set_compressed;
    size_t cLitSize;

-    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
-                disableLiteralCompression);
+    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
+                disableLiteralCompression, (U32)srcSize);

    /* Prepare nextEntropy assuming reusing the existing table */
-    memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));

    if (disableLiteralCompression)
        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
@@ -102,24 +104,25 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
        cLitSize = singleStream ?
            HUF_compress1X_repeat(
                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
-                255, 11, entropyWorkspace, entropyWorkspaceSize,
+                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
            HUF_compress4X_repeat(
                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
-                255, 11, entropyWorkspace, entropyWorkspaceSize,
+                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
        if (repeat != HUF_repeat_none) {
            /* reused the existing table */
+            DEBUGLOG(5, "Reusing previous huffman table");
            hType = set_repeat;
        }
    }

    if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
-        memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
    }
    if (cLitSize==1) {
-        memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
        return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
    }

@@ -150,5 +153,6 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
    default:  /* not possible : lhSize is {3,4,5} */
        assert(0);
    }
+    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
    return lhSize+cLitSize;
 }
--- a/vendor/github.com/DataDog/zstd/zstd_compress_literals.h
+++ b/vendor/github.com/DataDog/zstd/zstd_compress_literals.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/vendor/github.com/DataDog/zstd/zstd_compress_sequences.c
+++ b/vendor/github.com/DataDog/zstd/zstd_compress_sequences.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -50,6 +50,19 @@ static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
  return maxSymbolValue;
 }

+/**
+ * Returns true if we should use ncount=-1 else we should
+ * use ncount=1 for low probability symbols instead.
+ */
+static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
+{
+    /* Heuristic: This should cover most blocks <= 16K and
+     * start to fade out after 16K to about 32K depending on
+     * comprssibility.
+     */
+    return nbSeq >= 2048;
+}
+
 /**
 * Returns the cost in bytes of encoding the normalized count header.
 * Returns an error if any of the helper functions return an error.
@@ -60,7 +73,7 @@ static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
    BYTE wksp[FSE_NCOUNTBOUND];
    S16 norm[MaxSeq + 1];
    const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
-    FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max));
+    FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
    return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
 }

@@ -86,7 +99,7 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t
 * Returns the cost in bits of encoding the distribution in count using ctable.
 * Returns an error if ctable cannot represent all the symbols in count.
 */
-static size_t ZSTD_fseBitCost(
+size_t ZSTD_fseBitCost(
    FSE_CTable const* ctable,
    unsigned const* count,
    unsigned const max)
@@ -96,18 +109,22 @@ static size_t ZSTD_fseBitCost(
    unsigned s;
    FSE_CState_t cstate;
    FSE_initCState(&cstate, ctable);
-    RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC,
-                    "Repeat FSE_CTable has maxSymbolValue %u < %u",
+    if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
+        DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
                    ZSTD_getFSEMaxSymbolValue(ctable), max);
+        return ERROR(GENERIC);
+    }
    for (s = 0; s <= max; ++s) {
        unsigned const tableLog = cstate.stateLog;
        unsigned const badCost = (tableLog + 1) << kAccuracyLog;
        unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
        if (count[s] == 0)
            continue;
-        RETURN_ERROR_IF(bitCost >= badCost, GENERIC,
-                        "Repeat FSE_CTable has Prob[%u] == 0", s);
-        cost += count[s] * bitCost;
+        if (bitCost >= badCost) {
+            DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
+            return ERROR(GENERIC);
+        }
+        cost += (size_t)count[s] * bitCost;
    }
    return cost >> kAccuracyLog;
 }
@@ -117,15 +134,15 @@ static size_t ZSTD_fseBitCost(
 * table described by norm. The max symbol support by norm is assumed >= max.
 * norm must be valid for every symbol with non-zero probability in count.
 */
-static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
-                                    unsigned const* count, unsigned const max)
+size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+                             unsigned const* count, unsigned const max)
 {
    unsigned const shift = 8 - accuracyLog;
    size_t cost = 0;
    unsigned s;
    assert(accuracyLog <= 8);
    for (s = 0; s <= max; ++s) {
-        unsigned const normAcc = norm[s] != -1 ? norm[s] : 1;
+        unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
        unsigned const norm256 = normAcc << shift;
        assert(norm256 > 0);
        assert(norm256 < 256);
@@ -230,15 +247,15 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,

    switch (type) {
    case set_rle:
-        FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max));
-        RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall);
+        FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
+        RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
        *op = codeTable[0];
        return 1;
    case set_repeat:
-        memcpy(nextCTable, prevCTable, prevCTableSize);
+        ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
        return 0;
    case set_basic:
-        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize));  /* note : could be pre-calculated */
+        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
        return 0;
    case set_compressed: {
        S16 norm[MaxSeq + 1];
@@ -249,14 +266,15 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
            nbSeq_1--;
        }
        assert(nbSeq_1 > 1);
-        FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
+        assert(entropyWorkspaceSize >= FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog));
+        FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
-            FORWARD_IF_ERROR(NCountSize);
-            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize));
+            FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
            return NCountSize;
        }
    }
-    default: assert(0); RETURN_ERROR(GENERIC);
+    default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
    }
 }

@@ -290,7 +308,7 @@ ZSTD_encodeSequences_body(
    if (MEM_32bits()) BIT_flushBits(&blockStream);
    if (longOffsets) {
        U32 const ofBits = ofCodeTable[nbSeq-1];
-        int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+        unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
        if (extraBits) {
            BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
            BIT_flushBits(&blockStream);
@@ -327,7 +345,7 @@ ZSTD_encodeSequences_body(
            BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
            if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
            if (longOffsets) {
-                int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+                unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
                if (extraBits) {
                    BIT_addBits(&blockStream, sequences[n].offset, extraBits);
                    BIT_flushBits(&blockStream);                            /* (7)*/
--- a/vendor/github.com/DataDog/zstd/zstd_compress_sequences.h
+++ b/vendor/github.com/DataDog/zstd/zstd_compress_sequences.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -44,4 +44,11 @@ size_t ZSTD_encodeSequences(
            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);

+size_t ZSTD_fseBitCost(
+    FSE_CTable const* ctable,
+    unsigned const* count,
+    unsigned const max);
+
+size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+                             unsigned const* count, unsigned const max);
 #endif /* ZSTD_COMPRESS_SEQUENCES_H */
--- a/vendor/github.com/DataDog/zstd/zstd_compress_superblock.c
+++ b/vendor/github.com/DataDog/zstd/zstd_compress_superblock.c
@@ -0,0 +1,849 @@
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+#include "zstd_compress_superblock.h"
+
+#include "zstd_internal.h"  /* ZSTD_getSequenceLength */
+#include "hist.h"                     /* HIST_countFast_wksp */
+#include "zstd_compress_internal.h"
+#include "zstd_compress_sequences.h"
+#include "zstd_compress_literals.h"
+
+/*-*************************************
+*  Superblock entropy buffer structs
+***************************************/
+/** ZSTD_hufCTablesMetadata_t :
+ *  Stores Literals Block Type for a super-block in hType, and
+ *  huffman tree description in hufDesBuffer.
+ *  hufDesSize refers to the size of huffman tree description in bytes.
+ *  This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
+typedef struct {
+    symbolEncodingType_e hType;
+    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
+    size_t hufDesSize;
+} ZSTD_hufCTablesMetadata_t;
+
+/** ZSTD_fseCTablesMetadata_t :
+ *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
+ *  fse tables in fseTablesBuffer.
+ *  fseTablesSize refers to the size of fse tables in bytes.
+ *  This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
+typedef struct {
+    symbolEncodingType_e llType;
+    symbolEncodingType_e ofType;
+    symbolEncodingType_e mlType;
+    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
+    size_t fseTablesSize;
+    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
+} ZSTD_fseCTablesMetadata_t;
+
+typedef struct {
+    ZSTD_hufCTablesMetadata_t hufMetadata;
+    ZSTD_fseCTablesMetadata_t fseMetadata;
+} ZSTD_entropyCTablesMetadata_t;
+
+
+/** ZSTD_buildSuperBlockEntropy_literal() :
+ *  Builds entropy for the super-block literals.
+ *  Stores literals block type (raw, rle, compressed, repeat) and
+ *  huffman description table to hufMetadata.
+ *  @return : size of huffman description table or error code */
+static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
+                                            const ZSTD_hufCTables_t* prevHuf,
+                                                  ZSTD_hufCTables_t* nextHuf,
+                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                  const int disableLiteralsCompression,
+                                                  void* workspace, size_t wkspSize)
+{
+    BYTE* const wkspStart = (BYTE*)workspace;
+    BYTE* const wkspEnd = wkspStart + wkspSize;
+    BYTE* const countWkspStart = wkspStart;
+    unsigned* const countWksp = (unsigned*)workspace;
+    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
+    BYTE* const nodeWksp = countWkspStart + countWkspSize;
+    const size_t nodeWkspSize = wkspEnd-nodeWksp;
+    unsigned maxSymbolValue = 255;
+    unsigned huffLog = HUF_TABLELOG_DEFAULT;
+    HUF_repeat repeat = prevHuf->repeatMode;
+
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralsCompression) {
+        DEBUGLOG(5, "set_basic - disabled");
+        hufMetadata->hType = set_basic;
+        return 0;
+    }
+
+    /* small ? don't even attempt compression (speed opt) */
+#   define COMPRESS_LITERALS_SIZE_MIN 63
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) {
+            DEBUGLOG(5, "set_basic - too small");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Scan input and build symbol stats */
+    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
+        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
+        if (largest == srcSize) {
+            DEBUGLOG(5, "set_rle");
+            hufMetadata->hType = set_rle;
+            return 0;
+        }
+        if (largest <= (srcSize >> 7)+4) {
+            DEBUGLOG(5, "set_basic - no gain");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Validate the previous Huffman table */
+    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
+        repeat = HUF_repeat_none;
+    }
+
+    /* Build Huffman Tree */
+    ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
+                                                    maxSymbolValue, huffLog,
+                                                    nodeWksp, nodeWkspSize);
+        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
+        huffLog = (U32)maxBits;
+        {   /* Build and write the CTable */
+            size_t const newCSize = HUF_estimateCompressedSize(
+                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
+            size_t const hSize = HUF_writeCTable(
+                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
+                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
+            /* Check against repeating the previous CTable */
+            if (repeat != HUF_repeat_none) {
+                size_t const oldCSize = HUF_estimateCompressedSize(
+                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
+                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
+                    DEBUGLOG(5, "set_repeat - smaller");
+                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                    hufMetadata->hType = set_repeat;
+                    return 0;
+                }
+            }
+            if (newCSize + hSize >= srcSize) {
+                DEBUGLOG(5, "set_basic - no gains");
+                ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                hufMetadata->hType = set_basic;
+                return 0;
+            }
+            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
+            hufMetadata->hType = set_compressed;
+            nextHuf->repeatMode = HUF_repeat_check;
+            return hSize;
+        }
+    }
+}
+
+/** ZSTD_buildSuperBlockEntropy_sequences() :
+ *  Builds entropy for the super-block sequences.
+ *  Stores symbol compression modes and fse table to fseMetadata.
+ *  @return : size of fse tables or error code */
+static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
+                                              const ZSTD_fseCTables_t* prevEntropy,
+                                                    ZSTD_fseCTables_t* nextEntropy,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                    void* workspace, size_t wkspSize)
+{
+    BYTE* const wkspStart = (BYTE*)workspace;
+    BYTE* const wkspEnd = wkspStart + wkspSize;
+    BYTE* const countWkspStart = wkspStart;
+    unsigned* const countWksp = (unsigned*)workspace;
+    const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
+    BYTE* const cTableWksp = countWkspStart + countWkspSize;
+    const size_t cTableWkspSize = wkspEnd-cTableWksp;
+    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
+    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
+    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    const BYTE* const llCodeTable = seqStorePtr->llCode;
+    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+    BYTE* const ostart = fseMetadata->fseTablesBuffer;
+    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
+    BYTE* op = ostart;
+
+    assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
+    ZSTD_memset(workspace, 0, wkspSize);
+
+    fseMetadata->lastCountSize = 0;
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr);
+    /* build CTable for Literal Lengths */
+    {   U32 LLtype;
+        unsigned max = MaxLL;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+        DEBUGLOG(5, "Building LL table");
+        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
+        LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        LLFSELog, prevEntropy->litlengthCTable,
+                                        LL_defaultNorm, LL_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(set_basic < set_compressed && set_rle < set_compressed);
+        assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
+                                                    countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                                                    prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
+            if (LLtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->llType = (symbolEncodingType_e) LLtype;
+    }   }
+    /* build CTable for Offsets */
+    {   U32 Offtype;
+        unsigned max = MaxOff;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+        DEBUGLOG(5, "Building OF table");
+        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
+        Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        OffFSELog, prevEntropy->offcodeCTable,
+                                        OF_defaultNorm, OF_defaultNormLog,
+                                        defaultPolicy, strategy);
+        assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
+                                                    countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                                                    prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
+            if (Offtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->ofType = (symbolEncodingType_e) Offtype;
+    }   }
+    /* build CTable for MatchLengths */
+    {   U32 MLtype;
+        unsigned max = MaxML;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
+        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
+        MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        MLFSELog, prevEntropy->matchlengthCTable,
+                                        ML_defaultNorm, ML_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
+                                                    countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
+                                                    prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
+            if (MLtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->mlType = (symbolEncodingType_e) MLtype;
+    }   }
+    assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
+    return op-ostart;
+}
+
+
+/** ZSTD_buildSuperBlockEntropy() :
+ *  Builds entropy for the super-block.
+ *  @return : 0 on success or error code */
+static size_t
+ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
+                      const ZSTD_entropyCTables_t* prevEntropy,
+                            ZSTD_entropyCTables_t* nextEntropy,
+                      const ZSTD_CCtx_params* cctxParams,
+                            ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                            void* workspace, size_t wkspSize)
+{
+    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
+    entropyMetadata->hufMetadata.hufDesSize =
+        ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
+                                            &prevEntropy->huf, &nextEntropy->huf,
+                                            &entropyMetadata->hufMetadata,
+                                            ZSTD_disableLiteralsCompression(cctxParams),
+                                            workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
+    entropyMetadata->fseMetadata.fseTablesSize =
+        ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
+                                              &prevEntropy->fse, &nextEntropy->fse,
+                                              cctxParams,
+                                              &entropyMetadata->fseMetadata,
+                                              workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
+    return 0;
+}
+
+/** ZSTD_compressSubBlock_literal() :
+ *  Compresses literals section for a sub-block.
+ *  When we have to write the Huffman table we will sometimes choose a header
+ *  size larger than necessary. This is because we have to pick the header size
+ *  before we know the table size + compressed size, so we have a bound on the
+ *  table size. If we guessed incorrectly, we fall back to uncompressed literals.
+ *
+ *  We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded
+ *  in writing the header, otherwise it is set to 0.
+ *
+ *  hufMetadata->hType has literals block type info.
+ *      If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block.
+ *      If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block.
+ *      If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block
+ *      If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
+ *      and the following sub-blocks' literals sections will be Treeless_Literals_Block.
+ *  @return : compressed size of literals section of a sub-block
+ *            Or 0 if it unable to compress.
+ *            Or error code */
+static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
+                                    const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                    const BYTE* literals, size_t litSize,
+                                    void* dst, size_t dstSize,
+                                    const int bmi2, int writeEntropy, int* entropyWritten)
+{
+    size_t const header = writeEntropy ? 200 : 0;
+    size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart + lhSize;
+    U32 const singleStream = lhSize == 3;
+    symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
+    size_t cLitSize = 0;
+
+    (void)bmi2; /* TODO bmi2... */
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
+
+    *entropyWritten = 0;
+    if (litSize == 0 || hufMetadata->hType == set_basic) {
+      DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal");
+      return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+    } else if (hufMetadata->hType == set_rle) {
+      DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal");
+      return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize);
+    }
+
+    assert(litSize > 0);
+    assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
+
+    if (writeEntropy && hufMetadata->hType == set_compressed) {
+        ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
+        op += hufMetadata->hufDesSize;
+        cLitSize += hufMetadata->hufDesSize;
+        DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
+    }
+
+    /* TODO bmi2 */
+    {   const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
+                                          : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
+        op += cSize;
+        cLitSize += cSize;
+        if (cSize == 0 || ERR_isError(cSize)) {
+            DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize));
+            return 0;
+        }
+        /* If we expand and we aren't writing a header then emit uncompressed */
+        if (!writeEntropy && cLitSize >= litSize) {
+            DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible");
+            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+        }
+        /* If we are writing headers then allow expansion that doesn't change our header size. */
+        if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) {
+            assert(cLitSize > litSize);
+            DEBUGLOG(5, "Literals expanded beyond allowed header size");
+            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+        }
+        DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize);
+    }
+
+    /* Build header */
+    switch(lhSize)
+    {
+    case 3: /* 2 - 2 - 10 - 10 */
+        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
+            MEM_writeLE24(ostart, lhc);
+            break;
+        }
+    case 4: /* 2 - 2 - 14 - 14 */
+        {   U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18);
+            MEM_writeLE32(ostart, lhc);
+            break;
+        }
+    case 5: /* 2 - 2 - 18 - 18 */
+        {   U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22);
+            MEM_writeLE32(ostart, lhc);
+            ostart[4] = (BYTE)(cLitSize >> 10);
+            break;
+        }
+    default:  /* not possible : lhSize is {3,4,5} */
+        assert(0);
+    }
+    *entropyWritten = 1;
+    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
+    return op-ostart;
+}
+
+static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
+    const seqDef* const sstart = sequences;
+    const seqDef* const send = sequences + nbSeq;
+    const seqDef* sp = sstart;
+    size_t matchLengthSum = 0;
+    size_t litLengthSum = 0;
+    while (send-sp > 0) {
+        ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
+        litLengthSum += seqLen.litLength;
+        matchLengthSum += seqLen.matchLength;
+        sp++;
+    }
+    assert(litLengthSum <= litSize);
+    if (!lastSequence) {
+        assert(litLengthSum == litSize);
+    }
+    return matchLengthSum + litSize;
+}
+
+/** ZSTD_compressSubBlock_sequences() :
+ *  Compresses sequences section for a sub-block.
+ *  fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have
+ *  symbol compression modes for the super-block.
+ *  The first successfully compressed block will have these in its header.
+ *  We set entropyWritten=1 when we succeed in compressing the sequences.
+ *  The following sub-blocks will always have repeat mode.
+ *  @return : compressed size of sequences section of a sub-block
+ *            Or 0 if it is unable to compress
+ *            Or error code. */
+static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
+                                              const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                              const seqDef* sequences, size_t nbSeq,
+                                              const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                              void* dst, size_t dstCapacity,
+                                              const int bmi2, int writeEntropy, int* entropyWritten)
+{
+    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    BYTE* seqHead;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets);
+
+    *entropyWritten = 0;
+    /* Sequences Header */
+    RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
+                    dstSize_tooSmall, "");
+    if (nbSeq < 0x7F)
+        *op++ = (BYTE)nbSeq;
+    else if (nbSeq < LONGNBSEQ)
+        op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
+    else
+        op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
+    if (nbSeq==0) {
+        return op - ostart;
+    }
+
+    /* seqHead : flags for FSE encoding type */
+    seqHead = op++;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart));
+
+    if (writeEntropy) {
+        const U32 LLtype = fseMetadata->llType;
+        const U32 Offtype = fseMetadata->ofType;
+        const U32 MLtype = fseMetadata->mlType;
+        DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
+        *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+        ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
+        op += fseMetadata->fseTablesSize;
+    } else {
+        const U32 repeat = set_repeat;
+        *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2));
+    }
+
+    {   size_t const bitstreamSize = ZSTD_encodeSequences(
+                                        op, oend - op,
+                                        fseTables->matchlengthCTable, mlCode,
+                                        fseTables->offcodeCTable, ofCode,
+                                        fseTables->litlengthCTable, llCode,
+                                        sequences, nbSeq,
+                                        longOffsets, bmi2);
+        FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
+        op += bitstreamSize;
+        /* zstd versions <= 1.3.4 mistakenly report corruption when
+         * FSE_readNCount() receives a buffer < 4 bytes.
+         * Fixed by https://github.com/facebook/zstd/pull/1146.
+         * This can happen when the last set_compressed table present is 2
+         * bytes and the bitstream is only one byte.
+         * In this exceedingly rare case, we will simply emit an uncompressed
+         * block, since it isn't worth optimizing.
+         */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+        if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) {
+            /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+            assert(fseMetadata->lastCountSize + bitstreamSize == 3);
+            DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
+                        "emitting an uncompressed block.");
+            return 0;
+        }
+#endif
+        DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize);
+    }
+
+    /* zstd versions <= 1.4.0 mistakenly report error when
+     * sequences section body size is less than 3 bytes.
+     * Fixed by https://github.com/facebook/zstd/pull/1664.
+     * This can happen when the previous sequences section block is compressed
+     * with rle mode and the current block's sequences section is compressed
+     * with repeat mode where sequences section body size can be 1 byte.
+     */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    if (op-seqHead < 4) {
+        DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting "
+                    "an uncompressed block when sequences are < 4 bytes");
+        return 0;
+    }
+#endif
+
+    *entropyWritten = 1;
+    return op - ostart;
+}
+
+/** ZSTD_compressSubBlock() :
+ *  Compresses a single sub-block.
+ *  @return : compressed size of the sub-block
+ *            Or 0 if it failed to compress. */
+static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
+                                    const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                    const seqDef* sequences, size_t nbSeq,
+                                    const BYTE* literals, size_t litSize,
+                                    const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+                                    const ZSTD_CCtx_params* cctxParams,
+                                    void* dst, size_t dstCapacity,
+                                    const int bmi2,
+                                    int writeLitEntropy, int writeSeqEntropy,
+                                    int* litEntropyWritten, int* seqEntropyWritten,
+                                    U32 lastBlock)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart + ZSTD_blockHeaderSize;
+    DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)",
+                litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
+    {   size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
+                                                        &entropyMetadata->hufMetadata, literals, litSize,
+                                                        op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
+        FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
+        if (cLitSize == 0) return 0;
+        op += cLitSize;
+    }
+    {   size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse,
+                                                  &entropyMetadata->fseMetadata,
+                                                  sequences, nbSeq,
+                                                  llCode, mlCode, ofCode,
+                                                  cctxParams,
+                                                  op, oend-op,
+                                                  bmi2, writeSeqEntropy, seqEntropyWritten);
+        FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
+        if (cSeqSize == 0) return 0;
+        op += cSeqSize;
+    }
+    /* Write block header */
+    {   size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
+        U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+        MEM_writeLE24(ostart, cBlockHeader24);
+    }
+    return op-ostart;
+}
+
+static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
+                                                const ZSTD_hufCTables_t* huf,
+                                                const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                void* workspace, size_t wkspSize,
+                                                int writeEntropy)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    unsigned maxSymbolValue = 255;
+    size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+
+    if (hufMetadata->hType == set_basic) return litSize;
+    else if (hufMetadata->hType == set_rle) return 1;
+    else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
+        size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
+        if (ZSTD_isError(largest)) return litSize;
+        {   size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
+            if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
+            return cLitSizeEstimate + literalSectionHeaderSize;
+    }   }
+    assert(0); /* impossible */
+    return 0;
+}
+
+static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
+                        const BYTE* codeTable, unsigned maxCode,
+                        size_t nbSeq, const FSE_CTable* fseCTable,
+                        const U32* additionalBits,
+                        short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                        void* workspace, size_t wkspSize)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    const BYTE* ctp = codeTable;
+    const BYTE* const ctStart = ctp;
+    const BYTE* const ctEnd = ctStart + nbSeq;
+    size_t cSymbolTypeSizeEstimateInBits = 0;
+    unsigned max = maxCode;
+
+    HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+    if (type == set_basic) {
+        /* We selected this encoding type, so it must be valid. */
+        assert(max <= defaultMax);
+        cSymbolTypeSizeEstimateInBits = max <= defaultMax
+                ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max)
+                : ERROR(GENERIC);
+    } else if (type == set_rle) {
+        cSymbolTypeSizeEstimateInBits = 0;
+    } else if (type == set_compressed || type == set_repeat) {
+        cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
+    }
+    if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10;
+    while (ctp < ctEnd) {
+        if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
+        else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
+        ctp++;
+    }
+    return cSymbolTypeSizeEstimateInBits / 8;
+}
+
+static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
+                                                  const BYTE* llCodeTable,
+                                                  const BYTE* mlCodeTable,
+                                                  size_t nbSeq,
+                                                  const ZSTD_fseCTables_t* fseTables,
+                                                  const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                  void* workspace, size_t wkspSize,
+                                                  int writeEntropy)
+{
+    size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+    size_t cSeqSizeEstimate = 0;
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
+                                         nbSeq, fseTables->offcodeCTable, NULL,
+                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
+                                         nbSeq, fseTables->litlengthCTable, LL_bits,
+                                         LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
+                                         nbSeq, fseTables->matchlengthCTable, ML_bits,
+                                         ML_defaultNorm, ML_defaultNormLog, MaxML,
+                                         workspace, wkspSize);
+    if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
+    return cSeqSizeEstimate + sequencesSectionHeaderSize;
+}
+
+static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
+                                        const BYTE* ofCodeTable,
+                                        const BYTE* llCodeTable,
+                                        const BYTE* mlCodeTable,
+                                        size_t nbSeq,
+                                        const ZSTD_entropyCTables_t* entropy,
+                                        const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                        void* workspace, size_t wkspSize,
+                                        int writeLitEntropy, int writeSeqEntropy) {
+    size_t cSizeEstimate = 0;
+    cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
+                                                         &entropy->huf, &entropyMetadata->hufMetadata,
+                                                         workspace, wkspSize, writeLitEntropy);
+    cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+                                                         nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
+                                                         workspace, wkspSize, writeSeqEntropy);
+    return cSizeEstimate + ZSTD_blockHeaderSize;
+}
+
+static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
+{
+    if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle)
+        return 1;
+    if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle)
+        return 1;
+    if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle)
+        return 1;
+    return 0;
+}
+
+/** ZSTD_compressSubBlock_multi() :
+ *  Breaks super-block into multiple sub-blocks and compresses them.
+ *  Entropy will be written to the first block.
+ *  The following blocks will use repeat mode to compress.
+ *  All sub-blocks are compressed blocks (no raw or rle blocks).
+ *  @return : compressed size of the super block (which is multiple ZSTD blocks)
+ *            Or 0 if it failed to compress. */
+static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
+                            const ZSTD_compressedBlockState_t* prevCBlock,
+                            ZSTD_compressedBlockState_t* nextCBlock,
+                            const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                            const ZSTD_CCtx_params* cctxParams,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const int bmi2, U32 lastBlock,
+                            void* workspace, size_t wkspSize)
+{
+    const seqDef* const sstart = seqStorePtr->sequencesStart;
+    const seqDef* const send = seqStorePtr->sequences;
+    const seqDef* sp = sstart;
+    const BYTE* const lstart = seqStorePtr->litStart;
+    const BYTE* const lend = seqStorePtr->lit;
+    const BYTE* lp = lstart;
+    BYTE const* ip = (BYTE const*)src;
+    BYTE const* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    const BYTE* llCodePtr = seqStorePtr->llCode;
+    const BYTE* mlCodePtr = seqStorePtr->mlCode;
+    const BYTE* ofCodePtr = seqStorePtr->ofCode;
+    size_t targetCBlockSize = cctxParams->targetCBlockSize;
+    size_t litSize, seqCount;
+    int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
+    int writeSeqEntropy = 1;
+    int lastSequence = 0;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
+                (unsigned)(lend-lp), (unsigned)(send-sstart));
+
+    litSize = 0;
+    seqCount = 0;
+    do {
+        size_t cBlockSizeEstimate = 0;
+        if (sstart == send) {
+            lastSequence = 1;
+        } else {
+            const seqDef* const sequence = sp + seqCount;
+            lastSequence = sequence == send - 1;
+            litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
+            seqCount++;
+        }
+        if (lastSequence) {
+            assert(lp <= lend);
+            assert(litSize <= (size_t)(lend - lp));
+            litSize = (size_t)(lend - lp);
+        }
+        /* I think there is an optimization opportunity here.
+         * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
+         * since it recalculates estimate from scratch.
+         * For example, it would recount literal distribution and symbol codes everytime.
+         */
+        cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
+                                                       &nextCBlock->entropy, entropyMetadata,
+                                                       workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
+        if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
+            int litEntropyWritten = 0;
+            int seqEntropyWritten = 0;
+            const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
+            const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+                                                       sp, seqCount,
+                                                       lp, litSize,
+                                                       llCodePtr, mlCodePtr, ofCodePtr,
+                                                       cctxParams,
+                                                       op, oend-op,
+                                                       bmi2, writeLitEntropy, writeSeqEntropy,
+                                                       &litEntropyWritten, &seqEntropyWritten,
+                                                       lastBlock && lastSequence);
+            FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+            if (cSize > 0 && cSize < decompressedSize) {
+                DEBUGLOG(5, "Committed the sub-block");
+                assert(ip + decompressedSize <= iend);
+                ip += decompressedSize;
+                sp += seqCount;
+                lp += litSize;
+                op += cSize;
+                llCodePtr += seqCount;
+                mlCodePtr += seqCount;
+                ofCodePtr += seqCount;
+                litSize = 0;
+                seqCount = 0;
+                /* Entropy only needs to be written once */
+                if (litEntropyWritten) {
+                    writeLitEntropy = 0;
+                }
+                if (seqEntropyWritten) {
+                    writeSeqEntropy = 0;
+                }
+            }
+        }
+    } while (!lastSequence);
+    if (writeLitEntropy) {
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
+        ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
+    }
+    if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
+        /* If we haven't written our entropy tables, then we've violated our contract and
+         * must emit an uncompressed block.
+         */
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
+        return 0;
+    }
+    if (ip < iend) {
+        size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
+        FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+        assert(cSize != 0);
+        op += cSize;
+        /* We have to regenerate the repcodes because we've skipped some sequences */
+        if (sp < send) {
+            seqDef const* seq;
+            repcodes_t rep;
+            ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
+            for (seq = sstart; seq < sp; ++seq) {
+                rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
+            }
+            ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
+        }
+    }
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
+    return op-ostart;
+}
+
+size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               void const* src, size_t srcSize,
+                               unsigned lastBlock) {
+    ZSTD_entropyCTablesMetadata_t entropyMetadata;
+
+    FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
+          &zc->blockState.prevCBlock->entropy,
+          &zc->blockState.nextCBlock->entropy,
+          &zc->appliedParams,
+          &entropyMetadata,
+          zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
+
+    return ZSTD_compressSubBlock_multi(&zc->seqStore,
+            zc->blockState.prevCBlock,
+            zc->blockState.nextCBlock,
+            &entropyMetadata,
+            &zc->appliedParams,
+            dst, dstCapacity,
+            src, srcSize,
+            zc->bmi2, lastBlock,
+            zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
+}
--- a/vendor/github.com/DataDog/zstd/zstd_compress_superblock.h
+++ b/vendor/github.com/DataDog/zstd/zstd_compress_superblock.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_ADVANCED_H
+#define ZSTD_COMPRESS_ADVANCED_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+
+#include "zstd.h" /* ZSTD_CCtx */
+
+/*-*************************************
+*  Target Compressed Block Size
+***************************************/
+
+/* ZSTD_compressSuperBlock() :
+ * Used to compress a super block when targetCBlockSize is being used.
+ * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
+size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               void const* src, size_t srcSize,
+                               unsigned lastBlock);
+
+#endif /* ZSTD_COMPRESS_ADVANCED_H */
--- a/vendor/github.com/DataDog/zstd/zstd_ctx.go
+++ b/vendor/github.com/DataDog/zstd/zstd_ctx.go
@@ -0,0 +1,139 @@
+package zstd
+
+/*
+#define ZSTD_STATIC_LINKING_ONLY
+#include "zstd.h"
+*/
+import "C"
+import (
+	"bytes"
+	"io/ioutil"
+	"runtime"
+	"unsafe"
+)
+
+type Ctx interface {
+	// Compress src into dst.  If you have a buffer to use, you can pass it to
+	// prevent allocation.  If it is too small, or if nil is passed, a new buffer
+	// will be allocated and returned.
+	Compress(dst, src []byte) ([]byte, error)
+
+	// CompressLevel is the same as Compress but you can pass a compression level
+	CompressLevel(dst, src []byte, level int) ([]byte, error)
+
+	// Decompress src into dst.  If you have a buffer to use, you can pass it to
+	// prevent allocation.  If it is too small, or if nil is passed, a new buffer
+	// will be allocated and returned.
+	Decompress(dst, src []byte) ([]byte, error)
+}
+
+type ctx struct {
+	cctx *C.ZSTD_CCtx
+	dctx *C.ZSTD_DCtx
+}
+
+// Create a new ZStd Context.
+//  When compressing/decompressing many times, it is recommended to allocate a
+//  context just once, and re-use it for each successive compression operation.
+//  This will make workload friendlier for system's memory.
+//  Note : re-using context is just a speed / resource optimization.
+//         It doesn't change the compression ratio, which remains identical.
+//  Note 2 : In multi-threaded environments,
+//         use one different context per thread for parallel execution.
+//
+func NewCtx() Ctx {
+	c := &ctx{
+		cctx: C.ZSTD_createCCtx(),
+		dctx: C.ZSTD_createDCtx(),
+	}
+
+	runtime.SetFinalizer(c, finalizeCtx)
+	return c
+}
+
+func (c *ctx) Compress(dst, src []byte) ([]byte, error) {
+	return c.CompressLevel(dst, src, DefaultCompression)
+}
+
+func (c *ctx) CompressLevel(dst, src []byte, level int) ([]byte, error) {
+	bound := CompressBound(len(src))
+	if cap(dst) >= bound {
+		dst = dst[0:bound] // Reuse dst buffer
+	} else {
+		dst = make([]byte, bound)
+	}
+
+	var srcPtr *byte // Do not point anywhere, if src is empty
+	if len(src) > 0 {
+		srcPtr = &src[0]
+	}
+
+	cWritten := C.ZSTD_compressCCtx(
+		c.cctx,
+		unsafe.Pointer(&dst[0]),
+		C.size_t(len(dst)),
+		unsafe.Pointer(srcPtr),
+		C.size_t(len(src)),
+		C.int(level))
+
+	written := int(cWritten)
+	// Check if the return is an Error code
+	if err := getError(written); err != nil {
+		return nil, err
+	}
+	return dst[:written], nil
+}
+
+func (c *ctx) Decompress(dst, src []byte) ([]byte, error) {
+	if len(src) == 0 {
+		return []byte{}, ErrEmptySlice
+	}
+	decompress := func(dst, src []byte) ([]byte, error) {
+
+		cWritten := C.ZSTD_decompressDCtx(
+			c.dctx,
+			unsafe.Pointer(&dst[0]),
+			C.size_t(len(dst)),
+			unsafe.Pointer(&src[0]),
+			C.size_t(len(src)))
+
+		written := int(cWritten)
+		// Check error
+		if err := getError(written); err != nil {
+			return nil, err
+		}
+		return dst[:written], nil
+	}
+
+	if len(dst) == 0 {
+		// Attempt to use zStd to determine decompressed size (may result in error or 0)
+		size := int(C.size_t(C.ZSTD_getDecompressedSize(unsafe.Pointer(&src[0]), C.size_t(len(src)))))
+
+		if err := getError(size); err != nil {
+			return nil, err
+		}
+
+		if size > 0 {
+			dst = make([]byte, size)
+		} else {
+			dst = make([]byte, len(src)*3) // starting guess
+		}
+	}
+	for i := 0; i < 3; i++ { // 3 tries to allocate a bigger buffer
+		result, err := decompress(dst, src)
+		if !IsDstSizeTooSmallError(err) {
+			return result, err
+		}
+		dst = make([]byte, len(dst)*2) // Grow buffer by 2
+	}
+
+	// We failed getting a dst buffer of correct size, use stream API
+	r := NewReader(bytes.NewReader(src))
+	defer r.Close()
+	return ioutil.ReadAll(r)
+}
+
+func finalizeCtx(c *ctx) {
+	C.ZSTD_freeCCtx(c.cctx)
+	C.ZSTD_freeDCtx(c.dctx)
+}
--- a/vendor/github.com/DataDog/zstd/zstd_cwksp.h
+++ b/vendor/github.com/DataDog/zstd/zstd_cwksp.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -24,16 +24,6 @@ extern "C" {
 *  Constants
 ***************************************/

-/* define "workspace is too large" as this number of times larger than needed */
-#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
-
-/* when workspace is continuously too large
- * during at least this number of times,
- * context's memory usage is considered wasteful,
- * because it's sized to handle a worst case scenario which rarely happens.
- * In which case, resize it down to free some memory */
-#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
-
 /* Since the workspace is effectively its own little malloc implementation /
 * arena, when we run under ASAN, we should similarly insert redzones between
 * each internal element of the workspace, so ASAN will catch overruns that
@@ -54,6 +44,16 @@ typedef enum {
    ZSTD_cwksp_alloc_aligned
 } ZSTD_cwksp_alloc_phase_e;

+/**
+ * Used to describe whether the workspace is statically allocated (and will not
+ * necessarily ever be freed), or if it's dynamically allocated and we can
+ * expect a well-formed caller to free this.
+ */
+typedef enum {
+    ZSTD_cwksp_dynamic_alloc,
+    ZSTD_cwksp_static_alloc
+} ZSTD_cwksp_static_alloc_e;
+
 /**
 * Zstd fits all its internal datastructures into a single continuous buffer,
 * so that it only needs to perform a single OS allocation (or so that a buffer
@@ -102,7 +102,7 @@ typedef enum {
 *
 * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
 *   so that literally everything fits in a single buffer. Note: if present,
- *   this must be the first object in the workspace, since ZSTD_free{CCtx,
+ *   this must be the first object in the workspace, since ZSTD_customFree{CCtx,
 *   CDict}() rely on a pointer comparison to see whether one or two frees are
 *   required.
 *
@@ -147,9 +147,10 @@ typedef struct {
    void* tableValidEnd;
    void* allocStart;

-    int allocFailed;
+    BYTE allocFailed;
    int workspaceOversizedDuration;
    ZSTD_cwksp_alloc_phase_e phase;
+    ZSTD_cwksp_static_alloc_e isStatic;
 } ZSTD_cwksp;

 /*-*************************************
@@ -188,7 +189,9 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
 * else is though.
 */
 MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    if (size == 0)
+        return 0;
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
 #else
    return size;
@@ -238,7 +241,10 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal(
    ZSTD_cwksp_internal_advance_phase(ws, phase);
    alloc = (BYTE *)ws->allocStart - bytes;

-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    if (bytes == 0)
+        return NULL;
+
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    /* over-reserve space */
    alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
 #endif
@@ -257,11 +263,13 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal(
    }
    ws->allocStart = alloc;

-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
     * either size. */
    alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
-    __asan_unpoison_memory_region(alloc, bytes);
+    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
+        __asan_unpoison_memory_region(alloc, bytes);
+    }
 #endif

    return alloc;
@@ -306,8 +314,10 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
    }
    ws->tableEnd = end;

-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
-    __asan_unpoison_memory_region(alloc, bytes);
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
+        __asan_unpoison_memory_region(alloc, bytes);
+    }
 #endif

    return alloc;
@@ -321,7 +331,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
    void* alloc = ws->objectEnd;
    void* end = (BYTE*)alloc + roundedBytes;

-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    /* over-reserve space */
    end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
 #endif
@@ -342,11 +352,13 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
    ws->tableEnd = end;
    ws->tableValidEnd = end;

-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
     * either size. */
    alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
-    __asan_unpoison_memory_region(alloc, bytes);
+    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
+        __asan_unpoison_memory_region(alloc, bytes);
+    }
 #endif

    return alloc;
@@ -355,7 +367,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
 MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");

-#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
    /* To validate that the table re-use logic is sound, and that we don't
     * access table space that we haven't cleaned, we re-"poison" the table
     * space every time we mark it dirty. */
@@ -390,7 +402,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
    assert(ws->tableValidEnd >= ws->objectEnd);
    assert(ws->tableValidEnd <= ws->allocStart);
    if (ws->tableValidEnd < ws->tableEnd) {
-        memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
+        ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
    }
    ZSTD_cwksp_mark_tables_clean(ws);
 }
@@ -402,8 +414,12 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
 MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
    DEBUGLOG(4, "cwksp: clearing tables!");

-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
-    {
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* We don't do this when the workspace is statically allocated, because
+     * when that is the case, we have no capability to hook into the end of the
+     * workspace's lifecycle to unpoison the memory.
+     */
+    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
        size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
        __asan_poison_memory_region(ws->objectEnd, size);
    }
@@ -420,7 +436,7 @@ MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
 MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
    DEBUGLOG(4, "cwksp: clearing!");

-#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
    /* To validate that the context re-use logic is sound, and that we don't
     * access stuff that this compression hasn't initialized, we re-"poison"
     * the workspace (or at least the non-static, non-table parts of it)
@@ -431,8 +447,12 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
    }
 #endif

-#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
-    {
+#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* We don't do this when the workspace is statically allocated, because
+     * when that is the case, we have no capability to hook into the end of the
+     * workspace's lifecycle to unpoison the memory.
+     */
+    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
        size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
        __asan_poison_memory_region(ws->objectEnd, size);
    }
@@ -452,7 +472,7 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
 * Any existing values in the workspace are ignored (the previously managed
 * buffer, if present, must be separately freed).
 */
-MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
+MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) {
    DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
    assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
    ws->workspace = start;
@@ -460,24 +480,25 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
    ws->objectEnd = ws->workspace;
    ws->tableValidEnd = ws->objectEnd;
    ws->phase = ZSTD_cwksp_alloc_objects;
+    ws->isStatic = isStatic;
    ZSTD_cwksp_clear(ws);
    ws->workspaceOversizedDuration = 0;
    ZSTD_cwksp_assert_internal_consistency(ws);
 }

 MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
-    void* workspace = ZSTD_malloc(size, customMem);
+    void* workspace = ZSTD_customMalloc(size, customMem);
    DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
-    RETURN_ERROR_IF(workspace == NULL, memory_allocation);
-    ZSTD_cwksp_init(ws, workspace, size);
+    RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
+    ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc);
    return 0;
 }

 MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
    void *ptr = ws->workspace;
    DEBUGLOG(4, "cwksp: freeing workspace");
-    memset(ws, 0, sizeof(ZSTD_cwksp));
-    ZSTD_free(ptr, customMem);
+    ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
+    ZSTD_customFree(ptr, customMem);
 }

 /**
@@ -486,13 +507,18 @@ MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
 */
 MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
    *dst = *src;
-    memset(src, 0, sizeof(ZSTD_cwksp));
+    ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
 }

 MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
 }

+MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
+         + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
+}
+
 MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
    return ws->allocFailed;
 }
--- a/vendor/github.com/DataDog/zstd/zstd_ddict.c
+++ b/vendor/github.com/DataDog/zstd/zstd_ddict.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -14,7 +14,7 @@
 /*-*******************************************************
 *  Dependencies
 *********************************************************/
-#include <string.h>      /* memcpy, memmove, memset */
+#include "zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
 #include "cpu.h"         /* bmi2 */
 #include "mem.h"         /* low level memory routines */
 #define FSE_STATIC_LINKING_ONLY
@@ -25,7 +25,7 @@
 #include "zstd_ddict.h"

 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-#  include "zstd_legacy.h"
+#  include "../legacy/zstd_legacy.h"
 #endif


@@ -65,6 +65,10 @@ void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
    dctx->virtualStart = ddict->dictContent;
    dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
    dctx->previousDstEnd = dctx->dictEnd;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
    if (ddict->entropyPresent) {
        dctx->litEntropy = 1;
        dctx->fseEntropy = 1;
@@ -107,7 +111,7 @@ ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
    /* load entropy tables */
    RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
            &ddict->entropy, ddict->dictContent, ddict->dictSize)),
-        dictionary_corrupted);
+        dictionary_corrupted, "");
    ddict->entropyPresent = 1;
    return 0;
 }
@@ -123,17 +127,17 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
        ddict->dictContent = dict;
        if (!dict) dictSize = 0;
    } else {
-        void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
+        void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
        ddict->dictBuffer = internalBuffer;
        ddict->dictContent = internalBuffer;
        if (!internalBuffer) return ERROR(memory_allocation);
-        memcpy(internalBuffer, dict, dictSize);
+        ZSTD_memcpy(internalBuffer, dict, dictSize);
    }
    ddict->dictSize = dictSize;
    ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */

    /* parse dictionary content */
-    FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
+    FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");

    return 0;
 }
@@ -143,9 +147,9 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
                                      ZSTD_dictContentType_e dictContentType,
                                      ZSTD_customMem customMem)
 {
-    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;

-    {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
+    {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
        if (ddict == NULL) return NULL;
        ddict->cMem = customMem;
        {   size_t const initResult = ZSTD_initDDict_internal(ddict,
@@ -194,7 +198,7 @@ const ZSTD_DDict* ZSTD_initStaticDDict(
    if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
    if (sBufferSize < neededSpace) return NULL;
    if (dictLoadMethod == ZSTD_dlm_byCopy) {
-        memcpy(ddict+1, dict, dictSize);  /* local copy */
+        ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
        dict = ddict+1;
    }
    if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
@@ -209,8 +213,8 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
 {
    if (ddict==NULL) return 0;   /* support free on NULL */
    {   ZSTD_customMem const cMem = ddict->cMem;
-        ZSTD_free(ddict->dictBuffer, cMem);
-        ZSTD_free(ddict, cMem);
+        ZSTD_customFree(ddict->dictBuffer, cMem);
+        ZSTD_customFree(ddict, cMem);
        return 0;
    }
 }
--- a/vendor/github.com/DataDog/zstd/zstd_ddict.h
+++ b/vendor/github.com/DataDog/zstd/zstd_ddict.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
 /*-*******************************************************
 *  Dependencies
 *********************************************************/
-#include <stddef.h>   /* size_t */
+#include "zstd_deps.h"   /* size_t */
 #include "zstd.h"     /* ZSTD_DDict, and several public functions */


--- a/vendor/github.com/DataDog/zstd/zstd_decompress.c
+++ b/vendor/github.com/DataDog/zstd/zstd_decompress.c
--- a/vendor/github.com/DataDog/zstd/zstd_decompress_block.c
+++ b/vendor/github.com/DataDog/zstd/zstd_decompress_block.c
--- a/vendor/github.com/DataDog/zstd/zstd_decompress_block.h
+++ b/vendor/github.com/DataDog/zstd/zstd_decompress_block.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
 /*-*******************************************************
 *  Dependencies
 *********************************************************/
-#include <stddef.h>   /* size_t */
+#include "zstd_deps.h"   /* size_t */
 #include "zstd.h"    /* DCtx, and some public functions */
 #include "zstd_internal.h"  /* blockProperties_t, and some public functions */
 #include "zstd_decompress_internal.h"  /* ZSTD_seqSymbol */
@@ -48,12 +48,15 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
 * this function must be called with valid parameters only
 * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
 * in which case it cannot fail.
+ * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
+ * defined in zstd_decompress_internal.h.
 * Internal use only.
 */
 void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
             const short* normalizedCounter, unsigned maxSymbolValue,
             const U32* baseValue, const U32* nbAdditionalBits,
-                   unsigned tableLog);
+                   unsigned tableLog, void* wksp, size_t wkspSize,
+                   int bmi2);


 #endif /* ZSTD_DEC_BLOCK_H */
--- a/vendor/github.com/DataDog/zstd/zstd_decompress_internal.h
+++ b/vendor/github.com/DataDog/zstd/zstd_decompress_internal.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -27,26 +27,26 @@
 /*-*******************************************************
 *  Constants
 *********************************************************/
-static const U32 LL_base[MaxLL+1] = {
+static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
                 0,    1,    2,     3,     4,     5,     6,      7,
                 8,    9,   10,    11,    12,    13,    14,     15,
                16,   18,   20,    22,    24,    28,    32,     40,
                48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
                0x2000, 0x4000, 0x8000, 0x10000 };

-static const U32 OF_base[MaxOff+1] = {
+static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
                 0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
                 0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
                 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
                 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };

-static const U32 OF_bits[MaxOff+1] = {
+static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
                     0,  1,  2,  3,  4,  5,  6,  7,
                     8,  9, 10, 11, 12, 13, 14, 15,
                    16, 17, 18, 19, 20, 21, 22, 23,
                    24, 25, 26, 27, 28, 29, 30, 31 };

-static const U32 ML_base[MaxML+1] = {
+static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
                     3,  4,  5,    6,     7,     8,     9,    10,
                    11, 12, 13,   14,    15,    16,    17,    18,
                    19, 20, 21,   22,    23,    24,    25,    26,
@@ -73,12 +73,16 @@ static const U32 ML_base[MaxML+1] = {

 #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))

+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
+
 typedef struct {
    ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
    ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
    ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
    U32 rep[ZSTD_REP_NUM];
+    U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
 } ZSTD_entropyDTables_t;

 typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
@@ -117,6 +121,8 @@ struct ZSTD_DCtx_s
    XXH64_state_t xxhState;
    size_t headerSize;
    ZSTD_format_e format;
+    ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum;   /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
+    U32 validateChecksum;         /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
    const BYTE* litPtr;
    ZSTD_customMem customMem;
    size_t litSize;
@@ -147,10 +153,19 @@ struct ZSTD_DCtx_s
    U32 legacyVersion;
    U32 hostageByte;
    int noForwardProgress;
+    ZSTD_bufferMode_e outBufferMode;
+    ZSTD_outBuffer expectedOutBuffer;

    /* workspace */
    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
+
+    size_t oversizedDuration;
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    void const* dictContentBeginForFuzzing;
+    void const* dictContentEndForFuzzing;
+#endif
 };  /* typedef'd to ZSTD_DCtx within "zstd.h" */


@@ -160,7 +175,7 @@ struct ZSTD_DCtx_s

 /*! ZSTD_loadDEntropy() :
 *  dict : must point at beginning of a valid zstd dictionary.
- * @return : size of entropy tables read */
+ * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
 size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
                   const void* const dict, size_t const dictSize);

--- a/vendor/github.com/DataDog/zstd/zstd_deps.h
+++ b/vendor/github.com/DataDog/zstd/zstd_deps.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2016-2020, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* This file provides common libc dependencies that zstd requires.
+ * The purpose is to allow replacing this file with a custom implementation
+ * to compile zstd without libc support.
+ */
+
+/* Need:
+ * NULL
+ * INT_MAX
+ * UINT_MAX
+ * ZSTD_memcpy()
+ * ZSTD_memset()
+ * ZSTD_memmove()
+ */
+#ifndef ZSTD_DEPS_COMMON
+#define ZSTD_DEPS_COMMON
+
+#include <limits.h>
+#include <stddef.h>
+#include <string.h>
+
+#if defined(__GNUC__) && __GNUC__ >= 4
+# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l))
+# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l))
+# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l))
+#else
+# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l))
+# define ZSTD_memmove(d,s,l) memmove((d),(s),(l))
+# define ZSTD_memset(p,v,l) memset((p),(v),(l))
+#endif
+
+#endif /* ZSTD_DEPS_COMMON */
+
+/* Need:
+ * ZSTD_malloc()
+ * ZSTD_free()
+ * ZSTD_calloc()
+ */
+#ifdef ZSTD_DEPS_NEED_MALLOC
+#ifndef ZSTD_DEPS_MALLOC
+#define ZSTD_DEPS_MALLOC
+
+#include <stdlib.h>
+
+#define ZSTD_malloc(s) malloc(s)
+#define ZSTD_calloc(n,s) calloc((n), (s))
+#define ZSTD_free(p) free((p))
+
+#endif /* ZSTD_DEPS_MALLOC */
+#endif /* ZSTD_DEPS_NEED_MALLOC */
+
+/*
+ * Provides 64-bit math support.
+ * Need:
+ * U64 ZSTD_div64(U64 dividend, U32 divisor)
+ */
+#ifdef ZSTD_DEPS_NEED_MATH64
+#ifndef ZSTD_DEPS_MATH64
+#define ZSTD_DEPS_MATH64
+
+#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor))
+
+#endif /* ZSTD_DEPS_MATH64 */
+#endif /* ZSTD_DEPS_NEED_MATH64 */
+
+/* Need:
+ * assert()
+ */
+#ifdef ZSTD_DEPS_NEED_ASSERT
+#ifndef ZSTD_DEPS_ASSERT
+#define ZSTD_DEPS_ASSERT
+
+#include <assert.h>
+
+#endif /* ZSTD_DEPS_ASSERT */
+#endif /* ZSTD_DEPS_NEED_ASSERT */
+
+/* Need:
+ * ZSTD_DEBUG_PRINT()
+ */
+#ifdef ZSTD_DEPS_NEED_IO
+#ifndef ZSTD_DEPS_IO
+#define ZSTD_DEPS_IO
+
+#include <stdio.h>
+#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
+
+#endif /* ZSTD_DEPS_IO */
+#endif /* ZSTD_DEPS_NEED_IO */
+
+/* Only requested when <stdint.h> is known to be present.
+ * Need:
+ * intptr_t
+ */
+#ifdef ZSTD_DEPS_NEED_STDINT
+#ifndef ZSTD_DEPS_STDINT
+#define ZSTD_DEPS_STDINT
+
+#include <stdint.h>
+
+#endif /* ZSTD_DEPS_STDINT */
+#endif /* ZSTD_DEPS_NEED_STDINT */
--- a/vendor/github.com/DataDog/zstd/zstd_double_fast.c
+++ b/vendor/github.com/DataDog/zstd/zstd_double_fast.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -31,15 +31,15 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
     * is empty.
     */
    for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
-        U32 const current = (U32)(ip - base);
+        U32 const curr = (U32)(ip - base);
        U32 i;
        for (i = 0; i < fastHashFillStep; ++i) {
            size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
            size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
            if (i == 0)
-                hashSmall[smHash] = current + i;
+                hashSmall[smHash] = curr + i;
            if (i == 0 || hashLarge[lgHash] == 0)
-                hashLarge[lgHash] = current + i;
+                hashLarge[lgHash] = curr + i;
            /* Only load extra positions for ZSTD_dtlm_full */
            if (dtlm == ZSTD_dtlm_fast)
                break;
@@ -63,10 +63,8 @@ size_t ZSTD_compressBlock_doubleFast_generic(
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
-    const U32 lowestValid = ms->window.dictLimit;
-    const U32 maxDistance = 1U << cParams->windowLog;
    /* presumes that, if there is a dictionary, it must be using Attach mode */
-    const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
+    const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
    const BYTE* const prefixLowest = base + prefixLowestIndex;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -96,7 +94,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
                                     dictCParams->hashLog : hBitsL;
    const U32 dictHBitsS           = dictMode == ZSTD_dictMatchState ?
                                     dictCParams->chainLog : hBitsS;
-    const U32 dictAndPrefixLength  = (U32)(ip - prefixLowest + dictEnd - dictStart);
+    const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));

    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");

@@ -104,13 +102,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(

    /* if a dictionary is attached, it must be within window range */
    if (dictMode == ZSTD_dictMatchState) {
-        assert(lowestValid + maxDistance >= endIndex);
+        assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
    }

    /* init */
    ip += (dictAndPrefixLength == 0);
    if (dictMode == ZSTD_noDict) {
-        U32 const maxRep = (U32)(ip - prefixLowest);
+        U32 const curr = (U32)(ip - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
+        U32 const maxRep = curr - windowLow;
        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
    }
@@ -129,17 +129,17 @@ size_t ZSTD_compressBlock_doubleFast_generic(
        size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
        size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
        size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
-        U32 const current = (U32)(ip-base);
+        U32 const curr = (U32)(ip-base);
        U32 const matchIndexL = hashLong[h2];
        U32 matchIndexS = hashSmall[h];
        const BYTE* matchLong = base + matchIndexL;
        const BYTE* match = base + matchIndexS;
-        const U32 repIndex = current + 1 - offset_1;
+        const U32 repIndex = curr + 1 - offset_1;
        const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
                            && repIndex < prefixLowestIndex) ?
                               dictBase + (repIndex - dictIndexDelta) :
                               base + repIndex;
-        hashLong[h2] = hashSmall[h] = current;   /* update hash tables */
+        hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */

        /* check dictMatchState repcode */
        if (dictMode == ZSTD_dictMatchState
@@ -177,7 +177,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(

            if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
                mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
-                offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
+                offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
                while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
                goto _match_found;
        }   }
@@ -198,6 +198,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
        }   }

        ip += ((ip-anchor) >> kSearchStrength) + 1;
+#if defined(__aarch64__)
+        PREFETCH_L1(ip+256);
+#endif
        continue;

 _search_next_long:
@@ -206,7 +209,7 @@ _search_next_long:
            size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
            U32 const matchIndexL3 = hashLong[hl3];
            const BYTE* matchL3 = base + matchIndexL3;
-            hashLong[hl3] = current + 1;
+            hashLong[hl3] = curr + 1;

            /* check prefix long +1 match */
            if (matchIndexL3 > prefixLowestIndex) {
@@ -225,7 +228,7 @@ _search_next_long:
                if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
                    mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
                    ip++;
-                    offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
+                    offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
                    while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
                    goto _match_found;
        }   }   }
@@ -233,7 +236,7 @@ _search_next_long:
        /* if no long +1 match, explore the short match we found */
        if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
            mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
-            offset = (U32)(current - matchIndexS);
+            offset = (U32)(curr - matchIndexS);
            while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
        } else {
            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
@@ -257,7 +260,7 @@ _match_stored:
        if (ip <= ilimit) {
            /* Complementary insertion */
            /* done after iLimit test, as candidates could be > iend-8 */
-            {   U32 const indexToInsert = current+2;
+            {   U32 const indexToInsert = curr+2;
                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -271,7 +274,7 @@ _match_stored:
                    U32 const repIndex2 = current2 - offset_2;
                    const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
                        && repIndex2 < prefixLowestIndex ?
-                            dictBase - dictIndexDelta + repIndex2 :
+                            dictBase + repIndex2 - dictIndexDelta :
                            base + repIndex2;
                    if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
                       && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
@@ -398,12 +401,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
        const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
        const BYTE* matchLong = matchLongBase + matchLongIndex;

-        const U32 current = (U32)(ip-base);
-        const U32 repIndex = current + 1 - offset_1;   /* offset_1 expected <= current +1 */
+        const U32 curr = (U32)(ip-base);
+        const U32 repIndex = curr + 1 - offset_1;   /* offset_1 expected <= curr +1 */
        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
        const BYTE* const repMatch = repBase + repIndex;
        size_t mLength;
-        hashSmall[hSmall] = hashLong[hLong] = current;   /* update hash table */
+        hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */

        if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
            & (repIndex > dictStartIndex))
@@ -418,7 +421,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
                U32 offset;
                mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
-                offset = current - matchLongIndex;
+                offset = curr - matchLongIndex;
                while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
                offset_2 = offset_1;
                offset_1 = offset;
@@ -430,19 +433,19 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
                const BYTE* match3 = match3Base + matchIndex3;
                U32 offset;
-                hashLong[h3] = current + 1;
+                hashLong[h3] = curr + 1;
                if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
                    const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
                    const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
                    mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
                    ip++;
-                    offset = current+1 - matchIndex3;
+                    offset = curr+1 - matchIndex3;
                    while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
                } else {
                    const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
                    const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
                    mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
-                    offset = current - matchIndex;
+                    offset = curr - matchIndex;
                    while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
                }
                offset_2 = offset_1;
@@ -461,7 +464,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
        if (ip <= ilimit) {
            /* Complementary insertion */
            /* done after iLimit test, as candidates could be > iend-8 */
-            {   U32 const indexToInsert = current+2;
+            {   U32 const indexToInsert = curr+2;
                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
--- a/vendor/github.com/DataDog/zstd/zstd_double_fast.h
+++ b/vendor/github.com/DataDog/zstd/zstd_double_fast.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/vendor/github.com/DataDog/zstd/zstd_errors.h
+++ b/vendor/github.com/DataDog/zstd/zstd_errors.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -76,6 +76,8 @@ typedef enum {
  /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
  ZSTD_error_frameIndex_tooLarge = 100,
  ZSTD_error_seekableIO          = 102,
+  ZSTD_error_dstBuffer_wrong     = 104,
+  ZSTD_error_srcBuffer_wrong     = 105,
  ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
 } ZSTD_ErrorCode;

--- a/vendor/github.com/DataDog/zstd/zstd_fast.c
+++ b/vendor/github.com/DataDog/zstd/zstd_fast.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -29,16 +29,16 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
     * Insert the other positions if their hash entry is empty.
     */
    for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
-        U32 const current = (U32)(ip - base);
+        U32 const curr = (U32)(ip - base);
        size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
-        hashTable[hash0] = current;
+        hashTable[hash0] = curr;
        if (dtlm == ZSTD_dtlm_fast) continue;
        /* Only load extra positions for ZSTD_dtlm_full */
        {   U32 p;
            for (p = 1; p < fastHashFillStep; ++p) {
                size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
                if (hashTable[hash] == 0) {  /* not yet filled */
-                    hashTable[hash] = current + p;
+                    hashTable[hash] = curr + p;
    }   }   }   }
 }

@@ -61,9 +61,7 @@ ZSTD_compressBlock_fast_generic(
    const BYTE* ip1;
    const BYTE* anchor = istart;
    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
-    const U32   maxDistance = 1U << cParams->windowLog;
-    const U32   validStartIndex = ms->window.dictLimit;
-    const U32   prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
+    const U32   prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
    const BYTE* const prefixStart = base + prefixStartIndex;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -74,12 +72,21 @@ ZSTD_compressBlock_fast_generic(
    DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
    ip0 += (ip0 == prefixStart);
    ip1 = ip0 + 1;
-    {   U32 const maxRep = (U32)(ip0 - prefixStart);
+    {   U32 const curr = (U32)(ip0 - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
+        U32 const maxRep = curr - windowLow;
        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
    }

    /* Main Search Loop */
+#ifdef __INTEL_COMPILER
+    /* From intel 'The vector pragma indicates that the loop should be
+     * vectorized if it is legal to do so'. Can be used together with
+     * #pragma ivdep (but have opted to exclude that because intel
+     * warns against using it).*/
+    #pragma vector always
+#endif
    while (ip1 < ilimit) {   /* < instead of <=, because check at ip0+2 */
        size_t mLength;
        BYTE const* ip2 = ip0 + 2;
@@ -91,19 +98,25 @@ ZSTD_compressBlock_fast_generic(
        U32 const current1 = (U32)(ip1-base);
        U32 const matchIndex0 = hashTable[h0];
        U32 const matchIndex1 = hashTable[h1];
-        BYTE const* repMatch = ip2-offset_1;
+        BYTE const* repMatch = ip2 - offset_1;
        const BYTE* match0 = base + matchIndex0;
        const BYTE* match1 = base + matchIndex1;
        U32 offcode;
+
+#if defined(__aarch64__)
+        PREFETCH_L1(ip0+256);
+#endif
+
        hashTable[h0] = current0;   /* update hash table */
        hashTable[h1] = current1;   /* update hash table */

        assert(ip0 + 1 == ip1);

        if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
-            mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
+            mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
            ip0 = ip2 - mLength;
            match0 = repMatch - mLength;
+            mLength += 4;
            offcode = 0;
            goto _match;
        }
@@ -128,19 +141,18 @@ _offset: /* Requires: ip0, match0 */
        offset_2 = offset_1;
        offset_1 = (U32)(ip0-match0);
        offcode = offset_1 + ZSTD_REP_MOVE;
-        mLength = 0;
+        mLength = 4;
        /* Count the backwards match length */
        while (((ip0>anchor) & (match0>prefixStart))
             && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */

 _match: /* Requires: ip0, match0, offcode */
        /* Count the forward length */
-        mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
+        mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
        ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
        /* match found */
        ip0 += mLength;
        anchor = ip0;
-        ip1 = ip0 + 1;

        if (ip0 <= ilimit) {
            /* Fill Table */
@@ -148,19 +160,18 @@ _match: /* Requires: ip0, match0, offcode */
            hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
            hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);

-            while ( ((ip0 <= ilimit) & (offset_2>0))  /* offset_2==0 means offset_2 is invalidated */
-                 && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
-                /* store sequence */
-                size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
-                { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
-                hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
-                ip0 += rLength;
-                ip1 = ip0 + 1;
-                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
-                anchor = ip0;
-                continue;   /* faster when present (confirmed on gcc-8) ... (?) */
-            }
-        }
+            if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
+                while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
+                    /* store sequence */
+                    size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
+                    { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
+                    hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
+                    ip0 += rLength;
+                    ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
+                    anchor = ip0;
+                    continue;   /* faster when present (confirmed on gcc-8) ... (?) */
+        }   }   }
+        ip1 = ip0 + 1;
    }

    /* save reps for next block */
@@ -247,14 +258,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
        size_t mLength;
        size_t const h = ZSTD_hashPtr(ip, hlog, mls);
-        U32 const current = (U32)(ip-base);
+        U32 const curr = (U32)(ip-base);
        U32 const matchIndex = hashTable[h];
        const BYTE* match = base + matchIndex;
-        const U32 repIndex = current + 1 - offset_1;
+        const U32 repIndex = curr + 1 - offset_1;
        const BYTE* repMatch = (repIndex < prefixStartIndex) ?
                               dictBase + (repIndex - dictIndexDelta) :
                               base + repIndex;
-        hashTable[h] = current;   /* update hash table */
+        hashTable[h] = curr;   /* update hash table */

        if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
@@ -273,7 +284,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                continue;
            } else {
                /* found a dict match */
-                U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
+                U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
                mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
                while (((ip>anchor) & (dictMatch>dictStart))
                     && (ip[-1] == dictMatch[-1])) {
@@ -305,8 +316,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(

        if (ip <= ilimit) {
            /* Fill Table */
-            assert(base+current+2 > istart);  /* check base overflow */
-            hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;  /* here because current+2 could be > iend-8 */
+            assert(base+curr+2 > istart);  /* check base overflow */
+            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;  /* here because curr+2 could be > iend-8 */
            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);

            /* check immediate repcode */
@@ -387,7 +398,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
    const BYTE* const ilimit = iend - 8;
    U32 offset_1=rep[0], offset_2=rep[1];

-    DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic");
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);

    /* switch to "regular" variant if extDict is invalidated due to maxDistance */
    if (prefixStartIndex == dictStartIndex)
@@ -399,12 +410,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
        const U32    matchIndex = hashTable[h];
        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
        const BYTE*  match = matchBase + matchIndex;
-        const U32    current = (U32)(ip-base);
-        const U32    repIndex = current + 1 - offset_1;
+        const U32    curr = (U32)(ip-base);
+        const U32    repIndex = curr + 1 - offset_1;
        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
        const BYTE* const repMatch = repBase + repIndex;
-        hashTable[h] = current;   /* update hash table */
-        assert(offset_1 <= current +1);   /* check repIndex */
+        hashTable[h] = curr;   /* update hash table */
+        DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
+        assert(offset_1 <= curr +1);   /* check repIndex */

        if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
@@ -423,7 +435,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
            }
            {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
                const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
-                U32 const offset = current - matchIndex;
+                U32 const offset = curr - matchIndex;
                size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
                while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
                offset_2 = offset_1; offset_1 = offset;  /* update offset history */
@@ -434,7 +446,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(

        if (ip <= ilimit) {
            /* Fill Table */
-            hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
+            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
            /* check immediate repcode */
            while (ip <= ilimit) {
--- a/vendor/github.com/DataDog/zstd/zstd_fast.h
+++ b/vendor/github.com/DataDog/zstd/zstd_fast.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/vendor/github.com/DataDog/zstd/zstd_internal.h
+++ b/vendor/github.com/DataDog/zstd/zstd_internal.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -19,6 +19,9 @@
 /*-*************************************
 *  Dependencies
 ***************************************/
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
+#include <arm_neon.h>
+#endif
 #include "compiler.h"
 #include "mem.h"
 #include "debug.h"                 /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
@@ -53,6 +56,31 @@ extern "C" {
 #define MIN(a,b) ((a)<(b) ? (a) : (b))
 #define MAX(a,b) ((a)>(b) ? (a) : (b))

+/**
+ * Ignore: this is an internal helper.
+ *
+ * This is a helper function to help force C99-correctness during compilation.
+ * Under strict compilation modes, variadic macro arguments can't be empty.
+ * However, variadic function arguments can be. Using a function therefore lets
+ * us statically check that at least one (string) argument was passed,
+ * independent of the compilation flags.
+ */
+static INLINE_KEYWORD UNUSED_ATTR
+void _force_has_format_string(const char *format, ...) {
+  (void)format;
+}
+
+/**
+ * Ignore: this is an internal helper.
+ *
+ * We want to force this function invocation to be syntactically correct, but
+ * we don't want to force runtime evaluation of its arguments.
+ */
+#define _FORCE_HAS_FORMAT_STRING(...) \
+  if (0) { \
+    _force_has_format_string(__VA_ARGS__); \
+  }
+
 /**
 * Return the specified error if the condition evaluates to true.
 *
@@ -62,7 +90,9 @@ extern "C" {
 */
 #define RETURN_ERROR_IF(cond, err, ...) \
  if (cond) { \
-    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
+    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
+           __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
    RAWLOG(3, ": " __VA_ARGS__); \
    RAWLOG(3, "\n"); \
    return ERROR(err); \
@@ -75,7 +105,9 @@ extern "C" {
 */
 #define RETURN_ERROR(err, ...) \
  do { \
-    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
+    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+           __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
    RAWLOG(3, ": " __VA_ARGS__); \
    RAWLOG(3, "\n"); \
    return ERROR(err); \
@@ -90,7 +122,9 @@ extern "C" {
  do { \
    size_t const err_code = (err); \
    if (ERR_isError(err_code)) { \
-      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
+      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
+             __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
+      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
      RAWLOG(3, ": " __VA_ARGS__); \
      RAWLOG(3, "\n"); \
      return err_code; \
@@ -105,7 +139,7 @@ extern "C" {

 #define ZSTD_REP_NUM      3                 /* number of repcodes */
 #define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
-static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
+static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };

 #define KB *(1 <<10)
 #define MB *(1 <<20)
@@ -119,15 +153,17 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
 #define BIT0   1

 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
-static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
-static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
+static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
+static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };

 #define ZSTD_FRAMEIDSIZE 4   /* magic number size */

 #define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
-static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
+static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;

+#define ZSTD_FRAMECHECKSUMSIZE 4
+
 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */

@@ -150,51 +186,77 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
 #define OffFSELog   8
 #define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)

-static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
-                                      0, 0, 0, 0, 0, 0, 0, 0,
-                                      1, 1, 1, 1, 2, 2, 3, 3,
-                                      4, 6, 7, 8, 9,10,11,12,
-                                     13,14,15,16 };
-static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2,
-                                             2, 2, 2, 2, 2, 1, 1, 1,
-                                             2, 2, 2, 2, 2, 2, 2, 2,
-                                             2, 3, 2, 1, 1, 1, 1, 1,
-                                            -1,-1,-1,-1 };
+#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
+/* Each table cannot take more than #symbols * FSELog bits */
+#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
+
+static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = {
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     1, 1, 1, 1, 2, 2, 3, 3,
+     4, 6, 7, 8, 9,10,11,12,
+    13,14,15,16
+};
+static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
+     4, 3, 2, 2, 2, 2, 2, 2,
+     2, 2, 2, 2, 2, 1, 1, 1,
+     2, 2, 2, 2, 2, 2, 2, 2,
+     2, 3, 2, 1, 1, 1, 1, 1,
+    -1,-1,-1,-1
+};
 #define LL_DEFAULTNORMLOG 6  /* for static allocation */
-static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
+static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;

-static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
-                                      0, 0, 0, 0, 0, 0, 0, 0,
-                                      0, 0, 0, 0, 0, 0, 0, 0,
-                                      0, 0, 0, 0, 0, 0, 0, 0,
-                                      1, 1, 1, 1, 2, 2, 3, 3,
-                                      4, 4, 5, 7, 8, 9,10,11,
-                                     12,13,14,15,16 };
-static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2,
-                                             2, 1, 1, 1, 1, 1, 1, 1,
-                                             1, 1, 1, 1, 1, 1, 1, 1,
-                                             1, 1, 1, 1, 1, 1, 1, 1,
-                                             1, 1, 1, 1, 1, 1, 1, 1,
-                                             1, 1, 1, 1, 1, 1,-1,-1,
-                                            -1,-1,-1,-1,-1 };
+static UNUSED_ATTR const U32 ML_bits[MaxML+1] = {
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     1, 1, 1, 1, 2, 2, 3, 3,
+     4, 4, 5, 7, 8, 9,10,11,
+    12,13,14,15,16
+};
+static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
+     1, 4, 3, 2, 2, 2, 2, 2,
+     2, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1,-1,-1,
+    -1,-1,-1,-1,-1
+};
 #define ML_DEFAULTNORMLOG 6  /* for static allocation */
-static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
+static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;

-static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2,
-                                                     2, 1, 1, 1, 1, 1, 1, 1,
-                                                     1, 1, 1, 1, 1, 1, 1, 1,
-                                                    -1,-1,-1,-1,-1 };
+static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
+     1, 1, 1, 1, 1, 1, 2, 2,
+     2, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+    -1,-1,-1,-1,-1
+};
 #define OF_DEFAULTNORMLOG 5  /* for static allocation */
-static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
+static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;


 /*-*******************************************
 *  Shared functions to include for inlining
 *********************************************/
-static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+static void ZSTD_copy8(void* dst, const void* src) {
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
+    vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
+#else
+    ZSTD_memcpy(dst, src, 8);
+#endif
+}

 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
-static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
+static void ZSTD_copy16(void* dst, const void* src) {
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
+    vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
+#else
+    ZSTD_memcpy(dst, src, 16);
+#endif
+}
 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }

 #define WILDCOPY_OVERLENGTH 32
@@ -207,13 +269,13 @@ typedef enum {
 } ZSTD_overlap_e;

 /*! ZSTD_wildcopy() :
- *  Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
+ *  Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
 *  @param ovtype controls the overlap detection
 *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
 *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
 *           The src buffer must be before the dst buffer.
 */
-MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
+MEM_STATIC FORCE_INLINE_ATTR
 void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
 {
    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
@@ -230,47 +292,110 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
        } while (op < oend);
    } else {
        assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
-        /* Separate out the first two COPY16() calls because the copy length is
+        /* Separate out the first COPY16() call because the copy length is
         * almost certain to be short, so the branches have different
-         * probabilities.
-         * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%.
-         * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
+         * probabilities. Since it is almost certain to be short, only do
+         * one COPY16() in the first call. Then, do two calls per loop since
+         * at that point it is more likely to have a high trip count.
         */
-        COPY16(op, ip);
-        COPY16(op, ip);
-        if (op >= oend) return;
+#ifdef __aarch64__
+        do {
+            COPY16(op, ip);
+        }
+        while (op < oend);
+#else
+        ZSTD_copy16(op, ip);
+        if (16 >= length) return;
+        op += 16;
+        ip += 16;
        do {
            COPY16(op, ip);
            COPY16(op, ip);
        }
        while (op < oend);
+#endif
    }
 }

+MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    size_t const length = MIN(dstCapacity, srcSize);
+    if (length > 0) {
+        ZSTD_memcpy(dst, src, length);
+    }
+    return length;
+}
+
+/* define "workspace is too large" as this number of times larger than needed */
+#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
+
+/* when workspace is continuously too large
+ * during at least this number of times,
+ * context's memory usage is considered wasteful,
+ * because it's sized to handle a worst case scenario which rarely happens.
+ * In which case, resize it down to free some memory */
+#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
+
+/* Controls whether the input/output buffer is buffered or stable. */
+typedef enum {
+    ZSTD_bm_buffered = 0,  /* Buffer the input/output */
+    ZSTD_bm_stable = 1     /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
+} ZSTD_bufferMode_e;
+

 /*-*******************************************
 *  Private declarations
 *********************************************/
 typedef struct seqDef_s {
-    U32 offset;
+    U32 offset;         /* Offset code of the sequence */
    U16 litLength;
    U16 matchLength;
 } seqDef;

 typedef struct {
    seqDef* sequencesStart;
-    seqDef* sequences;
+    seqDef* sequences;      /* ptr to end of sequences */
    BYTE* litStart;
-    BYTE* lit;
+    BYTE* lit;              /* ptr to end of literals */
    BYTE* llCode;
    BYTE* mlCode;
    BYTE* ofCode;
    size_t maxNbSeq;
    size_t maxNbLit;
-    U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
-    U32   longLengthPos;
+
+    /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
+     * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
+     * the existing value of the litLength or matchLength by 0x10000.
+     */
+    U32   longLengthID;   /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
+    U32   longLengthPos;  /* Index of the sequence to apply long length modification to */
 } seqStore_t;

+typedef struct {
+    U32 litLength;
+    U32 matchLength;
+} ZSTD_sequenceLength;
+
+/**
+ * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
+ * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
+ */
+MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
+{
+    ZSTD_sequenceLength seqLen;
+    seqLen.litLength = seq->litLength;
+    seqLen.matchLength = seq->matchLength + MINMATCH;
+    if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
+        if (seqStore->longLengthID == 1) {
+            seqLen.litLength += 0xFFFF;
+        }
+        if (seqStore->longLengthID == 2) {
+            seqLen.matchLength += 0xFFFF;
+        }
+    }
+    return seqLen;
+}
+
 /**
 * Contains the compressed frame size and an upper-bound for the decompressed frame size.
 * Note: before using `compressedSize`, check for errors using ZSTD_isError().
@@ -286,9 +411,9 @@ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBu
 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */

 /* custom memory allocation functions */
-void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
-void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
-void ZSTD_free(void* ptr, ZSTD_customMem customMem);
+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);


 MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
@@ -296,9 +421,12 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus
    assert(val != 0);
    {
 #   if defined(_MSC_VER)   /* Visual */
-        unsigned long r=0;
-        _BitScanReverse(&r, val);
-        return (unsigned)r;
+#       if STATIC_BMI2 == 1
+            return _lzcnt_u32(val)^31;
+#       else
+            unsigned long r=0;
+            return _BitScanReverse(&r, val) ? (unsigned)r : 0;
+#       endif
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
        return __builtin_clz (val) ^ 31;
 #   elif defined(__ICCARM__)    /* IAR Intrinsic */
--- a/vendor/github.com/DataDog/zstd/zstd_lazy.c
+++ b/vendor/github.com/DataDog/zstd/zstd_lazy.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -58,11 +58,11 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,

 /** ZSTD_insertDUBT1() :
 *  sort one already inserted but unsorted position
- *  assumption : current >= btlow == (current - btmask)
+ *  assumption : curr >= btlow == (curr - btmask)
 *  doesn't fail */
 static void
 ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
-                 U32 current, const BYTE* inputEnd,
+                 U32 curr, const BYTE* inputEnd,
                 U32 nbCompares, U32 btLow,
                 const ZSTD_dictMode_e dictMode)
 {
@@ -74,41 +74,41 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
    const BYTE* const base = ms->window.base;
    const BYTE* const dictBase = ms->window.dictBase;
    const U32 dictLimit = ms->window.dictLimit;
-    const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
-    const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
+    const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
+    const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const BYTE* match;
-    U32* smallerPtr = bt + 2*(current&btMask);
+    U32* smallerPtr = bt + 2*(curr&btMask);
    U32* largerPtr  = smallerPtr + 1;
    U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
    U32 dummy32;   /* to be nullified at the end */
    U32 const windowValid = ms->window.lowLimit;
    U32 const maxDistance = 1U << cParams->windowLog;
-    U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
+    U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;


    DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
-                current, dictLimit, windowLow);
-    assert(current >= btLow);
+                curr, dictLimit, windowLow);
+    assert(curr >= btLow);
    assert(ip < iend);   /* condition for ZSTD_count */

    while (nbCompares-- && (matchIndex > windowLow)) {
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-        assert(matchIndex < current);
+        assert(matchIndex < curr);
        /* note : all candidates are now supposed sorted,
         * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
         * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */

        if ( (dictMode != ZSTD_extDict)
          || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
-          || (current < dictLimit) /* both in extDict */) {
+          || (curr < dictLimit) /* both in extDict */) {
            const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
                                     || (matchIndex+matchLength >= dictLimit)) ?
                                        base : dictBase;
            assert( (matchIndex+matchLength >= dictLimit)   /* might be wrong if extDict is incorrectly set to 0 */
-                 || (current < dictLimit) );
+                 || (curr < dictLimit) );
            match = mBase + matchIndex;
            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
        } else {
@@ -119,7 +119,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
        }

        DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
-                    current, matchIndex, (U32)matchLength);
+                    curr, matchIndex, (U32)matchLength);

        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
@@ -168,7 +168,7 @@ ZSTD_DUBT_findBetterDictMatch (

    const BYTE* const base = ms->window.base;
    const BYTE* const prefixStart = base + ms->window.dictLimit;
-    U32         const current = (U32)(ip-base);
+    U32         const curr = (U32)(ip-base);
    const BYTE* const dictBase = dms->window.base;
    const BYTE* const dictEnd = dms->window.nextSrc;
    U32         const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
@@ -195,10 +195,10 @@ ZSTD_DUBT_findBetterDictMatch (

        if (matchLength > bestLength) {
            U32 matchIndex = dictMatchIndex + dictIndexDelta;
-            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
+            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
                DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
-                    current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
-                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
+                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
+                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
            }
            if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
@@ -218,9 +218,9 @@ ZSTD_DUBT_findBetterDictMatch (
    }

    if (bestLength >= MINMATCH) {
-        U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+        U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
        DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
-                    current, (U32)bestLength, (U32)*offsetPtr, mIndex);
+                    curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
    }
    return bestLength;

@@ -241,13 +241,13 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
    U32          matchIndex  = hashTable[h];

    const BYTE* const base = ms->window.base;
-    U32    const current = (U32)(ip-base);
-    U32    const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
+    U32    const curr = (U32)(ip-base);
+    U32    const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);

    U32*   const bt = ms->chainTable;
    U32    const btLog  = cParams->chainLog - 1;
    U32    const btMask = (1 << btLog) - 1;
-    U32    const btLow = (btMask >= current) ? 0 : current - btMask;
+    U32    const btLow = (btMask >= curr) ? 0 : curr - btMask;
    U32    const unsortLimit = MAX(btLow, windowLow);

    U32*         nextCandidate = bt + 2*(matchIndex&btMask);
@@ -256,8 +256,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
    U32          nbCandidates = nbCompares;
    U32          previousCandidate = 0;

-    DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
+    DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
    assert(ip <= iend-8);   /* required for h calculation */
+    assert(dictMode != ZSTD_dedicatedDictSearch);

    /* reach end of unsorted candidates list */
    while ( (matchIndex > unsortLimit)
@@ -299,14 +300,14 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
        const U32 dictLimit = ms->window.dictLimit;
        const BYTE* const dictEnd = dictBase + dictLimit;
        const BYTE* const prefixStart = base + dictLimit;
-        U32* smallerPtr = bt + 2*(current&btMask);
-        U32* largerPtr  = bt + 2*(current&btMask) + 1;
-        U32 matchEndIdx = current + 8 + 1;
+        U32* smallerPtr = bt + 2*(curr&btMask);
+        U32* largerPtr  = bt + 2*(curr&btMask) + 1;
+        U32 matchEndIdx = curr + 8 + 1;
        U32 dummy32;   /* to be nullified at the end */
        size_t bestLength = 0;

        matchIndex  = hashTable[h];
-        hashTable[h] = current;   /* Update Hash Table */
+        hashTable[h] = curr;   /* Update Hash Table */

        while (nbCompares-- && (matchIndex > windowLow)) {
            U32* const nextPtr = bt + 2*(matchIndex & btMask);
@@ -326,8 +327,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
            if (matchLength > bestLength) {
                if (matchLength > matchEndIdx - matchIndex)
                    matchEndIdx = matchIndex + (U32)matchLength;
-                if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
-                    bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
+                if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
+                    bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
                if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
                    if (dictMode == ZSTD_dictMatchState) {
                        nbCompares = 0; /* in addition to avoiding checking any
@@ -363,12 +364,12 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
                    mls, dictMode);
        }

-        assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
+        assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
        ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
        if (bestLength >= MINMATCH) {
-            U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+            U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
            DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
-                        current, (U32)bestLength, (U32)*offsetPtr, mIndex);
+                        curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
        }
        return bestLength;
    }
@@ -446,7 +447,7 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (

 /* Update chains up to ip (excluded)
   Assumption : always within prefix (i.e. not within extDict) */
-static U32 ZSTD_insertAndFindFirstIndex_internal(
+FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
                        ZSTD_matchState_t* ms,
                        const ZSTD_compressionParameters* const cParams,
                        const BYTE* ip, U32 const mls)
@@ -475,6 +476,121 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
 }

+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
+{
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32* const hashTable = ms->hashTable;
+    U32* const chainTable = ms->chainTable;
+    U32 const chainSize = 1 << ms->cParams.chainLog;
+    U32 idx = ms->nextToUpdate;
+    U32 const minChain = chainSize < target ? target - chainSize : idx;
+    U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
+    U32 const cacheSize = bucketSize - 1;
+    U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
+    U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
+
+    /* We know the hashtable is oversized by a factor of `bucketSize`.
+     * We are going to temporarily pretend `bucketSize == 1`, keeping only a
+     * single entry. We will use the rest of the space to construct a temporary
+     * chaintable.
+     */
+    U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
+    U32* const tmpHashTable = hashTable;
+    U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
+    U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
+    U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
+
+    U32 hashIdx;
+
+    assert(ms->cParams.chainLog <= 24);
+    assert(ms->cParams.hashLog >= ms->cParams.chainLog);
+    assert(idx != 0);
+    assert(tmpMinChain <= minChain);
+
+    /* fill conventional hash table and conventional chain table */
+    for ( ; idx < target; idx++) {
+        U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
+        if (idx >= tmpMinChain) {
+            tmpChainTable[idx - tmpMinChain] = hashTable[h];
+        }
+        tmpHashTable[h] = idx;
+    }
+
+    /* sort chains into ddss chain table */
+    {
+        U32 chainPos = 0;
+        for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
+            U32 count;
+            U32 countBeyondMinChain = 0;
+            U32 i = tmpHashTable[hashIdx];
+            for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
+                /* skip through the chain to the first position that won't be
+                 * in the hash cache bucket */
+                if (i < minChain) {
+                    countBeyondMinChain++;
+                }
+                i = tmpChainTable[i - tmpMinChain];
+            }
+            if (count == cacheSize) {
+                for (count = 0; count < chainLimit;) {
+                    if (i < minChain) {
+                        if (!i || countBeyondMinChain++ > cacheSize) {
+                            /* only allow pulling `cacheSize` number of entries
+                             * into the cache or chainTable beyond `minChain`,
+                             * to replace the entries pulled out of the
+                             * chainTable into the cache. This lets us reach
+                             * back further without increasing the total number
+                             * of entries in the chainTable, guaranteeing the
+                             * DDSS chain table will fit into the space
+                             * allocated for the regular one. */
+                            break;
+                        }
+                    }
+                    chainTable[chainPos++] = i;
+                    count++;
+                    if (i < tmpMinChain) {
+                        break;
+                    }
+                    i = tmpChainTable[i - tmpMinChain];
+                }
+            } else {
+                count = 0;
+            }
+            if (count) {
+                tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
+            } else {
+                tmpHashTable[hashIdx] = 0;
+            }
+        }
+        assert(chainPos <= chainSize); /* I believe this is guaranteed... */
+    }
+
+    /* move chain pointers into the last entry of each hash bucket */
+    for (hashIdx = (1 << hashLog); hashIdx; ) {
+        U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
+        U32 const chainPackedPointer = tmpHashTable[hashIdx];
+        U32 i;
+        for (i = 0; i < cacheSize; i++) {
+            hashTable[bucketIdx + i] = 0;
+        }
+        hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
+    }
+
+    /* fill the buckets of the hash table */
+    for (idx = ms->nextToUpdate; idx < target; idx++) {
+        U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
+                   << ZSTD_LAZY_DDSS_BUCKET_LOG;
+        U32 i;
+        /* Shift hash cache down 1. */
+        for (i = cacheSize - 1; i; i--)
+            hashTable[h + i] = hashTable[h + i - 1];
+        hashTable[h] = idx;
+    }
+
+    ms->nextToUpdate = target;
+}
+

 /* inlining is important to hardwire a hot branch (template emulation) */
 FORCE_INLINE_TEMPLATE
@@ -493,20 +609,33 @@ size_t ZSTD_HcFindBestMatch_generic (
    const U32 dictLimit = ms->window.dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
-    const U32 current = (U32)(ip-base);
+    const U32 curr = (U32)(ip-base);
    const U32 maxDistance = 1U << cParams->windowLog;
    const U32 lowestValid = ms->window.lowLimit;
-    const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
+    const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
    const U32 isDictionary = (ms->loadedDictEnd != 0);
    const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
-    const U32 minChain = current > chainSize ? current - chainSize : 0;
+    const U32 minChain = curr > chainSize ? curr - chainSize : 0;
    U32 nbAttempts = 1U << cParams->searchLog;
    size_t ml=4-1;

-    /* HC4 match finder */
-    U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
+                         ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
+    const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
+                        ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;

-    for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
+    U32 matchIndex;
+
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        const U32* entry = &dms->hashTable[ddsIdx];
+        PREFETCH_L1(entry);
+    }
+
+    /* HC4 match finder */
+    matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
+
+    for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
        size_t currentMl=0;
        if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
            const BYTE* const match = base + matchIndex;
@@ -523,7 +652,7 @@ size_t ZSTD_HcFindBestMatch_generic (
        /* save best solution */
        if (currentMl > ml) {
            ml = currentMl;
-            *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
+            *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
        }

@@ -531,8 +660,92 @@ size_t ZSTD_HcFindBestMatch_generic (
        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
    }

-    if (dictMode == ZSTD_dictMatchState) {
-        const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        const U32 ddsLowestIndex  = dms->window.dictLimit;
+        const BYTE* const ddsBase = dms->window.base;
+        const BYTE* const ddsEnd  = dms->window.nextSrc;
+        const U32 ddsSize         = (U32)(ddsEnd - ddsBase);
+        const U32 ddsIndexDelta   = dictLimit - ddsSize;
+        const U32 bucketSize      = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
+        const U32 bucketLimit     = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
+        U32 ddsAttempt;
+
+        for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
+            PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
+        }
+
+        {
+            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+            U32 const chainIndex = chainPackedPointer >> 8;
+
+            PREFETCH_L1(&dms->chainTable[chainIndex]);
+        }
+
+        for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
+            size_t currentMl=0;
+            const BYTE* match;
+            matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
+            match = ddsBase + matchIndex;
+
+            if (!matchIndex) {
+                return ml;
+            }
+
+            /* guaranteed by table construction */
+            (void)ddsLowestIndex;
+            assert(matchIndex >= ddsLowestIndex);
+            assert(match+4 <= ddsEnd);
+            if (MEM_read32(match) == MEM_read32(ip)) {
+                /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+            }
+
+            /* save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) {
+                    /* best possible, avoids read overflow on next attempt */
+                    return ml;
+                }
+            }
+        }
+
+        {
+            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+            U32 chainIndex = chainPackedPointer >> 8;
+            U32 const chainLength = chainPackedPointer & 0xFF;
+            U32 const chainAttempts = nbAttempts - ddsAttempt;
+            U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
+            U32 chainAttempt;
+
+            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
+                PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
+            }
+
+            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
+                size_t currentMl=0;
+                const BYTE* match;
+                matchIndex = dms->chainTable[chainIndex];
+                match = ddsBase + matchIndex;
+
+                /* guaranteed by table construction */
+                assert(matchIndex >= ddsLowestIndex);
+                assert(match+4 <= ddsEnd);
+                if (MEM_read32(match) == MEM_read32(ip)) {
+                    /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                    currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+                }
+
+                /* save best solution */
+                if (currentMl > ml) {
+                    ml = currentMl;
+                    *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+                    if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+                }
+            }
+        }
+    } else if (dictMode == ZSTD_dictMatchState) {
        const U32* const dmsChainTable = dms->chainTable;
        const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
        const U32 dmsChainMask         = dmsChainSize - 1;
@@ -545,7 +758,7 @@ size_t ZSTD_HcFindBestMatch_generic (

        matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];

-        for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
+        for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
            size_t currentMl=0;
            const BYTE* const match = dmsBase + matchIndex;
            assert(match+4 <= dmsEnd);
@@ -555,11 +768,12 @@ size_t ZSTD_HcFindBestMatch_generic (
            /* save best solution */
            if (currentMl > ml) {
                ml = currentMl;
-                *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
+                *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
            }

            if (matchIndex <= dmsMinChain) break;
+
            matchIndex = dmsChainTable[matchIndex & dmsChainMask];
        }
    }
@@ -600,6 +814,22 @@ static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
 }


+static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
+    }
+}
+
+
 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
                        ZSTD_matchState_t* ms,
                        const BYTE* ip, const BYTE* const iLimit,
@@ -641,35 +871,62 @@ ZSTD_compressBlock_lazy_generic(
    typedef size_t (*searchMax_f)(
                        ZSTD_matchState_t* ms,
                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
-    searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
-        (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
-                                         : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
-        (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
-                                         : ZSTD_HcFindBestMatch_selectMLS);
+
+    /**
+     * This table is indexed first by the four ZSTD_dictMode_e values, and then
+     * by the two searchMethod_e values. NULLs are placed for configurations
+     * that should never occur (extDict modes go to the other implementation
+     * below and there is no DDSS for binary tree search yet).
+     */
+    const searchMax_f searchFuncs[4][2] = {
+        {
+            ZSTD_HcFindBestMatch_selectMLS,
+            ZSTD_BtFindBestMatch_selectMLS
+        },
+        {
+            NULL,
+            NULL
+        },
+        {
+            ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
+            ZSTD_BtFindBestMatch_dictMatchState_selectMLS
+        },
+        {
+            ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
+            NULL
+        }
+    };
+
+    searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
    U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;

+    const int isDMS = dictMode == ZSTD_dictMatchState;
+    const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
+    const int isDxS = isDMS || isDDS;
    const ZSTD_matchState_t* const dms = ms->dictMatchState;
-    const U32 dictLowestIndex      = dictMode == ZSTD_dictMatchState ?
-                                     dms->window.dictLimit : 0;
-    const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
-                                     dms->window.base : NULL;
-    const BYTE* const dictLowest   = dictMode == ZSTD_dictMatchState ?
-                                     dictBase + dictLowestIndex : NULL;
-    const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
-                                     dms->window.nextSrc : NULL;
-    const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
+    const U32 dictLowestIndex      = isDxS ? dms->window.dictLimit : 0;
+    const BYTE* const dictBase     = isDxS ? dms->window.base : NULL;
+    const BYTE* const dictLowest   = isDxS ? dictBase + dictLowestIndex : NULL;
+    const BYTE* const dictEnd      = isDxS ? dms->window.nextSrc : NULL;
+    const U32 dictIndexDelta       = isDxS ?
                                     prefixLowestIndex - (U32)(dictEnd - dictBase) :
                                     0;
-    const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
+    const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
+
+    assert(searchMax != NULL);
+
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);

    /* init */
    ip += (dictAndPrefixLength == 0);
    if (dictMode == ZSTD_noDict) {
-        U32 const maxRep = (U32)(ip - prefixLowest);
+        U32 const curr = (U32)(ip - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
+        U32 const maxRep = curr - windowLow;
        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
        if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
    }
-    if (dictMode == ZSTD_dictMatchState) {
+    if (isDxS) {
        /* dictMatchState repCode checks don't currently handle repCode == 0
         * disabling. */
        assert(offset_1 <= dictAndPrefixLength);
@@ -677,15 +934,21 @@ ZSTD_compressBlock_lazy_generic(
    }

    /* Match Loop */
+#if defined(__GNUC__) && defined(__x86_64__)
+    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
+     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
+     */
+    __asm__(".p2align 5");
+#endif
    while (ip < ilimit) {
        size_t matchLength=0;
        size_t offset=0;
        const BYTE* start=ip+1;

        /* check repCode */
-        if (dictMode == ZSTD_dictMatchState) {
+        if (isDxS) {
            const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
-            const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
+            const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
                                && repIndex < prefixLowestIndex) ?
                                   dictBase + (repIndex - dictIndexDelta) :
                                   base + repIndex;
@@ -726,7 +989,7 @@ ZSTD_compressBlock_lazy_generic(
                if ((mlRep >= 4) && (gain2 > gain1))
                    matchLength = mlRep, offset = 0, start = ip;
            }
-            if (dictMode == ZSTD_dictMatchState) {
+            if (isDxS) {
                const U32 repIndex = (U32)(ip - base) - offset_1;
                const BYTE* repMatch = repIndex < prefixLowestIndex ?
                               dictBase + (repIndex - dictIndexDelta) :
@@ -761,7 +1024,7 @@ ZSTD_compressBlock_lazy_generic(
                    if ((mlRep >= 4) && (gain2 > gain1))
                        matchLength = mlRep, offset = 0, start = ip;
                }
-                if (dictMode == ZSTD_dictMatchState) {
+                if (isDxS) {
                    const U32 repIndex = (U32)(ip - base) - offset_1;
                    const BYTE* repMatch = repIndex < prefixLowestIndex ?
                                   dictBase + (repIndex - dictIndexDelta) :
@@ -799,7 +1062,7 @@ ZSTD_compressBlock_lazy_generic(
                     && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
                    { start--; matchLength++; }
            }
-            if (dictMode == ZSTD_dictMatchState) {
+            if (isDxS) {
                U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
                const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
                const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
@@ -815,12 +1078,11 @@ _storeSequence:
        }

        /* check immediate repcode */
-        if (dictMode == ZSTD_dictMatchState) {
+        if (isDxS) {
            while (ip <= ilimit) {
                U32 const current2 = (U32)(ip-base);
                U32 const repIndex = current2 - offset_2;
-                const BYTE* repMatch = dictMode == ZSTD_dictMatchState
-                    && repIndex < prefixLowestIndex ?
+                const BYTE* repMatch = repIndex < prefixLowestIndex ?
                        dictBase - dictIndexDelta + repIndex :
                        base + repIndex;
                if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
@@ -915,6 +1177,28 @@ size_t ZSTD_compressBlock_greedy_dictMatchState(
 }


+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
+}
+
+
 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_lazy_extDict_generic(
                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
@@ -929,11 +1213,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
    const BYTE* const ilimit = iend - 8;
    const BYTE* const base = ms->window.base;
    const U32 dictLimit = ms->window.dictLimit;
-    const U32 lowestIndex = ms->window.lowLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const BYTE* const dictBase = ms->window.dictBase;
    const BYTE* const dictEnd  = dictBase + dictLimit;
-    const BYTE* const dictStart  = dictBase + lowestIndex;
+    const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
+    const U32 windowLog = ms->cParams.windowLog;

    typedef size_t (*searchMax_f)(
                        ZSTD_matchState_t* ms,
@@ -942,21 +1226,30 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(

    U32 offset_1 = rep[0], offset_2 = rep[1];

+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
+
    /* init */
    ip += (ip == prefixStart);

    /* Match Loop */
+#if defined(__GNUC__) && defined(__x86_64__)
+    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
+     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
+     */
+    __asm__(".p2align 5");
+#endif
    while (ip < ilimit) {
        size_t matchLength=0;
        size_t offset=0;
        const BYTE* start=ip+1;
-        U32 current = (U32)(ip-base);
+        U32 curr = (U32)(ip-base);

        /* check repCode */
-        {   const U32 repIndex = (U32)(current+1 - offset_1);
+        {   const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
+            const U32 repIndex = (U32)(curr+1 - offset_1);
            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
            const BYTE* const repMatch = repBase + repIndex;
-            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))   /* intentional overflow */
+            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))   /* intentional overflow */
            if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
                /* repcode detected we should take it */
                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -980,13 +1273,14 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
        if (depth>=1)
        while (ip<ilimit) {
            ip ++;
-            current++;
+            curr++;
            /* check repCode */
            if (offset) {
-                const U32 repIndex = (U32)(current - offset_1);
+                const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
+                const U32 repIndex = (U32)(curr - offset_1);
                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                const BYTE* const repMatch = repBase + repIndex;
-                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
+                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
                if (MEM_read32(ip) == MEM_read32(repMatch)) {
                    /* repcode detected */
                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1010,13 +1304,14 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
            /* let's find an even better one */
            if ((depth==2) && (ip<ilimit)) {
                ip ++;
-                current++;
+                curr++;
                /* check repCode */
                if (offset) {
-                    const U32 repIndex = (U32)(current - offset_1);
+                    const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
+                    const U32 repIndex = (U32)(curr - offset_1);
                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                    const BYTE* const repMatch = repBase + repIndex;
-                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
+                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
                    if (MEM_read32(ip) == MEM_read32(repMatch)) {
                        /* repcode detected */
                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1057,10 +1352,12 @@ _storeSequence:

        /* check immediate repcode */
        while (ip <= ilimit) {
-            const U32 repIndex = (U32)((ip-base) - offset_2);
+            const U32 repCurrent = (U32)(ip-base);
+            const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
+            const U32 repIndex = repCurrent - offset_2;
            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
            const BYTE* const repMatch = repBase + repIndex;
-            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
+            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
            if (MEM_read32(ip) == MEM_read32(repMatch)) {
                /* repcode detected we should take it */
                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
--- a/vendor/github.com/DataDog/zstd/zstd_lazy.h
+++ b/vendor/github.com/DataDog/zstd/zstd_lazy.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -17,8 +17,18 @@ extern "C" {

 #include "zstd_compress_internal.h"

+/**
+ * Dedicated Dictionary Search Structure bucket log. In the
+ * ZSTD_dedicatedDictSearch mode, the hashTable has
+ * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just
+ * one.
+ */
+#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
+
 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);

+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
+
 void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */

 size_t ZSTD_compressBlock_btlazy2(
@@ -47,6 +57,16 @@ size_t ZSTD_compressBlock_greedy_dictMatchState(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);

+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
 size_t ZSTD_compressBlock_greedy_extDict(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
--- a/vendor/github.com/DataDog/zstd/zstd_ldm.c
+++ b/vendor/github.com/DataDog/zstd/zstd_ldm.c
@@ -1,10 +1,11 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 */

 #include "zstd_ldm.h"
@@ -26,13 +27,6 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
    DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
    if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
    if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
-    if (cParams->strategy >= ZSTD_btopt) {
-      /* Get out of the way of the optimal parser */
-      U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
-      assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
-      assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
-      params->minMatchLength = minMatch;
-    }
    if (params->hashLog == 0) {
        params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
        assert(params->hashLog <= ZSTD_HASHLOG_MAX);
@@ -149,10 +143,10 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
 *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
 static size_t ZSTD_ldm_countBackwardsMatch(
            const BYTE* pIn, const BYTE* pAnchor,
-            const BYTE* pMatch, const BYTE* pBase)
+            const BYTE* pMatch, const BYTE* pMatchBase)
 {
    size_t matchLength = 0;
-    while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
+    while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
        pIn--;
        pMatch--;
        matchLength++;
@@ -160,6 +154,27 @@ static size_t ZSTD_ldm_countBackwardsMatch(
    return matchLength;
 }

+/** ZSTD_ldm_countBackwardsMatch_2segments() :
+ *  Returns the number of bytes that match backwards from pMatch,
+ *  even with the backwards match spanning 2 different segments.
+ *
+ *  On reaching `pMatchBase`, start counting from mEnd */
+static size_t ZSTD_ldm_countBackwardsMatch_2segments(
+                    const BYTE* pIn, const BYTE* pAnchor,
+                    const BYTE* pMatch, const BYTE* pMatchBase,
+                    const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
+{
+    size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
+    if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
+        /* If backwards match is entirely in the extDict or prefix, immediately return */
+        return matchLength;
+    }
+    DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
+    matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
+    DEBUGLOG(7, "final backwards match length = %zu", matchLength);
+    return matchLength;
+}
+
 /** ZSTD_ldm_fillFastTables() :
 *
 *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
@@ -223,6 +238,20 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
    return rollingHash;
 }

+void ZSTD_ldm_fillHashTable(
+            ldmState_t* state, const BYTE* ip,
+            const BYTE* iend, ldmParams_t const* params)
+{
+    DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
+    if ((size_t)(iend - ip) >= params->minMatchLength) {
+        U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
+        ZSTD_ldm_fillLdmHashTable(
+            state, startingHash, ip, iend - params->minMatchLength, state->window.base,
+            params->hashLog - params->bucketSizeLog,
+            *params);
+    }
+}
+

 /** ZSTD_ldm_limitTableUpdate() :
 *
@@ -231,10 +260,10 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
 *  (after a long match, only update tables a limited amount). */
 static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
 {
-    U32 const current = (U32)(anchor - ms->window.base);
-    if (current > ms->nextToUpdate + 1024) {
+    U32 const curr = (U32)(anchor - ms->window.base);
+    if (curr > ms->nextToUpdate + 1024) {
        ms->nextToUpdate =
-            current - MIN(512, current - ms->nextToUpdate - 1024);
+            curr - MIN(512, curr - ms->nextToUpdate - 1024);
    }
 }

@@ -271,7 +300,7 @@ static size_t ZSTD_ldm_generateSequences_internal(

    while (ip <= ilimit) {
        size_t mLength;
-        U32 const current = (U32)(ip - base);
+        U32 const curr = (U32)(ip - base);
        size_t forwardMatchLength = 0, backwardMatchLength = 0;
        ldmEntry_t* bestEntry = NULL;
        if (ip != istart) {
@@ -321,8 +350,9 @@ static size_t ZSTD_ldm_generateSequences_internal(
                        continue;
                    }
                    curBackwardMatchLength =
-                        ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
-                                                     lowMatchPtr);
+                        ZSTD_ldm_countBackwardsMatch_2segments(ip, anchor,
+                                                               pMatch, lowMatchPtr,
+                                                               dictStart, dictEnd);
                    curTotalMatchLength = curForwardMatchLength +
                                          curBackwardMatchLength;
                } else { /* !extDict */
@@ -350,7 +380,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
        /* No match found -- continue searching */
        if (bestEntry == NULL) {
            ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
-                                             hBits, current,
+                                             hBits, curr,
                                             *params);
            ip++;
            continue;
@@ -362,11 +392,11 @@ static size_t ZSTD_ldm_generateSequences_internal(

        {
            /* Store the sequence:
-             * ip = current - backwardMatchLength
+             * ip = curr - backwardMatchLength
             * The match is at (bestEntry->offset - backwardMatchLength)
             */
            U32 const matchIndex = bestEntry->offset;
-            U32 const offset = current - matchIndex;
+            U32 const offset = curr - matchIndex;
            rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;

            /* Out of sequence storage */
@@ -449,6 +479,8 @@ size_t ZSTD_ldm_generateSequences(
            U32 const correction = ZSTD_window_correctOverflow(
                &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
            ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
+            /* invalidate dictionaries on overflow correction */
+            ldmState->loadedDictEnd = 0;
        }
        /* 2. We enforce the maximum offset allowed.
         *
@@ -457,8 +489,14 @@ size_t ZSTD_ldm_generateSequences(
         * TODO: * Test the chunk size.
         *       * Try invalidation after the sequence generation and test the
         *         the offset against maxDist directly.
+         *
+         * NOTE: Because of dictionaries + sequence splitting we MUST make sure
+         * that any offset used is valid at the END of the sequence, since it may
+         * be split into two sequences. This condition holds when using
+         * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
+         * against maxDist directly, we'll have to carefully handle that case.
         */
-        ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
+        ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
        /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
        newLeftoverSize = ZSTD_ldm_generateSequences_internal(
            ldmState, sequences, params, chunkStart, chunkSize);
@@ -539,6 +577,23 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
    return sequence;
 }

+void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
+    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
+        if (currPos >= currSeq.litLength + currSeq.matchLength) {
+            currPos -= currSeq.litLength + currSeq.matchLength;
+            rawSeqStore->pos++;
+        } else {
+            rawSeqStore->posInSequence = currPos;
+            break;
+        }
+    }
+    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
+        rawSeqStore->posInSequence = 0;
+    }
+}
+
 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
    void const* src, size_t srcSize)
@@ -554,6 +609,15 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
    BYTE const* ip = istart;

    DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
+    /* If using opt parser, use LDMs only as candidates rather than always accepting them */
+    if (cParams->strategy >= ZSTD_btopt) {
+        size_t lastLLSize;
+        ms->ldmSeqStore = rawSeqStore;
+        lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
+        ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
+        return lastLLSize;
+    }
+
    assert(rawSeqStore->pos <= rawSeqStore->size);
    assert(rawSeqStore->size <= rawSeqStore->capacity);
    /* Loop through each sequence and apply the block compressor to the lits */
@@ -566,14 +630,13 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
        if (sequence.offset == 0)
            break;

-        assert(sequence.offset <= (1U << cParams->windowLog));
        assert(ip + sequence.litLength + sequence.matchLength <= iend);

        /* Fill tables for block compressor */
        ZSTD_ldm_limitTableUpdate(ms, ip);
        ZSTD_ldm_fillFastTables(ms, ip);
        /* Run the block compressor */
-        DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
+        DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
        {
            size_t const newLitLength =
                blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
--- a/vendor/github.com/DataDog/zstd/zstd_ldm.h
+++ b/vendor/github.com/DataDog/zstd/zstd_ldm.h
@@ -1,10 +1,11 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
 */

 #ifndef ZSTD_LDM_H
@@ -23,6 +24,10 @@ extern "C" {

 #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT

+void ZSTD_ldm_fillHashTable(
+            ldmState_t* state, const BYTE* ip,
+            const BYTE* iend, ldmParams_t const* params);
+
 /**
 * ZSTD_ldm_generateSequences():
 *
@@ -73,6 +78,12 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
 void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
    U32 const minMatch);

+/* ZSTD_ldm_skipRawSeqStoreBytes():
+ * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'.
+ * Not to be used in conjunction with ZSTD_ldm_skipSequences().
+ * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
+ */
+void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes);

 /** ZSTD_ldm_getTableSize() :
 *  Estimate the space needed for long distance matching tables or 0 if LDM is
--- a/vendor/github.com/DataDog/zstd/zstd_legacy.h
+++ b/vendor/github.com/DataDog/zstd/zstd_legacy.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/vendor/github.com/DataDog/zstd/zstd_opt.c
+++ b/vendor/github.com/DataDog/zstd/zstd_opt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -249,40 +249,6 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
    }
 }

-/* ZSTD_litLengthContribution() :
- * @return ( cost(litlength) - cost(0) )
- * this value can then be added to rawLiteralsCost()
- * to provide a cost which is directly comparable to a match ending at same position */
-static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
-{
-    if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);
-
-    /* dynamic statistics */
-    {   U32 const llCode = ZSTD_LLcode(litLength);
-        int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
-                               + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel)   /* note: log2litLengthSum cancel out */
-                               - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
-#if 1
-        return contribution;
-#else
-        return MAX(0, contribution); /* sometimes better, sometimes not ... */
-#endif
-    }
-}
-
-/* ZSTD_literalsContribution() :
- * creates a fake cost for the literals part of a sequence
- * which can be compared to the ending cost of a match
- * should a new match start at this position */
-static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
-                                     const optState_t* const optPtr,
-                                     int optLevel)
-{
-    int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
-                           + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
-    return contribution;
-}
-
 /* ZSTD_getMatchPrice() :
 * Provides the cost of the match part (offset + matchLength) of a sequence
 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
@@ -420,32 +386,32 @@ static U32 ZSTD_insertBt1(
    const BYTE* const dictEnd = dictBase + dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const BYTE* match;
-    const U32 current = (U32)(ip-base);
-    const U32 btLow = btMask >= current ? 0 : current - btMask;
-    U32* smallerPtr = bt + 2*(current&btMask);
+    const U32 curr = (U32)(ip-base);
+    const U32 btLow = btMask >= curr ? 0 : curr - btMask;
+    U32* smallerPtr = bt + 2*(curr&btMask);
    U32* largerPtr  = smallerPtr + 1;
    U32 dummy32;   /* to be nullified at the end */
    U32 const windowLow = ms->window.lowLimit;
-    U32 matchEndIdx = current+8+1;
+    U32 matchEndIdx = curr+8+1;
    size_t bestLength = 8;
    U32 nbCompares = 1U << cParams->searchLog;
 #ifdef ZSTD_C_PREDICT
-    U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
-    U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
+    U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
+    U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
    predictedSmall += (predictedSmall>0);
    predictedLarge += (predictedLarge>0);
 #endif /* ZSTD_C_PREDICT */

-    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
+    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);

    assert(ip <= iend-8);   /* required for h calculation */
-    hashTable[h] = current;   /* Update Hash Table */
+    hashTable[h] = curr;   /* Update Hash Table */

    assert(windowLow > 0);
    while (nbCompares-- && (matchIndex >= windowLow)) {
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-        assert(matchIndex < current);
+        assert(matchIndex < curr);

 #ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
@@ -508,8 +474,8 @@ static U32 ZSTD_insertBt1(
    *smallerPtr = *largerPtr = 0;
    {   U32 positions = 0;
        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
-        assert(matchEndIdx > current + 8);
-        return MAX(positions, matchEndIdx - (current + 8));
+        assert(matchEndIdx > curr + 8);
+        return MAX(positions, matchEndIdx - (curr + 8));
    }
 }

@@ -553,7 +519,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
    const BYTE* const base = ms->window.base;
-    U32 const current = (U32)(ip-base);
+    U32 const curr = (U32)(ip-base);
    U32 const hashLog = cParams->hashLog;
    U32 const minMatch = (mls==3) ? 3 : 4;
    U32* const hashTable = ms->hashTable;
@@ -567,12 +533,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
    U32 const dictLimit = ms->window.dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
-    U32 const btLow = (btMask >= current) ? 0 : current - btMask;
-    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
+    U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
+    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
    U32 const matchLow = windowLow ? windowLow : 1;
-    U32* smallerPtr = bt + 2*(current&btMask);
-    U32* largerPtr  = bt + 2*(current&btMask) + 1;
-    U32 matchEndIdx = current+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
+    U32* smallerPtr = bt + 2*(curr&btMask);
+    U32* largerPtr  = bt + 2*(curr&btMask) + 1;
+    U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
    U32 dummy32;   /* to be nullified at the end */
    U32 mnum = 0;
    U32 nbCompares = 1U << cParams->searchLog;
@@ -591,7 +557,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
    U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;

    size_t bestLength = lengthToBeat-1;
-    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
+    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);

    /* check repCode */
    assert(ll0 <= 1);   /* necessarily 1 or 0 */
@@ -599,26 +565,29 @@ U32 ZSTD_insertBtAndGetAllMatches (
        U32 repCode;
        for (repCode = ll0; repCode < lastR; repCode++) {
            U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
-            U32 const repIndex = current - repOffset;
+            U32 const repIndex = curr - repOffset;
            U32 repLen = 0;
-            assert(current >= dictLimit);
-            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) {  /* equivalent to `current > repIndex >= dictLimit` */
-                if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
+            assert(curr >= dictLimit);
+            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
+                /* We must validate the repcode offset because when we're using a dictionary the
+                 * valid offset range shrinks when the dictionary goes out of bounds.
+                 */
+                if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
                    repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
                }
-            } else {  /* repIndex < dictLimit || repIndex >= current */
+            } else {  /* repIndex < dictLimit || repIndex >= curr */
                const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
                                             dmsBase + repIndex - dmsIndexDelta :
                                             dictBase + repIndex;
-                assert(current >= windowLow);
+                assert(curr >= windowLow);
                if ( dictMode == ZSTD_extDict
-                  && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow)  /* equivalent to `current > repIndex >= windowLow` */
+                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
                     & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
                }
                if (dictMode == ZSTD_dictMatchState
-                  && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `current > repIndex >= dmsLowLimit` */
+                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
                     & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
@@ -640,7 +609,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
    if ((mls == 3) /*static*/ && (bestLength < mls)) {
        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
        if ((matchIndex3 >= matchLow)
-          & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
+          & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
            size_t mlen;
            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
                const BYTE* const match = base + matchIndex3;
@@ -655,26 +624,26 @@ U32 ZSTD_insertBtAndGetAllMatches (
                DEBUGLOG(8, "found small match with hlog3, of length %u",
                            (U32)mlen);
                bestLength = mlen;
-                assert(current > matchIndex3);
+                assert(curr > matchIndex3);
                assert(mnum==0);  /* no prior solution */
-                matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
+                matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
                matches[0].len = (U32)mlen;
                mnum = 1;
                if ( (mlen > sufficient_len) |
                     (ip+mlen == iLimit) ) {  /* best possible length */
-                    ms->nextToUpdate = current+1;  /* skip insertion */
+                    ms->nextToUpdate = curr+1;  /* skip insertion */
                    return 1;
        }   }   }
        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
    }

-    hashTable[h] = current;   /* Update Hash Table */
+    hashTable[h] = curr;   /* Update Hash Table */

    while (nbCompares-- && (matchIndex >= matchLow)) {
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
        const BYTE* match;
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-        assert(current > matchIndex);
+        assert(curr > matchIndex);

        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
@@ -691,12 +660,12 @@ U32 ZSTD_insertBtAndGetAllMatches (

        if (matchLength > bestLength) {
            DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
-                    (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
+                    (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
            assert(matchEndIdx > matchIndex);
            if (matchLength > matchEndIdx - matchIndex)
                matchEndIdx = matchIndex + (U32)matchLength;
            bestLength = matchLength;
-            matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
+            matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
            matches[mnum].len = (U32)matchLength;
            mnum++;
            if ( (matchLength > ZSTD_OPT_NUM)
@@ -739,11 +708,11 @@ U32 ZSTD_insertBtAndGetAllMatches (
            if (matchLength > bestLength) {
                matchIndex = dictMatchIndex + dmsIndexDelta;
                DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
-                        (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
+                        (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
                if (matchLength > matchEndIdx - matchIndex)
                    matchEndIdx = matchIndex + (U32)matchLength;
                bestLength = matchLength;
-                matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
+                matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
                matches[mnum].len = (U32)matchLength;
                mnum++;
                if ( (matchLength > ZSTD_OPT_NUM)
@@ -764,7 +733,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
        }
    }

-    assert(matchEndIdx > current+8);
+    assert(matchEndIdx > curr+8);
    ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
    return mnum;
 }
@@ -795,34 +764,144 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
    }
 }

+/*************************
+*  LDM helper functions  *
+*************************/
+
+/* Struct containing info needed to make decision about ldm inclusion */
+typedef struct {
+    rawSeqStore_t seqStore;         /* External match candidates store for this block */
+    U32 startPosInBlock;            /* Start position of the current match candidate */
+    U32 endPosInBlock;              /* End position of the current match candidate */
+    U32 offset;                     /* Offset of the match candidate */
+} ZSTD_optLdm_t;
+
+/* ZSTD_optLdm_skipRawSeqStoreBytes():
+ * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
+ */
+static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
+    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
+        if (currPos >= currSeq.litLength + currSeq.matchLength) {
+            currPos -= currSeq.litLength + currSeq.matchLength;
+            rawSeqStore->pos++;
+        } else {
+            rawSeqStore->posInSequence = currPos;
+            break;
+        }
+    }
+    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
+        rawSeqStore->posInSequence = 0;
+    }
+}
+
+/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
+ * Calculates the beginning and end of the next match in the current block.
+ * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
+ */
+static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
+                                                   U32 blockBytesRemaining) {
+    rawSeq currSeq;
+    U32 currBlockEndPos;
+    U32 literalsBytesRemaining;
+    U32 matchBytesRemaining;
+
+    /* Setting match end position to MAX to ensure we never use an LDM during this block */
+    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
+        optLdm->startPosInBlock = UINT_MAX;
+        optLdm->endPosInBlock = UINT_MAX;
+        return;
+    }
+    /* Calculate appropriate bytes left in matchLength and litLength after adjusting
+       based on ldmSeqStore->posInSequence */
+    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
+    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
+    currBlockEndPos = currPosInBlock + blockBytesRemaining;
+    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
+            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
+            0;
+    matchBytesRemaining = (literalsBytesRemaining == 0) ?
+            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
+            currSeq.matchLength;
+
+    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
+    if (literalsBytesRemaining >= blockBytesRemaining) {
+        optLdm->startPosInBlock = UINT_MAX;
+        optLdm->endPosInBlock = UINT_MAX;
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
+        return;
+    }
+
+    /* Matches may be < MINMATCH by this process. In that case, we will reject them
+       when we are deciding whether or not to add the ldm */
+    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
+    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
+    optLdm->offset = currSeq.offset;
+
+    if (optLdm->endPosInBlock > currBlockEndPos) {
+        /* Match ends after the block ends, we can't use the whole match */
+        optLdm->endPosInBlock = currBlockEndPos;
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
+    } else {
+        /* Consume nb of bytes equal to size of sequence left */
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
+    }
+}
+
+/* ZSTD_optLdm_maybeAddMatch():
+ * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
+ * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
+ */
+static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
+                                      ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
+    U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
+    /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
+    U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
+    U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
+
+    /* Ensure that current block position is not outside of the match */
+    if (currPosInBlock < optLdm->startPosInBlock
+      || currPosInBlock >= optLdm->endPosInBlock
+      || candidateMatchLength < MINMATCH) {
+        return;
+    }
+
+    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
+        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
+                 candidateOffCode, candidateMatchLength, currPosInBlock);
+        matches[*nbMatches].len = candidateMatchLength;
+        matches[*nbMatches].off = candidateOffCode;
+        (*nbMatches)++;
+    }
+}
+
+/* ZSTD_optLdm_processMatchCandidate():
+ * Wrapper function to update ldm seq store and call ldm functions as necessary.
+ */
+static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
+                                              U32 currPosInBlock, U32 remainingBytes) {
+    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
+        return;
+    }
+
+    if (currPosInBlock >= optLdm->endPosInBlock) {
+        if (currPosInBlock > optLdm->endPosInBlock) {
+            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
+             * at the end of a match from the ldm seq store, and will often be some bytes
+             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
+             */
+            U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
+            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
+        } 
+        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
+    }
+    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
+}

 /*-*******************************
 *  Optimal parser
 *********************************/
-typedef struct repcodes_s {
-    U32 rep[3];
-} repcodes_t;
-
-static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
-{
-    repcodes_t newReps;
-    if (offset >= ZSTD_REP_NUM) {  /* full offset */
-        newReps.rep[2] = rep[1];
-        newReps.rep[1] = rep[0];
-        newReps.rep[0] = offset - ZSTD_REP_MOVE;
-    } else {   /* repcode */
-        U32 const repCode = offset + ll0;
-        if (repCode > 0) {  /* note : if repCode==0, no change */
-            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
-            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
-            newReps.rep[1] = rep[0];
-            newReps.rep[0] = currentOffset;
-        } else {   /* repCode == 0 */
-            memcpy(&newReps, rep, sizeof(newReps));
-        }
-    }
-    return newReps;
-}


 static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
@@ -839,7 +918,7 @@ listStats(const U32* table, int lastEltID)
    int enb;
    for (enb=0; enb < nbElts; enb++) {
        (void)table;
-        //RAWLOG(2, "%3i:%3i,  ", enb, table[enb]);
+        /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
        RAWLOG(2, "%4i,", table[enb]);
    }
    RAWLOG(2, " \n");
@@ -872,6 +951,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
    ZSTD_match_t* const matches = optStatePtr->matchTable;
    ZSTD_optimal_t lastSequence;
+    ZSTD_optLdm_t optLdm;
+
+    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
+    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
+    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));

    /* init */
    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
@@ -887,14 +971,21 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
        /* find first match */
        {   U32 const litlen = (U32)(ip - anchor);
            U32 const ll0 = !litlen;
-            U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
+            U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
+            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
+                                              (U32)(ip-istart), (U32)(iend - ip));
            if (!nbMatches) { ip++; continue; }

            /* initialize opt[0] */
            { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
            opt[0].mlen = 0;  /* means is_a_literal */
            opt[0].litlen = litlen;
-            opt[0].price = ZSTD_literalsContribution(anchor, litlen, optStatePtr, optLevel);
+            /* We don't need to include the actual price of the literals because
+             * it is static for the duration of the forward pass, and is included
+             * in every price. We include the literal length to avoid negative
+             * prices when we subtract the previous literal length.
+             */
+            opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);

            /* large match -> immediate encoding */
            {   U32 const maxML = matches[nbMatches-1].len;
@@ -923,7 +1014,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
                    U32 const offset = matches[matchNb].off;
                    U32 const end = matches[matchNb].len;
-                    repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
                    for ( ; pos <= end ; pos++ ) {
                        U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
                        U32 const sequencePrice = literalsPrice + matchPrice;
@@ -933,8 +1023,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                        opt[pos].off = offset;
                        opt[pos].litlen = litlen;
                        opt[pos].price = sequencePrice;
-                        ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
-                        memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
                }   }
                last_pos = pos-1;
            }
@@ -961,7 +1049,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                    opt[cur].off = 0;
                    opt[cur].litlen = litlen;
                    opt[cur].price = price;
-                    memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
                } else {
                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
@@ -969,6 +1056,21 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                }
            }

+            /* Set the repcodes of the current position. We must do it here
+             * because we rely on the repcodes of the 2nd to last sequence being
+             * correct to set the next chunks repcodes during the backward
+             * traversal.
+             */
+            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
+            assert(cur >= opt[cur].mlen);
+            if (opt[cur].mlen != 0) {
+                U32 const prev = cur - opt[cur].mlen;
+                repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
+                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
+            } else {
+                ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
+            }
+
            /* last match must start at a minimum distance of 8 from oend */
            if (inr > ilimit) continue;

@@ -984,8 +1086,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
                U32 const previousPrice = opt[cur].price;
                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
-                U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
+                U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
                U32 matchNb;
+
+                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
+                                                  (U32)(inr-istart), (U32)(iend-inr));
+
                if (!nbMatches) {
                    DEBUGLOG(7, "rPos:%u : no match found", cur);
                    continue;
@@ -1009,7 +1115,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                /* set prices using matches found at position == cur */
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
                    U32 const offset = matches[matchNb].off;
-                    repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
                    U32 const lastML = matches[matchNb].len;
                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
                    U32 mlen;
@@ -1029,8 +1134,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                            opt[pos].off = offset;
                            opt[pos].litlen = litlen;
                            opt[pos].price = price;
-                            ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
-                            memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
                        } else {
                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
@@ -1046,6 +1149,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
        assert(opt[0].mlen == 0);

+        /* Set the next chunk's repcodes based on the repcodes of the beginning
+         * of the last match, and the last sequence. This avoids us having to
+         * update them while traversing the sequences.
+         */
+        if (lastSequence.mlen != 0) {
+            repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
+            ZSTD_memcpy(rep, &reps, sizeof(reps));
+        } else {
+            ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
+        }
+
        {   U32 const storeEnd = cur + 1;
            U32 storeStart = storeEnd;
            U32 seqPos = cur;
@@ -1082,20 +1196,6 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
                        continue;   /* will finish */
                    }

-                    /* repcodes update : like ZSTD_updateRep(), but update in place */
-                    if (offCode >= ZSTD_REP_NUM) {  /* full offset */
-                        rep[2] = rep[1];
-                        rep[1] = rep[0];
-                        rep[0] = offCode - ZSTD_REP_MOVE;
-                    } else {   /* repcode */
-                        U32 const repCode = offCode + (llen==0);
-                        if (repCode) {  /* note : if repCode==0, no change */
-                            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
-                            if (repCode >= 2) rep[2] = rep[1];
-                            rep[1] = rep[0];
-                            rep[0] = currentOffset;
-                    }   }
-
                    assert(anchor + llen <= iend);
                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
@@ -1104,7 +1204,6 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
            }   }
            ZSTD_setBasePrices(optStatePtr, optLevel);
        }
-
    }   /* while (ip < ilimit) */

    /* Return the last literals size */
@@ -1156,7 +1255,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
               const void* src, size_t srcSize)
 {
    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
-    memcpy(tmpRep, rep, sizeof(tmpRep));
+    ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));

    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
    assert(ms->opt.litLengthSum == 0);    /* first block */
@@ -1189,7 +1288,7 @@ size_t ZSTD_compressBlock_btultra2(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        const void* src, size_t srcSize)
 {
-    U32 const current = (U32)((const BYTE*)src - ms->window.base);
+    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);

    /* 2-pass strategy:
@@ -1204,7 +1303,7 @@ size_t ZSTD_compressBlock_btultra2(
    if ( (ms->opt.litLengthSum==0)   /* first block */
      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
-      && (current == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
+      && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
      && (srcSize > ZSTD_PREDEF_THRESHOLD)
      ) {
        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
--- a/vendor/github.com/DataDog/zstd/zstd_opt.h
+++ b/vendor/github.com/DataDog/zstd/zstd_opt.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/vendor/github.com/DataDog/zstd/zstd_stream.go
+++ b/vendor/github.com/DataDog/zstd/zstd_stream.go
@@ -2,7 +2,6 @@ package zstd

 /*
 #define ZSTD_STATIC_LINKING_ONLY
-#include "stdint.h"  // for uintptr_t
 #include "zstd.h"

 typedef struct compressStream2_result_s {
@@ -11,9 +10,10 @@ typedef struct compressStream2_result_s {
 	size_t bytes_written;
 } compressStream2_result;

-static void ZSTD_compressStream2_wrapper(compressStream2_result* result, ZSTD_CCtx* ctx, uintptr_t dst, size_t maxDstSize, const uintptr_t src, size_t srcSize) {
-	ZSTD_outBuffer outBuffer = { (void*)dst, maxDstSize, 0 };
-	ZSTD_inBuffer inBuffer = { (void*)src, srcSize, 0 };
+static void ZSTD_compressStream2_wrapper(compressStream2_result* result, ZSTD_CCtx* ctx,
+		void* dst, size_t maxDstSize, const void* src, size_t srcSize) {
+	ZSTD_outBuffer outBuffer = { dst, maxDstSize, 0 };
+	ZSTD_inBuffer inBuffer = { src, srcSize, 0 };
 	size_t retCode = ZSTD_compressStream2(ctx, &outBuffer, &inBuffer, ZSTD_e_continue);

 	result->return_code = retCode;
@@ -21,9 +21,21 @@ static void ZSTD_compressStream2_wrapper(compressStream2_result* result, ZSTD_CC
 	result->bytes_written = outBuffer.pos;
 }

-static void ZSTD_compressStream2_finish(compressStream2_result* result, ZSTD_CCtx* ctx, uintptr_t dst, size_t maxDstSize, const uintptr_t src, size_t srcSize) {
-	ZSTD_outBuffer outBuffer = { (void*)dst, maxDstSize, 0 };
-	ZSTD_inBuffer inBuffer = { (void*)src, srcSize, 0 };
+static void ZSTD_compressStream2_flush(compressStream2_result* result, ZSTD_CCtx* ctx,
+		void* dst, size_t maxDstSize, const void* src, size_t srcSize) {
+	ZSTD_outBuffer outBuffer = { dst, maxDstSize, 0 };
+	ZSTD_inBuffer inBuffer = { src, srcSize, 0 };
+	size_t retCode = ZSTD_compressStream2(ctx, &outBuffer, &inBuffer, ZSTD_e_flush);
+
+	result->return_code = retCode;
+	result->bytes_consumed = inBuffer.pos;
+	result->bytes_written = outBuffer.pos;
+}
+
+static void ZSTD_compressStream2_finish(compressStream2_result* result, ZSTD_CCtx* ctx,
+		void* dst, size_t maxDstSize, const void* src, size_t srcSize) {
+	ZSTD_outBuffer outBuffer = { dst, maxDstSize, 0 };
+	ZSTD_inBuffer inBuffer = { src, srcSize, 0 };
 	size_t retCode = ZSTD_compressStream2(ctx, &outBuffer, &inBuffer, ZSTD_e_end);

 	result->return_code = retCode;
@@ -38,9 +50,10 @@ typedef struct decompressStream2_result_s {
 	size_t bytes_written;
 } decompressStream2_result;

-static void ZSTD_decompressStream_wrapper(decompressStream2_result* result, ZSTD_DCtx* ctx, uintptr_t dst, size_t maxDstSize, const uintptr_t src, size_t srcSize) {
-	ZSTD_outBuffer outBuffer = { (void*)dst, maxDstSize, 0 };
-	ZSTD_inBuffer inBuffer = { (void*)src, srcSize, 0 };
+static void ZSTD_decompressStream_wrapper(decompressStream2_result* result, ZSTD_DCtx* ctx,
+		void* dst, size_t maxDstSize, const void* src, size_t srcSize) {
+	ZSTD_outBuffer outBuffer = { dst, maxDstSize, 0 };
+	ZSTD_inBuffer inBuffer = { src, srcSize, 0 };
 	size_t retCode = ZSTD_decompressStream(ctx, &outBuffer, &inBuffer);

 	result->return_code = retCode;
@@ -155,20 +168,19 @@ func (w *Writer) Write(p []byte) (int, error) {
 		srcData = w.srcBuffer
 	}

-	srcPtr := C.uintptr_t(uintptr(0)) // Do not point anywhere, if src is empty
+	var srcPtr *byte // Do not point anywhere, if src is empty
 	if len(srcData) > 0 {
-		srcPtr = C.uintptr_t(uintptr(unsafe.Pointer(&srcData[0])))
+		srcPtr = &srcData[0]
 	}

 	C.ZSTD_compressStream2_wrapper(
 		w.resultBuffer,
 		w.ctx,
-		C.uintptr_t(uintptr(unsafe.Pointer(&w.dstBuffer[0]))),
+		unsafe.Pointer(&w.dstBuffer[0]),
 		C.size_t(len(w.dstBuffer)),
-		srcPtr,
+		unsafe.Pointer(srcPtr),
 		C.size_t(len(srcData)),
 	)
-	runtime.KeepAlive(p) // Ensure p is kept until here so pointer doesn't disappear during C call
 	ret := int(w.resultBuffer.return_code)
 	if err := getError(ret); err != nil {
 		return 0, err
@@ -203,6 +215,49 @@ func (w *Writer) Write(p []byte) (int, error) {
 	return len(p), err
 }

+// Flush writes any unwritten data to the underlying io.Writer.
+func (w *Writer) Flush() error {
+	if w.firstError != nil {
+		return w.firstError
+	}
+
+	ret := 1 // So we loop at least once
+	for ret > 0 {
+		var srcPtr *byte // Do not point anywhere, if src is empty
+		if len(w.srcBuffer) > 0 {
+			srcPtr = &w.srcBuffer[0]
+		}
+
+		C.ZSTD_compressStream2_flush(
+			w.resultBuffer,
+			w.ctx,
+			unsafe.Pointer(&w.dstBuffer[0]),
+			C.size_t(len(w.dstBuffer)),
+			unsafe.Pointer(srcPtr),
+			C.size_t(len(w.srcBuffer)),
+		)
+		ret = int(w.resultBuffer.return_code)
+		if err := getError(ret); err != nil {
+			return err
+		}
+		w.srcBuffer = w.srcBuffer[w.resultBuffer.bytes_consumed:]
+		written := int(w.resultBuffer.bytes_written)
+		_, err := w.underlyingWriter.Write(w.dstBuffer[:written])
+		if err != nil {
+			return err
+		}
+
+		if ret > 0 { // We have a hint if we need to resize the dstBuffer
+			w.dstBuffer = w.dstBuffer[:cap(w.dstBuffer)]
+			if len(w.dstBuffer) < ret {
+				w.dstBuffer = make([]byte, ret)
+			}
+		}
+	}
+
+	return nil
+}
+
 // Close closes the Writer, flushing any unwritten data to the underlying
 // io.Writer and freeing objects, but does not close the underlying io.Writer.
 func (w *Writer) Close() error {
@@ -212,17 +267,17 @@ func (w *Writer) Close() error {

 	ret := 1 // So we loop at least once
 	for ret > 0 {
-		srcPtr := C.uintptr_t(uintptr(0)) // Do not point anywhere, if src is empty
+		var srcPtr *byte // Do not point anywhere, if src is empty
 		if len(w.srcBuffer) > 0 {
-			srcPtr = C.uintptr_t(uintptr(unsafe.Pointer(&w.srcBuffer[0])))
+			srcPtr = &w.srcBuffer[0]
 		}

 		C.ZSTD_compressStream2_finish(
 			w.resultBuffer,
 			w.ctx,
-			C.uintptr_t(uintptr(unsafe.Pointer(&w.dstBuffer[0]))),
+			unsafe.Pointer(&w.dstBuffer[0]),
 			C.size_t(len(w.dstBuffer)),
-			srcPtr,
+			unsafe.Pointer(srcPtr),
 			C.size_t(len(w.srcBuffer)),
 		)
 		ret = int(w.resultBuffer.return_code)
@@ -231,7 +286,11 @@ func (w *Writer) Close() error {
 		}
 		w.srcBuffer = w.srcBuffer[w.resultBuffer.bytes_consumed:]
 		written := int(w.resultBuffer.bytes_written)
-		w.underlyingWriter.Write(w.dstBuffer[:written])
+		_, err := w.underlyingWriter.Write(w.dstBuffer[:written])
+		if err != nil {
+			C.ZSTD_freeCStream(w.ctx)
+			return err
+		}

 		if ret > 0 { // We have a hint if we need to resize the dstBuffer
 			w.dstBuffer = w.dstBuffer[:cap(w.dstBuffer)]
@@ -388,17 +447,17 @@ func (r *reader) Read(p []byte) (int, error) {
 		src = src[:r.compressionLeft+n]

 		// C code
-		srcPtr := C.uintptr_t(uintptr(0)) // Do not point anywhere, if src is empty
+		var srcPtr *byte // Do not point anywhere, if src is empty
 		if len(src) > 0 {
-			srcPtr = C.uintptr_t(uintptr(unsafe.Pointer(&src[0])))
+			srcPtr = &src[0]
 		}

 		C.ZSTD_decompressStream_wrapper(
 			r.resultBuffer,
 			r.ctx,
-			C.uintptr_t(uintptr(unsafe.Pointer(&r.decompressionBuffer[0]))),
+			unsafe.Pointer(&r.decompressionBuffer[0]),
 			C.size_t(len(r.decompressionBuffer)),
-			srcPtr,
+			unsafe.Pointer(srcPtr),
 			C.size_t(len(src)),
 		)
 		retCode := int(r.resultBuffer.return_code)
--- a/vendor/github.com/DataDog/zstd/zstd_v01.c
+++ b/vendor/github.com/DataDog/zstd/zstd_v01.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -257,7 +257,7 @@ static U64 FSE_read64(const void* memPtr)
    U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
 }

-#endif // FSE_FORCE_MEMORY_ACCESS
+#endif /* FSE_FORCE_MEMORY_ACCESS */

 static U16 FSE_readLE16(const void* memPtr)
 {
@@ -1078,7 +1078,7 @@ static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
        BYTE* const ostart = (BYTE*) dst;
        BYTE* op = ostart;
        BYTE* const omax = op + maxDstSize;
-        BYTE* const olimit = omax-15;
+        BYTE* const olimit = maxDstSize < 15 ? op : omax-15;

        const void* ptr = DTable;
        const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
@@ -1092,7 +1092,7 @@ static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
        const size_t length1 = FSE_readLE16(jumpTable);
        const size_t length2 = FSE_readLE16(jumpTable+1);
        const size_t length3 = FSE_readLE16(jumpTable+2);
-        const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
+        const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   /* check coherency !! */
        const char* const start1 = (const char*)(cSrc) + 6;
        const char* const start2 = start1 + length1;
        const char* const start3 = start2 + length2;
@@ -1150,11 +1150,11 @@ static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */

        /* tail */
        {
-            // bitTail = bitD1;   // *much* slower : -20% !??!
+            /* bitTail = bitD1; */   /* *much* slower : -20% !??! */
            FSE_DStream_t bitTail;
            bitTail.ptr = bitD1.ptr;
            bitTail.bitsConsumed = bitD1.bitsConsumed;
-            bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
+            bitTail.bitContainer = bitD1.bitContainer;   /* required in case of FSE_DStream_endOfBuffer */
            bitTail.start = start1;
            for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
            {
@@ -1280,7 +1280,11 @@ static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, si
 *  Basic Types
 *********************************************************/
 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-# include <stdint.h>
+# if defined(_AIX)
+#  include <inttypes.h>
+# else
+#  include <stdint.h> /* intptr_t */
+# endif
 typedef  uint8_t BYTE;
 typedef uint16_t U16;
 typedef  int16_t S16;
@@ -1483,7 +1487,9 @@ static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProper
 static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
-    memcpy(dst, src, srcSize);
+    if (srcSize > 0) {
+        memcpy(dst, src, srcSize);
+    }
    return srcSize;
 }

@@ -1502,7 +1508,7 @@ static size_t ZSTD_decompressLiterals(void* ctx,
    if (srcSize <= 3) return ERROR(corruption_detected);

    litSize = ip[1] + (ip[0]<<8);
-    litSize += ((ip[-3] >> 3) & 7) << 16;   // mmmmh....
+    litSize += ((ip[-3] >> 3) & 7) << 16;   /* mmmmh.... */
    op = oend - litSize;

    (void)ctx;
@@ -1541,7 +1547,9 @@ static size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
            size_t rleSize = litbp.origSize;
            if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
            if (!srcSize) return ERROR(srcSize_wrong);
-            memset(oend - rleSize, *ip, rleSize);
+            if (rleSize > 0) {
+                memset(oend - rleSize, *ip, rleSize);
+            }
            *litStart = oend - rleSize;
            *litSize = rleSize;
            ip++;
@@ -1901,8 +1909,10 @@ static size_t ZSTD_decompressSequences(
        {
            size_t lastLLSize = litEnd - litPtr;
            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
-            if (op != litPtr) memmove(op, litPtr, lastLLSize);
-            op += lastLLSize;
+            if (lastLLSize > 0) {
+                if (op != litPtr) memmove(op, litPtr, lastLLSize);
+                op += lastLLSize;
+            }
        }
    }

--- a/vendor/github.com/DataDog/zstd/zstd_v01.h
+++ b/vendor/github.com/DataDog/zstd/zstd_v01.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/vendor/github.com/DataDog/zstd/zstd_v02.c
+++ b/vendor/github.com/DataDog/zstd/zstd_v02.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -89,7 +89,11 @@ extern "C" {
 *  Basic Types
 *****************************************************************/
 #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# include <stdint.h>
+# if defined(_AIX)
+#  include <inttypes.h>
+# else
+#  include <stdint.h> /* intptr_t */
+# endif
  typedef  uint8_t BYTE;
  typedef uint16_t U16;
  typedef  int16_t S16;
@@ -189,7 +193,7 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value)
    memcpy(memPtr, &value, sizeof(value));
 }

-#endif // MEM_FORCE_MEMORY_ACCESS
+#endif /* MEM_FORCE_MEMORY_ACCESS */


 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
@@ -2836,7 +2840,9 @@ static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockPropertie
 static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
-    memcpy(dst, src, srcSize);
+    if (srcSize > 0) {
+        memcpy(dst, src, srcSize);
+    }
    return srcSize;
 }

@@ -3229,8 +3235,10 @@ static size_t ZSTD_decompressSequences(
            size_t lastLLSize = litEnd - litPtr;
            if (litPtr > litEnd) return ERROR(corruption_detected);
            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
-            if (op != litPtr) memmove(op, litPtr, lastLLSize);
-            op += lastLLSize;
+            if (lastLLSize > 0) {
+                if (op != litPtr) memmove(op, litPtr, lastLLSize);
+                op += lastLLSize;
+            }
        }
    }

--- a/vendor/github.com/DataDog/zstd/zstd_v02.h
+++ b/vendor/github.com/DataDog/zstd/zstd_v02.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/vendor/github.com/DataDog/zstd/zstd_v03.c
+++ b/vendor/github.com/DataDog/zstd/zstd_v03.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -90,7 +90,11 @@ extern "C" {
 *  Basic Types
 *****************************************************************/
 #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# include <stdint.h>
+# if defined(_AIX)
+#  include <inttypes.h>
+# else
+#  include <stdint.h> /* intptr_t */
+# endif
  typedef  uint8_t BYTE;
  typedef uint16_t U16;
  typedef  int16_t S16;
@@ -191,7 +195,7 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value)
 }


-#endif // MEM_FORCE_MEMORY_ACCESS
+#endif /* MEM_FORCE_MEMORY_ACCESS */


 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
@@ -2477,7 +2481,9 @@ static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockPropertie
 static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
-    memcpy(dst, src, srcSize);
+    if (srcSize > 0) {
+        memcpy(dst, src, srcSize);
+    }
    return srcSize;
 }

@@ -2870,8 +2876,10 @@ static size_t ZSTD_decompressSequences(
            size_t lastLLSize = litEnd - litPtr;
            if (litPtr > litEnd) return ERROR(corruption_detected);
            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
-            if (op != litPtr) memmove(op, litPtr, lastLLSize);
-            op += lastLLSize;
+            if (lastLLSize > 0) {
+                if (op != litPtr) memmove(op, litPtr, lastLLSize);
+                op += lastLLSize;
+            }
        }
    }

--- a/vendor/github.com/DataDog/zstd/zstd_v03.h
+++ b/vendor/github.com/DataDog/zstd/zstd_v03.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/vendor/github.com/DataDog/zstd/zstd_v04.c
+++ b/vendor/github.com/DataDog/zstd/zstd_v04.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -52,7 +52,11 @@ extern "C" {
 *  Basic Types
 *****************************************************************/
 #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# include <stdint.h>
+# if defined(_AIX)
+#  include <inttypes.h>
+# else
+#  include <stdint.h> /* intptr_t */
+# endif
  typedef  uint8_t BYTE;
  typedef uint16_t U16;
  typedef  int16_t S16;
@@ -161,7 +165,7 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value)
    memcpy(memPtr, &value, sizeof(value));
 }

-#endif // MEM_FORCE_MEMORY_ACCESS
+#endif /* MEM_FORCE_MEMORY_ACCESS */


 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
@@ -2603,7 +2607,9 @@ static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockPropertie
 static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
    if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
-    memcpy(dst, src, srcSize);
+    if (srcSize > 0) {
+        memcpy(dst, src, srcSize);
+    }
    return srcSize;
 }

@@ -3008,8 +3014,10 @@ static size_t ZSTD_decompressSequences(
            size_t lastLLSize = litEnd - litPtr;
            if (litPtr > litEnd) return ERROR(corruption_detected);
            if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
-            if (op != litPtr) memcpy(op, litPtr, lastLLSize);
-            op += lastLLSize;
+            if (lastLLSize > 0) {
+                if (op != litPtr) memcpy(op, litPtr, lastLLSize);
+                op += lastLLSize;
+            }
        }
    }

@@ -3407,7 +3415,9 @@ static size_t ZBUFF_decompressWithDictionary(ZBUFF_DCtx* zbc, const void* src, s
 static size_t ZBUFF_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
    size_t length = MIN(maxDstSize, srcSize);
-    memcpy(dst, src, length);
+    if (length > 0) {
+        memcpy(dst, src, length);
+    }
    return length;
 }

--- a/vendor/github.com/DataDog/zstd/zstd_v04.h
+++ b/vendor/github.com/DataDog/zstd/zstd_v04.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/vendor/github.com/DataDog/zstd/zstd_v05.c
+++ b/vendor/github.com/DataDog/zstd/zstd_v05.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@@ -80,7 +80,11 @@ extern "C" {
 *  Basic Types
 *****************************************************************/
 #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# include <stdint.h>
+# if defined(_AIX)
+#  include <inttypes.h>
+# else
+#  include <stdint.h> /* intptr_t */
+# endif
  typedef  uint8_t BYTE;
  typedef uint16_t U16;
  typedef  int16_t S16;
@@ -1804,7 +1808,7 @@ static size_t HUFv05_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,

    if (!srcSize) return ERROR(srcSize_wrong);
    iSize = ip[0];
-    //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
+    /* memset(huffWeight, 0, hwSize); */   /* is not necessary, even though some analyzer complain ... */

    if (iSize >= 128)  { /* special header */
        if (iSize >= (242)) {  /* RLE */
@@ -1879,7 +1883,7 @@ size_t HUFv05_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
    HUFv05_DEltX2* const dt = (HUFv05_DEltX2*)dtPtr;

    HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX2) == sizeof(U16));   /* if compilation fails here, assertion is false */
-    //memset(huffWeight, 0, sizeof(huffWeight));   /* is not necessary, even though some analyzer complain ... */
+    /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */

    iSize = HUFv05_readStats(huffWeight, HUFv05_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
    if (HUFv05_isError(iSize)) return iSize;
@@ -2210,7 +2214,7 @@ size_t HUFv05_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize)

    HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX4) == sizeof(unsigned));   /* if compilation fails here, assertion is false */
    if (memLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
-    //memset(weightList, 0, sizeof(weightList));   /* is not necessary, even though some analyzer complain ... */
+    /* memset(weightList, 0, sizeof(weightList)); */   /* is not necessary, even though some analyzer complain ... */

    iSize = HUFv05_readStats(weightList, HUFv05_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
    if (HUFv05_isError(iSize)) return iSize;
@@ -2539,9 +2543,9 @@ size_t HUFv05_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cS

    return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);

-    //return HUFv05_decompress4X2(dst, dstSize, cSrc, cSrcSize);   /* multi-streams single-symbol decoding */
-    //return HUFv05_decompress4X4(dst, dstSize, cSrc, cSrcSize);   /* multi-streams double-symbols decoding */
-    //return HUFv05_decompress4X6(dst, dstSize, cSrc, cSrcSize);   /* multi-streams quad-symbols decoding */
+    /* return HUFv05_decompress4X2(dst, dstSize, cSrc, cSrcSize); */   /* multi-streams single-symbol decoding */
+    /* return HUFv05_decompress4X4(dst, dstSize, cSrc, cSrcSize); */   /* multi-streams double-symbols decoding */
+    /* return HUFv05_decompress4X6(dst, dstSize, cSrc, cSrcSize); */   /* multi-streams quad-symbols decoding */
 }
 /*
    zstd - standard compression library
@@ -3362,8 +3366,10 @@ static size_t ZSTDv05_decompressSequences(
        size_t lastLLSize = litEnd - litPtr;
        if (litPtr > litEnd) return ERROR(corruption_detected);   /* too many literals already used */
        if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
-        memcpy(op, litPtr, lastLLSize);
-        op += lastLLSize;
+        if (lastLLSize > 0) {
+            memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
    }

    return op-ostart;
@@ -3791,7 +3797,9 @@ static size_t ZBUFFv05_blockHeaderSize = 3;
 static size_t ZBUFFv05_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
    size_t length = MIN(maxDstSize, srcSize);
-    memcpy(dst, src, length);
+    if (length > 0) {
+        memcpy(dst, src, length);
+    }
    return length;
 }

@@ -3928,7 +3936,7 @@ size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* zbc, void* dst, size_t* maxDst
                    *maxDstSizePtr = 0;
                    return headerSize - zbc->hPos;
                }
-                // zbc->stage = ZBUFFv05ds_decodeHeader; break;   /* useless : stage follows */
+                /* zbc->stage = ZBUFFv05ds_decodeHeader; break; */   /* useless : stage follows */
            }
 	    /* fall-through */
        case ZBUFFv05ds_decodeHeader:
@@ -4001,7 +4009,7 @@ size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* zbc, void* dst, size_t* maxDst
                    if (!decodedSize) { zbc->stage = ZBUFFv05ds_read; break; }   /* this was just a header */
                    zbc->outEnd = zbc->outStart +  decodedSize;
                    zbc->stage = ZBUFFv05ds_flush;
-                    // break; /* ZBUFFv05ds_flush follows */
+                    /* break; */  /* ZBUFFv05ds_flush follows */
                }
 	    }
 	    /* fall-through */
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`au BufWritePost *.go silent!make tags > /dev/null 2>&1`