storj/pkg/ranger/content.go

575 lines
14 KiB
Go
Raw Normal View History

2018-04-11 13:46:19 +01:00
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the Go project's LICENSE file.
//
// This file was lifted wholesale from the Go standard library with
// very minor tweaks by Storj Labs, Inc., 2018
package ranger
import (
"context"
2018-04-11 13:46:19 +01:00
"errors"
"fmt"
"io"
"io/ioutil"
"log"
2018-04-11 13:46:19 +01:00
"mime"
"mime/multipart"
"net/http"
"net/textproto"
"path/filepath"
"strconv"
"strings"
"time"
)
// ServeContent is the Go standard library's http.ServeContent but modified to
// work with Rangers.
func ServeContent(ctx context.Context, w http.ResponseWriter, r *http.Request, name string, modtime time.Time, content Ranger) {
defer mon.Task()(&ctx)(nil)
2018-04-11 13:46:19 +01:00
setLastModified(w, modtime)
done, rangeReq := checkPreconditions(w, r, modtime)
if done {
return
}
code := http.StatusOK
size := content.Size()
if size <= 0 {
w.WriteHeader(code)
return
}
// handle Content-Range header.
sendSize := size
sendContent := func() (io.ReadCloser, error) {
return content.Range(ctx, 0, size)
2018-04-11 13:46:19 +01:00
}
ranges, err := parseRange(rangeReq, size)
if err != nil {
if err == errNoOverlap {
w.Header().Set("Content-Range", fmt.Sprintf("bytes */%d", size))
}
http.Error(w, err.Error(), http.StatusRequestedRangeNotSatisfiable)
return
}
if sumRangesSize(ranges) > size {
// The total number of bytes in all the ranges
// is larger than the size of the file by
// itself, so this is probably an attack, or a
// dumb client. Ignore the range request.
ranges = nil
}
switch {
case len(ranges) == 1:
// RFC 2616, Section 14.16:
// "When an HTTP message includes the content of a single
// range (for example, a response to a request for a
// single range, or to a request for a set of ranges
// that overlap without any holes), this content is
// transmitted with a Content-Range header, and a
// Content-Length header showing the number of bytes
// actually transferred.
// ...
// A response to a request for a single range MUST NOT
// be sent using the multipart/byteranges media type."
ra := ranges[0]
sendContent = func() (io.ReadCloser, error) { return content.Range(ctx, ra.start, ra.length) }
2018-04-11 13:46:19 +01:00
sendSize = ra.length
code = http.StatusPartialContent
w.Header().Set("Content-Range", ra.contentRange(size))
case len(ranges) > 1:
// If Content-Type isn't set, use the file's extension to find it, but
// if the Content-Type is unset explicitly, do not sniff the type.
ctypes, haveType := w.Header()["Content-Type"]
var ctype string
if !haveType {
ctype = mime.TypeByExtension(filepath.Ext(name))
if ctype == "" {
// read a chunk to decide between utf-8 text and binary
var buf [sniffLen]byte
amount := content.Size()
if amount > sniffLen {
amount = sniffLen
}
// TODO: cache this somewhere so we don't have to pull it out again
r, err := content.Range(ctx, 0, amount)
if err == nil {
defer func() {
if err := r.Close(); err != nil {
log.Printf("Error Closing ranger: %s", err)
}
}()
n, err := io.ReadFull(r, buf[:])
if err != nil {
log.Printf("Error Reading full: %s", err)
}
ctype = http.DetectContentType(buf[:n])
}
}
if ctype != "" {
w.Header().Set("Content-Type", ctype)
}
} else if len(ctypes) > 0 {
ctype = ctypes[0]
}
2018-04-11 13:46:19 +01:00
sendSize = rangesMIMESize(ranges, ctype, size)
code = http.StatusPartialContent
pr, pw := io.Pipe()
mw := multipart.NewWriter(pw)
w.Header().Set("Content-Type",
Node Identity (#193) * peertls: don't log errors for double close understood that this part of the code is undergoing heavy change right now, but just want to make sure this fix gets incorporated somewhere * git cleanup: node-id stuff * cleanup * rename identity_util.go * wip `CertificateAuthority` refactor * refactoring * gitignore update * wip * Merge remote-tracking branch 'storj/doubleclose' into node-id3 * storj/doubleclose: peertls: don't log errors for double close * add peertls tests & gomports * wip: + refactor + style changes + cleanup + [wip] add version to CA and identity configs + [wip] heavy client setup * refactor * wip: + refactor + style changes + add `CAConfig.Load` + add `CAConfig.Save` * wip: + add `LoadOrCreate` and `Create` to CA and Identity configs + add overwrite to CA and identity configs + heavy client setup + refactor + style changes + cleanup * wip * fixing things * fixing things * wip hc setup * hc setup: + refactor + bugfixing * improvements based on reveiw feedback * goimports * improvements: + responding to review feedback + refactor * feedback-based improvements * feedback-based improvements * feedback-based improvements * feedback-based improvements * feedback-based improvements * feedback-based improvements * cleanup * refactoring CA and Identity structs * Merge branch 'master' into node-id3 * move version field to setup config structs for CA and identity * fix typo * responding to revieiw feedback * responding to revieiw feedback * responding to revieiw feedback * responding to revieiw feedback * responding to revieiw feedback * responding to revieiw feedback * Merge branch 'master' into node-id3 * fix gateway setup finally * go imports * fix `FullCertificateAuthority.GenerateIdentity` * cleanup overlay tests * bugfixing * update ca/identity setup * go imports * fix peertls test copy/paste fail * responding to review feedback * setup tweaking * update farmer setup
2018-08-13 09:39:45 +01:00
"multipart/byteranges; boundary="+mw.Boundary())
sendContent = func() (io.ReadCloser, error) { return ioutil.NopCloser(pr), nil }
2018-04-11 13:46:19 +01:00
// cause writing goroutine to fail and exit if CopyN doesn't finish.
defer func() {
if err := pr.Close(); err != nil {
log.Printf("Error Closing pipereader: %s", err)
}
}()
2018-04-11 13:46:19 +01:00
go func() {
for _, ra := range ranges {
part, err := mw.CreatePart(ra.mimeHeader(ctype, size))
if err != nil {
if err := pw.CloseWithError(err); err != nil {
log.Printf("Error Closing pipewriter with errors: %s", err)
}
2018-04-11 13:46:19 +01:00
return
}
partReader, err := content.Range(ctx, ra.start, ra.length)
if err != nil {
if err := pw.CloseWithError(err); err != nil {
log.Printf("Error Closing pipewriter with errors: %s", err)
}
return
}
defer func() {
if err := partReader.Close(); err != nil {
log.Printf("Error Closing partReader: %s", err)
}
}()
2018-04-11 13:46:19 +01:00
if _, err := io.Copy(part, partReader); err != nil {
if err := pw.CloseWithError(err); err != nil {
log.Printf("Error Closing pipewriter with errors: %s", err)
}
2018-04-11 13:46:19 +01:00
return
}
}
if err := mw.Close(); err != nil {
log.Printf("Error Closing writer: %s", err)
}
if err := pw.Close(); err != nil {
log.Printf("Error closing pipewriter: %s", err)
}
2018-04-11 13:46:19 +01:00
}()
}
w.Header().Set("Accept-Ranges", "bytes")
if w.Header().Get("Content-Encoding") == "" {
w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10))
}
w.WriteHeader(code)
2018-04-25 15:55:26 +01:00
if r.Method != http.MethodHead {
r, err := sendContent()
if err != nil {
return
}
if _, err := io.CopyN(w, r, sendSize); err != nil {
log.Printf("Error Copying bytes: %s", err)
}
if err := r.Close(); err != nil {
log.Printf("Error closing: %s", err)
}
2018-04-11 13:46:19 +01:00
}
}
var unixEpochTime = time.Unix(0, 0)
// isZeroTime reports whether t is obviously unspecified (either zero or
// Unix()=0).
func isZeroTime(t time.Time) bool {
return t.IsZero() || t.Equal(unixEpochTime)
}
func setLastModified(w http.ResponseWriter, modtime time.Time) {
if w == nil {
return
}
2018-04-11 13:46:19 +01:00
if !isZeroTime(modtime) {
w.Header().Set("Last-Modified", modtime.UTC().Format(http.TimeFormat))
}
}
// checkPreconditions evaluates request preconditions and reports whether a
// precondition resulted in sending StatusNotModified or
// StatusPreconditionFailed.
func checkPreconditions(w http.ResponseWriter, r *http.Request, modtime time.Time) (done bool, rangeHeader string) {
2018-04-11 13:46:19 +01:00
// This function carefully follows RFC 7232 section 6.
ch := checkIfMatch(w, r)
if ch == condNone {
ch = checkIfUnmodifiedSince(r, modtime)
}
if ch == condFalse {
w.WriteHeader(http.StatusPreconditionFailed)
return true, ""
}
switch checkIfNoneMatch(w, r) {
case condFalse:
2018-04-25 15:55:26 +01:00
if r.Method == http.MethodGet || r.Method == http.MethodHead {
2018-04-11 13:46:19 +01:00
writeNotModified(w)
return true, ""
}
2018-04-11 14:18:35 +01:00
w.WriteHeader(http.StatusPreconditionFailed)
return true, ""
2018-04-11 13:46:19 +01:00
case condNone:
if checkIfModifiedSince(r, modtime) == condFalse {
writeNotModified(w)
return true, ""
}
}
rangeHeader = r.Header.Get("Range")
if rangeHeader != "" && checkIfRange(w, r, modtime) == condFalse {
rangeHeader = ""
}
return false, rangeHeader
}
// condResult is the result of an HTTP request precondition check.
// See https://tools.ietf.org/html/rfc7232 section 3.
type condResult int
const (
Node Identity (#193) * peertls: don't log errors for double close understood that this part of the code is undergoing heavy change right now, but just want to make sure this fix gets incorporated somewhere * git cleanup: node-id stuff * cleanup * rename identity_util.go * wip `CertificateAuthority` refactor * refactoring * gitignore update * wip * Merge remote-tracking branch 'storj/doubleclose' into node-id3 * storj/doubleclose: peertls: don't log errors for double close * add peertls tests & gomports * wip: + refactor + style changes + cleanup + [wip] add version to CA and identity configs + [wip] heavy client setup * refactor * wip: + refactor + style changes + add `CAConfig.Load` + add `CAConfig.Save` * wip: + add `LoadOrCreate` and `Create` to CA and Identity configs + add overwrite to CA and identity configs + heavy client setup + refactor + style changes + cleanup * wip * fixing things * fixing things * wip hc setup * hc setup: + refactor + bugfixing * improvements based on reveiw feedback * goimports * improvements: + responding to review feedback + refactor * feedback-based improvements * feedback-based improvements * feedback-based improvements * feedback-based improvements * feedback-based improvements * feedback-based improvements * cleanup * refactoring CA and Identity structs * Merge branch 'master' into node-id3 * move version field to setup config structs for CA and identity * fix typo * responding to revieiw feedback * responding to revieiw feedback * responding to revieiw feedback * responding to revieiw feedback * responding to revieiw feedback * responding to revieiw feedback * Merge branch 'master' into node-id3 * fix gateway setup finally * go imports * fix `FullCertificateAuthority.GenerateIdentity` * cleanup overlay tests * bugfixing * update ca/identity setup * go imports * fix peertls test copy/paste fail * responding to review feedback * setup tweaking * update farmer setup
2018-08-13 09:39:45 +01:00
condNone condResult = iota
2018-04-11 13:46:19 +01:00
condTrue
condFalse
)
func checkIfMatch(w http.ResponseWriter, r *http.Request) condResult {
im := r.Header.Get("If-Match")
if im == "" {
return condNone
}
for {
im = textproto.TrimString(im)
if len(im) == 0 {
break
}
if im[0] == ',' {
im = im[1:]
continue
}
if im[0] == '*' {
return condTrue
}
etag, remain := scanETag(im)
if etag == "" {
break
}
if etagStrongMatch(etag, w.Header().Get("Etag")) {
return condTrue
}
im = remain
}
return condFalse
}
func checkIfUnmodifiedSince(r *http.Request, modtime time.Time) condResult {
ius := r.Header.Get("If-Unmodified-Since")
if ius == "" || isZeroTime(modtime) {
return condNone
}
if t, err := http.ParseTime(ius); err == nil {
// The Date-Modified header truncates sub-second precision, so
// use mtime < t+1s instead of mtime <= t to check for unmodified.
if modtime.Before(t.Add(1 * time.Second)) {
return condTrue
}
return condFalse
}
return condNone
}
func checkIfNoneMatch(w http.ResponseWriter, r *http.Request) condResult {
inm := r.Header.Get("If-None-Match")
if inm == "" {
return condNone
}
buf := inm
for {
buf = textproto.TrimString(buf)
if len(buf) == 0 {
break
}
if buf[0] == ',' {
buf = buf[1:]
continue
2018-04-11 13:46:19 +01:00
}
if buf[0] == '*' {
return condFalse
}
etag, remain := scanETag(buf)
if etag == "" {
break
}
if etagWeakMatch(etag, w.Header().Get("Etag")) {
return condFalse
}
buf = remain
}
return condTrue
}
func checkIfModifiedSince(r *http.Request, modtime time.Time) condResult {
2018-04-25 15:55:26 +01:00
if r.Method != http.MethodGet && r.Method != http.MethodHead {
2018-04-11 13:46:19 +01:00
return condNone
}
ims := r.Header.Get("If-Modified-Since")
if ims == "" || isZeroTime(modtime) {
return condNone
}
t, err := http.ParseTime(ims)
if err != nil {
return condNone
}
// The Date-Modified header truncates sub-second precision, so
// use mtime < t+1s instead of mtime <= t to check for unmodified.
if modtime.Before(t.Add(1 * time.Second)) {
return condFalse
}
return condTrue
}
func checkIfRange(w http.ResponseWriter, r *http.Request, modtime time.Time) (rv condResult) {
2018-04-25 15:55:26 +01:00
if r.Method != http.MethodGet && r.Method != http.MethodHead {
2018-04-11 13:46:19 +01:00
return condNone
}
ir := r.Header.Get("If-Range")
if ir == "" {
return condNone
}
etag, _ := scanETag(ir)
if etag != "" {
if etagStrongMatch(etag, w.Header().Get("Etag")) {
return condTrue
}
2018-04-11 14:18:35 +01:00
return condFalse
2018-04-11 13:46:19 +01:00
}
// The If-Range value is typically the ETag value, but it may also be
// the modtime date. See golang.org/issue/8367.
if isZeroTime(modtime) {
2018-04-11 13:46:19 +01:00
return condFalse
}
t, err := http.ParseTime(ir)
if err != nil {
return condFalse
}
if t.Unix() == modtime.Unix() {
return condTrue
}
return condFalse
}
func writeNotModified(w http.ResponseWriter) {
// RFC 7232 section 4.1:
// a sender SHOULD NOT generate representation metadata other than the
// above listed fields unless said metadata exists for the purpose of
// guiding cache updates (e.g., Last-Modified might be useful if the
// response does not have an ETag field).
h := w.Header()
delete(h, "Content-Type")
delete(h, "Content-Length")
if h.Get("Etag") != "" {
delete(h, "Last-Modified")
}
w.WriteHeader(http.StatusNotModified)
}
// scanETag determines if a syntactically valid ETag is present at s. If so,
// the ETag and remaining text after consuming ETag is returned. Otherwise,
// it returns "", "".
func scanETag(s string) (etag string, remain string) {
s = textproto.TrimString(s)
start := 0
if strings.HasPrefix(s, "W/") {
start = 2
}
if len(s[start:]) < 2 || s[start] != '"' {
return "", ""
}
// ETag is either W/"text" or "text".
// See RFC 7232 2.3.
for i := start + 1; i < len(s); i++ {
c := s[i]
switch {
// Character values allowed in ETags.
case c == 0x21 || c >= 0x23 && c <= 0x7A || c >= 0x80:
2018-04-11 13:46:19 +01:00
case c == '"':
return s[:i+1], s[i+1:]
default:
return "", ""
}
}
return "", ""
}
// etagStrongMatch reports whether a and b match using strong ETag comparison.
// Assumes a and b are valid ETags.
func etagStrongMatch(a, b string) bool {
return a == b && a != "" && a[0] == '"'
}
// etagWeakMatch reports whether a and b match using weak ETag comparison.
// Assumes a and b are valid ETags.
func etagWeakMatch(a, b string) bool {
return strings.TrimPrefix(a, "W/") == strings.TrimPrefix(b, "W/")
}
// The algorithm uses at most sniffLen bytes to make its decision.
const sniffLen = 512
// httpRange specifies the byte range to be sent to the client.
type httpRange struct {
start, length int64
}
func (r httpRange) contentRange(size int64) string {
return fmt.Sprintf("bytes %d-%d/%d", r.start, r.start+r.length-1, size)
}
func (r httpRange) mimeHeader(contentType string, size int64) (rv textproto.MIMEHeader) {
2018-04-11 13:46:19 +01:00
return textproto.MIMEHeader{
"Content-Range": {r.contentRange(size)},
"Content-Type": {contentType},
}
}
// parseRange parses a Range header string as per RFC 2616.
// errNoOverlap is returned if none of the ranges overlap.
func parseRange(s string, size int64) ([]httpRange, error) {
if s == "" {
return nil, nil // header not present
}
const b = "bytes="
if !strings.HasPrefix(s, b) {
return nil, errors.New("invalid range")
}
2018-04-25 15:55:26 +01:00
2018-04-11 13:46:19 +01:00
var ranges []httpRange
noOverlap := false
for _, ra := range strings.Split(s[len(b):], ",") {
ra = strings.TrimSpace(ra)
if ra == "" {
continue
}
i := strings.Index(ra, "-")
if i < 0 {
return nil, errors.New("invalid range")
}
start, end := strings.TrimSpace(ra[:i]), strings.TrimSpace(ra[i+1:])
var r httpRange
if start == "" {
// If no start is specified, end specifies the
// range start relative to the end of the file.
i, err := strconv.ParseInt(end, 10, 64)
if err != nil {
return nil, errors.New("invalid range")
}
if i > size {
i = size
}
r.start = size - i
r.length = size - r.start
} else {
i, err := strconv.ParseInt(start, 10, 64)
if err != nil || i < 0 {
return nil, errors.New("invalid range")
}
if i >= size {
// If the range begins after the size of the content,
// then it does not overlap.
noOverlap = true
continue
}
r.start = i
if end == "" {
// If no end is specified, range extends to end of the file.
r.length = size - r.start
} else {
i, err := strconv.ParseInt(end, 10, 64)
if err != nil || r.start > i {
return nil, errors.New("invalid range")
}
if i >= size {
i = size - 1
}
r.length = i - r.start + 1
}
}
ranges = append(ranges, r)
}
if noOverlap && len(ranges) == 0 {
// The specified ranges did not overlap with the content.
return nil, errNoOverlap
}
return ranges, nil
}
// countingWriter counts how many bytes have been written to it.
type countingWriter int64
func (w *countingWriter) Write(p []byte) (n int, err error) {
*w += countingWriter(len(p))
return len(p), nil
}
// rangesMIMESize returns the number of bytes it takes to encode the
// provided ranges as a multipart response.
func rangesMIMESize(ranges []httpRange, contentType string, contentSize int64) (encSize int64) {
2018-04-11 13:46:19 +01:00
var w countingWriter
mw := multipart.NewWriter(&w)
for _, ra := range ranges {
if _, err := mw.CreatePart(ra.mimeHeader(contentType, contentSize)); err != nil {
log.Printf("Failed to Create Part: %s", err)
}
2018-04-11 13:46:19 +01:00
encSize += ra.length
}
if err := mw.Close(); err != nil {
log.Printf("Failed to close: %s", err)
}
2018-04-11 13:46:19 +01:00
encSize += int64(w)
return encSize
2018-04-11 13:46:19 +01:00
}
func sumRangesSize(ranges []httpRange) (size int64) {
for _, ra := range ranges {
size += ra.length
}
return size
2018-04-11 13:46:19 +01:00
}
// errNoOverlap is returned by serveContent's parseRange if first-byte-pos of
// all of the byte-range-spec values is greater than the content size.
var errNoOverlap = errors.New("invalid range: failed to overlap")