// Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package sumdb import ( "bytes" "errors" "fmt" "log" "strings" "sync" "golang.org/x/mod/module" "golang.org/x/mod/sumdb/note" "golang.org/x/mod/sumdb/tlog" ) // A ClientOps provides the external operations // (file caching, HTTP fetches, and so on) needed by the Client. // The methods must be safe for concurrent use by multiple goroutines. type ClientOps interface { // ReadRemote reads and returns the content served at the given path // on the remote database server. The path begins with "/lookup" or "/tile/", // and there is no need to parse the path in any way. // It is the implementation's responsibility to turn that path into a full URL // and make the HTTP request. ReadRemote should return an error for // any non-200 HTTP response status. ReadRemote(path string) ([]byte, error) // ReadConfig reads and returns the content of the named configuration file. // There are only a fixed set of configuration files. // // "key" returns a file containing the verifier key for the server. // // serverName + "/latest" returns a file containing the latest known // signed tree from the server. // To signal that the client wishes to start with an "empty" signed tree, // ReadConfig can return a successful empty result (0 bytes of data). ReadConfig(file string) ([]byte, error) // WriteConfig updates the content of the named configuration file, // changing it from the old []byte to the new []byte. // If the old []byte does not match the stored configuration, // WriteConfig must return ErrWriteConflict. // Otherwise, WriteConfig should atomically replace old with new. // The "key" configuration file is never written using WriteConfig. WriteConfig(file string, old, new []byte) error // Log prints the given log message (such as with log.Print) Log(msg string) // SecurityError prints the given security error log message. // The Client returns ErrSecurity from any operation that invokes SecurityError, // but the return value is mainly for testing. In a real program, // SecurityError should typically print the message and call log.Fatal or os.Exit. SecurityError(msg string) } // ErrWriteConflict signals a write conflict during Client.WriteConfig. var ErrWriteConflict = errors.New("write conflict") // ErrSecurity is returned by Client operations that invoke Client.SecurityError. var ErrSecurity = errors.New("security error: misbehaving server") // A Client is a client connection to a checksum database. // All the methods are safe for simultaneous use by multiple goroutines. type Client struct { ops ClientOps // access to operations in the external world // one-time initialized data initOnce sync.Once initErr error // init error, if any name string // name of accepted verifier verifiers note.Verifiers // accepted verifiers (just one, but Verifiers for note.Open) tileReader tileReader tileHeight int nosumdb string record parCache // cache of record lookup, keyed by path@vers tileCache parCache // cache of c.readTile, keyed by tile latestMu sync.Mutex latest tlog.Tree // latest known tree head latestMsg []byte // encoded signed note for latest tileSavedMu sync.Mutex tileSaved map[tlog.Tile]bool // which tiles have been saved using c.ops.WriteCache already } // NewClient returns a new Client using the given Client. func NewClient(ops ClientOps) *Client { return &Client{ ops: ops, } } // init initiailzes the client (if not already initialized) // and returns any initialization error. func (c *Client) init() error { c.initOnce.Do(c.initWork) return c.initErr } // initWork does the actual initialization work. func (c *Client) initWork() { log.Printf("initializing sumdb client") defer func() { log.Printf("done init") if c.initErr != nil { c.initErr = fmt.Errorf("initializing sumdb.Client: %v", c.initErr) } }() c.tileReader.c = c c.tileHeight = 8 c.tileSaved = make(map[tlog.Tile]bool) vkey, err := c.ops.ReadConfig("key") if err != nil { c.initErr = err return } verifier, err := note.NewVerifier(strings.TrimSpace(string(vkey))) if err != nil { c.initErr = err return } c.verifiers = note.VerifierList(verifier) c.name = verifier.Name() log.Printf("init: reading latest") data, err := c.ops.ReadConfig(c.name + "/latest") if err != nil { c.initErr = err return } log.Printf("init: merging latest") if err := c.mergeLatest(data); err != nil { c.initErr = err return } } // Lookup returns the go.sum lines for the given module path and version. // The version may end in a /go.mod suffix, in which case Lookup returns // the go.sum lines for the module's go.mod-only hash. func (c *Client) Lookup(path, vers string) (lines []string, err error) { defer func() { if err != nil { err = fmt.Errorf("%s@%s: %v", path, vers, err) } }() if err := c.init(); err != nil { return nil, err } // Prepare encoded cache filename / URL. epath, err := module.EscapePath(path) if err != nil { return nil, err } evers, err := module.EscapeVersion(strings.TrimSuffix(vers, "/go.mod")) if err != nil { return nil, err } remotePath := "/lookup/" + epath + "@" + evers file := c.name + remotePath // Fetch the data. // The lookupCache avoids redundant ReadCache/GetURL operations // (especially since go.sum lines tend to come in pairs for a given // path and version) and also avoids having multiple of the same // request in flight at once. type cached struct { data []byte err error } result := c.record.Do(file, func() interface{} { data, err := c.ops.ReadRemote(remotePath) if err != nil { return cached{nil, err} } // Validate the record before using it for anything. id, text, treeMsg, err := tlog.ParseRecord(data) if err != nil { return cached{nil, err} } if err := c.mergeLatest(treeMsg); err != nil { return cached{nil, err} } if err := c.checkRecord(id, text); err != nil { return cached{nil, err} } return cached{data, nil} }).(cached) if result.err != nil { return nil, result.err } // Extract the lines for the specific version we want // (with or without /go.mod). prefix := path + " " + vers + " " var hashes []string for _, line := range strings.Split(string(result.data), "\n") { if strings.HasPrefix(line, prefix) { hashes = append(hashes, line) } } return hashes, nil } func (c *Client) FetchLatest() (*tlog.Tree, error) { if err := c.init(); err != nil { return nil, err } msg, err := c.ops.ReadRemote("/latest") if err != nil { return nil, err } note, err := note.Open(msg, c.verifiers) if err != nil { return nil, fmt.Errorf("reading latest note: %v\nnote:\n%s", err, msg) } tree, err := tlog.ParseTree([]byte(note.Text)) if err != nil { return nil, fmt.Errorf("reading tree: %v\ntree:\n%s", err, note.Text) } return &tree, nil } func (c *Client) FetchTreeProof(t *tlog.Tree) error { log.Printf("fetching proof for tree %v", t) if err := c.checkTrees(tlog.Tree{}, nil, *t, nil); err != nil { log.Printf("check tree error: %v", err) return err } return nil } // mergeLatest merges the tree head in msg // with the Client's current latest tree head, // ensuring the result is a consistent timeline. // If the result is inconsistent, mergeLatest calls c.ops.SecurityError // with a detailed security error message and then // (only if c.ops.SecurityError does not exit the program) returns ErrSecurity. // If the Client's current latest tree head moves forward, // mergeLatest updates the underlying configuration file as well, // taking care to merge any independent updates to that configuration. func (c *Client) mergeLatest(msg []byte) error { log.Printf("merge latest: start") // Merge msg into our in-memory copy of the latest tree head. when, err := c.mergeLatestMem(msg) if err != nil { return err } log.Printf("merge latest: when=%d", when) if when != msgFuture { // msg matched our present or was in the past. // No change to our present, so no update of config file. log.Printf("merge latest: future; done") return nil } // Flush our extended timeline back out to the configuration file. // If the configuration file has been updated in the interim, // we need to merge any updates made there as well. // Note that writeConfig is an atomic compare-and-swap. for { log.Printf("merge latest: reading latest...") msg, err := c.ops.ReadConfig(c.name + "/latest") if err != nil { return err } when, err := c.mergeLatestMem(msg) if err != nil { return err } if when != msgPast { // msg matched our present or was from the future, // and now our in-memory copy matches. return nil } // msg (== config) is in the past, so we need to update it. c.latestMu.Lock() latestMsg := c.latestMsg c.latestMu.Unlock() if err := c.ops.WriteConfig(c.name+"/latest", msg, latestMsg); err != ErrWriteConflict { // Success or a non-write-conflict error. return err } } } const ( msgPast = 1 + iota msgNow msgFuture ) // mergeLatestMem is like mergeLatest but is only concerned with // updating the in-memory copy of the latest tree head (c.latest) // not the configuration file. // The when result explains when msg happened relative to our // previous idea of c.latest: // msgPast means msg was from before c.latest, // msgNow means msg was exactly c.latest, and // msgFuture means msg was from after c.latest, which has now been updated. func (c *Client) mergeLatestMem(msg []byte) (when int, err error) { log.Printf("mergeLatestMem: start with msg=%v", msg) if len(msg) == 0 { // Accept empty msg as the unsigned, empty timeline. c.latestMu.Lock() latest := c.latest c.latestMu.Unlock() if latest.N == 0 { return msgNow, nil } return msgPast, nil } log.Printf("mergeMemLatest: reading tree note: %s", msg) note, err := note.Open(msg, c.verifiers) if err != nil { return 0, fmt.Errorf("reading tree note: %v\nnote:\n%s", err, msg) } tree, err := tlog.ParseTree([]byte(note.Text)) if err != nil { return 0, fmt.Errorf("reading tree: %v\ntree:\n%s", err, note.Text) } // Other lookups may be calling mergeLatest with other heads, // so c.latest is changing underfoot. We don't want to hold the // c.mu lock during tile fetches, so loop trying to update c.latest. c.latestMu.Lock() latest := c.latest latestMsg := c.latestMsg c.latestMu.Unlock() for { // If the tree head looks old, check that it is on our timeline. if tree.N <= latest.N { log.Printf("mergeLatestMem: looks old") if err := c.checkTrees(tree, msg, latest, latestMsg); err != nil { return 0, err } if tree.N < latest.N { return msgPast, nil } return msgNow, nil } // The tree head looks new. Check that we are on its timeline and try to move our timeline forward. if err := c.checkTrees(latest, latestMsg, tree, msg); err != nil { log.Printf("mergeLatestMem: looks new") return 0, err } log.Printf("mergeLatestMem: install msg if possible") // Install our msg if possible. // Otherwise we will go around again. c.latestMu.Lock() installed := false if c.latest == latest { installed = true c.latest = tree c.latestMsg = msg } else { latest = c.latest latestMsg = c.latestMsg } c.latestMu.Unlock() if installed { return msgFuture, nil } } } // checkTrees checks that older (from olderNote) is contained in newer (from newerNote). // If an error occurs, such as malformed data or a network problem, checkTrees returns that error. // If on the other hand checkTrees finds evidence of misbehavior, it prepares a detailed // message and calls log.Fatal. func (c *Client) checkTrees(older tlog.Tree, olderNote []byte, newer tlog.Tree, newerNote []byte) error { log.Printf("checking trees: older=%v, newer=%v", older, newer) thr := tlog.TileHashReader(newer, &c.tileReader) h, err := tlog.TreeHash(older.N, thr) if err != nil { if older.N == newer.N { return fmt.Errorf("checking tree#%d: %v", older.N, err) } return fmt.Errorf("checking tree#%d against tree#%d: %v", older.N, newer.N, err) } log.Printf("computed tree hash %v", h) if h == older.Hash { log.Printf("hashes match: %v", older.Hash) return nil } log.Printf("HASHES DO NOT MATCH h=%v older=%v", h, older.Hash) // Detected a fork in the tree timeline. // Start by reporting the inconsistent signed tree notes. var buf bytes.Buffer fmt.Fprintf(&buf, "SECURITY ERROR\n") fmt.Fprintf(&buf, "go.sum database server misbehavior detected!\n\n") indent := func(b []byte) []byte { return bytes.Replace(b, []byte("\n"), []byte("\n\t"), -1) } fmt.Fprintf(&buf, "old database:\n\t%s\n", indent(olderNote)) fmt.Fprintf(&buf, "new database:\n\t%s\n", indent(newerNote)) // The notes alone are not enough to prove the inconsistency. // We also need to show that the newer note's tree hash for older.N // does not match older.Hash. The consumer of this report could // of course consult the server to try to verify the inconsistency, // but we are holding all the bits we need to prove it right now, // so we might as well print them and make the report not depend // on the continued availability of the misbehaving server. // Preparing this data only reuses the tiled hashes needed for // tlog.TreeHash(older.N, thr) above, so assuming thr is caching tiles, // there are no new access to the server here, and these operations cannot fail. fmt.Fprintf(&buf, "proof of misbehavior:\n\t%v", h) if p, err := tlog.ProveTree(newer.N, older.N, thr); err != nil { fmt.Fprintf(&buf, "\tinternal error: %v\n", err) } else if err := tlog.CheckTree(p, newer.N, newer.Hash, older.N, h); err != nil { fmt.Fprintf(&buf, "\tinternal error: generated inconsistent proof\n") } else { for _, h := range p { fmt.Fprintf(&buf, "\n\t%v", h) } } c.ops.SecurityError(buf.String()) return ErrSecurity } // checkRecord checks that record #id's hash matches data. func (c *Client) checkRecord(id int64, data []byte) error { log.Printf("checking record %d with %s", id, data) c.latestMu.Lock() latest := c.latest c.latestMu.Unlock() if id >= latest.N { return fmt.Errorf("cannot validate record %d in tree of size %d", id, latest.N) } hashes, err := tlog.TileHashReader(latest, &c.tileReader).ReadHashes([]int64{tlog.StoredHashIndex(0, id)}) if err != nil { return err } log.Printf("checkRecord: got hashes %v", hashes) if hashes[0] == tlog.RecordHash(data) { log.Printf("tile hash %s matches record hash %s", hashes[0], data) return nil } return fmt.Errorf("cannot authenticate record data in server response") } // tileReader is a *Client wrapper that implements tlog.TileReader. // The separate type avoids exposing the ReadTiles and SaveTiles // methods on Client itself. type tileReader struct { c *Client } func (r *tileReader) Height() int { log.Printf("checking height") return r.c.tileHeight } // ReadTiles reads and returns the requested tiles, // either from the on-disk cache or the server. func (r *tileReader) ReadTiles(tiles []tlog.Tile) ([][]byte, error) { log.Printf("reading %d tiles", len(tiles)) // Read all the tiles in parallel. data := make([][]byte, len(tiles)) errs := make([]error, len(tiles)) var wg sync.WaitGroup for i, tile := range tiles { wg.Add(1) go func(i int, tile tlog.Tile) { defer wg.Done() log.Printf("reading tile %v", tile) data[i], errs[i] = r.c.readTile(tile) }(i, tile) } wg.Wait() for _, err := range errs { if err != nil { return nil, err } } return data, nil } // tileRemotePath returns the remote path for the tile. func (c *Client) tileRemotePath(tile tlog.Tile) string { return "/" + tile.Path() } // readTile reads a single tile, either from the on-disk cache or the server. func (c *Client) readTile(tile tlog.Tile) ([]byte, error) { return c.ops.ReadRemote(c.tileRemotePath(tile)) } // markTileSaved records that tile is already present in the on-disk cache, // so that a future SaveTiles for that tile can be ignored. func (c *Client) markTileSaved(tile tlog.Tile) { c.tileSavedMu.Lock() c.tileSaved[tile] = true c.tileSavedMu.Unlock() } // SaveTiles saves the now validated tiles. func (r *tileReader) SaveTiles(tiles []tlog.Tile, data [][]byte) { c := r.c // Determine which tiles need saving. // (Tiles that came from the cache need not be saved back.) save := make([]bool, len(tiles)) c.tileSavedMu.Lock() for i, tile := range tiles { if !c.tileSaved[tile] { save[i] = true c.tileSaved[tile] = true } } c.tileSavedMu.Unlock() }