// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package sumdb

import (
	"bytes"
	"errors"
	"fmt"
	"log"
	"strings"
	"sync"

	"golang.org/x/mod/module"
	"golang.org/x/mod/sumdb/note"
	"golang.org/x/mod/sumdb/tlog"
)

// A ClientOps provides the external operations
// (file caching, HTTP fetches, and so on) needed by the Client.
// The methods must be safe for concurrent use by multiple goroutines.
type ClientOps interface {
	// ReadRemote reads and returns the content served at the given path
	// on the remote database server. The path begins with "/lookup" or "/tile/",
	// and there is no need to parse the path in any way.
	// It is the implementation's responsibility to turn that path into a full URL
	// and make the HTTP request. ReadRemote should return an error for
	// any non-200 HTTP response status.
	ReadRemote(path string) ([]byte, error)

	// ReadConfig reads and returns the content of the named configuration file.
	// There are only a fixed set of configuration files.
	//
	// "key" returns a file containing the verifier key for the server.
	//
	// serverName + "/latest" returns a file containing the latest known
	// signed tree from the server.
	// To signal that the client wishes to start with an "empty" signed tree,
	// ReadConfig can return a successful empty result (0 bytes of data).
	ReadConfig(file string) ([]byte, error)

	// WriteConfig updates the content of the named configuration file,
	// changing it from the old []byte to the new []byte.
	// If the old []byte does not match the stored configuration,
	// WriteConfig must return ErrWriteConflict.
	// Otherwise, WriteConfig should atomically replace old with new.
	// The "key" configuration file is never written using WriteConfig.
	WriteConfig(file string, old, new []byte) error

	// Log prints the given log message (such as with log.Print)
	Log(msg string)

	// SecurityError prints the given security error log message.
	// The Client returns ErrSecurity from any operation that invokes SecurityError,
	// but the return value is mainly for testing. In a real program,
	// SecurityError should typically print the message and call log.Fatal or os.Exit.
	SecurityError(msg string)
}

// ErrWriteConflict signals a write conflict during Client.WriteConfig.
var ErrWriteConflict = errors.New("write conflict")

// ErrSecurity is returned by Client operations that invoke Client.SecurityError.
var ErrSecurity = errors.New("security error: misbehaving server")

// A Client is a client connection to a checksum database.
// All the methods are safe for simultaneous use by multiple goroutines.
type Client struct {
	ops ClientOps // access to operations in the external world

	// one-time initialized data
	initOnce   sync.Once
	initErr    error          // init error, if any
	name       string         // name of accepted verifier
	verifiers  note.Verifiers // accepted verifiers (just one, but Verifiers for note.Open)
	tileReader tileReader
	tileHeight int
	nosumdb    string

	record    parCache // cache of record lookup, keyed by path@vers
	tileCache parCache // cache of c.readTile, keyed by tile

	latestMu  sync.Mutex
	latest    tlog.Tree // latest known tree head
	latestMsg []byte    // encoded signed note for latest

	tileSavedMu sync.Mutex
	tileSaved   map[tlog.Tile]bool // which tiles have been saved using c.ops.WriteCache already
}

// NewClient returns a new Client using the given Client.
func NewClient(ops ClientOps) *Client {
	return &Client{
		ops: ops,
	}
}

// init initiailzes the client (if not already initialized)
// and returns any initialization error.
func (c *Client) init() error {
	c.initOnce.Do(c.initWork)
	return c.initErr
}

// initWork does the actual initialization work.
func (c *Client) initWork() {
	log.Printf("initializing sumdb client")
	defer func() {
		log.Printf("done init")
		if c.initErr != nil {
			c.initErr = fmt.Errorf("initializing sumdb.Client: %v", c.initErr)
		}
	}()

	c.tileReader.c = c
	c.tileHeight = 8
	c.tileSaved = make(map[tlog.Tile]bool)

	vkey, err := c.ops.ReadConfig("key")
	if err != nil {
		c.initErr = err
		return
	}
	verifier, err := note.NewVerifier(strings.TrimSpace(string(vkey)))
	if err != nil {
		c.initErr = err
		return
	}
	c.verifiers = note.VerifierList(verifier)
	c.name = verifier.Name()

	log.Printf("init: reading latest")
	data, err := c.ops.ReadConfig(c.name + "/latest")
	if err != nil {
		c.initErr = err
		return
	}

	log.Printf("init: merging latest")
	if err := c.mergeLatest(data); err != nil {
		c.initErr = err
		return
	}
}

// Lookup returns the go.sum lines for the given module path and version.
// The version may end in a /go.mod suffix, in which case Lookup returns
// the go.sum lines for the module's go.mod-only hash.
func (c *Client) Lookup(path, vers string) (lines []string, err error) {
	defer func() {
		if err != nil {
			err = fmt.Errorf("%s@%s: %v", path, vers, err)
		}
	}()

	if err := c.init(); err != nil {
		return nil, err
	}

	// Prepare encoded cache filename / URL.
	epath, err := module.EscapePath(path)
	if err != nil {
		return nil, err
	}
	evers, err := module.EscapeVersion(strings.TrimSuffix(vers, "/go.mod"))
	if err != nil {
		return nil, err
	}
	remotePath := "/lookup/" + epath + "@" + evers
	file := c.name + remotePath

	// Fetch the data.
	// The lookupCache avoids redundant ReadCache/GetURL operations
	// (especially since go.sum lines tend to come in pairs for a given
	// path and version) and also avoids having multiple of the same
	// request in flight at once.
	type cached struct {
		data []byte
		err  error
	}
	result := c.record.Do(file, func() interface{} {
		data, err := c.ops.ReadRemote(remotePath)
		if err != nil {
			return cached{nil, err}
		}

		// Validate the record before using it for anything.
		id, text, treeMsg, err := tlog.ParseRecord(data)
		if err != nil {
			return cached{nil, err}
		}
		if err := c.mergeLatest(treeMsg); err != nil {
			return cached{nil, err}
		}
		if err := c.checkRecord(id, text); err != nil {
			return cached{nil, err}
		}

		return cached{data, nil}
	}).(cached)
	if result.err != nil {
		return nil, result.err
	}

	// Extract the lines for the specific version we want
	// (with or without /go.mod).
	prefix := path + " " + vers + " "
	var hashes []string
	for _, line := range strings.Split(string(result.data), "\n") {
		if strings.HasPrefix(line, prefix) {
			hashes = append(hashes, line)
		}
	}
	return hashes, nil
}

func (c *Client) FetchLatest() (*tlog.Tree, error) {
	if err := c.init(); err != nil {
		return nil, err
	}

	msg, err := c.ops.ReadRemote("/latest")
	if err != nil {
		return nil, err
	}
	note, err := note.Open(msg, c.verifiers)
	if err != nil {
		return nil, fmt.Errorf("reading latest note: %v\nnote:\n%s", err, msg)
	}
	tree, err := tlog.ParseTree([]byte(note.Text))
	if err != nil {
		return nil, fmt.Errorf("reading tree: %v\ntree:\n%s", err, note.Text)
	}
	return &tree, nil
}

func (c *Client) FetchTreeProof(t *tlog.Tree) error {
	log.Printf("fetching proof for tree %v", t)

	if err := c.checkTrees(tlog.Tree{}, nil, *t, nil); err != nil {
		log.Printf("check tree error: %v", err)
		return err
	}

	return nil
}

// mergeLatest merges the tree head in msg
// with the Client's current latest tree head,
// ensuring the result is a consistent timeline.
// If the result is inconsistent, mergeLatest calls c.ops.SecurityError
// with a detailed security error message and then
// (only if c.ops.SecurityError does not exit the program) returns ErrSecurity.
// If the Client's current latest tree head moves forward,
// mergeLatest updates the underlying configuration file as well,
// taking care to merge any independent updates to that configuration.
func (c *Client) mergeLatest(msg []byte) error {
	log.Printf("merge latest: start")
	// Merge msg into our in-memory copy of the latest tree head.
	when, err := c.mergeLatestMem(msg)
	if err != nil {
		return err
	}
	log.Printf("merge latest: when=%d", when)
	if when != msgFuture {
		// msg matched our present or was in the past.
		// No change to our present, so no update of config file.
		log.Printf("merge latest: future; done")
		return nil
	}

	// Flush our extended timeline back out to the configuration file.
	// If the configuration file has been updated in the interim,
	// we need to merge any updates made there as well.
	// Note that writeConfig is an atomic compare-and-swap.
	for {
		log.Printf("merge latest: reading latest...")
		msg, err := c.ops.ReadConfig(c.name + "/latest")
		if err != nil {
			return err
		}
		when, err := c.mergeLatestMem(msg)
		if err != nil {
			return err
		}
		if when != msgPast {
			// msg matched our present or was from the future,
			// and now our in-memory copy matches.
			return nil
		}

		// msg (== config) is in the past, so we need to update it.
		c.latestMu.Lock()
		latestMsg := c.latestMsg
		c.latestMu.Unlock()
		if err := c.ops.WriteConfig(c.name+"/latest", msg, latestMsg); err != ErrWriteConflict {
			// Success or a non-write-conflict error.
			return err
		}
	}
}

const (
	msgPast = 1 + iota
	msgNow
	msgFuture
)

// mergeLatestMem is like mergeLatest but is only concerned with
// updating the in-memory copy of the latest tree head (c.latest)
// not the configuration file.
// The when result explains when msg happened relative to our
// previous idea of c.latest:
// msgPast means msg was from before c.latest,
// msgNow means msg was exactly c.latest, and
// msgFuture means msg was from after c.latest, which has now been updated.
func (c *Client) mergeLatestMem(msg []byte) (when int, err error) {
	log.Printf("mergeLatestMem: start with msg=%v", msg)
	if len(msg) == 0 {
		// Accept empty msg as the unsigned, empty timeline.
		c.latestMu.Lock()
		latest := c.latest
		c.latestMu.Unlock()
		if latest.N == 0 {
			return msgNow, nil
		}
		return msgPast, nil
	}

	log.Printf("mergeMemLatest: reading tree note: %s", msg)
	note, err := note.Open(msg, c.verifiers)
	if err != nil {
		return 0, fmt.Errorf("reading tree note: %v\nnote:\n%s", err, msg)
	}
	tree, err := tlog.ParseTree([]byte(note.Text))
	if err != nil {
		return 0, fmt.Errorf("reading tree: %v\ntree:\n%s", err, note.Text)
	}

	// Other lookups may be calling mergeLatest with other heads,
	// so c.latest is changing underfoot. We don't want to hold the
	// c.mu lock during tile fetches, so loop trying to update c.latest.
	c.latestMu.Lock()
	latest := c.latest
	latestMsg := c.latestMsg
	c.latestMu.Unlock()

	for {
		// If the tree head looks old, check that it is on our timeline.
		if tree.N <= latest.N {
			log.Printf("mergeLatestMem: looks old")
			if err := c.checkTrees(tree, msg, latest, latestMsg); err != nil {
				return 0, err
			}
			if tree.N < latest.N {
				return msgPast, nil
			}
			return msgNow, nil
		}

		// The tree head looks new. Check that we are on its timeline and try to move our timeline forward.
		if err := c.checkTrees(latest, latestMsg, tree, msg); err != nil {
			log.Printf("mergeLatestMem: looks new")
			return 0, err
		}

		log.Printf("mergeLatestMem: install msg if possible")

		// Install our msg if possible.
		// Otherwise we will go around again.
		c.latestMu.Lock()
		installed := false
		if c.latest == latest {
			installed = true
			c.latest = tree
			c.latestMsg = msg
		} else {
			latest = c.latest
			latestMsg = c.latestMsg
		}
		c.latestMu.Unlock()

		if installed {
			return msgFuture, nil
		}
	}
}

// checkTrees checks that older (from olderNote) is contained in newer (from newerNote).
// If an error occurs, such as malformed data or a network problem, checkTrees returns that error.
// If on the other hand checkTrees finds evidence of misbehavior, it prepares a detailed
// message and calls log.Fatal.
func (c *Client) checkTrees(older tlog.Tree, olderNote []byte, newer tlog.Tree, newerNote []byte) error {
	log.Printf("checking trees: older=%v, newer=%v", older, newer)
	thr := tlog.TileHashReader(newer, &c.tileReader)
	h, err := tlog.TreeHash(older.N, thr)
	if err != nil {
		if older.N == newer.N {
			return fmt.Errorf("checking tree#%d: %v", older.N, err)
		}
		return fmt.Errorf("checking tree#%d against tree#%d: %v", older.N, newer.N, err)
	}
	log.Printf("computed tree hash %v", h)
	if h == older.Hash {
		log.Printf("hashes match: %v", older.Hash)
		return nil
	}
	log.Printf("HASHES DO NOT MATCH h=%v older=%v", h, older.Hash)

	// Detected a fork in the tree timeline.
	// Start by reporting the inconsistent signed tree notes.
	var buf bytes.Buffer
	fmt.Fprintf(&buf, "SECURITY ERROR\n")
	fmt.Fprintf(&buf, "go.sum database server misbehavior detected!\n\n")
	indent := func(b []byte) []byte {
		return bytes.Replace(b, []byte("\n"), []byte("\n\t"), -1)
	}
	fmt.Fprintf(&buf, "old database:\n\t%s\n", indent(olderNote))
	fmt.Fprintf(&buf, "new database:\n\t%s\n", indent(newerNote))

	// The notes alone are not enough to prove the inconsistency.
	// We also need to show that the newer note's tree hash for older.N
	// does not match older.Hash. The consumer of this report could
	// of course consult the server to try to verify the inconsistency,
	// but we are holding all the bits we need to prove it right now,
	// so we might as well print them and make the report not depend
	// on the continued availability of the misbehaving server.
	// Preparing this data only reuses the tiled hashes needed for
	// tlog.TreeHash(older.N, thr) above, so assuming thr is caching tiles,
	// there are no new access to the server here, and these operations cannot fail.
	fmt.Fprintf(&buf, "proof of misbehavior:\n\t%v", h)
	if p, err := tlog.ProveTree(newer.N, older.N, thr); err != nil {
		fmt.Fprintf(&buf, "\tinternal error: %v\n", err)
	} else if err := tlog.CheckTree(p, newer.N, newer.Hash, older.N, h); err != nil {
		fmt.Fprintf(&buf, "\tinternal error: generated inconsistent proof\n")
	} else {
		for _, h := range p {
			fmt.Fprintf(&buf, "\n\t%v", h)
		}
	}
	c.ops.SecurityError(buf.String())
	return ErrSecurity
}

// checkRecord checks that record #id's hash matches data.
func (c *Client) checkRecord(id int64, data []byte) error {
	log.Printf("checking record %d with %s", id, data)
	c.latestMu.Lock()
	latest := c.latest
	c.latestMu.Unlock()

	if id >= latest.N {
		return fmt.Errorf("cannot validate record %d in tree of size %d", id, latest.N)
	}
	hashes, err := tlog.TileHashReader(latest, &c.tileReader).ReadHashes([]int64{tlog.StoredHashIndex(0, id)})
	if err != nil {
		return err
	}
	log.Printf("checkRecord: got hashes %v", hashes)
	if hashes[0] == tlog.RecordHash(data) {
		log.Printf("tile hash %s matches record hash %s", hashes[0], data)
		return nil
	}
	return fmt.Errorf("cannot authenticate record data in server response")
}

// tileReader is a *Client wrapper that implements tlog.TileReader.
// The separate type avoids exposing the ReadTiles and SaveTiles
// methods on Client itself.
type tileReader struct {
	c *Client
}

func (r *tileReader) Height() int {
	log.Printf("checking height")
	return r.c.tileHeight
}

// ReadTiles reads and returns the requested tiles,
// either from the on-disk cache or the server.
func (r *tileReader) ReadTiles(tiles []tlog.Tile) ([][]byte, error) {
	log.Printf("reading %d tiles", len(tiles))
	// Read all the tiles in parallel.
	data := make([][]byte, len(tiles))
	errs := make([]error, len(tiles))
	var wg sync.WaitGroup
	for i, tile := range tiles {
		wg.Add(1)
		go func(i int, tile tlog.Tile) {
			defer wg.Done()
			log.Printf("reading tile %v", tile)
			data[i], errs[i] = r.c.readTile(tile)
		}(i, tile)
	}
	wg.Wait()

	for _, err := range errs {
		if err != nil {
			return nil, err
		}
	}

	return data, nil
}

// tileRemotePath returns the remote path for the tile.
func (c *Client) tileRemotePath(tile tlog.Tile) string {
	return "/" + tile.Path()
}

// readTile reads a single tile, either from the on-disk cache or the server.
func (c *Client) readTile(tile tlog.Tile) ([]byte, error) {
	return c.ops.ReadRemote(c.tileRemotePath(tile))
}

// markTileSaved records that tile is already present in the on-disk cache,
// so that a future SaveTiles for that tile can be ignored.
func (c *Client) markTileSaved(tile tlog.Tile) {
	c.tileSavedMu.Lock()
	c.tileSaved[tile] = true
	c.tileSavedMu.Unlock()
}

// SaveTiles saves the now validated tiles.
func (r *tileReader) SaveTiles(tiles []tlog.Tile, data [][]byte) {
	c := r.c

	// Determine which tiles need saving.
	// (Tiles that came from the cache need not be saved back.)
	save := make([]bool, len(tiles))
	c.tileSavedMu.Lock()
	for i, tile := range tiles {
		if !c.tileSaved[tile] {
			save[i] = true
			c.tileSaved[tile] = true
		}
	}
	c.tileSavedMu.Unlock()
}