feat/bust-cache-validators-change #14
128
internal/cache/cache.go
vendored
|
@ -4,20 +4,23 @@ import (
|
|||
"context"
|
||||
"crypto/sha1"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"git.numtide.com/numtide/treefmt/internal/format"
|
||||
"github.com/charmbracelet/log"
|
||||
|
||||
"github.com/adrg/xdg"
|
||||
"github.com/vmihailenco/msgpack/v5"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
const (
|
||||
modifiedBucket = "modified"
|
||||
pathsBucket = "paths"
|
||||
formattersBucket = "formatters"
|
||||
)
|
||||
|
||||
// Entry represents a cache entry, indicating the last size and modified time for a file path.
|
||||
|
@ -33,7 +36,9 @@ var db *bolt.DB
|
|||
//
|
||||
// The database will be located in `XDG_CACHE_DIR/treefmt/eval-cache/<id>.db`, where <id> is determined by hashing
|
||||
// the treeRoot path. This associates a given treeRoot with a given instance of the cache.
|
||||
func Open(treeRoot string, clean bool) (err error) {
|
||||
func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter) (err error) {
|
||||
l := log.WithPrefix("cache")
|
||||
|
||||
// determine a unique and consistent db name for the tree root
|
||||
h := sha1.New()
|
||||
h.Write([]byte(treeRoot))
|
||||
|
@ -45,27 +50,84 @@ func Open(treeRoot string, clean bool) (err error) {
|
|||
return fmt.Errorf("%w: could not resolve local path for the cache", err)
|
||||
}
|
||||
|
||||
// force a clean of the cache if specified
|
||||
if clean {
|
||||
err := os.Remove(path)
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
err = nil
|
||||
} else if err != nil {
|
||||
return fmt.Errorf("%w: failed to clear cache", err)
|
||||
}
|
||||
}
|
||||
|
||||
db, err = bolt.Open(path, 0o600, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: failed to open cache", err)
|
||||
}
|
||||
|
||||
err = db.Update(func(tx *bolt.Tx) error {
|
||||
_, err := tx.CreateBucket([]byte(modifiedBucket))
|
||||
if errors.Is(err, bolt.ErrBucketExists) {
|
||||
return nil
|
||||
// create bucket for tracking paths
|
||||
pathsBucket, err := tx.CreateBucketIfNotExists([]byte(pathsBucket))
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: failed to create paths bucket", err)
|
||||
}
|
||||
return err
|
||||
|
||||
// create bucket for tracking formatters
|
||||
formattersBucket, err := tx.CreateBucketIfNotExists([]byte(formattersBucket))
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: failed to create formatters bucket", err)
|
||||
}
|
||||
|
||||
// check for any newly configured or modified formatters
|
||||
for name, formatter := range formatters {
|
||||
|
||||
stat, err := os.Lstat(formatter.Executable())
|
||||
brianmcgee marked this conversation as resolved
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: failed to state formatter executable", err)
|
||||
}
|
||||
|
||||
entry, err := getEntry(formattersBucket, name)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: failed to retrieve entry for formatter", err)
|
||||
}
|
||||
|
||||
clean = clean || entry == nil || !(entry.Size == stat.Size() && entry.Modified == stat.ModTime())
|
||||
l.Debug(
|
||||
"checking if formatter has changed",
|
||||
"name", name,
|
||||
"clean", clean,
|
||||
"entry", entry,
|
||||
"stat", stat,
|
||||
)
|
||||
|
||||
// record formatters info
|
||||
entry = &Entry{
|
||||
Size: stat.Size(),
|
||||
Modified: stat.ModTime(),
|
||||
}
|
||||
brianmcgee marked this conversation as resolved
zimbatm
commented
This could be seen as a "CacheKey" or something like that. If any of the values have changed, invalidate. I know this is what you are doing already; just wondering about semantics a bit. This could be seen as a "CacheKey" or something like that. If any of the values have changed, invalidate. I know this is what you are doing already; just wondering about semantics a bit.
brianmcgee
commented
I currently think of it as a I currently think of it as a `cache.Entry` given the package it's within, as it's more of a Cache Entry for me than a Cache Key since it's not being used as the key in bolt.
|
||||
|
||||
if err = putEntry(formattersBucket, name, entry); err != nil {
|
||||
return fmt.Errorf("%w: failed to write formatter entry", err)
|
||||
}
|
||||
}
|
||||
|
||||
// check for any removed formatters
|
||||
if err = formattersBucket.ForEach(func(key []byte, _ []byte) error {
|
||||
_, ok := formatters[string(key)]
|
||||
if !ok {
|
||||
// remove the formatter entry from the cache
|
||||
if err = formattersBucket.Delete(key); err != nil {
|
||||
return fmt.Errorf("%w: failed to remove formatter entry", err)
|
||||
}
|
||||
// indicate a clean is required
|
||||
clean = true
|
||||
}
|
||||
return nil
|
||||
}); err != nil {
|
||||
return fmt.Errorf("%w: failed to check for removed formatters", err)
|
||||
}
|
||||
|
||||
if clean {
|
||||
// remove all path entries
|
||||
c := pathsBucket.Cursor()
|
||||
for k, v := c.First(); !(k == nil && v == nil); k, v = c.Next() {
|
||||
if err = c.Delete(); err != nil {
|
||||
return fmt.Errorf("%w: failed to remove path entry", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
return
|
||||
|
@ -93,11 +155,24 @@ func getEntry(bucket *bolt.Bucket, path string) (*Entry, error) {
|
|||
}
|
||||
}
|
||||
|
||||
// putEntry is a helper for writing cache entries into bolt.
|
||||
func putEntry(bucket *bolt.Bucket, path string, entry *Entry) error {
|
||||
bytes, err := msgpack.Marshal(entry)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: failed to marshal cache entry", err)
|
||||
}
|
||||
|
||||
if err = bucket.Put([]byte(path), bytes); err != nil {
|
||||
return fmt.Errorf("%w: failed to put cache entry", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ChangeSet is used to walk a filesystem, starting at root, and outputting any new or changed paths using pathsCh.
|
||||
// It determines if a path is new or has changed by comparing against cache entries.
|
||||
func ChangeSet(ctx context.Context, root string, pathsCh chan<- string) error {
|
||||
return db.Update(func(tx *bolt.Tx) error {
|
||||
bucket := tx.Bucket([]byte(modifiedBucket))
|
||||
bucket := tx.Bucket([]byte(pathsBucket))
|
||||
|
||||
return filepath.Walk(root, func(path string, info fs.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
|
@ -142,13 +217,9 @@ func Update(paths []string) (int, error) {
|
|||
var changes int
|
||||
|
||||
return changes, db.Update(func(tx *bolt.Tx) error {
|
||||
bucket := tx.Bucket([]byte(modifiedBucket))
|
||||
bucket := tx.Bucket([]byte(pathsBucket))
|
||||
|
||||
for _, path := range paths {
|
||||
if path == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
cached, err := getEntry(bucket, path)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -166,18 +237,13 @@ func Update(paths []string) (int, error) {
|
|||
continue
|
||||
}
|
||||
|
||||
cacheInfo := Entry{
|
||||
entry := Entry{
|
||||
Size: pathInfo.Size(),
|
||||
Modified: pathInfo.ModTime(),
|
||||
}
|
||||
|
||||
bytes, err := msgpack.Marshal(cacheInfo)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: failed to marshal mod time", err)
|
||||
}
|
||||
|
||||
if err = bucket.Put([]byte(path), bytes); err != nil {
|
||||
return fmt.Errorf("%w: failed to put mode time", err)
|
||||
if err = putEntry(bucket, path, &entry); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ package cli
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
|
@ -71,7 +72,7 @@ func (f *Format) Run() error {
|
|||
}
|
||||
|
||||
err = formatter.Init(name, globalExcludes)
|
||||
if err == format.ErrFormatterNotFound && Cli.AllowMissingFormatter {
|
||||
if errors.Is(err, format.ErrFormatterNotFound) && Cli.AllowMissingFormatter {
|
||||
l.Debugf("formatter not found: %v", name)
|
||||
// remove this formatter
|
||||
delete(cfg.Formatters, name)
|
||||
|
@ -82,7 +83,7 @@ func (f *Format) Run() error {
|
|||
|
||||
ctx = format.RegisterFormatters(ctx, cfg.Formatters)
|
||||
|
||||
if err = cache.Open(Cli.TreeRoot, Cli.ClearCache); err != nil {
|
||||
if err = cache.Open(Cli.TreeRoot, Cli.ClearCache, cfg.Formatters); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -110,12 +111,15 @@ func (f *Format) Run() error {
|
|||
eg.Go(func() error {
|
||||
batchSize := 1024
|
||||
batch := make([]string, batchSize)
|
||||
batch = batch[:0]
|
||||
brianmcgee marked this conversation as resolved
zimbatm
commented
Aren't those two lines equivalent to Aren't those two lines equivalent to `batch := []string{}`?
brianmcgee
commented
What I believe I'm doing here is creating an instance of Looking at it with a debugger seems to confirm it's doing what I think it's doing, with the capacity being set to What I _believe_ I'm doing here is creating an instance of `batch` with a pre-initialised backing array of `batchSize` but resetting the slice's length to 0 so that subsequent logic looking at `len(batch)` and `range batch` all work correctly.
Looking at it with a debugger seems to confirm it's doing what I think it's doing, with the capacity being set to `1024` but length matching the latest batch to process.
|
||||
|
||||
var pending, completed, changes int
|
||||
|
||||
LOOP:
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case _, ok := <-pendingCh:
|
||||
if ok {
|
||||
pending += 1
|
||||
|
|
|
@ -2,6 +2,8 @@ package cli
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"testing"
|
||||
|
||||
"git.numtide.com/numtide/treefmt/internal/test"
|
||||
|
@ -54,28 +56,28 @@ func TestSpecifyingFormatters(t *testing.T) {
|
|||
},
|
||||
})
|
||||
|
||||
out, err := cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir)
|
||||
out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), "3 files changed")
|
||||
|
||||
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "elm,nix")
|
||||
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "elm,nix")
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), "2 files changed")
|
||||
|
||||
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "ruby,nix")
|
||||
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "ruby,nix")
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), "2 files changed")
|
||||
|
||||
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "nix")
|
||||
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "nix")
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), "1 files changed")
|
||||
|
||||
// test bad names
|
||||
|
||||
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "foo")
|
||||
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "foo")
|
||||
as.Errorf(err, "formatter not found in config: foo")
|
||||
|
||||
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "bar,foo")
|
||||
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "bar,foo")
|
||||
as.Errorf(err, "formatter not found in config: bar")
|
||||
}
|
||||
|
||||
|
@ -150,3 +152,139 @@ func TestIncludesAndExcludes(t *testing.T) {
|
|||
as.NoError(err)
|
||||
as.Contains(string(out), fmt.Sprintf("%d files changed", 2))
|
||||
}
|
||||
|
||||
func TestCache(t *testing.T) {
|
||||
as := require.New(t)
|
||||
|
||||
tempDir := test.TempExamples(t)
|
||||
configPath := tempDir + "/echo.toml"
|
||||
|
||||
// test without any excludes
|
||||
config := format.Config{
|
||||
Formatters: map[string]*format.Formatter{
|
||||
"echo": {
|
||||
Command: "echo",
|
||||
Includes: []string{"*"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
test.WriteConfig(t, configPath, config)
|
||||
out, err := cmd(t, "--config-file", configPath, "--tree-root", tempDir)
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), fmt.Sprintf("%d files changed", 29))
|
||||
|
||||
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), "0 files changed")
|
||||
}
|
||||
|
||||
func TestBustCacheOnFormatterChange(t *testing.T) {
|
||||
as := require.New(t)
|
||||
|
||||
tempDir := test.TempExamples(t)
|
||||
configPath := tempDir + "/echo.toml"
|
||||
|
||||
// symlink some formatters into temp dir, so we can mess with their mod times
|
||||
binPath := tempDir + "/bin"
|
||||
as.NoError(os.Mkdir(binPath, 0o755))
|
||||
|
||||
binaries := []string{"black", "elm-format", "gofmt"}
|
||||
|
||||
for _, name := range binaries {
|
||||
src, err := exec.LookPath(name)
|
||||
as.NoError(err)
|
||||
as.NoError(os.Symlink(src, binPath+"/"+name))
|
||||
}
|
||||
|
||||
// prepend our test bin directory to PATH
|
||||
as.NoError(os.Setenv("PATH", binPath+":"+os.Getenv("PATH")))
|
||||
|
||||
// start with 2 formatters
|
||||
config := format.Config{
|
||||
Formatters: map[string]*format.Formatter{
|
||||
"python": {
|
||||
Command: "black",
|
||||
Includes: []string{"*.py"},
|
||||
},
|
||||
"elm": {
|
||||
Command: "elm-format",
|
||||
Options: []string{"--yes"},
|
||||
Includes: []string{"*.elm"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
test.WriteConfig(t, configPath, config)
|
||||
args := []string{"--config-file", configPath, "--tree-root", tempDir}
|
||||
out, err := cmd(t, args...)
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), fmt.Sprintf("%d files changed", 3))
|
||||
|
||||
// tweak mod time of elm formatter
|
||||
as.NoError(test.RecreateSymlink(t, binPath+"/"+"elm-format"))
|
||||
|
||||
out, err = cmd(t, args...)
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), fmt.Sprintf("%d files changed", 3))
|
||||
|
||||
// check cache is working
|
||||
out, err = cmd(t, args...)
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), "0 files changed")
|
||||
|
||||
// tweak mod time of python formatter
|
||||
as.NoError(test.RecreateSymlink(t, binPath+"/"+"black"))
|
||||
|
||||
out, err = cmd(t, args...)
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), fmt.Sprintf("%d files changed", 3))
|
||||
|
||||
// check cache is working
|
||||
out, err = cmd(t, args...)
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), "0 files changed")
|
||||
|
||||
// add go formatter
|
||||
config.Formatters["go"] = &format.Formatter{
|
||||
Command: "gofmt",
|
||||
Options: []string{"-w"},
|
||||
Includes: []string{"*.go"},
|
||||
}
|
||||
test.WriteConfig(t, configPath, config)
|
||||
|
||||
out, err = cmd(t, args...)
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), fmt.Sprintf("%d files changed", 4))
|
||||
|
||||
// check cache is working
|
||||
out, err = cmd(t, args...)
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), "0 files changed")
|
||||
|
||||
// remove python formatter
|
||||
delete(config.Formatters, "python")
|
||||
test.WriteConfig(t, configPath, config)
|
||||
|
||||
out, err = cmd(t, args...)
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), fmt.Sprintf("%d files changed", 2))
|
||||
|
||||
// check cache is working
|
||||
out, err = cmd(t, args...)
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), "0 files changed")
|
||||
|
||||
// remove elm formatter
|
||||
delete(config.Formatters, "elm")
|
||||
test.WriteConfig(t, configPath, config)
|
||||
|
||||
out, err = cmd(t, args...)
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), fmt.Sprintf("%d files changed", 1))
|
||||
|
||||
// check cache is working
|
||||
out, err = cmd(t, args...)
|
||||
as.NoError(err)
|
||||
as.Contains(string(out), "0 files changed")
|
||||
}
|
||||
|
|
|
@ -159,7 +159,8 @@ func (f *Formatter) apply(ctx context.Context) error {
|
|||
start := time.Now()
|
||||
cmd := exec.CommandContext(ctx, f.Command, args...)
|
||||
|
||||
if _, err := cmd.CombinedOutput(); err != nil {
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
f.log.Debugf("\n%v", string(out))
|
||||
// todo log output
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -36,3 +36,11 @@ func TempFile(t *testing.T, path string) *os.File {
|
|||
}
|
||||
return file
|
||||
}
|
||||
|
||||
func RecreateSymlink(t *testing.T, path string) error {
|
||||
t.Helper()
|
||||
src, err := os.Readlink(path)
|
||||
require.NoError(t, err, "failed to read symlink")
|
||||
require.NoError(t, os.Remove(path), "failed to remove symlink")
|
||||
return os.Symlink(src, path)
|
||||
}
|
||||
|
|
Probably we care more about which executable is running in case it's a symlink. I would use
os.Stat
here. And maybe store the resolved path as well?I went with
os.Lstat
here becausegofmt
resolved to a symlink which was then another relative symlink within the nix output path. This ensures we recursively resolve down to the underlying binary.For paths that are being formatted we already skip symlinks when traversing the filesystem