feat/bust-cache-validators-change (#14)

Tracks the mod time and size of a formatter's executable in bolt.

The cache is busted using the following criteria:

- a new formatter has been configured.
- an existing formatter has changed (mod time or size)
- an existing formatter has been removed from config

Also implemented better resolution of symlinks when determining a formatters executable path.

Reviewed-on: #14
Reviewed-by: Jonas Chevalier <zimbatm@noreply.git.numtide.com>
Co-authored-by: Brian McGee <brian@bmcgee.ie>
Co-committed-by: Brian McGee <brian@bmcgee.ie>
This commit is contained in:
Brian McGee 2024-01-03 08:08:57 +00:00 committed by Brian McGee
parent ada9a72a7e
commit 811f883a2b
5 changed files with 257 additions and 40 deletions

View File

@ -4,20 +4,23 @@ import (
"context"
"crypto/sha1"
"encoding/hex"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"time"
"git.numtide.com/numtide/treefmt/internal/format"
"github.com/charmbracelet/log"
"github.com/adrg/xdg"
"github.com/vmihailenco/msgpack/v5"
bolt "go.etcd.io/bbolt"
)
const (
modifiedBucket = "modified"
pathsBucket = "paths"
formattersBucket = "formatters"
)
// Entry represents a cache entry, indicating the last size and modified time for a file path.
@ -33,7 +36,9 @@ var db *bolt.DB
//
// The database will be located in `XDG_CACHE_DIR/treefmt/eval-cache/<id>.db`, where <id> is determined by hashing
// the treeRoot path. This associates a given treeRoot with a given instance of the cache.
func Open(treeRoot string, clean bool) (err error) {
func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter) (err error) {
l := log.WithPrefix("cache")
// determine a unique and consistent db name for the tree root
h := sha1.New()
h.Write([]byte(treeRoot))
@ -45,27 +50,84 @@ func Open(treeRoot string, clean bool) (err error) {
return fmt.Errorf("%w: could not resolve local path for the cache", err)
}
// force a clean of the cache if specified
if clean {
err := os.Remove(path)
if errors.Is(err, os.ErrNotExist) {
err = nil
} else if err != nil {
return fmt.Errorf("%w: failed to clear cache", err)
}
}
db, err = bolt.Open(path, 0o600, nil)
if err != nil {
return fmt.Errorf("%w: failed to open cache", err)
}
err = db.Update(func(tx *bolt.Tx) error {
_, err := tx.CreateBucket([]byte(modifiedBucket))
if errors.Is(err, bolt.ErrBucketExists) {
return nil
// create bucket for tracking paths
pathsBucket, err := tx.CreateBucketIfNotExists([]byte(pathsBucket))
if err != nil {
return fmt.Errorf("%w: failed to create paths bucket", err)
}
return err
// create bucket for tracking formatters
formattersBucket, err := tx.CreateBucketIfNotExists([]byte(formattersBucket))
if err != nil {
return fmt.Errorf("%w: failed to create formatters bucket", err)
}
// check for any newly configured or modified formatters
for name, formatter := range formatters {
stat, err := os.Lstat(formatter.Executable())
if err != nil {
return fmt.Errorf("%w: failed to state formatter executable", err)
}
entry, err := getEntry(formattersBucket, name)
if err != nil {
return fmt.Errorf("%w: failed to retrieve entry for formatter", err)
}
clean = clean || entry == nil || !(entry.Size == stat.Size() && entry.Modified == stat.ModTime())
l.Debug(
"checking if formatter has changed",
"name", name,
"clean", clean,
"entry", entry,
"stat", stat,
)
// record formatters info
entry = &Entry{
Size: stat.Size(),
Modified: stat.ModTime(),
}
if err = putEntry(formattersBucket, name, entry); err != nil {
return fmt.Errorf("%w: failed to write formatter entry", err)
}
}
// check for any removed formatters
if err = formattersBucket.ForEach(func(key []byte, _ []byte) error {
_, ok := formatters[string(key)]
if !ok {
// remove the formatter entry from the cache
if err = formattersBucket.Delete(key); err != nil {
return fmt.Errorf("%w: failed to remove formatter entry", err)
}
// indicate a clean is required
clean = true
}
return nil
}); err != nil {
return fmt.Errorf("%w: failed to check for removed formatters", err)
}
if clean {
// remove all path entries
c := pathsBucket.Cursor()
for k, v := c.First(); !(k == nil && v == nil); k, v = c.Next() {
if err = c.Delete(); err != nil {
return fmt.Errorf("%w: failed to remove path entry", err)
}
}
}
return nil
})
return
@ -93,11 +155,24 @@ func getEntry(bucket *bolt.Bucket, path string) (*Entry, error) {
}
}
// putEntry is a helper for writing cache entries into bolt.
func putEntry(bucket *bolt.Bucket, path string, entry *Entry) error {
bytes, err := msgpack.Marshal(entry)
if err != nil {
return fmt.Errorf("%w: failed to marshal cache entry", err)
}
if err = bucket.Put([]byte(path), bytes); err != nil {
return fmt.Errorf("%w: failed to put cache entry", err)
}
return nil
}
// ChangeSet is used to walk a filesystem, starting at root, and outputting any new or changed paths using pathsCh.
// It determines if a path is new or has changed by comparing against cache entries.
func ChangeSet(ctx context.Context, root string, pathsCh chan<- string) error {
return db.Update(func(tx *bolt.Tx) error {
bucket := tx.Bucket([]byte(modifiedBucket))
bucket := tx.Bucket([]byte(pathsBucket))
return filepath.Walk(root, func(path string, info fs.FileInfo, err error) error {
if err != nil {
@ -142,13 +217,9 @@ func Update(paths []string) (int, error) {
var changes int
return changes, db.Update(func(tx *bolt.Tx) error {
bucket := tx.Bucket([]byte(modifiedBucket))
bucket := tx.Bucket([]byte(pathsBucket))
for _, path := range paths {
if path == "" {
continue
}
cached, err := getEntry(bucket, path)
if err != nil {
return err
@ -166,18 +237,13 @@ func Update(paths []string) (int, error) {
continue
}
cacheInfo := Entry{
entry := Entry{
Size: pathInfo.Size(),
Modified: pathInfo.ModTime(),
}
bytes, err := msgpack.Marshal(cacheInfo)
if err != nil {
return fmt.Errorf("%w: failed to marshal mod time", err)
}
if err = bucket.Put([]byte(path), bytes); err != nil {
return fmt.Errorf("%w: failed to put mode time", err)
if err = putEntry(bucket, path, &entry); err != nil {
return err
}
}

View File

@ -2,6 +2,7 @@ package cli
import (
"context"
"errors"
"fmt"
"os"
"os/signal"
@ -71,7 +72,7 @@ func (f *Format) Run() error {
}
err = formatter.Init(name, globalExcludes)
if err == format.ErrFormatterNotFound && Cli.AllowMissingFormatter {
if errors.Is(err, format.ErrFormatterNotFound) && Cli.AllowMissingFormatter {
l.Debugf("formatter not found: %v", name)
// remove this formatter
delete(cfg.Formatters, name)
@ -82,7 +83,7 @@ func (f *Format) Run() error {
ctx = format.RegisterFormatters(ctx, cfg.Formatters)
if err = cache.Open(Cli.TreeRoot, Cli.ClearCache); err != nil {
if err = cache.Open(Cli.TreeRoot, Cli.ClearCache, cfg.Formatters); err != nil {
return err
}
@ -110,12 +111,15 @@ func (f *Format) Run() error {
eg.Go(func() error {
batchSize := 1024
batch := make([]string, batchSize)
batch = batch[:0]
var pending, completed, changes int
LOOP:
for {
select {
case <-ctx.Done():
return ctx.Err()
case _, ok := <-pendingCh:
if ok {
pending += 1

View File

@ -2,6 +2,8 @@ package cli
import (
"fmt"
"os"
"os/exec"
"testing"
"git.numtide.com/numtide/treefmt/internal/test"
@ -54,28 +56,28 @@ func TestSpecifyingFormatters(t *testing.T) {
},
})
out, err := cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir)
out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
as.Contains(string(out), "3 files changed")
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "elm,nix")
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "elm,nix")
as.NoError(err)
as.Contains(string(out), "2 files changed")
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "ruby,nix")
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "ruby,nix")
as.NoError(err)
as.Contains(string(out), "2 files changed")
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "nix")
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "nix")
as.NoError(err)
as.Contains(string(out), "1 files changed")
// test bad names
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "foo")
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "foo")
as.Errorf(err, "formatter not found in config: foo")
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "bar,foo")
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "bar,foo")
as.Errorf(err, "formatter not found in config: bar")
}
@ -150,3 +152,139 @@ func TestIncludesAndExcludes(t *testing.T) {
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 2))
}
func TestCache(t *testing.T) {
as := require.New(t)
tempDir := test.TempExamples(t)
configPath := tempDir + "/echo.toml"
// test without any excludes
config := format.Config{
Formatters: map[string]*format.Formatter{
"echo": {
Command: "echo",
Includes: []string{"*"},
},
},
}
test.WriteConfig(t, configPath, config)
out, err := cmd(t, "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 29))
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
as.Contains(string(out), "0 files changed")
}
func TestBustCacheOnFormatterChange(t *testing.T) {
as := require.New(t)
tempDir := test.TempExamples(t)
configPath := tempDir + "/echo.toml"
// symlink some formatters into temp dir, so we can mess with their mod times
binPath := tempDir + "/bin"
as.NoError(os.Mkdir(binPath, 0o755))
binaries := []string{"black", "elm-format", "gofmt"}
for _, name := range binaries {
src, err := exec.LookPath(name)
as.NoError(err)
as.NoError(os.Symlink(src, binPath+"/"+name))
}
// prepend our test bin directory to PATH
as.NoError(os.Setenv("PATH", binPath+":"+os.Getenv("PATH")))
// start with 2 formatters
config := format.Config{
Formatters: map[string]*format.Formatter{
"python": {
Command: "black",
Includes: []string{"*.py"},
},
"elm": {
Command: "elm-format",
Options: []string{"--yes"},
Includes: []string{"*.elm"},
},
},
}
test.WriteConfig(t, configPath, config)
args := []string{"--config-file", configPath, "--tree-root", tempDir}
out, err := cmd(t, args...)
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 3))
// tweak mod time of elm formatter
as.NoError(test.RecreateSymlink(t, binPath+"/"+"elm-format"))
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 3))
// check cache is working
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), "0 files changed")
// tweak mod time of python formatter
as.NoError(test.RecreateSymlink(t, binPath+"/"+"black"))
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 3))
// check cache is working
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), "0 files changed")
// add go formatter
config.Formatters["go"] = &format.Formatter{
Command: "gofmt",
Options: []string{"-w"},
Includes: []string{"*.go"},
}
test.WriteConfig(t, configPath, config)
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 4))
// check cache is working
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), "0 files changed")
// remove python formatter
delete(config.Formatters, "python")
test.WriteConfig(t, configPath, config)
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 2))
// check cache is working
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), "0 files changed")
// remove elm formatter
delete(config.Formatters, "elm")
test.WriteConfig(t, configPath, config)
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 1))
// check cache is working
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), "0 files changed")
}

View File

@ -159,7 +159,8 @@ func (f *Formatter) apply(ctx context.Context) error {
start := time.Now()
cmd := exec.CommandContext(ctx, f.Command, args...)
if _, err := cmd.CombinedOutput(); err != nil {
if out, err := cmd.CombinedOutput(); err != nil {
f.log.Debugf("\n%v", string(out))
// todo log output
return err
}

View File

@ -36,3 +36,11 @@ func TempFile(t *testing.T, path string) *os.File {
}
return file
}
func RecreateSymlink(t *testing.T, path string) error {
t.Helper()
src, err := os.Readlink(path)
require.NoError(t, err, "failed to read symlink")
require.NoError(t, os.Remove(path), "failed to remove symlink")
return os.Symlink(src, path)
}