feat/bust-cache-validators-change #14

Merged
brianmcgee merged 2 commits from feat/bust-cache-validators-change into main 2024-01-03 08:08:57 +00:00
5 changed files with 257 additions and 40 deletions

View File

@ -4,20 +4,23 @@ import (
"context"
"crypto/sha1"
"encoding/hex"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"time"
"git.numtide.com/numtide/treefmt/internal/format"
"github.com/charmbracelet/log"
"github.com/adrg/xdg"
"github.com/vmihailenco/msgpack/v5"
bolt "go.etcd.io/bbolt"
)
const (
modifiedBucket = "modified"
pathsBucket = "paths"
formattersBucket = "formatters"
)
// Entry represents a cache entry, indicating the last size and modified time for a file path.
@ -33,7 +36,9 @@ var db *bolt.DB
//
// The database will be located in `XDG_CACHE_DIR/treefmt/eval-cache/<id>.db`, where <id> is determined by hashing
// the treeRoot path. This associates a given treeRoot with a given instance of the cache.
func Open(treeRoot string, clean bool) (err error) {
func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter) (err error) {
l := log.WithPrefix("cache")
// determine a unique and consistent db name for the tree root
h := sha1.New()
h.Write([]byte(treeRoot))
@ -45,27 +50,84 @@ func Open(treeRoot string, clean bool) (err error) {
return fmt.Errorf("%w: could not resolve local path for the cache", err)
}
// force a clean of the cache if specified
if clean {
err := os.Remove(path)
if errors.Is(err, os.ErrNotExist) {
err = nil
} else if err != nil {
return fmt.Errorf("%w: failed to clear cache", err)
}
}
db, err = bolt.Open(path, 0o600, nil)
if err != nil {
return fmt.Errorf("%w: failed to open cache", err)
}
err = db.Update(func(tx *bolt.Tx) error {
_, err := tx.CreateBucket([]byte(modifiedBucket))
if errors.Is(err, bolt.ErrBucketExists) {
return nil
// create bucket for tracking paths
pathsBucket, err := tx.CreateBucketIfNotExists([]byte(pathsBucket))
if err != nil {
return fmt.Errorf("%w: failed to create paths bucket", err)
}
return err
// create bucket for tracking formatters
formattersBucket, err := tx.CreateBucketIfNotExists([]byte(formattersBucket))
if err != nil {
return fmt.Errorf("%w: failed to create formatters bucket", err)
}
// check for any newly configured or modified formatters
for name, formatter := range formatters {
stat, err := os.Lstat(formatter.Executable())
brianmcgee marked this conversation as resolved
Review

Probably we care more about which executable is running in case it's a symlink. I would use os.Stat here. And maybe store the resolved path as well?

Probably we care more about which executable is running in case it's a symlink. I would use `os.Stat` here. And maybe store the resolved path as well?
Review

I went with os.Lstat here because gofmt resolved to a symlink which was then another relative symlink within the nix output path. This ensures we recursively resolve down to the underlying binary.

For paths that are being formatted we already skip symlinks when traversing the filesystem

internal/cache/cache.go Lines 102 to 115 in ada9a72a7e
return filepath.Walk(root, func(path string, info fs.FileInfo, err error) error {
if err != nil {
return fmt.Errorf("%w: failed to walk path", err)
} else if ctx.Err() != nil {
return ctx.Err()
} else if info.IsDir() {
// todo what about symlinks?
return nil
}
if info.Mode()&os.ModeSymlink == os.ModeSymlink {
// skip symlinks
return nil
}

I went with `os.Lstat` here because `gofmt` resolved to a symlink which was then another relative symlink within the nix output path. This ensures we recursively resolve down to the underlying binary. For paths that are being formatted we already skip symlinks when traversing the filesystem https://git.numtide.com/numtide/treefmt/src/commit/ada9a72a7e56ffea11af5899cb711b6bb66a7293/internal/cache/cache.go#L102-L115
if err != nil {
return fmt.Errorf("%w: failed to state formatter executable", err)
}
entry, err := getEntry(formattersBucket, name)
if err != nil {
return fmt.Errorf("%w: failed to retrieve entry for formatter", err)
}
clean = clean || entry == nil || !(entry.Size == stat.Size() && entry.Modified == stat.ModTime())
l.Debug(
"checking if formatter has changed",
"name", name,
"clean", clean,
"entry", entry,
"stat", stat,
)
// record formatters info
entry = &Entry{
Size: stat.Size(),
Modified: stat.ModTime(),
}
brianmcgee marked this conversation as resolved
Review

This could be seen as a "CacheKey" or something like that. If any of the values have changed, invalidate. I know this is what you are doing already; just wondering about semantics a bit.

This could be seen as a "CacheKey" or something like that. If any of the values have changed, invalidate. I know this is what you are doing already; just wondering about semantics a bit.
Review

I currently think of it as a cache.Entry given the package it's within, as it's more of a Cache Entry for me than a Cache Key since it's not being used as the key in bolt.

I currently think of it as a `cache.Entry` given the package it's within, as it's more of a Cache Entry for me than a Cache Key since it's not being used as the key in bolt.
if err = putEntry(formattersBucket, name, entry); err != nil {
return fmt.Errorf("%w: failed to write formatter entry", err)
}
}
// check for any removed formatters
if err = formattersBucket.ForEach(func(key []byte, _ []byte) error {
_, ok := formatters[string(key)]
if !ok {
// remove the formatter entry from the cache
if err = formattersBucket.Delete(key); err != nil {
return fmt.Errorf("%w: failed to remove formatter entry", err)
}
// indicate a clean is required
clean = true
}
return nil
}); err != nil {
return fmt.Errorf("%w: failed to check for removed formatters", err)
}
if clean {
// remove all path entries
c := pathsBucket.Cursor()
for k, v := c.First(); !(k == nil && v == nil); k, v = c.Next() {
if err = c.Delete(); err != nil {
return fmt.Errorf("%w: failed to remove path entry", err)
}
}
}
return nil
})
return
@ -93,11 +155,24 @@ func getEntry(bucket *bolt.Bucket, path string) (*Entry, error) {
}
}
// putEntry is a helper for writing cache entries into bolt.
func putEntry(bucket *bolt.Bucket, path string, entry *Entry) error {
bytes, err := msgpack.Marshal(entry)
if err != nil {
return fmt.Errorf("%w: failed to marshal cache entry", err)
}
if err = bucket.Put([]byte(path), bytes); err != nil {
return fmt.Errorf("%w: failed to put cache entry", err)
}
return nil
}
// ChangeSet is used to walk a filesystem, starting at root, and outputting any new or changed paths using pathsCh.
// It determines if a path is new or has changed by comparing against cache entries.
func ChangeSet(ctx context.Context, root string, pathsCh chan<- string) error {
return db.Update(func(tx *bolt.Tx) error {
bucket := tx.Bucket([]byte(modifiedBucket))
bucket := tx.Bucket([]byte(pathsBucket))
return filepath.Walk(root, func(path string, info fs.FileInfo, err error) error {
if err != nil {
@ -142,13 +217,9 @@ func Update(paths []string) (int, error) {
var changes int
return changes, db.Update(func(tx *bolt.Tx) error {
bucket := tx.Bucket([]byte(modifiedBucket))
bucket := tx.Bucket([]byte(pathsBucket))
for _, path := range paths {
if path == "" {
continue
}
cached, err := getEntry(bucket, path)
if err != nil {
return err
@ -166,18 +237,13 @@ func Update(paths []string) (int, error) {
continue
}
cacheInfo := Entry{
entry := Entry{
Size: pathInfo.Size(),
Modified: pathInfo.ModTime(),
}
bytes, err := msgpack.Marshal(cacheInfo)
if err != nil {
return fmt.Errorf("%w: failed to marshal mod time", err)
}
if err = bucket.Put([]byte(path), bytes); err != nil {
return fmt.Errorf("%w: failed to put mode time", err)
if err = putEntry(bucket, path, &entry); err != nil {
return err
}
}

View File

@ -2,6 +2,7 @@ package cli
import (
"context"
"errors"
"fmt"
"os"
"os/signal"
@ -71,7 +72,7 @@ func (f *Format) Run() error {
}
err = formatter.Init(name, globalExcludes)
if err == format.ErrFormatterNotFound && Cli.AllowMissingFormatter {
if errors.Is(err, format.ErrFormatterNotFound) && Cli.AllowMissingFormatter {
l.Debugf("formatter not found: %v", name)
// remove this formatter
delete(cfg.Formatters, name)
@ -82,7 +83,7 @@ func (f *Format) Run() error {
ctx = format.RegisterFormatters(ctx, cfg.Formatters)
if err = cache.Open(Cli.TreeRoot, Cli.ClearCache); err != nil {
if err = cache.Open(Cli.TreeRoot, Cli.ClearCache, cfg.Formatters); err != nil {
return err
}
@ -110,12 +111,15 @@ func (f *Format) Run() error {
eg.Go(func() error {
batchSize := 1024
batch := make([]string, batchSize)
batch = batch[:0]
brianmcgee marked this conversation as resolved
Review

Aren't those two lines equivalent to batch := []string{}?

Aren't those two lines equivalent to `batch := []string{}`?
Review

What I believe I'm doing here is creating an instance of batch with a pre-initialised backing array of batchSize but resetting the slice's length to 0 so that subsequent logic looking at len(batch) and range batch all work correctly.

Looking at it with a debugger seems to confirm it's doing what I think it's doing, with the capacity being set to 1024 but length matching the latest batch to process.

What I _believe_ I'm doing here is creating an instance of `batch` with a pre-initialised backing array of `batchSize` but resetting the slice's length to 0 so that subsequent logic looking at `len(batch)` and `range batch` all work correctly. Looking at it with a debugger seems to confirm it's doing what I think it's doing, with the capacity being set to `1024` but length matching the latest batch to process.
var pending, completed, changes int
LOOP:
for {
select {
case <-ctx.Done():
return ctx.Err()
case _, ok := <-pendingCh:
if ok {
pending += 1

View File

@ -2,6 +2,8 @@ package cli
import (
"fmt"
"os"
"os/exec"
"testing"
"git.numtide.com/numtide/treefmt/internal/test"
@ -54,28 +56,28 @@ func TestSpecifyingFormatters(t *testing.T) {
},
})
out, err := cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir)
out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
as.Contains(string(out), "3 files changed")
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "elm,nix")
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "elm,nix")
as.NoError(err)
as.Contains(string(out), "2 files changed")
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "ruby,nix")
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "ruby,nix")
as.NoError(err)
as.Contains(string(out), "2 files changed")
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "nix")
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "nix")
as.NoError(err)
as.Contains(string(out), "1 files changed")
// test bad names
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "foo")
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "foo")
as.Errorf(err, "formatter not found in config: foo")
out, err = cmd(t, "--clear-cache", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "bar,foo")
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "bar,foo")
as.Errorf(err, "formatter not found in config: bar")
}
@ -150,3 +152,139 @@ func TestIncludesAndExcludes(t *testing.T) {
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 2))
}
func TestCache(t *testing.T) {
as := require.New(t)
tempDir := test.TempExamples(t)
configPath := tempDir + "/echo.toml"
// test without any excludes
config := format.Config{
Formatters: map[string]*format.Formatter{
"echo": {
Command: "echo",
Includes: []string{"*"},
},
},
}
test.WriteConfig(t, configPath, config)
out, err := cmd(t, "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 29))
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
as.Contains(string(out), "0 files changed")
}
func TestBustCacheOnFormatterChange(t *testing.T) {
as := require.New(t)
tempDir := test.TempExamples(t)
configPath := tempDir + "/echo.toml"
// symlink some formatters into temp dir, so we can mess with their mod times
binPath := tempDir + "/bin"
as.NoError(os.Mkdir(binPath, 0o755))
binaries := []string{"black", "elm-format", "gofmt"}
for _, name := range binaries {
src, err := exec.LookPath(name)
as.NoError(err)
as.NoError(os.Symlink(src, binPath+"/"+name))
}
// prepend our test bin directory to PATH
as.NoError(os.Setenv("PATH", binPath+":"+os.Getenv("PATH")))
// start with 2 formatters
config := format.Config{
Formatters: map[string]*format.Formatter{
"python": {
Command: "black",
Includes: []string{"*.py"},
},
"elm": {
Command: "elm-format",
Options: []string{"--yes"},
Includes: []string{"*.elm"},
},
},
}
test.WriteConfig(t, configPath, config)
args := []string{"--config-file", configPath, "--tree-root", tempDir}
out, err := cmd(t, args...)
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 3))
// tweak mod time of elm formatter
as.NoError(test.RecreateSymlink(t, binPath+"/"+"elm-format"))
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 3))
// check cache is working
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), "0 files changed")
// tweak mod time of python formatter
as.NoError(test.RecreateSymlink(t, binPath+"/"+"black"))
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 3))
// check cache is working
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), "0 files changed")
// add go formatter
config.Formatters["go"] = &format.Formatter{
Command: "gofmt",
Options: []string{"-w"},
Includes: []string{"*.go"},
}
test.WriteConfig(t, configPath, config)
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 4))
// check cache is working
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), "0 files changed")
// remove python formatter
delete(config.Formatters, "python")
test.WriteConfig(t, configPath, config)
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 2))
// check cache is working
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), "0 files changed")
// remove elm formatter
delete(config.Formatters, "elm")
test.WriteConfig(t, configPath, config)
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 1))
// check cache is working
out, err = cmd(t, args...)
as.NoError(err)
as.Contains(string(out), "0 files changed")
}

View File

@ -159,7 +159,8 @@ func (f *Formatter) apply(ctx context.Context) error {
start := time.Now()
cmd := exec.CommandContext(ctx, f.Command, args...)
if _, err := cmd.CombinedOutput(); err != nil {
if out, err := cmd.CombinedOutput(); err != nil {
f.log.Debugf("\n%v", string(out))
// todo log output
return err
}

View File

@ -36,3 +36,11 @@ func TempFile(t *testing.T, path string) *os.File {
}
return file
}
func RecreateSymlink(t *testing.T, path string) error {
t.Helper()
src, err := os.Readlink(path)
require.NoError(t, err, "failed to read symlink")
require.NoError(t, os.Remove(path), "failed to remove symlink")
return os.Symlink(src, path)
}