Merge pull request 'improve caching and tracking' (#34) from feat/improve-caching into main

Reviewed-on: #34
This commit is contained in:
Brian McGee 2024-05-02 10:41:16 +00:00
commit 404b0a951a
16 changed files with 394 additions and 307 deletions

86
cache/cache.go vendored
View File

@ -5,9 +5,7 @@ import (
"crypto/sha1"
"encoding/hex"
"fmt"
"io/fs"
"os"
"path/filepath"
"runtime"
"time"
@ -57,25 +55,25 @@ func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter)
name := hex.EncodeToString(digest)
path, err := xdg.CacheFile(fmt.Sprintf("treefmt/eval-cache/%v.db", name))
if err != nil {
return fmt.Errorf("%w: could not resolve local path for the cache", err)
return fmt.Errorf("could not resolve local path for the cache: %w", err)
}
db, err = bolt.Open(path, 0o600, nil)
if err != nil {
return fmt.Errorf("%w: failed to open cache", err)
return fmt.Errorf("failed to open cache at %v: %w", path, err)
}
err = db.Update(func(tx *bolt.Tx) error {
// create bucket for tracking paths
pathsBucket, err := tx.CreateBucketIfNotExists([]byte(pathsBucket))
if err != nil {
return fmt.Errorf("%w: failed to create paths bucket", err)
return fmt.Errorf("failed to create paths bucket: %w", err)
}
// create bucket for tracking formatters
formattersBucket, err := tx.CreateBucketIfNotExists([]byte(formattersBucket))
if err != nil {
return fmt.Errorf("%w: failed to create formatters bucket", err)
return fmt.Errorf("failed to create formatters bucket: %w", err)
}
// check for any newly configured or modified formatters
@ -83,12 +81,12 @@ func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter)
stat, err := os.Lstat(formatter.Executable())
if err != nil {
return fmt.Errorf("%w: failed to state formatter executable", err)
return fmt.Errorf("failed to stat formatter executable %v: %w", formatter.Executable(), err)
}
entry, err := getEntry(formattersBucket, name)
if err != nil {
return fmt.Errorf("%w: failed to retrieve entry for formatter", err)
return fmt.Errorf("failed to retrieve cache entry for formatter %v: %w", name, err)
}
clean = clean || entry == nil || !(entry.Size == stat.Size() && entry.Modified == stat.ModTime())
@ -107,7 +105,7 @@ func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter)
}
if err = putEntry(formattersBucket, name, entry); err != nil {
return fmt.Errorf("%w: failed to write formatter entry", err)
return fmt.Errorf("failed to write cache entry for formatter %v: %w", name, err)
}
}
@ -117,14 +115,14 @@ func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter)
if !ok {
// remove the formatter entry from the cache
if err = formattersBucket.Delete(key); err != nil {
return fmt.Errorf("%w: failed to remove formatter entry", err)
return fmt.Errorf("failed to remove cache entry for formatter %v: %w", key, err)
}
// indicate a clean is required
clean = true
}
return nil
}); err != nil {
return fmt.Errorf("%w: failed to check for removed formatters", err)
return fmt.Errorf("failed to check cache for removed formatters: %w", err)
}
if clean {
@ -132,7 +130,7 @@ func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter)
c := pathsBucket.Cursor()
for k, v := c.First(); !(k == nil && v == nil); k, v = c.Next() {
if err = c.Delete(); err != nil {
return fmt.Errorf("%w: failed to remove path entry", err)
return fmt.Errorf("failed to remove path entry: %w", err)
}
}
}
@ -157,7 +155,7 @@ func getEntry(bucket *bolt.Bucket, path string) (*Entry, error) {
if b != nil {
var cached Entry
if err := msgpack.Unmarshal(b, &cached); err != nil {
return nil, fmt.Errorf("%w: failed to unmarshal cache info for path '%v'", err, path)
return nil, fmt.Errorf("failed to unmarshal cache info for path '%v': %w", path, err)
}
return &cached, nil
} else {
@ -169,18 +167,18 @@ func getEntry(bucket *bolt.Bucket, path string) (*Entry, error) {
func putEntry(bucket *bolt.Bucket, path string, entry *Entry) error {
bytes, err := msgpack.Marshal(entry)
if err != nil {
return fmt.Errorf("%w: failed to marshal cache entry", err)
return fmt.Errorf("failed to marshal cache path %v: %w", path, err)
}
if err = bucket.Put([]byte(path), bytes); err != nil {
return fmt.Errorf("%w: failed to put cache entry", err)
return fmt.Errorf("failed to put cache path %v: %w", path, err)
}
return nil
}
// ChangeSet is used to walk a filesystem, starting at root, and outputting any new or changed paths using pathsCh.
// It determines if a path is new or has changed by comparing against cache entries.
func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) error {
func ChangeSet(ctx context.Context, walker walk.Walker, filesCh chan<- *walk.File) error {
start := time.Now()
defer func() {
@ -198,24 +196,21 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
}
}()
// for quick removal of tree root from paths
relPathOffset := len(walker.Root()) + 1
return walker.Walk(ctx, func(path string, info fs.FileInfo, err error) error {
return walker.Walk(ctx, func(file *walk.File, err error) error {
select {
case <-ctx.Done():
return ctx.Err()
default:
if err != nil {
return fmt.Errorf("%w: failed to walk path", err)
} else if info.IsDir() {
return fmt.Errorf("failed to walk path: %w", err)
} else if file.Info.IsDir() {
// ignore directories
return nil
}
}
// ignore symlinks
if info.Mode()&os.ModeSymlink == os.ModeSymlink {
if file.Info.Mode()&os.ModeSymlink == os.ModeSymlink {
return nil
}
@ -224,18 +219,17 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
if tx == nil {
tx, err = db.Begin(false)
if err != nil {
return fmt.Errorf("%w: failed to open a new read tx", err)
return fmt.Errorf("failed to open a new cache read tx: %w", err)
}
bucket = tx.Bucket([]byte(pathsBucket))
}
relPath := path[relPathOffset:]
cached, err := getEntry(bucket, relPath)
cached, err := getEntry(bucket, file.RelPath)
if err != nil {
return err
}
changedOrNew := cached == nil || !(cached.Modified == info.ModTime() && cached.Size == info.Size())
changedOrNew := cached == nil || !(cached.Modified == file.Info.ModTime() && cached.Size == file.Info.Size())
stats.Add(stats.Traversed, 1)
if !changedOrNew {
@ -250,7 +244,7 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
case <-ctx.Done():
return ctx.Err()
default:
pathsCh <- relPath
filesCh <- file
}
// close the current tx if we have reached the batch size
@ -266,47 +260,35 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
}
// Update is used to record updated cache information for the specified list of paths.
func Update(treeRoot string, paths []string) (int, error) {
func Update(files []*walk.File) error {
start := time.Now()
defer func() {
logger.Infof("finished updating %v paths in %v", len(paths), time.Since(start))
logger.Infof("finished processing %v paths in %v", len(files), time.Since(start))
}()
if len(paths) == 0 {
return 0, nil
if len(files) == 0 {
return nil
}
var changes int
return changes, db.Update(func(tx *bolt.Tx) error {
return db.Update(func(tx *bolt.Tx) error {
bucket := tx.Bucket([]byte(pathsBucket))
for _, path := range paths {
cached, err := getEntry(bucket, path)
for _, f := range files {
currentInfo, err := os.Stat(f.Path)
if err != nil {
return err
}
pathInfo, err := os.Stat(filepath.Join(treeRoot, path))
if err != nil {
return err
if !(f.Info.ModTime() == currentInfo.ModTime() && f.Info.Size() == currentInfo.Size()) {
stats.Add(stats.Formatted, 1)
}
if cached == nil || !(cached.Modified == pathInfo.ModTime() && cached.Size == pathInfo.Size()) {
changes += 1
} else {
// no change to write
continue
}
stats.Add(stats.Formatted, 1)
entry := Entry{
Size: pathInfo.Size(),
Modified: pathInfo.ModTime(),
Size: currentInfo.Size(),
Modified: currentInfo.ModTime(),
}
if err = putEntry(bucket, path, &entry); err != nil {
if err = putEntry(bucket, f.RelPath, &entry); err != nil {
return err
}
}

View File

@ -26,14 +26,14 @@ type Format struct {
Stdin bool `help:"Format the context passed in via stdin"`
}
func (f *Format) Configure() {
func ConfigureLogging() {
log.SetReportTimestamp(false)
if f.Verbosity == 0 {
if Cli.Verbosity == 0 {
log.SetLevel(log.WarnLevel)
} else if f.Verbosity == 1 {
} else if Cli.Verbosity == 1 {
log.SetLevel(log.InfoLevel)
} else if f.Verbosity > 1 {
} else if Cli.Verbosity > 1 {
log.SetLevel(log.DebugLevel)
}
}

View File

@ -5,13 +5,10 @@ import (
"context"
"errors"
"fmt"
"io/fs"
"os"
"os/signal"
"path/filepath"
"runtime"
"slices"
"sort"
"strings"
"syscall"
@ -35,19 +32,17 @@ var (
globalExcludes []glob.Glob
formatters map[string]*format.Formatter
pipelines map[string]*format.Pipeline
pathsCh chan string
processedCh chan string
filesCh chan *walk.File
processedCh chan *walk.File
ErrFailOnChange = errors.New("unexpected changes detected, --fail-on-change is enabled")
)
func (f *Format) Run() (err error) {
stats.Init()
Cli.Configure()
// create a prefixed logger
l := log.WithPrefix("format")
// ensure cache is closed on return
defer func() {
if err := cache.Close(); err != nil {
l.Errorf("failed to close cache: %v", err)
@ -55,46 +50,23 @@ func (f *Format) Run() (err error) {
}()
// read config
cfg, err := config.ReadFile(Cli.ConfigFile)
cfg, err := config.ReadFile(Cli.ConfigFile, Cli.Formatters)
if err != nil {
return fmt.Errorf("%w: failed to read config file", err)
return fmt.Errorf("failed to read config file %v: %w", Cli.ConfigFile, err)
}
// compile global exclude globs
if globalExcludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil {
return fmt.Errorf("%w: failed to compile global globs", err)
return fmt.Errorf("failed to compile global excludes: %w", err)
}
// initialise pipelines
pipelines = make(map[string]*format.Pipeline)
formatters = make(map[string]*format.Formatter)
// filter formatters
if len(Cli.Formatters) > 0 {
// first check the cli formatter list is valid
for _, name := range Cli.Formatters {
_, ok := cfg.Formatters[name]
if !ok {
return fmt.Errorf("formatter not found in config: %v", name)
}
}
// next we remove any formatter configs that were not specified
for name := range cfg.Formatters {
if !slices.Contains(Cli.Formatters, name) {
delete(cfg.Formatters, name)
}
}
}
// sort the formatter names so that, as we construct pipelines, we add formatters in a determinstic fashion. This
// ensures a deterministic order even when all priority values are the same e.g. 0
names := make([]string, 0, len(cfg.Formatters))
for name := range cfg.Formatters {
names = append(names, name)
}
sort.Strings(names)
// init formatters
for _, name := range names {
// iterate the formatters in lexicographical order
for _, name := range cfg.Names {
// init formatter
formatterCfg := cfg.Formatters[name]
formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, globalExcludes)
if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter {
@ -104,8 +76,12 @@ func (f *Format) Run() (err error) {
return fmt.Errorf("%w: failed to initialise formatter: %v", err, name)
}
// store formatter by name
formatters[name] = formatter
// If no pipeline is configured, we add the formatter to a nominal pipeline of size 1 with the key being the
// formatter's name. If a pipeline is configured, we add the formatter to a pipeline keyed by
// 'p:<pipeline_name>' in which it is sorted by priority.
if formatterCfg.Pipeline == "" {
pipeline := format.Pipeline{}
pipeline.Add(formatter)
@ -137,17 +113,20 @@ func (f *Format) Run() (err error) {
cancel()
}()
// create some groups for concurrent processing and control flow
// initialise stats collection
stats.Init()
// create an overall error group for executing high level tasks concurrently
eg, ctx := errgroup.WithContext(ctx)
// create a channel for paths to be processed
// we use a multiple of batch size here to allow for greater concurrency
pathsCh = make(chan string, BatchSize*runtime.NumCPU())
// create a channel for files needing to be processed
// we use a multiple of batch size here as a rudimentary concurrency optimization based on the host machine
filesCh = make(chan *walk.File, BatchSize*runtime.NumCPU())
// create a channel for tracking paths that have been processed
processedCh = make(chan string, cap(pathsCh))
// create a channel for files that have been processed
processedCh = make(chan *walk.File, cap(filesCh))
// start concurrent processing tasks
// start concurrent processing tasks in reverse order
eg.Go(updateCache(ctx))
eg.Go(applyFormatters(ctx))
eg.Go(walkFilesystem(ctx))
@ -156,76 +135,15 @@ func (f *Format) Run() (err error) {
return eg.Wait()
}
func walkFilesystem(ctx context.Context) func() error {
return func() error {
paths := Cli.Paths
if len(paths) == 0 && Cli.Stdin {
cwd, err := os.Getwd()
if err != nil {
return fmt.Errorf("%w: failed to determine current working directory", err)
}
// read in all the paths
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
path := scanner.Text()
if !strings.HasPrefix(path, "/") {
// append the cwd
path = filepath.Join(cwd, path)
}
paths = append(paths, path)
}
}
walker, err := walk.New(Cli.Walk, Cli.TreeRoot, paths)
if err != nil {
return fmt.Errorf("failed to create walker: %w", err)
}
defer close(pathsCh)
if Cli.NoCache {
return walker.Walk(ctx, func(path string, info fs.FileInfo, err error) error {
select {
case <-ctx.Done():
return ctx.Err()
default:
// ignore symlinks and directories
if !(info.IsDir() || info.Mode()&os.ModeSymlink == os.ModeSymlink) {
stats.Add(stats.Traversed, 1)
stats.Add(stats.Emitted, 1)
pathsCh <- path
}
return nil
}
})
}
if err = cache.ChangeSet(ctx, walker, pathsCh); err != nil {
return fmt.Errorf("failed to generate change set: %w", err)
}
return nil
}
}
func updateCache(ctx context.Context) func() error {
return func() error {
batch := make([]string, 0, BatchSize)
var changes int
// used to batch updates for more efficient txs
batch := make([]*walk.File, 0, BatchSize)
// apply a batch
processBatch := func() error {
if Cli.NoCache {
changes += len(batch)
} else {
count, err := cache.Update(Cli.TreeRoot, batch)
if err != nil {
return err
}
changes += count
if err := cache.Update(batch); err != nil {
return err
}
batch = batch[:0]
return nil
@ -234,13 +152,17 @@ func updateCache(ctx context.Context) func() error {
LOOP:
for {
select {
// detect ctx cancellation
case <-ctx.Done():
return ctx.Err()
case path, ok := <-processedCh:
// respond to processed files
case file, ok := <-processedCh:
if !ok {
// channel has been closed, no further files to process
break LOOP
}
batch = append(batch, path)
// append to batch and process if we have enough
batch = append(batch, file)
if len(batch) == BatchSize {
if err := processBatch(); err != nil {
return err
@ -254,48 +176,124 @@ func updateCache(ctx context.Context) func() error {
return err
}
if Cli.FailOnChange && changes != 0 {
// if fail on change has been enabled, check that no files were actually formatted, throwing an error if so
if Cli.FailOnChange && stats.Value(stats.Formatted) != 0 {
return ErrFailOnChange
}
// print stats to stdout
stats.Print()
return nil
}
}
func walkFilesystem(ctx context.Context) func() error {
return func() error {
paths := Cli.Paths
// we read paths from stdin if the cli flag has been set and no paths were provided as cli args
if len(paths) == 0 && Cli.Stdin {
// determine the current working directory
cwd, err := os.Getwd()
if err != nil {
return fmt.Errorf("failed to determine current working directory: %w", err)
}
// read in all the paths
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
path := scanner.Text()
if !strings.HasPrefix(path, "/") {
// append the cwd
path = filepath.Join(cwd, path)
}
// append the fully qualified path to our paths list
paths = append(paths, path)
}
}
// create a filesystem walker
walker, err := walk.New(Cli.Walk, Cli.TreeRoot, paths)
if err != nil {
return fmt.Errorf("failed to create walker: %w", err)
}
// close the files channel when we're done walking the file system
defer close(filesCh)
// if no cache has been configured, we invoke the walker directly
if Cli.NoCache {
return walker.Walk(ctx, func(file *walk.File, err error) error {
select {
case <-ctx.Done():
return ctx.Err()
default:
// ignore symlinks and directories
if !(file.Info.IsDir() || file.Info.Mode()&os.ModeSymlink == os.ModeSymlink) {
stats.Add(stats.Traversed, 1)
stats.Add(stats.Emitted, 1)
filesCh <- file
}
return nil
}
})
}
// otherwise we pass the walker to the cache and have it generate files for processing based on whether or not
// they have been added/changed since the last invocation
if err = cache.ChangeSet(ctx, walker, filesCh); err != nil {
return fmt.Errorf("failed to generate change set: %w", err)
}
return nil
}
}
func applyFormatters(ctx context.Context) func() error {
// create our own errgroup for concurrent formatting tasks
fg, ctx := errgroup.WithContext(ctx)
batches := make(map[string][]string)
tryApply := func(key string, path string) {
batch, ok := batches[key]
if !ok {
batch = make([]string, 0, BatchSize)
}
batch = append(batch, path)
batches[key] = batch
// pre-initialise batches keyed by pipeline
batches := make(map[string][]*walk.File)
for key := range pipelines {
batches[key] = make([]*walk.File, 0, BatchSize)
}
// for a given pipeline key, add the provided file to the current batch and trigger a format if the batch size has
// been reached
tryApply := func(key string, file *walk.File) {
// append to batch
batches[key] = append(batches[key], file)
// check if the batch is full
batch := batches[key]
if len(batch) == BatchSize {
// get the pipeline
pipeline := pipelines[key]
// copy the batch
paths := make([]string, len(batch))
copy(paths, batch)
files := make([]*walk.File, len(batch))
copy(files, batch)
// apply to the pipeline
fg.Go(func() error {
if err := pipeline.Apply(ctx, paths); err != nil {
if err := pipeline.Apply(ctx, files); err != nil {
return err
}
for _, path := range paths {
for _, path := range files {
processedCh <- path
}
return nil
})
// reset the batch
batches[key] = batch[:0]
}
}
// format any partial batches
flushBatches := func() {
for key, pipeline := range pipelines {
@ -322,17 +320,21 @@ func applyFormatters(ctx context.Context) func() error {
close(processedCh)
}()
for path := range pathsCh {
// iterate the files channel, checking if any pipeline wants it, and attempting to apply if so.
for file := range filesCh {
var matched bool
for key, pipeline := range pipelines {
if !pipeline.Wants(path) {
if !pipeline.Wants(file) {
continue
}
matched = true
tryApply(key, path)
tryApply(key, file)
}
if matched {
stats.Add(stats.Matched, 1)
} else {
// no match, so we send it direct to the processed channel
processedCh <- file
}
}

View File

@ -51,42 +51,42 @@ func TestSpecifyingFormatters(t *testing.T) {
test.WriteConfig(t, configPath, config2.Config{
Formatters: map[string]*config2.Formatter{
"elm": {
Command: "echo",
Command: "touch",
Includes: []string{"*.elm"},
},
"nix": {
Command: "echo",
Command: "touch",
Includes: []string{"*.nix"},
},
"ruby": {
Command: "echo",
Command: "touch",
Includes: []string{"*.rb"},
},
},
})
out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
_, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
assertFormatted(t, as, out, 3)
assertStats(t, as, 31, 31, 3, 3)
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "elm,nix")
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "elm,nix")
as.NoError(err)
assertFormatted(t, as, out, 2)
assertStats(t, as, 31, 31, 2, 2)
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "ruby,nix")
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "ruby,nix")
as.NoError(err)
assertFormatted(t, as, out, 2)
assertStats(t, as, 31, 31, 2, 2)
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "nix")
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "nix")
as.NoError(err)
assertFormatted(t, as, out, 1)
assertStats(t, as, 31, 31, 1, 1)
// test bad names
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "foo")
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "foo")
as.Errorf(err, "formatter not found in config: foo")
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "bar,foo")
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--formatters", "bar,foo")
as.Errorf(err, "formatter not found in config: bar")
}
@ -94,7 +94,7 @@ func TestIncludesAndExcludes(t *testing.T) {
as := require.New(t)
tempDir := test.TempExamples(t)
configPath := tempDir + "/echo.toml"
configPath := tempDir + "/touch.toml"
// test without any excludes
cfg := config2.Config{
@ -107,25 +107,25 @@ func TestIncludesAndExcludes(t *testing.T) {
}
test.WriteConfig(t, configPath, cfg)
out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
_, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
assertFormatted(t, as, out, 31)
assertStats(t, as, 31, 31, 31, 0)
// globally exclude nix files
cfg.Global.Excludes = []string{"*.nix"}
test.WriteConfig(t, configPath, cfg)
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
assertFormatted(t, as, out, 30)
assertStats(t, as, 31, 31, 30, 0)
// add haskell files to the global exclude
cfg.Global.Excludes = []string{"*.nix", "*.hs"}
test.WriteConfig(t, configPath, cfg)
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
assertFormatted(t, as, out, 24)
assertStats(t, as, 31, 31, 24, 0)
echo := cfg.Formatters["echo"]
@ -133,40 +133,40 @@ func TestIncludesAndExcludes(t *testing.T) {
echo.Excludes = []string{"*.py"}
test.WriteConfig(t, configPath, cfg)
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
assertFormatted(t, as, out, 22)
assertStats(t, as, 31, 31, 22, 0)
// remove go files from the echo formatter
echo.Excludes = []string{"*.py", "*.go"}
test.WriteConfig(t, configPath, cfg)
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
assertFormatted(t, as, out, 21)
assertStats(t, as, 31, 31, 21, 0)
// adjust the includes for echo to only include elm files
echo.Includes = []string{"*.elm"}
test.WriteConfig(t, configPath, cfg)
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
assertFormatted(t, as, out, 1)
assertStats(t, as, 31, 31, 1, 0)
// add js files to echo formatter
echo.Includes = []string{"*.elm", "*.js"}
test.WriteConfig(t, configPath, cfg)
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
assertFormatted(t, as, out, 2)
assertStats(t, as, 31, 31, 2, 0)
}
func TestCache(t *testing.T) {
as := require.New(t)
tempDir := test.TempExamples(t)
configPath := tempDir + "/echo.toml"
configPath := tempDir + "/touch.toml"
// test without any excludes
cfg := config2.Config{
@ -181,7 +181,7 @@ func TestCache(t *testing.T) {
test.WriteConfig(t, configPath, cfg)
out, err := cmd(t, "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
assertFormatted(t, as, out, 31)
assertStats(t, as, 31, 31, 31, 0)
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
@ -190,7 +190,7 @@ func TestCache(t *testing.T) {
// clear cache
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "-c")
as.NoError(err)
assertFormatted(t, as, out, 31)
assertStats(t, as, 31, 31, 31, 0)
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
@ -199,7 +199,7 @@ func TestCache(t *testing.T) {
// clear cache
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "-c")
as.NoError(err)
assertFormatted(t, as, out, 31)
assertStats(t, as, 31, 31, 31, 0)
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
@ -208,7 +208,7 @@ func TestCache(t *testing.T) {
// no cache
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "--no-cache")
as.NoError(err)
assertStats(t, as, out, 31, 31, 31, 0)
assertStats(t, as, 31, 31, 31, 0)
}
func TestChangeWorkingDirectory(t *testing.T) {
@ -240,22 +240,22 @@ func TestChangeWorkingDirectory(t *testing.T) {
// by default, we look for ./treefmt.toml and use the cwd for the tree root
// this should fail if the working directory hasn't been changed first
out, err := cmd(t, "-C", tempDir)
_, err = cmd(t, "-C", tempDir)
as.NoError(err)
assertFormatted(t, as, out, 31)
assertStats(t, as, 31, 31, 31, 0)
}
func TestFailOnChange(t *testing.T) {
as := require.New(t)
tempDir := test.TempExamples(t)
configPath := tempDir + "/echo.toml"
configPath := tempDir + "/touch.toml"
// test without any excludes
cfg := config2.Config{
Formatters: map[string]*config2.Formatter{
"echo": {
Command: "echo",
"touch": {
Command: "touch",
Includes: []string{"*"},
},
},
@ -270,7 +270,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) {
as := require.New(t)
tempDir := test.TempExamples(t)
configPath := tempDir + "/echo.toml"
configPath := tempDir + "/touch.toml"
// symlink some formatters into temp dir, so we can mess with their mod times
binPath := tempDir + "/bin"
@ -304,33 +304,33 @@ func TestBustCacheOnFormatterChange(t *testing.T) {
test.WriteConfig(t, configPath, cfg)
args := []string{"--config-file", configPath, "--tree-root", tempDir}
out, err := cmd(t, args...)
_, err := cmd(t, args...)
as.NoError(err)
assertFormatted(t, as, out, 3)
assertStats(t, as, 31, 31, 3, 0)
// tweak mod time of elm formatter
as.NoError(test.RecreateSymlink(t, binPath+"/"+"elm-format"))
out, err = cmd(t, args...)
_, err = cmd(t, args...)
as.NoError(err)
assertFormatted(t, as, out, 3)
assertStats(t, as, 31, 31, 3, 0)
// check cache is working
out, err = cmd(t, args...)
_, err = cmd(t, args...)
as.NoError(err)
assertFormatted(t, as, out, 0)
assertStats(t, as, 31, 0, 0, 0)
// tweak mod time of python formatter
as.NoError(test.RecreateSymlink(t, binPath+"/"+"black"))
out, err = cmd(t, args...)
_, err = cmd(t, args...)
as.NoError(err)
assertFormatted(t, as, out, 3)
assertStats(t, as, 31, 31, 3, 0)
// check cache is working
out, err = cmd(t, args...)
_, err = cmd(t, args...)
as.NoError(err)
assertFormatted(t, as, out, 0)
assertStats(t, as, 31, 0, 0, 0)
// add go formatter
cfg.Formatters["go"] = &config2.Formatter{
@ -340,40 +340,40 @@ func TestBustCacheOnFormatterChange(t *testing.T) {
}
test.WriteConfig(t, configPath, cfg)
out, err = cmd(t, args...)
_, err = cmd(t, args...)
as.NoError(err)
assertFormatted(t, as, out, 4)
assertStats(t, as, 31, 31, 4, 0)
// check cache is working
out, err = cmd(t, args...)
_, err = cmd(t, args...)
as.NoError(err)
assertFormatted(t, as, out, 0)
assertStats(t, as, 31, 0, 0, 0)
// remove python formatter
delete(cfg.Formatters, "python")
test.WriteConfig(t, configPath, cfg)
out, err = cmd(t, args...)
_, err = cmd(t, args...)
as.NoError(err)
assertFormatted(t, as, out, 2)
assertStats(t, as, 31, 31, 2, 0)
// check cache is working
out, err = cmd(t, args...)
_, err = cmd(t, args...)
as.NoError(err)
assertFormatted(t, as, out, 0)
assertStats(t, as, 31, 0, 0, 0)
// remove elm formatter
delete(cfg.Formatters, "elm")
test.WriteConfig(t, configPath, cfg)
out, err = cmd(t, args...)
_, err = cmd(t, args...)
as.NoError(err)
assertFormatted(t, as, out, 1)
assertStats(t, as, 31, 31, 1, 0)
// check cache is working
out, err = cmd(t, args...)
_, err = cmd(t, args...)
as.NoError(err)
assertFormatted(t, as, out, 0)
assertStats(t, as, 31, 0, 0, 0)
}
func TestGitWorktree(t *testing.T) {
@ -407,28 +407,28 @@ func TestGitWorktree(t *testing.T) {
wt, err := repo.Worktree()
as.NoError(err, "failed to get git worktree")
run := func(formatted int) {
run := func(traversed int, emitted int, matched int, formatted int) {
out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err)
assertFormatted(t, as, out, formatted)
}
// run before adding anything to the worktree
run(0)
run(0, 0, 0, 0)
// add everything to the worktree
as.NoError(wt.AddGlob("."))
as.NoError(err)
run(31)
run(31, 31, 31, 0)
// remove python directory
as.NoError(wt.RemoveGlob("python/*"))
run(28)
run(28, 28, 28, 0)
// walk with filesystem instead of git
out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--walk", "filesystem")
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--walk", "filesystem")
as.NoError(err)
assertFormatted(t, as, out, 59)
assertStats(t, as, 59, 59, 59, 0)
}
func TestPathsArg(t *testing.T) {
@ -461,17 +461,17 @@ func TestPathsArg(t *testing.T) {
test.WriteConfig(t, configPath, cfg)
// without any path args
out, err := cmd(t, "-C", tempDir)
_, err = cmd(t, "-C", tempDir)
as.NoError(err)
assertFormatted(t, as, out, 31)
assertStats(t, as, 31, 31, 31, 0)
// specify some explicit paths
out, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Foo.hs")
_, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Foo.hs")
as.NoError(err)
assertFormatted(t, as, out, 2)
assertStats(t, as, 4, 4, 4, 0)
// specify a bad path
out, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Bar.hs")
_, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Bar.hs")
as.ErrorContains(err, "no such file or directory")
}
@ -526,9 +526,9 @@ go/main.go
_, _ = stdin.Seek(0, 0)
}()
out, err := cmd(t, "-C", tempDir, "--stdin")
_, err = cmd(t, "-C", tempDir, "--stdin")
as.NoError(err)
assertFormatted(t, as, out, 3)
assertStats(t, as, 6, 6, 6, 0)
}
func TestDeterministicOrderingInPipeline(t *testing.T) {

View File

@ -7,6 +7,8 @@ import (
"path/filepath"
"testing"
"git.numtide.com/numtide/treefmt/stats"
"git.numtide.com/numtide/treefmt/test"
"github.com/alecthomas/kong"
@ -55,12 +57,12 @@ func cmd(t *testing.T, args ...string) ([]byte, error) {
// reset and read the temporary output
if _, err = tempOut.Seek(0, 0); err != nil {
return nil, fmt.Errorf("%w: failed to reset temp output for reading", err)
return nil, fmt.Errorf("failed to reset temp output for reading: %w", err)
}
out, err := io.ReadAll(tempOut)
if err != nil {
return nil, fmt.Errorf("%w: failed to read temp output", err)
return nil, fmt.Errorf("failed to read temp output: %w", err)
}
// swap outputs back
@ -70,12 +72,12 @@ func cmd(t *testing.T, args ...string) ([]byte, error) {
return out, nil
}
func assertStats(t *testing.T, as *require.Assertions, output []byte, traversed int32, emitted int32, matched int32, formatted int32) {
func assertStats(t *testing.T, as *require.Assertions, traversed int32, emitted int32, matched int32, formatted int32) {
t.Helper()
as.Contains(string(output), fmt.Sprintf("traversed %d files", traversed))
as.Contains(string(output), fmt.Sprintf("emitted %d files", emitted))
as.Contains(string(output), fmt.Sprintf("matched %d files", matched))
as.Contains(string(output), fmt.Sprintf("formatted %d files", formatted))
as.Equal(traversed, stats.Value(stats.Traversed))
as.Equal(emitted, stats.Value(stats.Emitted))
as.Equal(matched, stats.Value(stats.Matched))
as.Equal(formatted, stats.Value(stats.Formatted))
}
func assertFormatted(t *testing.T, as *require.Assertions, output []byte, count int) {

View File

@ -1,6 +1,11 @@
package config
import "github.com/BurntSushi/toml"
import (
"fmt"
"sort"
"github.com/BurntSushi/toml"
)
// Config is used to represent the list of configured Formatters.
type Config struct {
@ -8,11 +13,39 @@ type Config struct {
// Excludes is an optional list of glob patterns used to exclude certain files from all formatters.
Excludes []string
}
Names []string `toml:"-"`
Formatters map[string]*Formatter `toml:"formatter"`
}
// ReadFile reads from path and unmarshals toml into a Config instance.
func ReadFile(path string) (cfg *Config, err error) {
_, err = toml.DecodeFile(path, &cfg)
func ReadFile(path string, names []string) (cfg *Config, err error) {
if _, err = toml.DecodeFile(path, &cfg); err != nil {
return nil, fmt.Errorf("failed to decode config file: %w", err)
}
// filter formatters based on provided names
if len(names) > 0 {
filtered := make(map[string]*Formatter)
// check if the provided names exist in the config
for _, name := range names {
formatterCfg, ok := cfg.Formatters[name]
if !ok {
return nil, fmt.Errorf("formatter %v not found in config", name)
}
filtered[name] = formatterCfg
}
// updated formatters
cfg.Formatters = filtered
}
// sort the formatter names so that, as we construct pipelines, we add formatters in a determinstic fashion. This
// ensures a deterministic order even when all priority values are the same e.g. 0
for name := range cfg.Formatters {
cfg.Names = append(cfg.Names, name)
}
sort.Strings(cfg.Names)
return
}

View File

@ -9,7 +9,7 @@ import (
func TestReadConfigFile(t *testing.T) {
as := require.New(t)
cfg, err := ReadFile("../test/examples/treefmt.toml")
cfg, err := ReadFile("../test/examples/treefmt.toml", nil)
as.NoError(err, "failed to read config file")
as.NotNil(cfg)

View File

@ -8,6 +8,8 @@ import (
"os/exec"
"time"
"git.numtide.com/numtide/treefmt/walk"
"git.numtide.com/numtide/treefmt/config"
"github.com/charmbracelet/log"
@ -38,7 +40,7 @@ func (f *Formatter) Executable() string {
return f.executable
}
func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) error {
func (f *Formatter) Apply(ctx context.Context, files []*walk.File, filter bool) error {
start := time.Now()
// construct args, starting with config
@ -52,9 +54,9 @@ func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) erro
f.batch = f.batch[:0]
// filter paths
for _, path := range paths {
if f.Wants(path) {
f.batch = append(f.batch, path)
for _, file := range files {
if f.Wants(file) {
f.batch = append(f.batch, file.RelPath)
}
}
@ -67,12 +69,14 @@ func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) erro
args = append(args, f.batch...)
} else {
// exit early if nothing to process
if len(paths) == 0 {
if len(files) == 0 {
return nil
}
// append paths to the args
args = append(args, paths...)
for _, file := range files {
args = append(args, file.RelPath)
}
}
// execute the command
@ -83,22 +87,22 @@ func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) erro
if len(out) > 0 {
_, _ = fmt.Fprintf(os.Stderr, "%s error:\n%s\n", f.name, out)
}
return fmt.Errorf("%w: formatter %s failed to apply", err, f.name)
return fmt.Errorf("formatter %s failed to apply: %w", f.name, err)
}
//
f.log.Infof("%v files processed in %v", len(paths), time.Now().Sub(start))
f.log.Infof("%v files processed in %v", len(files), time.Now().Sub(start))
return nil
}
// Wants is used to test if a Formatter wants a path based on it's configured Includes and Excludes patterns.
// Returns true if the Formatter should be applied to path, false otherwise.
func (f *Formatter) Wants(path string) bool {
match := !PathMatches(path, f.excludes) && PathMatches(path, f.includes)
func (f *Formatter) Wants(file *walk.File) bool {
match := !PathMatches(file.RelPath, f.excludes) && PathMatches(file.RelPath, f.includes)
if match {
f.log.Debugf("match: %v", path)
f.log.Debugf("match: %v", file)
}
return match
}
@ -107,7 +111,7 @@ func (f *Formatter) Wants(path string) bool {
func NewFormatter(
name string,
treeRoot string,
config *config.Formatter,
cfg *config.Formatter,
globalExcludes []glob.Glob,
) (*Formatter, error) {
var err error
@ -116,11 +120,11 @@ func NewFormatter(
// capture config and the formatter's name
f.name = name
f.config = config
f.config = cfg
f.workingDir = treeRoot
// test if the formatter is available
executable, err := exec.LookPath(config.Command)
executable, err := exec.LookPath(cfg.Command)
if errors.Is(err, exec.ErrNotFound) {
return nil, ErrCommandNotFound
} else if err != nil {
@ -129,20 +133,20 @@ func NewFormatter(
f.executable = executable
// initialise internal state
if config.Pipeline == "" {
if cfg.Pipeline == "" {
f.log = log.WithPrefix(fmt.Sprintf("format | %s", name))
} else {
f.log = log.WithPrefix(fmt.Sprintf("format | %s[%s]", config.Pipeline, name))
f.log = log.WithPrefix(fmt.Sprintf("format | %s[%s]", cfg.Pipeline, name))
}
f.includes, err = CompileGlobs(config.Includes)
f.includes, err = CompileGlobs(cfg.Includes)
if err != nil {
return nil, fmt.Errorf("%w: formatter '%v' includes", err, f.name)
return nil, fmt.Errorf("failed to compile formatter '%v' includes: %w", f.name, err)
}
f.excludes, err = CompileGlobs(config.Excludes)
f.excludes, err = CompileGlobs(cfg.Excludes)
if err != nil {
return nil, fmt.Errorf("%w: formatter '%v' excludes", err, f.name)
return nil, fmt.Errorf("failed to compile formatter '%v' excludes: %w", f.name, err)
}
f.excludes = append(f.excludes, globalExcludes...)

View File

@ -13,7 +13,7 @@ func CompileGlobs(patterns []string) ([]glob.Glob, error) {
for i, pattern := range patterns {
g, err := glob.Compile(pattern)
if err != nil {
return nil, fmt.Errorf("%w: failed to compile include pattern '%v'", err, pattern)
return nil, fmt.Errorf("failed to compile include pattern '%v': %w", pattern, err)
}
globs[i] = g
}

View File

@ -3,6 +3,8 @@ package format
import (
"context"
"slices"
"git.numtide.com/numtide/treefmt/walk"
)
type Pipeline struct {
@ -17,7 +19,7 @@ func (p *Pipeline) Add(f *Formatter) {
})
}
func (p *Pipeline) Wants(path string) bool {
func (p *Pipeline) Wants(path *walk.File) bool {
var match bool
for _, f := range p.sequence {
match = f.Wants(path)
@ -28,7 +30,7 @@ func (p *Pipeline) Wants(path string) bool {
return match
}
func (p *Pipeline) Apply(ctx context.Context, paths []string) error {
func (p *Pipeline) Apply(ctx context.Context, paths []*walk.File) error {
for _, f := range p.sequence {
if err := f.Apply(ctx, paths, len(p.sequence) > 1); err != nil {
return err

View File

@ -36,5 +36,6 @@ func main() {
}
ctx := kong.Parse(&cli.Cli)
cli.ConfigureLogging()
ctx.FatalIfErrorf(ctx.Run())
}

View File

@ -2,6 +2,7 @@
[formatter.deadnix]
command = "deadnix"
options = ["--edit"]
includes = ["*.nix"]
pipeline = "nix"
priority = 1

View File

@ -1,3 +1,3 @@
[formatter.echo]
command = "echo"
command = "touch"
includes = [ "*.*" ]

View File

@ -2,6 +2,7 @@ package walk
import (
"context"
"io/fs"
"os"
"path/filepath"
)
@ -15,18 +16,42 @@ func (f filesystemWalker) Root() string {
return f.root
}
func (f filesystemWalker) Walk(_ context.Context, fn filepath.WalkFunc) error {
func (f filesystemWalker) Walk(_ context.Context, fn WalkFunc) error {
relPathOffset := len(f.root) + 1
relPathFn := func(path string) (relPath string) {
if len(path) >= relPathOffset {
relPath = path[relPathOffset:]
}
return
}
walkFn := func(path string, info fs.FileInfo, err error) error {
file := File{
Path: path,
RelPath: relPathFn(path),
Info: info,
}
return fn(&file, err)
}
if len(f.paths) == 0 {
return filepath.Walk(f.root, fn)
return filepath.Walk(f.root, walkFn)
}
for _, path := range f.paths {
info, err := os.Stat(path)
if err = filepath.Walk(path, fn); err != nil {
if err = filepath.Walk(path, walkFn); err != nil {
return err
}
if err = fn(path, info, err); err != nil {
file := File{
Path: path,
RelPath: relPathFn(path),
Info: info,
}
if err = fn(&file, err); err != nil {
return err
}
}

View File

@ -24,10 +24,20 @@ func (g *gitWalker) Root() string {
return g.root
}
func (g *gitWalker) Walk(ctx context.Context, fn filepath.WalkFunc) error {
func (g *gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
// for quick relative paths
relPathOffset := len(g.root) + 1
relPathFn := func(path string) (relPath string) {
if len(path) >= relPathOffset {
relPath = path[relPathOffset:]
}
return
}
idx, err := g.repo.Storer.Index()
if err != nil {
return fmt.Errorf("%w: failed to open index", err)
return fmt.Errorf("failed to open git index: %w", err)
}
if len(g.paths) > 0 {
@ -49,7 +59,13 @@ func (g *gitWalker) Walk(ctx context.Context, fn filepath.WalkFunc) error {
return nil
}
return fn(path, info, err)
file := File{
Path: path,
RelPath: relPathFn(path),
Info: info,
}
return fn(&file, err)
})
if err != nil {
return err
@ -66,7 +82,14 @@ func (g *gitWalker) Walk(ctx context.Context, fn filepath.WalkFunc) error {
// stat the file
info, err := os.Lstat(path)
if err = fn(path, info, err); err != nil {
file := File{
Path: path,
RelPath: relPathFn(path),
Info: info,
}
if err = fn(&file, err); err != nil {
return err
}
}
@ -79,7 +102,7 @@ func (g *gitWalker) Walk(ctx context.Context, fn filepath.WalkFunc) error {
func NewGit(root string, paths []string) (Walker, error) {
repo, err := git.PlainOpen(root)
if err != nil {
return nil, fmt.Errorf("%w: failed to open git repo", err)
return nil, fmt.Errorf("failed to open git repo: %w", err)
}
return &gitWalker{root, paths, repo}, nil
}

View File

@ -3,7 +3,7 @@ package walk
import (
"context"
"fmt"
"path/filepath"
"io/fs"
)
type Type string
@ -14,9 +14,21 @@ const (
Filesystem Type = "filesystem"
)
type File struct {
Path string
RelPath string
Info fs.FileInfo
}
func (f File) String() string {
return f.Path
}
type WalkFunc func(file *File, err error) error
type Walker interface {
Root() string
Walk(ctx context.Context, fn filepath.WalkFunc) error
Walk(ctx context.Context, fn WalkFunc) error
}
func New(walkerType Type, root string, paths []string) (Walker, error) {