From 8af5b3c076017955bca062adab823add4d3f4e45 Mon Sep 17 00:00:00 2001
From: Brian McGee <brian@bmcgee.ie>
Date: Fri, 19 Apr 2024 10:57:41 +0100
Subject: [PATCH 1/6] feat: introduce concept of pipelines for better
 concurrency

Replaces the `Before` config option with an optional `Pipeline` key.

This is used to group formatters together in the order in which they are specified within the config file.

Signed-off-by: Brian McGee <brian@bmcgee.ie>
---
 cli/format.go              | 350 ++++++++++++++++++++-----------------
 cli/format_test.go         |  75 --------
 config/formatter.go        |   4 +-
 format/formatter.go        | 211 +++++-----------------
 format/pipeline.go         |  31 ++++
 test/examples/treefmt.toml |   6 +-
 6 files changed, 267 insertions(+), 410 deletions(-)
 create mode 100644 format/pipeline.go

diff --git a/cli/format.go b/cli/format.go
index 14ac16c..c248753 100644
--- a/cli/format.go
+++ b/cli/format.go
@@ -8,23 +8,38 @@ import (
 	"io/fs"
 	"os"
 	"os/signal"
-	"strings"
+	"slices"
 	"syscall"
 	"time"
 
+	"git.numtide.com/numtide/treefmt/format"
+	"github.com/gobwas/glob"
+
 	"git.numtide.com/numtide/treefmt/cache"
 	"git.numtide.com/numtide/treefmt/config"
-	format2 "git.numtide.com/numtide/treefmt/format"
 	"git.numtide.com/numtide/treefmt/walk"
 
 	"github.com/charmbracelet/log"
 	"golang.org/x/sync/errgroup"
 )
 
-var ErrFailOnChange = errors.New("unexpected changes detected, --fail-on-change is enabled")
+const (
+	BatchSize = 1024
+)
 
-func (f *Format) Run() error {
-	start := time.Now()
+var (
+	start          time.Time
+	globalExcludes []glob.Glob
+	formatters     map[string]*format.Formatter
+	pipelines      map[string]*format.Pipeline
+	pathsCh        chan string
+	processedCh    chan string
+
+	ErrFailOnChange = errors.New("unexpected changes detected, --fail-on-change is enabled")
+)
+
+func (f *Format) Run() (err error) {
+	start = time.Now()
 
 	Cli.Configure()
 
@@ -36,86 +51,40 @@ func (f *Format) Run() error {
 		}
 	}()
 
-	// create an overall context
-	ctx, cancel := context.WithCancel(context.Background())
-	defer cancel()
-
 	// read config
 	cfg, err := config.ReadFile(Cli.ConfigFile)
 	if err != nil {
 		return fmt.Errorf("%w: failed to read config file", err)
 	}
 
-	globalExcludes, err := format2.CompileGlobs(cfg.Global.Excludes)
-
-	// create optional formatter filter set
-	formatterSet := make(map[string]bool)
-	for _, name := range Cli.Formatters {
-		_, ok := cfg.Formatters[name]
-		if !ok {
-			return fmt.Errorf("%w: formatter not found in config: %v", err, name)
-		}
-		formatterSet[name] = true
+	if globalExcludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil {
+		return fmt.Errorf("%w: failed to compile global globs", err)
 	}
 
-	includeFormatter := func(name string) bool {
-		if len(formatterSet) == 0 {
-			return true
-		} else {
-			_, include := formatterSet[name]
-			return include
-		}
-	}
+	pipelines = make(map[string]*format.Pipeline)
+	formatters = make(map[string]*format.Formatter)
 
-	formatters := make(map[string]*format2.Formatter)
-
-	// detect broken dependencies
-	for name, formatterCfg := range cfg.Formatters {
-		before := formatterCfg.Before
-		if before != "" {
-			// check child formatter exists
-			_, ok := cfg.Formatters[before]
+	// filter formatters
+	if len(Cli.Formatters) > 0 {
+		// first check the cli formatter list is valid
+		for _, name := range Cli.Formatters {
+			_, ok := cfg.Formatters[name]
 			if !ok {
-				return fmt.Errorf("formatter %v is before %v but config for %v was not found", name, before, before)
+				return fmt.Errorf("formatter not found in config: %v", name)
 			}
 		}
-	}
-
-	// dependency cycle detection
-	for name, formatterCfg := range cfg.Formatters {
-		var ok bool
-		var history []string
-		childName := name
-		for {
-			// add to history
-			history = append(history, childName)
-
-			if formatterCfg.Before == "" {
-				break
-			} else if formatterCfg.Before == name {
-				return fmt.Errorf("formatter cycle detected %v", strings.Join(history, " -> "))
-			}
-
-			// load child config
-			childName = formatterCfg.Before
-			formatterCfg, ok = cfg.Formatters[formatterCfg.Before]
-			if !ok {
-				return fmt.Errorf("formatter not found: %v", formatterCfg.Before)
+		// next we remove any formatter configs that were not specified
+		for name := range cfg.Formatters {
+			if !slices.Contains(Cli.Formatters, name) {
+				delete(cfg.Formatters, name)
 			}
 		}
 	}
 
 	// init formatters
 	for name, formatterCfg := range cfg.Formatters {
-		if !includeFormatter(name) {
-			// remove this formatter
-			delete(cfg.Formatters, name)
-			l.Debugf("formatter %v is not in formatter list %v, skipping", name, Cli.Formatters)
-			continue
-		}
-
-		formatter, err := format2.NewFormatter(name, formatterCfg, globalExcludes)
-		if errors.Is(err, format2.ErrCommandNotFound) && Cli.AllowMissingFormatter {
+		formatter, err := format.NewFormatter(name, formatterCfg, globalExcludes)
+		if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter {
 			l.Debugf("formatter not found: %v", name)
 			continue
 		} else if err != nil {
@@ -123,49 +92,101 @@ func (f *Format) Run() error {
 		}
 
 		formatters[name] = formatter
-	}
 
-	// iterate the initialised formatters configuring parent/child relationships
-	for _, formatter := range formatters {
-		if formatter.Before() != "" {
-			child, ok := formatters[formatter.Before()]
+		if formatterCfg.Pipeline == "" {
+			pipeline := format.Pipeline{}
+			pipeline.Add(formatter)
+			pipelines[name] = &pipeline
+		} else {
+			key := fmt.Sprintf("p:%s", formatterCfg.Pipeline)
+			pipeline, ok := pipelines[key]
 			if !ok {
-				// formatter has been filtered out by the user
-				formatter.ResetBefore()
-				continue
+				pipeline = &format.Pipeline{}
+				pipelines[key] = pipeline
 			}
-			formatter.SetChild(child)
-			child.SetParent(formatter)
+			pipeline.Add(formatter)
 		}
 	}
 
+	// open the cache
 	if err = cache.Open(Cli.TreeRoot, Cli.ClearCache, formatters); err != nil {
 		return err
 	}
 
-	//
-	completedCh := make(chan string, 1024)
+	// create an app context and listen for shutdown
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
 
-	ctx = format2.SetCompletedChannel(ctx, completedCh)
+	go func() {
+		exit := make(chan os.Signal, 1)
+		signal.Notify(exit, os.Interrupt, syscall.SIGTERM)
+		<-exit
+		cancel()
+	}()
 
-	//
+	// create some groups for concurrent processing and control flow
 	eg, ctx := errgroup.WithContext(ctx)
 
-	// start the formatters
-	for name := range formatters {
-		formatter := formatters[name]
-		eg.Go(func() error {
-			return formatter.Run(ctx)
-		})
+	// create a channel for paths to be processed
+	// we use a multiple of batch size here to allow for greater concurrency
+	pathsCh = make(chan string, 10*BatchSize)
+
+	// create a channel for tracking paths that have been processed
+	processedCh = make(chan string, cap(pathsCh))
+
+	// start concurrent processing tasks
+	eg.Go(updateCache(ctx))
+	eg.Go(applyFormatters(ctx))
+	eg.Go(walkFilesystem(ctx))
+
+	// wait for everything to complete
+	return eg.Wait()
+}
+
+func walkFilesystem(ctx context.Context) func() error {
+	return func() error {
+		paths := Cli.Paths
+
+		if len(paths) == 0 && Cli.Stdin {
+			// read in all the paths
+			scanner := bufio.NewScanner(os.Stdin)
+			for scanner.Scan() {
+				paths = append(paths, scanner.Text())
+			}
+		}
+
+		walker, err := walk.New(Cli.Walk, Cli.TreeRoot, paths)
+		if err != nil {
+			return fmt.Errorf("failed to create walker: %w", err)
+		}
+
+		defer close(pathsCh)
+
+		if Cli.NoCache {
+			return walker.Walk(ctx, func(path string, info fs.FileInfo, err error) error {
+				select {
+				case <-ctx.Done():
+					return ctx.Err()
+				default:
+					// ignore symlinks and directories
+					if !(info.IsDir() || info.Mode()&os.ModeSymlink == os.ModeSymlink) {
+						pathsCh <- path
+					}
+					return nil
+				}
+			})
+		}
+
+		if err = cache.ChangeSet(ctx, walker, pathsCh); err != nil {
+			return fmt.Errorf("failed to generate change set: %w", err)
+		}
+		return nil
 	}
+}
 
-	// determine paths to be formatted
-	pathsCh := make(chan string, 1024)
-
-	// update cache as paths are completed
-	eg.Go(func() error {
-		batchSize := 1024
-		batch := make([]string, 0, batchSize)
+func updateCache(ctx context.Context) func() error {
+	return func() error {
+		batch := make([]string, 0, BatchSize)
 
 		var changes int
 
@@ -188,13 +209,13 @@ func (f *Format) Run() error {
 			select {
 			case <-ctx.Done():
 				return ctx.Err()
-			case path, ok := <-completedCh:
+			case path, ok := <-processedCh:
 				if !ok {
 					break LOOP
 				}
 				batch = append(batch, path)
-				if len(batch) == batchSize {
-					if err = processBatch(); err != nil {
+				if len(batch) == BatchSize {
+					if err := processBatch(); err != nil {
 						return err
 					}
 				}
@@ -202,7 +223,7 @@ func (f *Format) Run() error {
 		}
 
 		// final flush
-		if err = processBatch(); err != nil {
+		if err := processBatch(); err != nil {
 			return err
 		}
 
@@ -212,81 +233,84 @@ func (f *Format) Run() error {
 
 		fmt.Printf("%v files changed in %v\n", changes, time.Now().Sub(start))
 		return nil
-	})
+	}
+}
 
-	eg.Go(func() error {
-		// pass paths to each formatter
-		for path := range pathsCh {
-			for _, formatter := range formatters {
-				if formatter.Wants(path) {
-					formatter.Put(path)
+func applyFormatters(ctx context.Context) func() error {
+	fg, ctx := errgroup.WithContext(ctx)
+	batches := make(map[string][]string)
+
+	tryApply := func(key string, path string) {
+		batch, ok := batches[key]
+		if !ok {
+			batch = make([]string, 0, BatchSize)
+		}
+		batch = append(batch, path)
+		batches[key] = batch
+
+		if len(batch) == BatchSize {
+			pipeline := pipelines[key]
+
+			// copy the batch
+			paths := make([]string, len(batch))
+			copy(paths, batch)
+
+			fg.Go(func() error {
+				if err := pipeline.Apply(ctx, paths); err != nil {
+					return err
 				}
-			}
+				for _, path := range paths {
+					processedCh <- path
+				}
+				return nil
+			})
+
+			batches[key] = batch[:0]
 		}
+	}
 
-		// indicate no more paths for each formatter
-		for _, formatter := range formatters {
-			if formatter.Parent() != nil {
-				// this formatter is not a root, it will be closed by a parent
-				continue
-			}
-			formatter.Close()
-		}
+	flushBatches := func() {
+		for key, pipeline := range pipelines {
 
-		// await completion
-		for _, formatter := range formatters {
-			formatter.AwaitCompletion()
-		}
+			batch := batches[key]
+			pipeline := pipeline // capture for closure
 
-		// indicate no more completion events
-		close(completedCh)
-
-		return nil
-	})
-
-	eg.Go(func() (err error) {
-		paths := Cli.Paths
-
-		if len(paths) == 0 && Cli.Stdin {
-			// read in all the paths
-			scanner := bufio.NewScanner(os.Stdin)
-			for scanner.Scan() {
-				paths = append(paths, scanner.Text())
-			}
-		}
-
-		walker, err := walk.New(Cli.Walk, Cli.TreeRoot, paths)
-		if err != nil {
-			return fmt.Errorf("%w: failed to create walker", err)
-		}
-
-		defer close(pathsCh)
-
-		if Cli.NoCache {
-			return walker.Walk(ctx, func(path string, info fs.FileInfo, err error) error {
-				select {
-				case <-ctx.Done():
-					return ctx.Err()
-				default:
-					// ignore symlinks and directories
-					if !(info.IsDir() || info.Mode()&os.ModeSymlink == os.ModeSymlink) {
-						pathsCh <- path
+			if len(batch) > 0 {
+				fg.Go(func() error {
+					if err := pipeline.Apply(ctx, batch); err != nil {
+						return fmt.Errorf("%w: pipeline failure, %s", err, key)
+					}
+					for _, path := range batch {
+						processedCh <- path
 					}
 					return nil
+				})
+			}
+		}
+	}
+
+	return func() error {
+		defer func() {
+			// close processed channel
+			close(processedCh)
+		}()
+
+		for path := range pathsCh {
+			for key, pipeline := range pipelines {
+				if !pipeline.Wants(path) {
+					continue
 				}
-			})
+				tryApply(key, path)
+			}
 		}
 
-		return cache.ChangeSet(ctx, walker, pathsCh)
-	})
+		// flush any partial batches which remain
+		flushBatches()
 
-	// listen for shutdown and call cancel if required
-	go func() {
-		exit := make(chan os.Signal, 1)
-		signal.Notify(exit, os.Interrupt, syscall.SIGTERM)
-		<-exit
-		cancel()
-	}()
-
-	return eg.Wait()
+		// wait for all outstanding formatting tasks to complete
+		if err := fg.Wait(); err != nil {
+			return fmt.Errorf("pipeline processing failure: %w", err)
+		}
+		return nil
+	}
 }
diff --git a/cli/format_test.go b/cli/format_test.go
index 4f03231..9030d09 100644
--- a/cli/format_test.go
+++ b/cli/format_test.go
@@ -41,27 +41,6 @@ func TestAllowMissingFormatter(t *testing.T) {
 	as.NoError(err)
 }
 
-func TestDependencyCycle(t *testing.T) {
-	as := require.New(t)
-
-	tempDir := t.TempDir()
-	configPath := tempDir + "/treefmt.toml"
-
-	test.WriteConfig(t, configPath, config2.Config{
-		Formatters: map[string]*config2.Formatter{
-			"a": {Command: "echo", Before: "b"},
-			"b": {Command: "echo", Before: "c"},
-			"c": {Command: "echo", Before: "a"},
-			"d": {Command: "echo", Before: "e"},
-			"e": {Command: "echo", Before: "f"},
-			"f": {Command: "echo"},
-		},
-	})
-
-	_, err := cmd(t, "--config-file", configPath, "--tree-root", tempDir)
-	as.ErrorContains(err, "formatter cycle detected")
-}
-
 func TestSpecifyingFormatters(t *testing.T) {
 	as := require.New(t)
 
@@ -451,60 +430,6 @@ func TestGitWorktree(t *testing.T) {
 	as.Contains(string(out), fmt.Sprintf("%d files changed", 57))
 }
 
-func TestOrderingFormatters(t *testing.T) {
-	as := require.New(t)
-
-	tempDir := test.TempExamples(t)
-	configPath := path.Join(tempDir, "treefmt.toml")
-
-	// missing child
-	test.WriteConfig(t, configPath, config2.Config{
-		Formatters: map[string]*config2.Formatter{
-			"hs-a": {
-				Command:  "echo",
-				Includes: []string{"*.hs"},
-				Before:   "hs-b",
-			},
-		},
-	})
-
-	out, err := cmd(t, "--config-file", configPath, "--tree-root", tempDir)
-	as.ErrorContains(err, "formatter hs-a is before hs-b but config for hs-b was not found")
-
-	// multiple roots
-	test.WriteConfig(t, configPath, config2.Config{
-		Formatters: map[string]*config2.Formatter{
-			"hs-a": {
-				Command:  "echo",
-				Includes: []string{"*.hs"},
-				Before:   "hs-b",
-			},
-			"hs-b": {
-				Command:  "echo",
-				Includes: []string{"*.hs"},
-				Before:   "hs-c",
-			},
-			"hs-c": {
-				Command:  "echo",
-				Includes: []string{"*.hs"},
-			},
-			"py-a": {
-				Command:  "echo",
-				Includes: []string{"*.py"},
-				Before:   "py-b",
-			},
-			"py-b": {
-				Command:  "echo",
-				Includes: []string{"*.py"},
-			},
-		},
-	})
-
-	out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
-	as.NoError(err)
-	as.Contains(string(out), "8 files changed")
-}
-
 func TestPathsArg(t *testing.T) {
 	as := require.New(t)
 
diff --git a/config/formatter.go b/config/formatter.go
index 34f4983..4bc6a76 100644
--- a/config/formatter.go
+++ b/config/formatter.go
@@ -9,6 +9,6 @@ type Formatter struct {
 	Includes []string
 	// Excludes is an optional list of glob patterns used to exclude certain files from this Formatter.
 	Excludes []string
-	// Before is the name of another formatter which must process a path after this one
-	Before string
+	//
+	Pipeline string
 }
diff --git a/format/formatter.go b/format/formatter.go
index 434addf..532a206 100644
--- a/format/formatter.go
+++ b/format/formatter.go
@@ -24,32 +24,9 @@ type Formatter struct {
 	log        *log.Logger
 	executable string // path to the executable described by Command
 
-	before string
-
-	child  *Formatter
-	parent *Formatter
-
 	// internal compiled versions of Includes and Excludes.
 	includes []glob.Glob
 	excludes []glob.Glob
-
-	// inboxCh is used to accept new paths for formatting.
-	inboxCh chan string
-	// completedCh is used to wait for this formatter to finish all processing.
-	completedCh chan interface{}
-
-	// Entries from inboxCh are batched according to batchSize and stored in batch for processing when the batchSize has
-	// been reached or Close is invoked.
-	batch     []string
-	batchSize int
-}
-
-func (f *Formatter) Before() string {
-	return f.before
-}
-
-func (f *Formatter) ResetBefore() {
-	f.before = ""
 }
 
 // Executable returns the path to the executable defined by Command
@@ -57,15 +34,56 @@ func (f *Formatter) Executable() string {
 	return f.executable
 }
 
+func (f *Formatter) Apply(ctx context.Context, paths []string) error {
+	// only apply if the resultant batch is not empty
+	if len(paths) > 0 {
+		// construct args, starting with config
+		args := f.config.Options
+
+		// append each file path
+		for _, path := range paths {
+			args = append(args, path)
+		}
+
+		// execute
+		start := time.Now()
+		cmd := exec.CommandContext(ctx, f.config.Command, args...)
+
+		if out, err := cmd.CombinedOutput(); err != nil {
+			f.log.Debugf("\n%v", string(out))
+			// todo log output
+			return err
+		}
+
+		f.log.Infof("%v files processed in %v", len(paths), time.Now().Sub(start))
+	}
+
+	return nil
+}
+
+// Wants is used to test if a Formatter wants path based on it's configured Includes and Excludes patterns.
+// Returns true if the Formatter should be applied to path, false otherwise.
+func (f *Formatter) Wants(path string) bool {
+	match := !PathMatches(path, f.excludes) && PathMatches(path, f.includes)
+	if match {
+		f.log.Debugf("match: %v", path)
+	}
+	return match
+}
+
 // NewFormatter is used to create a new Formatter.
-func NewFormatter(name string, config *config.Formatter, globalExcludes []glob.Glob) (*Formatter, error) {
+func NewFormatter(
+	name string,
+	config *config.Formatter,
+	globalExcludes []glob.Glob,
+) (*Formatter, error) {
 	var err error
 
 	f := Formatter{}
-	// capture the name from the config file
+
+	// capture config and the formatter's name
 	f.name = name
 	f.config = config
-	f.before = config.Before
 
 	// test if the formatter is available
 	executable, err := exec.LookPath(config.Command)
@@ -78,10 +96,6 @@ func NewFormatter(name string, config *config.Formatter, globalExcludes []glob.G
 
 	// initialise internal state
 	f.log = log.WithPrefix("format | " + name)
-	f.batchSize = 1024
-	f.batch = make([]string, 0, f.batchSize)
-	f.inboxCh = make(chan string, f.batchSize)
-	f.completedCh = make(chan interface{}, 1)
 
 	f.includes, err = CompileGlobs(config.Includes)
 	if err != nil {
@@ -96,140 +110,3 @@ func NewFormatter(name string, config *config.Formatter, globalExcludes []glob.G
 
 	return &f, nil
 }
-
-func (f *Formatter) SetParent(formatter *Formatter) {
-	f.parent = formatter
-}
-
-func (f *Formatter) Parent() *Formatter {
-	return f.parent
-}
-
-func (f *Formatter) SetChild(formatter *Formatter) {
-	f.child = formatter
-}
-
-// Wants is used to test if a Formatter wants path based on it's configured Includes and Excludes patterns.
-// Returns true if the Formatter should be applied to path, false otherwise.
-func (f *Formatter) Wants(path string) bool {
-	if f.parent != nil {
-		// we don't accept this path directly, our parent will forward it
-		return false
-	}
-	match := !PathMatches(path, f.excludes) && PathMatches(path, f.includes)
-	if match {
-		f.log.Debugf("match: %v", path)
-	}
-	return match
-}
-
-// Put add path into this Formatter's inboxCh for processing.
-func (f *Formatter) Put(path string) {
-	f.inboxCh <- path
-}
-
-// Run is the main processing loop for this Formatter.
-// It accepts a context which is used to lookup certain dependencies and for cancellation.
-func (f *Formatter) Run(ctx context.Context) (err error) {
-	defer func() {
-		if f.child != nil {
-			// indicate no further processing for the child formatter
-			f.child.Close()
-		}
-
-		// indicate this formatter has finished processing
-		f.completedCh <- nil
-	}()
-
-LOOP:
-	// keep processing until ctx has been cancelled or inboxCh has been closed
-	for {
-		select {
-
-		case <-ctx.Done():
-			// ctx has been cancelled
-			err = ctx.Err()
-			break LOOP
-
-		case path, ok := <-f.inboxCh:
-			// check if the inboxCh has been closed
-			if !ok {
-				break LOOP
-			}
-
-			// add path to the current batch
-			f.batch = append(f.batch, path)
-
-			if len(f.batch) == f.batchSize {
-				// drain immediately
-				if err := f.apply(ctx); err != nil {
-					break LOOP
-				}
-			}
-		}
-	}
-
-	// check if LOOP was exited due to an error
-	if err != nil {
-		return
-	}
-
-	// processing any lingering batch
-	return f.apply(ctx)
-}
-
-// apply executes Command against the latest batch of paths.
-// It accepts a context which is used to lookup certain dependencies and for cancellation.
-func (f *Formatter) apply(ctx context.Context) error {
-	// empty check
-	if len(f.batch) == 0 {
-		return nil
-	}
-
-	// construct args, starting with config
-	args := f.config.Options
-
-	// append each file path
-	for _, path := range f.batch {
-		args = append(args, path)
-	}
-
-	// execute
-	start := time.Now()
-	cmd := exec.CommandContext(ctx, f.config.Command, args...)
-
-	if out, err := cmd.CombinedOutput(); err != nil {
-		f.log.Debugf("\n%v", string(out))
-		// todo log output
-		return err
-	}
-
-	f.log.Infof("%v files processed in %v", len(f.batch), time.Now().Sub(start))
-
-	if f.child == nil {
-		// mark each path in this batch as completed
-		for _, path := range f.batch {
-			MarkPathComplete(ctx, path)
-		}
-	} else {
-		// otherwise forward each path onto the next formatter for processing
-		for _, path := range f.batch {
-			f.child.Put(path)
-		}
-	}
-
-	// reset batch
-	f.batch = f.batch[:0]
-
-	return nil
-}
-
-// Close is used to indicate that a Formatter should process any remaining paths and then stop it's processing loop.
-func (f *Formatter) Close() {
-	close(f.inboxCh)
-}
-
-func (f *Formatter) AwaitCompletion() {
-	// todo support a timeout
-	<-f.completedCh
-}
diff --git a/format/pipeline.go b/format/pipeline.go
new file mode 100644
index 0000000..29b94fb
--- /dev/null
+++ b/format/pipeline.go
@@ -0,0 +1,31 @@
+package format
+
+import "context"
+
+type Pipeline struct {
+	sequence []*Formatter
+}
+
+func (p *Pipeline) Add(f *Formatter) {
+	p.sequence = append(p.sequence, f)
+}
+
+func (p *Pipeline) Wants(path string) bool {
+	var match bool
+	for _, f := range p.sequence {
+		match = f.Wants(path)
+		if match {
+			break
+		}
+	}
+	return match
+}
+
+func (p *Pipeline) Apply(ctx context.Context, paths []string) error {
+	for _, f := range p.sequence {
+		if err := f.Apply(ctx, paths); err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/test/examples/treefmt.toml b/test/examples/treefmt.toml
index 699665a..0207c04 100644
--- a/test/examples/treefmt.toml
+++ b/test/examples/treefmt.toml
@@ -31,12 +31,12 @@ command = "alejandra"
 includes = ["*.nix"]
 # Act as an example on how to exclude specific files
 excludes = ["examples/nix/sources.nix"]
-# Make this run before deadnix
-# Note this formatter determines the file set for any 'downstream' formatters
-before = "deadnix"
+pipeline = "nix"
 
 [formatter.deadnix]
 command = "deadnix"
+includes = ["*.nix"]
+pipeline = "nix"
 
 [formatter.ruby]
 command = "rufo"

From c71d69051aceee8597725aba473b2ce5a96b407d Mon Sep 17 00:00:00 2001
From: Brian McGee <brian@bmcgee.ie>
Date: Thu, 25 Apr 2024 09:17:51 +0100
Subject: [PATCH 2/6] feat: have each formatter filter paths again if part of a
 pipeline

Signed-off-by: Brian McGee <brian@bmcgee.ie>
---
 format/context.go   | 21 ---------------
 format/formatter.go | 63 +++++++++++++++++++++++++++++++++------------
 format/pipeline.go  |  2 +-
 3 files changed, 47 insertions(+), 39 deletions(-)
 delete mode 100644 format/context.go

diff --git a/format/context.go b/format/context.go
deleted file mode 100644
index d1fb703..0000000
--- a/format/context.go
+++ /dev/null
@@ -1,21 +0,0 @@
-package format
-
-import (
-	"context"
-)
-
-const (
-	completedChKey = "completedCh"
-)
-
-// SetCompletedChannel is used to set a channel for indication processing completion in the provided context.
-func SetCompletedChannel(ctx context.Context, completedCh chan string) context.Context {
-	return context.WithValue(ctx, completedChKey, completedCh)
-}
-
-// MarkPathComplete is used to indicate that all processing has finished for the provided path.
-// This is done by adding the path to the completion channel which should have already been set using
-// SetCompletedChannel.
-func MarkPathComplete(ctx context.Context, path string) {
-	ctx.Value(completedChKey).(chan string) <- path
-}
diff --git a/format/formatter.go b/format/formatter.go
index 532a206..7e2353a 100644
--- a/format/formatter.go
+++ b/format/formatter.go
@@ -27,6 +27,8 @@ type Formatter struct {
 	// internal compiled versions of Includes and Excludes.
 	includes []glob.Glob
 	excludes []glob.Glob
+
+	batch []string
 }
 
 // Executable returns the path to the executable defined by Command
@@ -34,30 +36,53 @@ func (f *Formatter) Executable() string {
 	return f.executable
 }
 
-func (f *Formatter) Apply(ctx context.Context, paths []string) error {
-	// only apply if the resultant batch is not empty
-	if len(paths) > 0 {
-		// construct args, starting with config
-		args := f.config.Options
+func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) error {
+	// construct args, starting with config
+	args := f.config.Options
 
-		// append each file path
+	// If filter is true it indicates we are executing as part of a pipeline.
+	// In such a scenario each formatter must sub filter the paths provided as different formatters might want different
+	// files in a pipeline.
+	if filter {
+		// reset the batch
+		f.batch = f.batch[:]
+
+		// filter paths
 		for _, path := range paths {
-			args = append(args, path)
+			if f.Wants(path) {
+				f.batch = append(f.batch, path)
+			}
 		}
 
-		// execute
-		start := time.Now()
-		cmd := exec.CommandContext(ctx, f.config.Command, args...)
-
-		if out, err := cmd.CombinedOutput(); err != nil {
-			f.log.Debugf("\n%v", string(out))
-			// todo log output
-			return err
+		// exit early if nothing to process
+		if len(f.batch) == 0 {
+			return nil
 		}
 
-		f.log.Infof("%v files processed in %v", len(paths), time.Now().Sub(start))
+		// append paths to the args
+		args = append(args, f.batch...)
+	} else {
+		// exit early if nothing to process
+		if len(paths) == 0 {
+			return nil
+		}
+
+		// append paths to the args
+		args = append(args, paths...)
 	}
 
+	// execute the command
+	start := time.Now()
+	cmd := exec.CommandContext(ctx, f.config.Command, args...)
+
+	if out, err := cmd.CombinedOutput(); err != nil {
+		f.log.Debugf("\n%v", string(out))
+		// todo log output
+		return err
+	}
+
+	f.log.Infof("%v files processed in %v", len(paths), time.Now().Sub(start))
+
 	return nil
 }
 
@@ -95,7 +120,11 @@ func NewFormatter(
 	f.executable = executable
 
 	// initialise internal state
-	f.log = log.WithPrefix("format | " + name)
+	if config.Pipeline == "" {
+		f.log = log.WithPrefix(fmt.Sprintf("format | %s", name))
+	} else {
+		f.log = log.WithPrefix(fmt.Sprintf("format | %s[%s]", config.Pipeline, name))
+	}
 
 	f.includes, err = CompileGlobs(config.Includes)
 	if err != nil {
diff --git a/format/pipeline.go b/format/pipeline.go
index 29b94fb..84dbefc 100644
--- a/format/pipeline.go
+++ b/format/pipeline.go
@@ -23,7 +23,7 @@ func (p *Pipeline) Wants(path string) bool {
 
 func (p *Pipeline) Apply(ctx context.Context, paths []string) error {
 	for _, f := range p.sequence {
-		if err := f.Apply(ctx, paths); err != nil {
+		if err := f.Apply(ctx, paths, len(p.sequence) > 1); err != nil {
 			return err
 		}
 	}

From 6ae0e4f8e44cf44761014ce77a0455182b61fe12 Mon Sep 17 00:00:00 2001
From: Brian McGee <brian@bmcgee.ie>
Date: Thu, 25 Apr 2024 09:38:41 +0100
Subject: [PATCH 3/6] feat: add pipeline priority field

Allows for fine-grained control of execution order.

Signed-off-by: Brian McGee <brian@bmcgee.ie>
---
 config/config_test.go      | 12 ++++++++++++
 config/formatter.go        |  4 +++-
 format/pipeline.go         |  9 ++++++++-
 test/examples/treefmt.toml |  2 ++
 4 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/config/config_test.go b/config/config_test.go
index f50e6c7..cfe67e0 100644
--- a/config/config_test.go
+++ b/config/config_test.go
@@ -59,6 +59,18 @@ func TestReadConfigFile(t *testing.T) {
 	as.Nil(alejandra.Options)
 	as.Equal([]string{"*.nix"}, alejandra.Includes)
 	as.Equal([]string{"examples/nix/sources.nix"}, alejandra.Excludes)
+	as.Equal("nix", alejandra.Pipeline)
+	as.Equal(1, alejandra.Priority)
+
+	// deadnix
+	deadnix, ok := cfg.Formatters["deadnix"]
+	as.True(ok, "deadnix formatter not found")
+	as.Equal("deadnix", deadnix.Command)
+	as.Nil(deadnix.Options)
+	as.Equal([]string{"*.nix"}, deadnix.Includes)
+	as.Nil(deadnix.Excludes)
+	as.Equal("nix", deadnix.Pipeline)
+	as.Equal(2, deadnix.Priority)
 
 	// ruby
 	ruby, ok := cfg.Formatters["ruby"]
diff --git a/config/formatter.go b/config/formatter.go
index 4bc6a76..4fea20b 100644
--- a/config/formatter.go
+++ b/config/formatter.go
@@ -9,6 +9,8 @@ type Formatter struct {
 	Includes []string
 	// Excludes is an optional list of glob patterns used to exclude certain files from this Formatter.
 	Excludes []string
-	//
+	// Indicates this formatter should be executed as part of a group of formatters all sharing the same pipeline key.
 	Pipeline string
+	// Indicates the order of precedence when executing as part of a pipeline.
+	Priority int
 }
diff --git a/format/pipeline.go b/format/pipeline.go
index 84dbefc..4dc8cd5 100644
--- a/format/pipeline.go
+++ b/format/pipeline.go
@@ -1,6 +1,9 @@
 package format
 
-import "context"
+import (
+	"context"
+	"slices"
+)
 
 type Pipeline struct {
 	sequence []*Formatter
@@ -8,6 +11,10 @@ type Pipeline struct {
 
 func (p *Pipeline) Add(f *Formatter) {
 	p.sequence = append(p.sequence, f)
+	// sort by priority in ascending order
+	slices.SortFunc(p.sequence, func(a, b *Formatter) int {
+		return a.config.Priority - b.config.Priority
+	})
 }
 
 func (p *Pipeline) Wants(path string) bool {
diff --git a/test/examples/treefmt.toml b/test/examples/treefmt.toml
index 0207c04..8d3ea62 100644
--- a/test/examples/treefmt.toml
+++ b/test/examples/treefmt.toml
@@ -32,11 +32,13 @@ includes = ["*.nix"]
 # Act as an example on how to exclude specific files
 excludes = ["examples/nix/sources.nix"]
 pipeline = "nix"
+priority = 1
 
 [formatter.deadnix]
 command = "deadnix"
 includes = ["*.nix"]
 pipeline = "nix"
+priority = 2
 
 [formatter.ruby]
 command = "rufo"

From fcce518d5eadc2681ce2bf81b37b6e7864cba4c3 Mon Sep 17 00:00:00 2001
From: Brian McGee <brian@bmcgee.ie>
Date: Thu, 25 Apr 2024 12:16:04 +0100
Subject: [PATCH 4/6] feat: various perf improvements

Signed-off-by: Brian McGee <brian@bmcgee.ie>
---
 cache/cache.go             | 43 ++++++++++++++++++++++++++++----------
 cli/cli.go                 |  2 +-
 cli/format.go              | 25 +++++++++++++++++-----
 cli/format_test.go         | 28 ++++++++++++-------------
 format/formatter.go        | 19 ++++++++++++-----
 nix/devshell.nix           |  1 +
 nix/formatters.nix         |  1 +
 test/examples/nixpkgs.toml | 13 ++++++++++++
 8 files changed, 96 insertions(+), 36 deletions(-)
 create mode 100644 test/examples/nixpkgs.toml

diff --git a/cache/cache.go b/cache/cache.go
index eb56d15..529085d 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -7,6 +7,8 @@ import (
 	"fmt"
 	"io/fs"
 	"os"
+	"path/filepath"
+	"runtime"
 	"time"
 
 	"git.numtide.com/numtide/treefmt/format"
@@ -22,8 +24,6 @@ import (
 const (
 	pathsBucket      = "paths"
 	formattersBucket = "formatters"
-
-	readBatchSize = 1024
 )
 
 // Entry represents a cache entry, indicating the last size and modified time for a file path.
@@ -32,7 +32,11 @@ type Entry struct {
 	Modified time.Time
 }
 
-var db *bolt.DB
+var (
+	db            *bolt.DB
+	ReadBatchSize = 1024 * runtime.NumCPU()
+	logger        *log.Logger
+)
 
 // Open creates an instance of bolt.DB for a given treeRoot path.
 // If clean is true, Open will delete any existing data in the cache.
@@ -40,7 +44,7 @@ var db *bolt.DB
 // The database will be located in `XDG_CACHE_DIR/treefmt/eval-cache/<id>.db`, where <id> is determined by hashing
 // the treeRoot path. This associates a given treeRoot with a given instance of the cache.
 func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter) (err error) {
-	l := log.WithPrefix("cache")
+	logger = log.WithPrefix("cache")
 
 	// determine a unique and consistent db name for the tree root
 	h := sha1.New()
@@ -85,7 +89,7 @@ func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter)
 			}
 
 			clean = clean || entry == nil || !(entry.Size == stat.Size() && entry.Modified == stat.ModTime())
-			l.Debug(
+			logger.Debug(
 				"checking if formatter has changed",
 				"name", name,
 				"clean", clean,
@@ -174,6 +178,12 @@ func putEntry(bucket *bolt.Bucket, path string, entry *Entry) error {
 // ChangeSet is used to walk a filesystem, starting at root, and outputting any new or changed paths using pathsCh.
 // It determines if a path is new or has changed by comparing against cache entries.
 func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) error {
+	start := time.Now()
+
+	defer func() {
+		logger.Infof("finished generating change set in %v", time.Since(start))
+	}()
+
 	var tx *bolt.Tx
 	var bucket *bolt.Bucket
 	var processed int
@@ -185,6 +195,9 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
 		}
 	}()
 
+	// for quick removal of tree root from paths
+	relPathOffset := len(walker.Root()) + 1
+
 	return walker.Walk(ctx, func(path string, info fs.FileInfo, err error) error {
 		select {
 		case <-ctx.Done():
@@ -213,7 +226,8 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
 			bucket = tx.Bucket([]byte(pathsBucket))
 		}
 
-		cached, err := getEntry(bucket, path)
+		relPath := path[relPathOffset:]
+		cached, err := getEntry(bucket, relPath)
 		if err != nil {
 			return err
 		}
@@ -230,13 +244,15 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
 		case <-ctx.Done():
 			return ctx.Err()
 		default:
-			pathsCh <- path
+			pathsCh <- relPath
 		}
 
 		// close the current tx if we have reached the batch size
 		processed += 1
-		if processed == readBatchSize {
-			return tx.Rollback()
+		if processed == ReadBatchSize {
+			err = tx.Rollback()
+			tx = nil
+			return err
 		}
 
 		return nil
@@ -244,7 +260,12 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
 }
 
 // Update is used to record updated cache information for the specified list of paths.
-func Update(paths []string) (int, error) {
+func Update(treeRoot string, paths []string) (int, error) {
+	start := time.Now()
+	defer func() {
+		logger.Infof("finished updating %v paths in %v", len(paths), time.Since(start))
+	}()
+
 	if len(paths) == 0 {
 		return 0, nil
 	}
@@ -260,7 +281,7 @@ func Update(paths []string) (int, error) {
 				return err
 			}
 
-			pathInfo, err := os.Stat(path)
+			pathInfo, err := os.Stat(filepath.Join(treeRoot, path))
 			if err != nil {
 				return err
 			}
diff --git a/cli/cli.go b/cli/cli.go
index edb8caa..0bb1097 100644
--- a/cli/cli.go
+++ b/cli/cli.go
@@ -27,7 +27,7 @@ type Format struct {
 }
 
 func (f *Format) Configure() {
-	log.SetReportTimestamp(false)
+	log.SetReportTimestamp(true)
 
 	if f.Verbosity == 0 {
 		log.SetLevel(log.WarnLevel)
diff --git a/cli/format.go b/cli/format.go
index c248753..873e5a2 100644
--- a/cli/format.go
+++ b/cli/format.go
@@ -8,7 +8,10 @@ import (
 	"io/fs"
 	"os"
 	"os/signal"
+	"path/filepath"
+	"runtime"
 	"slices"
+	"strings"
 	"syscall"
 	"time"
 
@@ -83,7 +86,7 @@ func (f *Format) Run() (err error) {
 
 	// init formatters
 	for name, formatterCfg := range cfg.Formatters {
-		formatter, err := format.NewFormatter(name, formatterCfg, globalExcludes)
+		formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, globalExcludes)
 		if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter {
 			l.Debugf("formatter not found: %v", name)
 			continue
@@ -129,7 +132,7 @@ func (f *Format) Run() (err error) {
 
 	// create a channel for paths to be processed
 	// we use a multiple of batch size here to allow for greater concurrency
-	pathsCh = make(chan string, 10*BatchSize)
+	pathsCh = make(chan string, BatchSize*runtime.NumCPU())
 
 	// create a channel for tracking paths that have been processed
 	processedCh = make(chan string, cap(pathsCh))
@@ -148,10 +151,22 @@ func walkFilesystem(ctx context.Context) func() error {
 		paths := Cli.Paths
 
 		if len(paths) == 0 && Cli.Stdin {
+
+			cwd, err := os.Getwd()
+			if err != nil {
+				return fmt.Errorf("%w: failed to determine current working directory", err)
+			}
+
 			// read in all the paths
 			scanner := bufio.NewScanner(os.Stdin)
 			for scanner.Scan() {
-				paths = append(paths, scanner.Text())
+				path := scanner.Text()
+				if !strings.HasPrefix(path, "/") {
+					// append the cwd
+					path = filepath.Join(cwd, path)
+				}
+
+				paths = append(paths, path)
 			}
 		}
 
@@ -194,7 +209,7 @@ func updateCache(ctx context.Context) func() error {
 			if Cli.NoCache {
 				changes += len(batch)
 			} else {
-				count, err := cache.Update(batch)
+				count, err := cache.Update(Cli.TreeRoot, batch)
 				if err != nil {
 					return err
 				}
@@ -278,7 +293,7 @@ func applyFormatters(ctx context.Context) func() error {
 			if len(batch) > 0 {
 				fg.Go(func() error {
 					if err := pipeline.Apply(ctx, batch); err != nil {
-						return fmt.Errorf("%w: pipeline failure, %s", err, key)
+						return fmt.Errorf("%s failure: %w", key, err)
 					}
 					for _, path := range batch {
 						processedCh <- path
diff --git a/cli/format_test.go b/cli/format_test.go
index 9030d09..9356c97 100644
--- a/cli/format_test.go
+++ b/cli/format_test.go
@@ -108,7 +108,7 @@ func TestIncludesAndExcludes(t *testing.T) {
 	test.WriteConfig(t, configPath, cfg)
 	out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
 	as.NoError(err)
-	as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
+	as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
 
 	// globally exclude nix files
 	cfg.Global.Excludes = []string{"*.nix"}
@@ -116,7 +116,7 @@ func TestIncludesAndExcludes(t *testing.T) {
 	test.WriteConfig(t, configPath, cfg)
 	out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
 	as.NoError(err)
-	as.Contains(string(out), fmt.Sprintf("%d files changed", 29))
+	as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
 
 	// add haskell files to the global exclude
 	cfg.Global.Excludes = []string{"*.nix", "*.hs"}
@@ -124,7 +124,7 @@ func TestIncludesAndExcludes(t *testing.T) {
 	test.WriteConfig(t, configPath, cfg)
 	out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
 	as.NoError(err)
-	as.Contains(string(out), fmt.Sprintf("%d files changed", 23))
+	as.Contains(string(out), fmt.Sprintf("%d files changed", 24))
 
 	echo := cfg.Formatters["echo"]
 
@@ -134,7 +134,7 @@ func TestIncludesAndExcludes(t *testing.T) {
 	test.WriteConfig(t, configPath, cfg)
 	out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
 	as.NoError(err)
-	as.Contains(string(out), fmt.Sprintf("%d files changed", 21))
+	as.Contains(string(out), fmt.Sprintf("%d files changed", 22))
 
 	// remove go files from the echo formatter
 	echo.Excludes = []string{"*.py", "*.go"}
@@ -142,7 +142,7 @@ func TestIncludesAndExcludes(t *testing.T) {
 	test.WriteConfig(t, configPath, cfg)
 	out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
 	as.NoError(err)
-	as.Contains(string(out), fmt.Sprintf("%d files changed", 20))
+	as.Contains(string(out), fmt.Sprintf("%d files changed", 21))
 
 	// adjust the includes for echo to only include elm files
 	echo.Includes = []string{"*.elm"}
@@ -180,7 +180,7 @@ func TestCache(t *testing.T) {
 	test.WriteConfig(t, configPath, cfg)
 	out, err := cmd(t, "--config-file", configPath, "--tree-root", tempDir)
 	as.NoError(err)
-	as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
+	as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
 
 	out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
 	as.NoError(err)
@@ -189,7 +189,7 @@ func TestCache(t *testing.T) {
 	// clear cache
 	out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "-c")
 	as.NoError(err)
-	as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
+	as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
 
 	out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
 	as.NoError(err)
@@ -198,7 +198,7 @@ func TestCache(t *testing.T) {
 	// clear cache
 	out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "-c")
 	as.NoError(err)
-	as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
+	as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
 
 	out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
 	as.NoError(err)
@@ -207,7 +207,7 @@ func TestCache(t *testing.T) {
 	// no cache
 	out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "--no-cache")
 	as.NoError(err)
-	as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
+	as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
 }
 
 func TestChangeWorkingDirectory(t *testing.T) {
@@ -241,7 +241,7 @@ func TestChangeWorkingDirectory(t *testing.T) {
 	// this should fail if the working directory hasn't been changed first
 	out, err := cmd(t, "-C", tempDir)
 	as.NoError(err)
-	as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
+	as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
 }
 
 func TestFailOnChange(t *testing.T) {
@@ -418,16 +418,16 @@ func TestGitWorktree(t *testing.T) {
 	// add everything to the worktree
 	as.NoError(wt.AddGlob("."))
 	as.NoError(err)
-	run(30)
+	run(31)
 
 	// remove python directory
 	as.NoError(wt.RemoveGlob("python/*"))
-	run(27)
+	run(28)
 
 	// walk with filesystem instead of git
 	out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--walk", "filesystem")
 	as.NoError(err)
-	as.Contains(string(out), fmt.Sprintf("%d files changed", 57))
+	as.Contains(string(out), fmt.Sprintf("%d files changed", 59))
 }
 
 func TestPathsArg(t *testing.T) {
@@ -462,7 +462,7 @@ func TestPathsArg(t *testing.T) {
 	// without any path args
 	out, err := cmd(t, "-C", tempDir)
 	as.NoError(err)
-	as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
+	as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
 
 	// specify some explicit paths
 	out, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Foo.hs")
diff --git a/format/formatter.go b/format/formatter.go
index 7e2353a..ae2e759 100644
--- a/format/formatter.go
+++ b/format/formatter.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"os"
 	"os/exec"
 	"time"
 
@@ -23,6 +24,7 @@ type Formatter struct {
 
 	log        *log.Logger
 	executable string // path to the executable described by Command
+	workingDir string
 
 	// internal compiled versions of Includes and Excludes.
 	includes []glob.Glob
@@ -37,6 +39,8 @@ func (f *Formatter) Executable() string {
 }
 
 func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) error {
+	start := time.Now()
+
 	// construct args, starting with config
 	args := f.config.Options
 
@@ -45,7 +49,7 @@ func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) erro
 	// files in a pipeline.
 	if filter {
 		// reset the batch
-		f.batch = f.batch[:]
+		f.batch = f.batch[:0]
 
 		// filter paths
 		for _, path := range paths {
@@ -72,15 +76,18 @@ func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) erro
 	}
 
 	// execute the command
-	start := time.Now()
 	cmd := exec.CommandContext(ctx, f.config.Command, args...)
+	cmd.Dir = f.workingDir
 
 	if out, err := cmd.CombinedOutput(); err != nil {
-		f.log.Debugf("\n%v", string(out))
-		// todo log output
-		return err
+		if len(out) > 0 {
+			_, _ = fmt.Fprintf(os.Stderr, "%s error:\n%s\n", f.name, out)
+		}
+		return fmt.Errorf("%w: formatter %s failed to apply", err, f.name)
 	}
 
+	//
+
 	f.log.Infof("%v files processed in %v", len(paths), time.Now().Sub(start))
 
 	return nil
@@ -99,6 +106,7 @@ func (f *Formatter) Wants(path string) bool {
 // NewFormatter is used to create a new Formatter.
 func NewFormatter(
 	name string,
+	treeRoot string,
 	config *config.Formatter,
 	globalExcludes []glob.Glob,
 ) (*Formatter, error) {
@@ -109,6 +117,7 @@ func NewFormatter(
 	// capture config and the formatter's name
 	f.name = name
 	f.config = config
+	f.workingDir = treeRoot
 
 	// test if the formatter is available
 	executable, err := exec.LookPath(config.Command)
diff --git a/nix/devshell.nix b/nix/devshell.nix
index ee43fdf..864a992 100644
--- a/nix/devshell.nix
+++ b/nix/devshell.nix
@@ -25,6 +25,7 @@
           # golang
           go
           delve
+          graphviz
         ]
         ++
         # include formatters for development and testing
diff --git a/nix/formatters.nix b/nix/formatters.nix
index 9788f7f..5bce9a5 100644
--- a/nix/formatters.nix
+++ b/nix/formatters.nix
@@ -6,6 +6,7 @@ with pkgs; [
   haskellPackages.cabal-fmt
   haskellPackages.ormolu
   mdsh
+  nixpkgs-fmt
   nodePackages.prettier
   python3.pkgs.black
   rufo
diff --git a/test/examples/nixpkgs.toml b/test/examples/nixpkgs.toml
new file mode 100644
index 0000000..2c43b6f
--- /dev/null
+++ b/test/examples/nixpkgs.toml
@@ -0,0 +1,13 @@
+# One CLI to format the code tree - https://git.numtide.com/numtide/treefmt
+
+[formatter.deadnix]
+command = "deadnix"
+includes = ["*.nix"]
+pipeline = "nix"
+priority = 1
+
+[formatter.nixpkgs-fmt]
+command = "nixpkgs-fmt"
+includes = ["*.nix"]
+pipeline = "nix"
+priority = 2
\ No newline at end of file

From 710efbd049dc102337161031a42c5d802ecccdce Mon Sep 17 00:00:00 2001
From: Brian McGee <brian@bmcgee.ie>
Date: Fri, 26 Apr 2024 09:55:09 +0100
Subject: [PATCH 5/6] fix: remember timestamps from logging

Signed-off-by: Brian McGee <brian@bmcgee.ie>
---
 cli/cli.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli/cli.go b/cli/cli.go
index 0bb1097..edb8caa 100644
--- a/cli/cli.go
+++ b/cli/cli.go
@@ -27,7 +27,7 @@ type Format struct {
 }
 
 func (f *Format) Configure() {
-	log.SetReportTimestamp(true)
+	log.SetReportTimestamp(false)
 
 	if f.Verbosity == 0 {
 		log.SetLevel(log.WarnLevel)

From 40b76b74a0053324d683823ef7ad70f5b29f0dc9 Mon Sep 17 00:00:00 2001
From: Brian McGee <brian@bmcgee.ie>
Date: Fri, 26 Apr 2024 10:27:11 +0100
Subject: [PATCH 6/6] feat: ensure deterministic application of formatters

Signed-off-by: Brian McGee <brian@bmcgee.ie>
---
 cli/format.go       | 13 ++++++++-
 cli/format_test.go  | 64 ++++++++++++++++++++++++++++++++++++++++++++-
 format/formatter.go |  2 +-
 nix/formatters.nix  | 13 +++++++++
 4 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/cli/format.go b/cli/format.go
index 873e5a2..6f0b356 100644
--- a/cli/format.go
+++ b/cli/format.go
@@ -11,6 +11,7 @@ import (
 	"path/filepath"
 	"runtime"
 	"slices"
+	"sort"
 	"strings"
 	"syscall"
 	"time"
@@ -84,8 +85,18 @@ func (f *Format) Run() (err error) {
 		}
 	}
 
+	// sort the formatter names so that, as we construct pipelines, we add formatters in a determinstic fashion. This
+	// ensures a deterministic order even when all priority values are the same e.g. 0
+
+	names := make([]string, 0, len(cfg.Formatters))
+	for name := range cfg.Formatters {
+		names = append(names, name)
+	}
+	sort.Strings(names)
+
 	// init formatters
-	for name, formatterCfg := range cfg.Formatters {
+	for _, name := range names {
+		formatterCfg := cfg.Formatters[name]
 		formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, globalExcludes)
 		if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter {
 			l.Debugf("formatter not found: %v", name)
diff --git a/cli/format_test.go b/cli/format_test.go
index 9356c97..df02288 100644
--- a/cli/format_test.go
+++ b/cli/format_test.go
@@ -1,11 +1,13 @@
 package cli
 
 import (
+	"bufio"
 	"fmt"
 	"os"
 	"os/exec"
 	"path"
 	"path/filepath"
+	"regexp"
 	"testing"
 
 	config2 "git.numtide.com/numtide/treefmt/config"
@@ -23,7 +25,7 @@ import (
 func TestAllowMissingFormatter(t *testing.T) {
 	as := require.New(t)
 
-	tempDir := t.TempDir()
+	tempDir := test.TempExamples(t)
 	configPath := tempDir + "/treefmt.toml"
 
 	test.WriteConfig(t, configPath, config2.Config{
@@ -529,3 +531,63 @@ go/main.go
 	as.NoError(err)
 	as.Contains(string(out), fmt.Sprintf("%d files changed", 3))
 }
+
+func TestDeterministicOrderingInPipeline(t *testing.T) {
+	as := require.New(t)
+
+	tempDir := test.TempExamples(t)
+	configPath := tempDir + "/treefmt.toml"
+
+	test.WriteConfig(t, configPath, config2.Config{
+		Formatters: map[string]*config2.Formatter{
+			// a and b should execute in lexicographical order as they have default priority 0, with c last since it has
+			// priority 1
+			"fmt-a": {
+				Command:  "test-fmt",
+				Options:  []string{"fmt-a"},
+				Includes: []string{"*.py"},
+				Pipeline: "foo",
+			},
+			"fmt-b": {
+				Command:  "test-fmt",
+				Options:  []string{"fmt-b"},
+				Includes: []string{"*.py"},
+				Pipeline: "foo",
+			},
+			"fmt-c": {
+				Command:  "test-fmt",
+				Options:  []string{"fmt-c"},
+				Includes: []string{"*.py"},
+				Pipeline: "foo",
+				Priority: 1,
+			},
+		},
+	})
+
+	_, err := cmd(t, "-C", tempDir)
+	as.NoError(err)
+
+	matcher := regexp.MustCompile("^fmt-(.*)")
+
+	// check each affected file for the sequence of test statements which should be prepended to the end
+	sequence := []string{"fmt-a", "fmt-b", "fmt-c"}
+	paths := []string{"python/main.py", "python/virtualenv_proxy.py"}
+
+	for _, p := range paths {
+		file, err := os.Open(filepath.Join(tempDir, p))
+		as.NoError(err)
+		scanner := bufio.NewScanner(file)
+
+		idx := 0
+
+		for scanner.Scan() {
+			line := scanner.Text()
+			matches := matcher.FindAllString(line, -1)
+			if len(matches) != 1 {
+				continue
+			}
+			as.Equal(sequence[idx], matches[0])
+			idx += 1
+		}
+	}
+}
diff --git a/format/formatter.go b/format/formatter.go
index ae2e759..e2ec9e3 100644
--- a/format/formatter.go
+++ b/format/formatter.go
@@ -93,7 +93,7 @@ func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) erro
 	return nil
 }
 
-// Wants is used to test if a Formatter wants path based on it's configured Includes and Excludes patterns.
+// Wants is used to test if a Formatter wants a path based on it's configured Includes and Excludes patterns.
 // Returns true if the Formatter should be applied to path, false otherwise.
 func (f *Formatter) Wants(path string) bool {
 	match := !PathMatches(path, f.excludes) && PathMatches(path, f.includes)
diff --git a/nix/formatters.nix b/nix/formatters.nix
index 5bce9a5..8454d9d 100644
--- a/nix/formatters.nix
+++ b/nix/formatters.nix
@@ -16,4 +16,17 @@ with pkgs; [
   statix
   deadnix
   terraform
+  # util for unit testing
+  (pkgs.writeShellApplication {
+    name = "test-fmt";
+    text = ''
+      VALUE="$1"
+      shift
+
+      # append value to each file
+      for FILE in "$@"; do
+          echo "$VALUE" >> "$FILE"
+      done
+    '';
+  })
 ]