From fcce518d5eadc2681ce2bf81b37b6e7864cba4c3 Mon Sep 17 00:00:00 2001 From: Brian McGee Date: Thu, 25 Apr 2024 12:16:04 +0100 Subject: [PATCH] feat: various perf improvements Signed-off-by: Brian McGee --- cache/cache.go | 43 ++++++++++++++++++++++++++++---------- cli/cli.go | 2 +- cli/format.go | 25 +++++++++++++++++----- cli/format_test.go | 28 ++++++++++++------------- format/formatter.go | 19 ++++++++++++----- nix/devshell.nix | 1 + nix/formatters.nix | 1 + test/examples/nixpkgs.toml | 13 ++++++++++++ 8 files changed, 96 insertions(+), 36 deletions(-) create mode 100644 test/examples/nixpkgs.toml diff --git a/cache/cache.go b/cache/cache.go index eb56d15..529085d 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -7,6 +7,8 @@ import ( "fmt" "io/fs" "os" + "path/filepath" + "runtime" "time" "git.numtide.com/numtide/treefmt/format" @@ -22,8 +24,6 @@ import ( const ( pathsBucket = "paths" formattersBucket = "formatters" - - readBatchSize = 1024 ) // Entry represents a cache entry, indicating the last size and modified time for a file path. @@ -32,7 +32,11 @@ type Entry struct { Modified time.Time } -var db *bolt.DB +var ( + db *bolt.DB + ReadBatchSize = 1024 * runtime.NumCPU() + logger *log.Logger +) // Open creates an instance of bolt.DB for a given treeRoot path. // If clean is true, Open will delete any existing data in the cache. @@ -40,7 +44,7 @@ var db *bolt.DB // The database will be located in `XDG_CACHE_DIR/treefmt/eval-cache/.db`, where is determined by hashing // the treeRoot path. This associates a given treeRoot with a given instance of the cache. func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter) (err error) { - l := log.WithPrefix("cache") + logger = log.WithPrefix("cache") // determine a unique and consistent db name for the tree root h := sha1.New() @@ -85,7 +89,7 @@ func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter) } clean = clean || entry == nil || !(entry.Size == stat.Size() && entry.Modified == stat.ModTime()) - l.Debug( + logger.Debug( "checking if formatter has changed", "name", name, "clean", clean, @@ -174,6 +178,12 @@ func putEntry(bucket *bolt.Bucket, path string, entry *Entry) error { // ChangeSet is used to walk a filesystem, starting at root, and outputting any new or changed paths using pathsCh. // It determines if a path is new or has changed by comparing against cache entries. func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) error { + start := time.Now() + + defer func() { + logger.Infof("finished generating change set in %v", time.Since(start)) + }() + var tx *bolt.Tx var bucket *bolt.Bucket var processed int @@ -185,6 +195,9 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e } }() + // for quick removal of tree root from paths + relPathOffset := len(walker.Root()) + 1 + return walker.Walk(ctx, func(path string, info fs.FileInfo, err error) error { select { case <-ctx.Done(): @@ -213,7 +226,8 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e bucket = tx.Bucket([]byte(pathsBucket)) } - cached, err := getEntry(bucket, path) + relPath := path[relPathOffset:] + cached, err := getEntry(bucket, relPath) if err != nil { return err } @@ -230,13 +244,15 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e case <-ctx.Done(): return ctx.Err() default: - pathsCh <- path + pathsCh <- relPath } // close the current tx if we have reached the batch size processed += 1 - if processed == readBatchSize { - return tx.Rollback() + if processed == ReadBatchSize { + err = tx.Rollback() + tx = nil + return err } return nil @@ -244,7 +260,12 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e } // Update is used to record updated cache information for the specified list of paths. -func Update(paths []string) (int, error) { +func Update(treeRoot string, paths []string) (int, error) { + start := time.Now() + defer func() { + logger.Infof("finished updating %v paths in %v", len(paths), time.Since(start)) + }() + if len(paths) == 0 { return 0, nil } @@ -260,7 +281,7 @@ func Update(paths []string) (int, error) { return err } - pathInfo, err := os.Stat(path) + pathInfo, err := os.Stat(filepath.Join(treeRoot, path)) if err != nil { return err } diff --git a/cli/cli.go b/cli/cli.go index edb8caa..0bb1097 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -27,7 +27,7 @@ type Format struct { } func (f *Format) Configure() { - log.SetReportTimestamp(false) + log.SetReportTimestamp(true) if f.Verbosity == 0 { log.SetLevel(log.WarnLevel) diff --git a/cli/format.go b/cli/format.go index c248753..873e5a2 100644 --- a/cli/format.go +++ b/cli/format.go @@ -8,7 +8,10 @@ import ( "io/fs" "os" "os/signal" + "path/filepath" + "runtime" "slices" + "strings" "syscall" "time" @@ -83,7 +86,7 @@ func (f *Format) Run() (err error) { // init formatters for name, formatterCfg := range cfg.Formatters { - formatter, err := format.NewFormatter(name, formatterCfg, globalExcludes) + formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, globalExcludes) if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter { l.Debugf("formatter not found: %v", name) continue @@ -129,7 +132,7 @@ func (f *Format) Run() (err error) { // create a channel for paths to be processed // we use a multiple of batch size here to allow for greater concurrency - pathsCh = make(chan string, 10*BatchSize) + pathsCh = make(chan string, BatchSize*runtime.NumCPU()) // create a channel for tracking paths that have been processed processedCh = make(chan string, cap(pathsCh)) @@ -148,10 +151,22 @@ func walkFilesystem(ctx context.Context) func() error { paths := Cli.Paths if len(paths) == 0 && Cli.Stdin { + + cwd, err := os.Getwd() + if err != nil { + return fmt.Errorf("%w: failed to determine current working directory", err) + } + // read in all the paths scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { - paths = append(paths, scanner.Text()) + path := scanner.Text() + if !strings.HasPrefix(path, "/") { + // append the cwd + path = filepath.Join(cwd, path) + } + + paths = append(paths, path) } } @@ -194,7 +209,7 @@ func updateCache(ctx context.Context) func() error { if Cli.NoCache { changes += len(batch) } else { - count, err := cache.Update(batch) + count, err := cache.Update(Cli.TreeRoot, batch) if err != nil { return err } @@ -278,7 +293,7 @@ func applyFormatters(ctx context.Context) func() error { if len(batch) > 0 { fg.Go(func() error { if err := pipeline.Apply(ctx, batch); err != nil { - return fmt.Errorf("%w: pipeline failure, %s", err, key) + return fmt.Errorf("%s failure: %w", key, err) } for _, path := range batch { processedCh <- path diff --git a/cli/format_test.go b/cli/format_test.go index 9030d09..9356c97 100644 --- a/cli/format_test.go +++ b/cli/format_test.go @@ -108,7 +108,7 @@ func TestIncludesAndExcludes(t *testing.T) { test.WriteConfig(t, configPath, cfg) out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir) as.NoError(err) - as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) + as.Contains(string(out), fmt.Sprintf("%d files changed", 31)) // globally exclude nix files cfg.Global.Excludes = []string{"*.nix"} @@ -116,7 +116,7 @@ func TestIncludesAndExcludes(t *testing.T) { test.WriteConfig(t, configPath, cfg) out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir) as.NoError(err) - as.Contains(string(out), fmt.Sprintf("%d files changed", 29)) + as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) // add haskell files to the global exclude cfg.Global.Excludes = []string{"*.nix", "*.hs"} @@ -124,7 +124,7 @@ func TestIncludesAndExcludes(t *testing.T) { test.WriteConfig(t, configPath, cfg) out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir) as.NoError(err) - as.Contains(string(out), fmt.Sprintf("%d files changed", 23)) + as.Contains(string(out), fmt.Sprintf("%d files changed", 24)) echo := cfg.Formatters["echo"] @@ -134,7 +134,7 @@ func TestIncludesAndExcludes(t *testing.T) { test.WriteConfig(t, configPath, cfg) out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir) as.NoError(err) - as.Contains(string(out), fmt.Sprintf("%d files changed", 21)) + as.Contains(string(out), fmt.Sprintf("%d files changed", 22)) // remove go files from the echo formatter echo.Excludes = []string{"*.py", "*.go"} @@ -142,7 +142,7 @@ func TestIncludesAndExcludes(t *testing.T) { test.WriteConfig(t, configPath, cfg) out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir) as.NoError(err) - as.Contains(string(out), fmt.Sprintf("%d files changed", 20)) + as.Contains(string(out), fmt.Sprintf("%d files changed", 21)) // adjust the includes for echo to only include elm files echo.Includes = []string{"*.elm"} @@ -180,7 +180,7 @@ func TestCache(t *testing.T) { test.WriteConfig(t, configPath, cfg) out, err := cmd(t, "--config-file", configPath, "--tree-root", tempDir) as.NoError(err) - as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) + as.Contains(string(out), fmt.Sprintf("%d files changed", 31)) out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir) as.NoError(err) @@ -189,7 +189,7 @@ func TestCache(t *testing.T) { // clear cache out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "-c") as.NoError(err) - as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) + as.Contains(string(out), fmt.Sprintf("%d files changed", 31)) out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir) as.NoError(err) @@ -198,7 +198,7 @@ func TestCache(t *testing.T) { // clear cache out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "-c") as.NoError(err) - as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) + as.Contains(string(out), fmt.Sprintf("%d files changed", 31)) out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir) as.NoError(err) @@ -207,7 +207,7 @@ func TestCache(t *testing.T) { // no cache out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "--no-cache") as.NoError(err) - as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) + as.Contains(string(out), fmt.Sprintf("%d files changed", 31)) } func TestChangeWorkingDirectory(t *testing.T) { @@ -241,7 +241,7 @@ func TestChangeWorkingDirectory(t *testing.T) { // this should fail if the working directory hasn't been changed first out, err := cmd(t, "-C", tempDir) as.NoError(err) - as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) + as.Contains(string(out), fmt.Sprintf("%d files changed", 31)) } func TestFailOnChange(t *testing.T) { @@ -418,16 +418,16 @@ func TestGitWorktree(t *testing.T) { // add everything to the worktree as.NoError(wt.AddGlob(".")) as.NoError(err) - run(30) + run(31) // remove python directory as.NoError(wt.RemoveGlob("python/*")) - run(27) + run(28) // walk with filesystem instead of git out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--walk", "filesystem") as.NoError(err) - as.Contains(string(out), fmt.Sprintf("%d files changed", 57)) + as.Contains(string(out), fmt.Sprintf("%d files changed", 59)) } func TestPathsArg(t *testing.T) { @@ -462,7 +462,7 @@ func TestPathsArg(t *testing.T) { // without any path args out, err := cmd(t, "-C", tempDir) as.NoError(err) - as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) + as.Contains(string(out), fmt.Sprintf("%d files changed", 31)) // specify some explicit paths out, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Foo.hs") diff --git a/format/formatter.go b/format/formatter.go index 7e2353a..ae2e759 100644 --- a/format/formatter.go +++ b/format/formatter.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "os" "os/exec" "time" @@ -23,6 +24,7 @@ type Formatter struct { log *log.Logger executable string // path to the executable described by Command + workingDir string // internal compiled versions of Includes and Excludes. includes []glob.Glob @@ -37,6 +39,8 @@ func (f *Formatter) Executable() string { } func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) error { + start := time.Now() + // construct args, starting with config args := f.config.Options @@ -45,7 +49,7 @@ func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) erro // files in a pipeline. if filter { // reset the batch - f.batch = f.batch[:] + f.batch = f.batch[:0] // filter paths for _, path := range paths { @@ -72,15 +76,18 @@ func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) erro } // execute the command - start := time.Now() cmd := exec.CommandContext(ctx, f.config.Command, args...) + cmd.Dir = f.workingDir if out, err := cmd.CombinedOutput(); err != nil { - f.log.Debugf("\n%v", string(out)) - // todo log output - return err + if len(out) > 0 { + _, _ = fmt.Fprintf(os.Stderr, "%s error:\n%s\n", f.name, out) + } + return fmt.Errorf("%w: formatter %s failed to apply", err, f.name) } + // + f.log.Infof("%v files processed in %v", len(paths), time.Now().Sub(start)) return nil @@ -99,6 +106,7 @@ func (f *Formatter) Wants(path string) bool { // NewFormatter is used to create a new Formatter. func NewFormatter( name string, + treeRoot string, config *config.Formatter, globalExcludes []glob.Glob, ) (*Formatter, error) { @@ -109,6 +117,7 @@ func NewFormatter( // capture config and the formatter's name f.name = name f.config = config + f.workingDir = treeRoot // test if the formatter is available executable, err := exec.LookPath(config.Command) diff --git a/nix/devshell.nix b/nix/devshell.nix index ee43fdf..864a992 100644 --- a/nix/devshell.nix +++ b/nix/devshell.nix @@ -25,6 +25,7 @@ # golang go delve + graphviz ] ++ # include formatters for development and testing diff --git a/nix/formatters.nix b/nix/formatters.nix index 9788f7f..5bce9a5 100644 --- a/nix/formatters.nix +++ b/nix/formatters.nix @@ -6,6 +6,7 @@ with pkgs; [ haskellPackages.cabal-fmt haskellPackages.ormolu mdsh + nixpkgs-fmt nodePackages.prettier python3.pkgs.black rufo diff --git a/test/examples/nixpkgs.toml b/test/examples/nixpkgs.toml new file mode 100644 index 0000000..2c43b6f --- /dev/null +++ b/test/examples/nixpkgs.toml @@ -0,0 +1,13 @@ +# One CLI to format the code tree - https://git.numtide.com/numtide/treefmt + +[formatter.deadnix] +command = "deadnix" +includes = ["*.nix"] +pipeline = "nix" +priority = 1 + +[formatter.nixpkgs-fmt] +command = "nixpkgs-fmt" +includes = ["*.nix"] +pipeline = "nix" +priority = 2 \ No newline at end of file