feat: various perf improvements

Signed-off-by: Brian McGee <brian@bmcgee.ie>
This commit is contained in:
Brian McGee 2024-04-25 12:16:04 +01:00
parent 6ae0e4f8e4
commit fcce518d5e
Signed by: brianmcgee
GPG Key ID: D49016E76AD1E8C0
8 changed files with 96 additions and 36 deletions

43
cache/cache.go vendored
View File

@ -7,6 +7,8 @@ import (
"fmt" "fmt"
"io/fs" "io/fs"
"os" "os"
"path/filepath"
"runtime"
"time" "time"
"git.numtide.com/numtide/treefmt/format" "git.numtide.com/numtide/treefmt/format"
@ -22,8 +24,6 @@ import (
const ( const (
pathsBucket = "paths" pathsBucket = "paths"
formattersBucket = "formatters" formattersBucket = "formatters"
readBatchSize = 1024
) )
// Entry represents a cache entry, indicating the last size and modified time for a file path. // Entry represents a cache entry, indicating the last size and modified time for a file path.
@ -32,7 +32,11 @@ type Entry struct {
Modified time.Time Modified time.Time
} }
var db *bolt.DB var (
db *bolt.DB
ReadBatchSize = 1024 * runtime.NumCPU()
logger *log.Logger
)
// Open creates an instance of bolt.DB for a given treeRoot path. // Open creates an instance of bolt.DB for a given treeRoot path.
// If clean is true, Open will delete any existing data in the cache. // If clean is true, Open will delete any existing data in the cache.
@ -40,7 +44,7 @@ var db *bolt.DB
// The database will be located in `XDG_CACHE_DIR/treefmt/eval-cache/<id>.db`, where <id> is determined by hashing // The database will be located in `XDG_CACHE_DIR/treefmt/eval-cache/<id>.db`, where <id> is determined by hashing
// the treeRoot path. This associates a given treeRoot with a given instance of the cache. // the treeRoot path. This associates a given treeRoot with a given instance of the cache.
func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter) (err error) { func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter) (err error) {
l := log.WithPrefix("cache") logger = log.WithPrefix("cache")
// determine a unique and consistent db name for the tree root // determine a unique and consistent db name for the tree root
h := sha1.New() h := sha1.New()
@ -85,7 +89,7 @@ func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter)
} }
clean = clean || entry == nil || !(entry.Size == stat.Size() && entry.Modified == stat.ModTime()) clean = clean || entry == nil || !(entry.Size == stat.Size() && entry.Modified == stat.ModTime())
l.Debug( logger.Debug(
"checking if formatter has changed", "checking if formatter has changed",
"name", name, "name", name,
"clean", clean, "clean", clean,
@ -174,6 +178,12 @@ func putEntry(bucket *bolt.Bucket, path string, entry *Entry) error {
// ChangeSet is used to walk a filesystem, starting at root, and outputting any new or changed paths using pathsCh. // ChangeSet is used to walk a filesystem, starting at root, and outputting any new or changed paths using pathsCh.
// It determines if a path is new or has changed by comparing against cache entries. // It determines if a path is new or has changed by comparing against cache entries.
func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) error { func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) error {
start := time.Now()
defer func() {
logger.Infof("finished generating change set in %v", time.Since(start))
}()
var tx *bolt.Tx var tx *bolt.Tx
var bucket *bolt.Bucket var bucket *bolt.Bucket
var processed int var processed int
@ -185,6 +195,9 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
} }
}() }()
// for quick removal of tree root from paths
relPathOffset := len(walker.Root()) + 1
return walker.Walk(ctx, func(path string, info fs.FileInfo, err error) error { return walker.Walk(ctx, func(path string, info fs.FileInfo, err error) error {
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -213,7 +226,8 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
bucket = tx.Bucket([]byte(pathsBucket)) bucket = tx.Bucket([]byte(pathsBucket))
} }
cached, err := getEntry(bucket, path) relPath := path[relPathOffset:]
cached, err := getEntry(bucket, relPath)
if err != nil { if err != nil {
return err return err
} }
@ -230,13 +244,15 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
case <-ctx.Done(): case <-ctx.Done():
return ctx.Err() return ctx.Err()
default: default:
pathsCh <- path pathsCh <- relPath
} }
// close the current tx if we have reached the batch size // close the current tx if we have reached the batch size
processed += 1 processed += 1
if processed == readBatchSize { if processed == ReadBatchSize {
return tx.Rollback() err = tx.Rollback()
tx = nil
return err
} }
return nil return nil
@ -244,7 +260,12 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
} }
// Update is used to record updated cache information for the specified list of paths. // Update is used to record updated cache information for the specified list of paths.
func Update(paths []string) (int, error) { func Update(treeRoot string, paths []string) (int, error) {
start := time.Now()
defer func() {
logger.Infof("finished updating %v paths in %v", len(paths), time.Since(start))
}()
if len(paths) == 0 { if len(paths) == 0 {
return 0, nil return 0, nil
} }
@ -260,7 +281,7 @@ func Update(paths []string) (int, error) {
return err return err
} }
pathInfo, err := os.Stat(path) pathInfo, err := os.Stat(filepath.Join(treeRoot, path))
if err != nil { if err != nil {
return err return err
} }

View File

@ -27,7 +27,7 @@ type Format struct {
} }
func (f *Format) Configure() { func (f *Format) Configure() {
log.SetReportTimestamp(false) log.SetReportTimestamp(true)
if f.Verbosity == 0 { if f.Verbosity == 0 {
log.SetLevel(log.WarnLevel) log.SetLevel(log.WarnLevel)

View File

@ -8,7 +8,10 @@ import (
"io/fs" "io/fs"
"os" "os"
"os/signal" "os/signal"
"path/filepath"
"runtime"
"slices" "slices"
"strings"
"syscall" "syscall"
"time" "time"
@ -83,7 +86,7 @@ func (f *Format) Run() (err error) {
// init formatters // init formatters
for name, formatterCfg := range cfg.Formatters { for name, formatterCfg := range cfg.Formatters {
formatter, err := format.NewFormatter(name, formatterCfg, globalExcludes) formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, globalExcludes)
if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter { if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter {
l.Debugf("formatter not found: %v", name) l.Debugf("formatter not found: %v", name)
continue continue
@ -129,7 +132,7 @@ func (f *Format) Run() (err error) {
// create a channel for paths to be processed // create a channel for paths to be processed
// we use a multiple of batch size here to allow for greater concurrency // we use a multiple of batch size here to allow for greater concurrency
pathsCh = make(chan string, 10*BatchSize) pathsCh = make(chan string, BatchSize*runtime.NumCPU())
// create a channel for tracking paths that have been processed // create a channel for tracking paths that have been processed
processedCh = make(chan string, cap(pathsCh)) processedCh = make(chan string, cap(pathsCh))
@ -148,10 +151,22 @@ func walkFilesystem(ctx context.Context) func() error {
paths := Cli.Paths paths := Cli.Paths
if len(paths) == 0 && Cli.Stdin { if len(paths) == 0 && Cli.Stdin {
cwd, err := os.Getwd()
if err != nil {
return fmt.Errorf("%w: failed to determine current working directory", err)
}
// read in all the paths // read in all the paths
scanner := bufio.NewScanner(os.Stdin) scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() { for scanner.Scan() {
paths = append(paths, scanner.Text()) path := scanner.Text()
if !strings.HasPrefix(path, "/") {
// append the cwd
path = filepath.Join(cwd, path)
}
paths = append(paths, path)
} }
} }
@ -194,7 +209,7 @@ func updateCache(ctx context.Context) func() error {
if Cli.NoCache { if Cli.NoCache {
changes += len(batch) changes += len(batch)
} else { } else {
count, err := cache.Update(batch) count, err := cache.Update(Cli.TreeRoot, batch)
if err != nil { if err != nil {
return err return err
} }
@ -278,7 +293,7 @@ func applyFormatters(ctx context.Context) func() error {
if len(batch) > 0 { if len(batch) > 0 {
fg.Go(func() error { fg.Go(func() error {
if err := pipeline.Apply(ctx, batch); err != nil { if err := pipeline.Apply(ctx, batch); err != nil {
return fmt.Errorf("%w: pipeline failure, %s", err, key) return fmt.Errorf("%s failure: %w", key, err)
} }
for _, path := range batch { for _, path := range batch {
processedCh <- path processedCh <- path

View File

@ -108,7 +108,7 @@ func TestIncludesAndExcludes(t *testing.T) {
test.WriteConfig(t, configPath, cfg) test.WriteConfig(t, configPath, cfg)
out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir) out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err) as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
// globally exclude nix files // globally exclude nix files
cfg.Global.Excludes = []string{"*.nix"} cfg.Global.Excludes = []string{"*.nix"}
@ -116,7 +116,7 @@ func TestIncludesAndExcludes(t *testing.T) {
test.WriteConfig(t, configPath, cfg) test.WriteConfig(t, configPath, cfg)
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir) out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err) as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 29)) as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
// add haskell files to the global exclude // add haskell files to the global exclude
cfg.Global.Excludes = []string{"*.nix", "*.hs"} cfg.Global.Excludes = []string{"*.nix", "*.hs"}
@ -124,7 +124,7 @@ func TestIncludesAndExcludes(t *testing.T) {
test.WriteConfig(t, configPath, cfg) test.WriteConfig(t, configPath, cfg)
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir) out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err) as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 23)) as.Contains(string(out), fmt.Sprintf("%d files changed", 24))
echo := cfg.Formatters["echo"] echo := cfg.Formatters["echo"]
@ -134,7 +134,7 @@ func TestIncludesAndExcludes(t *testing.T) {
test.WriteConfig(t, configPath, cfg) test.WriteConfig(t, configPath, cfg)
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir) out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err) as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 21)) as.Contains(string(out), fmt.Sprintf("%d files changed", 22))
// remove go files from the echo formatter // remove go files from the echo formatter
echo.Excludes = []string{"*.py", "*.go"} echo.Excludes = []string{"*.py", "*.go"}
@ -142,7 +142,7 @@ func TestIncludesAndExcludes(t *testing.T) {
test.WriteConfig(t, configPath, cfg) test.WriteConfig(t, configPath, cfg)
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir) out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err) as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 20)) as.Contains(string(out), fmt.Sprintf("%d files changed", 21))
// adjust the includes for echo to only include elm files // adjust the includes for echo to only include elm files
echo.Includes = []string{"*.elm"} echo.Includes = []string{"*.elm"}
@ -180,7 +180,7 @@ func TestCache(t *testing.T) {
test.WriteConfig(t, configPath, cfg) test.WriteConfig(t, configPath, cfg)
out, err := cmd(t, "--config-file", configPath, "--tree-root", tempDir) out, err := cmd(t, "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err) as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir) out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err) as.NoError(err)
@ -189,7 +189,7 @@ func TestCache(t *testing.T) {
// clear cache // clear cache
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "-c") out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "-c")
as.NoError(err) as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir) out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err) as.NoError(err)
@ -198,7 +198,7 @@ func TestCache(t *testing.T) {
// clear cache // clear cache
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "-c") out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "-c")
as.NoError(err) as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir) out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
as.NoError(err) as.NoError(err)
@ -207,7 +207,7 @@ func TestCache(t *testing.T) {
// no cache // no cache
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "--no-cache") out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "--no-cache")
as.NoError(err) as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
} }
func TestChangeWorkingDirectory(t *testing.T) { func TestChangeWorkingDirectory(t *testing.T) {
@ -241,7 +241,7 @@ func TestChangeWorkingDirectory(t *testing.T) {
// this should fail if the working directory hasn't been changed first // this should fail if the working directory hasn't been changed first
out, err := cmd(t, "-C", tempDir) out, err := cmd(t, "-C", tempDir)
as.NoError(err) as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
} }
func TestFailOnChange(t *testing.T) { func TestFailOnChange(t *testing.T) {
@ -418,16 +418,16 @@ func TestGitWorktree(t *testing.T) {
// add everything to the worktree // add everything to the worktree
as.NoError(wt.AddGlob(".")) as.NoError(wt.AddGlob("."))
as.NoError(err) as.NoError(err)
run(30) run(31)
// remove python directory // remove python directory
as.NoError(wt.RemoveGlob("python/*")) as.NoError(wt.RemoveGlob("python/*"))
run(27) run(28)
// walk with filesystem instead of git // walk with filesystem instead of git
out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--walk", "filesystem") out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--walk", "filesystem")
as.NoError(err) as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 57)) as.Contains(string(out), fmt.Sprintf("%d files changed", 59))
} }
func TestPathsArg(t *testing.T) { func TestPathsArg(t *testing.T) {
@ -462,7 +462,7 @@ func TestPathsArg(t *testing.T) {
// without any path args // without any path args
out, err := cmd(t, "-C", tempDir) out, err := cmd(t, "-C", tempDir)
as.NoError(err) as.NoError(err)
as.Contains(string(out), fmt.Sprintf("%d files changed", 30)) as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
// specify some explicit paths // specify some explicit paths
out, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Foo.hs") out, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Foo.hs")

View File

@ -4,6 +4,7 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"os"
"os/exec" "os/exec"
"time" "time"
@ -23,6 +24,7 @@ type Formatter struct {
log *log.Logger log *log.Logger
executable string // path to the executable described by Command executable string // path to the executable described by Command
workingDir string
// internal compiled versions of Includes and Excludes. // internal compiled versions of Includes and Excludes.
includes []glob.Glob includes []glob.Glob
@ -37,6 +39,8 @@ func (f *Formatter) Executable() string {
} }
func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) error { func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) error {
start := time.Now()
// construct args, starting with config // construct args, starting with config
args := f.config.Options args := f.config.Options
@ -45,7 +49,7 @@ func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) erro
// files in a pipeline. // files in a pipeline.
if filter { if filter {
// reset the batch // reset the batch
f.batch = f.batch[:] f.batch = f.batch[:0]
// filter paths // filter paths
for _, path := range paths { for _, path := range paths {
@ -72,15 +76,18 @@ func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) erro
} }
// execute the command // execute the command
start := time.Now()
cmd := exec.CommandContext(ctx, f.config.Command, args...) cmd := exec.CommandContext(ctx, f.config.Command, args...)
cmd.Dir = f.workingDir
if out, err := cmd.CombinedOutput(); err != nil { if out, err := cmd.CombinedOutput(); err != nil {
f.log.Debugf("\n%v", string(out)) if len(out) > 0 {
// todo log output _, _ = fmt.Fprintf(os.Stderr, "%s error:\n%s\n", f.name, out)
return err }
return fmt.Errorf("%w: formatter %s failed to apply", err, f.name)
} }
//
f.log.Infof("%v files processed in %v", len(paths), time.Now().Sub(start)) f.log.Infof("%v files processed in %v", len(paths), time.Now().Sub(start))
return nil return nil
@ -99,6 +106,7 @@ func (f *Formatter) Wants(path string) bool {
// NewFormatter is used to create a new Formatter. // NewFormatter is used to create a new Formatter.
func NewFormatter( func NewFormatter(
name string, name string,
treeRoot string,
config *config.Formatter, config *config.Formatter,
globalExcludes []glob.Glob, globalExcludes []glob.Glob,
) (*Formatter, error) { ) (*Formatter, error) {
@ -109,6 +117,7 @@ func NewFormatter(
// capture config and the formatter's name // capture config and the formatter's name
f.name = name f.name = name
f.config = config f.config = config
f.workingDir = treeRoot
// test if the formatter is available // test if the formatter is available
executable, err := exec.LookPath(config.Command) executable, err := exec.LookPath(config.Command)

View File

@ -25,6 +25,7 @@
# golang # golang
go go
delve delve
graphviz
] ]
++ ++
# include formatters for development and testing # include formatters for development and testing

View File

@ -6,6 +6,7 @@ with pkgs; [
haskellPackages.cabal-fmt haskellPackages.cabal-fmt
haskellPackages.ormolu haskellPackages.ormolu
mdsh mdsh
nixpkgs-fmt
nodePackages.prettier nodePackages.prettier
python3.pkgs.black python3.pkgs.black
rufo rufo

View File

@ -0,0 +1,13 @@
# One CLI to format the code tree - https://git.numtide.com/numtide/treefmt
[formatter.deadnix]
command = "deadnix"
includes = ["*.nix"]
pipeline = "nix"
priority = 1
[formatter.nixpkgs-fmt]
command = "nixpkgs-fmt"
includes = ["*.nix"]
pipeline = "nix"
priority = 2