This repository has been archived on 2024-05-03. You can view files and clone it, but cannot push or open issues or pull requests.
treefmt/cli/format.go
2024-05-02 08:58:02 +01:00

342 lines
7.5 KiB
Go

package cli
import (
"bufio"
"context"
"errors"
"fmt"
"os"
"os/signal"
"path/filepath"
"runtime"
"slices"
"sort"
"strings"
"syscall"
"git.numtide.com/numtide/treefmt/format"
"git.numtide.com/numtide/treefmt/stats"
"github.com/gobwas/glob"
"git.numtide.com/numtide/treefmt/cache"
"git.numtide.com/numtide/treefmt/config"
"git.numtide.com/numtide/treefmt/walk"
"github.com/charmbracelet/log"
"golang.org/x/sync/errgroup"
)
const (
BatchSize = 1024
)
var (
globalExcludes []glob.Glob
formatters map[string]*format.Formatter
pipelines map[string]*format.Pipeline
filesCh chan *walk.File
processedCh chan *walk.File
ErrFailOnChange = errors.New("unexpected changes detected, --fail-on-change is enabled")
)
func (f *Format) Run() (err error) {
stats.Init()
Cli.Configure()
l := log.WithPrefix("format")
defer func() {
if err := cache.Close(); err != nil {
l.Errorf("failed to close cache: %v", err)
}
}()
// read config
cfg, err := config.ReadFile(Cli.ConfigFile)
if err != nil {
return fmt.Errorf("%w: failed to read config file", err)
}
if globalExcludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil {
return fmt.Errorf("%w: failed to compile global globs", err)
}
pipelines = make(map[string]*format.Pipeline)
formatters = make(map[string]*format.Formatter)
// filter formatters
if len(Cli.Formatters) > 0 {
// first check the cli formatter list is valid
for _, name := range Cli.Formatters {
_, ok := cfg.Formatters[name]
if !ok {
return fmt.Errorf("formatter not found in config: %v", name)
}
}
// next we remove any formatter configs that were not specified
for name := range cfg.Formatters {
if !slices.Contains(Cli.Formatters, name) {
delete(cfg.Formatters, name)
}
}
}
// sort the formatter names so that, as we construct pipelines, we add formatters in a determinstic fashion. This
// ensures a deterministic order even when all priority values are the same e.g. 0
names := make([]string, 0, len(cfg.Formatters))
for name := range cfg.Formatters {
names = append(names, name)
}
sort.Strings(names)
// init formatters
for _, name := range names {
formatterCfg := cfg.Formatters[name]
formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, globalExcludes)
if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter {
l.Debugf("formatter not found: %v", name)
continue
} else if err != nil {
return fmt.Errorf("%w: failed to initialise formatter: %v", err, name)
}
formatters[name] = formatter
if formatterCfg.Pipeline == "" {
pipeline := format.Pipeline{}
pipeline.Add(formatter)
pipelines[name] = &pipeline
} else {
key := fmt.Sprintf("p:%s", formatterCfg.Pipeline)
pipeline, ok := pipelines[key]
if !ok {
pipeline = &format.Pipeline{}
pipelines[key] = pipeline
}
pipeline.Add(formatter)
}
}
// open the cache
if err = cache.Open(Cli.TreeRoot, Cli.ClearCache, formatters); err != nil {
return err
}
// create an app context and listen for shutdown
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
exit := make(chan os.Signal, 1)
signal.Notify(exit, os.Interrupt, syscall.SIGTERM)
<-exit
cancel()
}()
// create some groups for concurrent processing and control flow
eg, ctx := errgroup.WithContext(ctx)
// create a channel for paths to be processed
// we use a multiple of batch size here to allow for greater concurrency
filesCh = make(chan *walk.File, BatchSize*runtime.NumCPU())
// create a channel for tracking paths that have been processed
processedCh = make(chan *walk.File, cap(filesCh))
// start concurrent processing tasks
eg.Go(updateCache(ctx))
eg.Go(applyFormatters(ctx))
eg.Go(walkFilesystem(ctx))
// wait for everything to complete
return eg.Wait()
}
func walkFilesystem(ctx context.Context) func() error {
return func() error {
paths := Cli.Paths
if len(paths) == 0 && Cli.Stdin {
cwd, err := os.Getwd()
if err != nil {
return fmt.Errorf("%w: failed to determine current working directory", err)
}
// read in all the paths
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
path := scanner.Text()
if !strings.HasPrefix(path, "/") {
// append the cwd
path = filepath.Join(cwd, path)
}
paths = append(paths, path)
}
}
walker, err := walk.New(Cli.Walk, Cli.TreeRoot, paths)
if err != nil {
return fmt.Errorf("failed to create walker: %w", err)
}
defer close(filesCh)
if Cli.NoCache {
return walker.Walk(ctx, func(file *walk.File, err error) error {
select {
case <-ctx.Done():
return ctx.Err()
default:
// ignore symlinks and directories
if !(file.Info.IsDir() || file.Info.Mode()&os.ModeSymlink == os.ModeSymlink) {
stats.Add(stats.Traversed, 1)
stats.Add(stats.Emitted, 1)
filesCh <- file
}
return nil
}
})
}
if err = cache.ChangeSet(ctx, walker, filesCh); err != nil {
return fmt.Errorf("failed to generate change set: %w", err)
}
return nil
}
}
func updateCache(ctx context.Context) func() error {
return func() error {
batch := make([]*walk.File, 0, BatchSize)
processBatch := func() error {
if err := cache.Update(batch); err != nil {
return err
}
batch = batch[:0]
return nil
}
LOOP:
for {
select {
case <-ctx.Done():
return ctx.Err()
case path, ok := <-processedCh:
if !ok {
break LOOP
}
batch = append(batch, path)
if len(batch) == BatchSize {
if err := processBatch(); err != nil {
return err
}
}
}
}
// final flush
if err := processBatch(); err != nil {
return err
}
if Cli.FailOnChange && stats.Value(stats.Formatted) != 0 {
return ErrFailOnChange
}
stats.Print()
return nil
}
}
func applyFormatters(ctx context.Context) func() error {
fg, ctx := errgroup.WithContext(ctx)
batches := make(map[string][]*walk.File)
tryApply := func(key string, file *walk.File) {
batch, ok := batches[key]
if !ok {
batch = make([]*walk.File, 0, BatchSize)
}
batch = append(batch, file)
batches[key] = batch
if len(batch) == BatchSize {
pipeline := pipelines[key]
// copy the batch
files := make([]*walk.File, len(batch))
copy(files, batch)
fg.Go(func() error {
if err := pipeline.Apply(ctx, files); err != nil {
return err
}
for _, path := range files {
processedCh <- path
}
return nil
})
batches[key] = batch[:0]
}
}
flushBatches := func() {
for key, pipeline := range pipelines {
batch := batches[key]
pipeline := pipeline // capture for closure
if len(batch) > 0 {
fg.Go(func() error {
if err := pipeline.Apply(ctx, batch); err != nil {
return fmt.Errorf("%s failure: %w", key, err)
}
for _, path := range batch {
processedCh <- path
}
return nil
})
}
}
}
return func() error {
defer func() {
// close processed channel
close(processedCh)
}()
for file := range filesCh {
var matched bool
for key, pipeline := range pipelines {
if !pipeline.Wants(file) {
continue
}
matched = true
tryApply(key, file)
}
if matched {
stats.Add(stats.Matched, 1)
} else {
processedCh <- file
}
}
// flush any partial batches which remain
flushBatches()
// wait for all outstanding formatting tasks to complete
if err := fg.Wait(); err != nil {
return fmt.Errorf("pipeline processing failure: %w", err)
}
return nil
}
}