diff --git a/contrib/datadog/go.mod b/contrib/datadog/go.mod index d6115b803..c62d5eb4c 100644 --- a/contrib/datadog/go.mod +++ b/contrib/datadog/go.mod @@ -39,7 +39,7 @@ require ( github.com/secure-systems-lab/go-securesystemslib v0.7.0 // indirect github.com/stretchr/objx v0.5.2 // indirect github.com/tinylib/msgp v1.1.8 // indirect - go.temporal.io/api v1.46.0 // indirect + go.temporal.io/api v1.48.0 // indirect go.uber.org/atomic v1.11.0 // indirect go4.org/intern v0.0.0-20230525184215-6c62f75575cb // indirect go4.org/unsafe/assume-no-moving-gc v0.0.0-20230525183740-e7c30c78aeb2 // indirect diff --git a/contrib/datadog/go.sum b/contrib/datadog/go.sum index 9b8ad87ff..7707c5e83 100644 --- a/contrib/datadog/go.sum +++ b/contrib/datadog/go.sum @@ -134,8 +134,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -go.temporal.io/api v1.46.0 h1:O1efPDB6O2B8uIeCDIa+3VZC7tZMvYsMZYQapSbHvCg= -go.temporal.io/api v1.46.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= +go.temporal.io/api v1.48.0 h1:qk8c2QkB4RRjBw2yB82hUulbhHxRIYnem+u2mvlbUf8= +go.temporal.io/api v1.48.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= diff --git a/contrib/envconfig/go.mod b/contrib/envconfig/go.mod index 3a7552de8..e179c5f76 100644 --- a/contrib/envconfig/go.mod +++ b/contrib/envconfig/go.mod @@ -22,7 +22,7 @@ require ( github.com/pmezard/go-difflib v1.0.0 // indirect github.com/robfig/cron v1.2.0 // indirect github.com/stretchr/objx v0.5.2 // indirect - go.temporal.io/api v1.46.0 // indirect + go.temporal.io/api v1.48.0 // indirect golang.org/x/net v0.36.0 // indirect golang.org/x/sync v0.11.0 // indirect golang.org/x/sys v0.30.0 // indirect diff --git a/contrib/envconfig/go.sum b/contrib/envconfig/go.sum index 7fc32e07d..e16cd419f 100644 --- a/contrib/envconfig/go.sum +++ b/contrib/envconfig/go.sum @@ -73,8 +73,8 @@ github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -go.temporal.io/api v1.46.0 h1:O1efPDB6O2B8uIeCDIa+3VZC7tZMvYsMZYQapSbHvCg= -go.temporal.io/api v1.46.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= +go.temporal.io/api v1.48.0 h1:qk8c2QkB4RRjBw2yB82hUulbhHxRIYnem+u2mvlbUf8= +go.temporal.io/api v1.48.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= diff --git a/contrib/opentelemetry/go.mod b/contrib/opentelemetry/go.mod index 4e18950eb..471de5bf7 100644 --- a/contrib/opentelemetry/go.mod +++ b/contrib/opentelemetry/go.mod @@ -32,7 +32,7 @@ require ( github.com/stretchr/objx v0.5.2 // indirect go.opentelemetry.io/otel/metric v1.27.0 go.opentelemetry.io/otel/sdk/metric v1.27.0 - go.temporal.io/api v1.46.0 // indirect + go.temporal.io/api v1.48.0 // indirect golang.org/x/net v0.36.0 // indirect golang.org/x/sys v0.30.0 // indirect golang.org/x/text v0.22.0 // indirect diff --git a/contrib/opentelemetry/go.sum b/contrib/opentelemetry/go.sum index 58c0ddda6..0fb072cc8 100644 --- a/contrib/opentelemetry/go.sum +++ b/contrib/opentelemetry/go.sum @@ -86,8 +86,8 @@ go.opentelemetry.io/otel/sdk/metric v1.27.0 h1:5uGNOlpXi+Hbo/DRoI31BSb1v+OGcpv2N go.opentelemetry.io/otel/sdk/metric v1.27.0/go.mod h1:we7jJVrYN2kh3mVBlswtPU22K0SA+769l93J6bsyvqw= go.opentelemetry.io/otel/trace v1.27.0 h1:IqYb813p7cmbHk0a5y6pD5JPakbVfftRXABGt5/Rscw= go.opentelemetry.io/otel/trace v1.27.0/go.mod h1:6RiD1hkAprV4/q+yd2ln1HG9GoPx39SuvvstaLBl+l4= -go.temporal.io/api v1.46.0 h1:O1efPDB6O2B8uIeCDIa+3VZC7tZMvYsMZYQapSbHvCg= -go.temporal.io/api v1.46.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= +go.temporal.io/api v1.48.0 h1:qk8c2QkB4RRjBw2yB82hUulbhHxRIYnem+u2mvlbUf8= +go.temporal.io/api v1.48.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= diff --git a/contrib/opentracing/go.mod b/contrib/opentracing/go.mod index fdcb6989f..0a647208a 100644 --- a/contrib/opentracing/go.mod +++ b/contrib/opentracing/go.mod @@ -22,7 +22,7 @@ require ( github.com/pmezard/go-difflib v1.0.0 // indirect github.com/robfig/cron v1.2.0 // indirect github.com/stretchr/objx v0.5.2 // indirect - go.temporal.io/api v1.46.0 // indirect + go.temporal.io/api v1.48.0 // indirect golang.org/x/net v0.36.0 // indirect golang.org/x/sync v0.11.0 // indirect golang.org/x/sys v0.30.0 // indirect diff --git a/contrib/opentracing/go.sum b/contrib/opentracing/go.sum index f7e437a72..393a47e17 100644 --- a/contrib/opentracing/go.sum +++ b/contrib/opentracing/go.sum @@ -73,8 +73,8 @@ github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -go.temporal.io/api v1.46.0 h1:O1efPDB6O2B8uIeCDIa+3VZC7tZMvYsMZYQapSbHvCg= -go.temporal.io/api v1.46.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= +go.temporal.io/api v1.48.0 h1:qk8c2QkB4RRjBw2yB82hUulbhHxRIYnem+u2mvlbUf8= +go.temporal.io/api v1.48.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= diff --git a/contrib/resourcetuner/go.mod b/contrib/resourcetuner/go.mod index 489392f83..946771607 100644 --- a/contrib/resourcetuner/go.mod +++ b/contrib/resourcetuner/go.mod @@ -36,7 +36,7 @@ require ( github.com/tklauser/go-sysconf v0.3.12 // indirect github.com/tklauser/numcpus v0.6.1 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect - go.temporal.io/api v1.46.0 // indirect + go.temporal.io/api v1.48.0 // indirect golang.org/x/exp v0.0.0-20231127185646-65229373498e // indirect golang.org/x/net v0.36.0 // indirect golang.org/x/sync v0.11.0 // indirect diff --git a/contrib/resourcetuner/go.sum b/contrib/resourcetuner/go.sum index 1db988427..db08ef36e 100644 --- a/contrib/resourcetuner/go.sum +++ b/contrib/resourcetuner/go.sum @@ -106,8 +106,8 @@ github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.einride.tech/pid v0.1.3 h1:yWAKSmD2Z10jxd4gYFhOjbBNqXeIQwAtnCO/XKCT7sQ= go.einride.tech/pid v0.1.3/go.mod h1:33JSUbKrH/4v8DZf/0K8IC8Enjd92wB2birp+bCYQso= -go.temporal.io/api v1.46.0 h1:O1efPDB6O2B8uIeCDIa+3VZC7tZMvYsMZYQapSbHvCg= -go.temporal.io/api v1.46.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= +go.temporal.io/api v1.48.0 h1:qk8c2QkB4RRjBw2yB82hUulbhHxRIYnem+u2mvlbUf8= +go.temporal.io/api v1.48.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA= diff --git a/contrib/tally/go.mod b/contrib/tally/go.mod index f6d677680..83da008cf 100644 --- a/contrib/tally/go.mod +++ b/contrib/tally/go.mod @@ -23,7 +23,7 @@ require ( github.com/robfig/cron v1.2.0 // indirect github.com/stretchr/objx v0.5.2 // indirect github.com/twmb/murmur3 v1.1.5 // indirect - go.temporal.io/api v1.46.0 // indirect + go.temporal.io/api v1.48.0 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/net v0.36.0 // indirect golang.org/x/sync v0.11.0 // indirect diff --git a/contrib/tally/go.sum b/contrib/tally/go.sum index c17e414d4..36988b47f 100644 --- a/contrib/tally/go.sum +++ b/contrib/tally/go.sum @@ -138,8 +138,8 @@ github.com/uber-go/tally/v4 v4.1.1/go.mod h1:aXeSTDMl4tNosyf6rdU8jlgScHyjEGGtfJ/ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -go.temporal.io/api v1.46.0 h1:O1efPDB6O2B8uIeCDIa+3VZC7tZMvYsMZYQapSbHvCg= -go.temporal.io/api v1.46.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= +go.temporal.io/api v1.48.0 h1:qk8c2QkB4RRjBw2yB82hUulbhHxRIYnem+u2mvlbUf8= +go.temporal.io/api v1.48.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= diff --git a/go.mod b/go.mod index b8a529a03..573c81bb3 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/nexus-rpc/sdk-go v0.3.0 github.com/robfig/cron v1.2.0 github.com/stretchr/testify v1.10.0 - go.temporal.io/api v1.46.0 + go.temporal.io/api v1.48.0 golang.org/x/sync v0.11.0 golang.org/x/sys v0.30.0 golang.org/x/time v0.3.0 diff --git a/go.sum b/go.sum index 0fc8b7de1..5f87472ef 100644 --- a/go.sum +++ b/go.sum @@ -71,8 +71,8 @@ github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -go.temporal.io/api v1.46.0 h1:O1efPDB6O2B8uIeCDIa+3VZC7tZMvYsMZYQapSbHvCg= -go.temporal.io/api v1.46.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= +go.temporal.io/api v1.48.0 h1:qk8c2QkB4RRjBw2yB82hUulbhHxRIYnem+u2mvlbUf8= +go.temporal.io/api v1.48.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= diff --git a/internal/cmd/build/go.mod b/internal/cmd/build/go.mod index 1bcdab7e1..4fd80ecec 100644 --- a/internal/cmd/build/go.mod +++ b/internal/cmd/build/go.mod @@ -24,7 +24,7 @@ require ( github.com/robfig/cron v1.2.0 // indirect github.com/stretchr/objx v0.5.2 // indirect github.com/stretchr/testify v1.10.0 // indirect - go.temporal.io/api v1.46.0 // indirect + go.temporal.io/api v1.48.0 // indirect golang.org/x/exp/typeparams v0.0.0-20250210185358-939b2ce775ac // indirect golang.org/x/mod v0.23.0 // indirect golang.org/x/net v0.36.0 // indirect diff --git a/internal/cmd/build/go.sum b/internal/cmd/build/go.sum index 6581a7339..5ca7a1904 100644 --- a/internal/cmd/build/go.sum +++ b/internal/cmd/build/go.sum @@ -89,8 +89,8 @@ go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiy go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ= go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM= go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8= -go.temporal.io/api v1.46.0 h1:O1efPDB6O2B8uIeCDIa+3VZC7tZMvYsMZYQapSbHvCg= -go.temporal.io/api v1.46.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= +go.temporal.io/api v1.48.0 h1:qk8c2QkB4RRjBw2yB82hUulbhHxRIYnem+u2mvlbUf8= +go.temporal.io/api v1.48.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= diff --git a/internal/error.go b/internal/error.go index 9282fe6c7..953dfc291 100644 --- a/internal/error.go +++ b/internal/error.go @@ -137,6 +137,8 @@ type ( // // NOTE: This option is supported by Temporal Server >= v1.24.2 older version will ignore this value. NextRetryDelay time.Duration + // Category of the error. Maps to logging/metrics behaviours. + Category ApplicationErrorCategory } // ApplicationError returned from activity implementations with message and optional details. @@ -150,6 +152,7 @@ type ( cause error details converter.EncodedValues nextRetryDelay time.Duration + category ApplicationErrorCategory } // TimeoutError returned when activity or child workflow timed out. @@ -380,6 +383,23 @@ var ( ErrMissingWorkflowID = errors.New("workflow ID is unset for Nexus operation") ) +// ApplicationErrorCategory sets the category of the error. The category of the error +// maps to logging/metrics behaviours. +// +// Exposed as: [go.temporal.io/sdk/temporal.ApplicationErrorCategory] +type ApplicationErrorCategory int + +const ( + // ApplicationErrorCategoryUnspecified represents an error with an unspecified category. + // + // Exposed as: [go.temporal.io/sdk/temporal.ApplicationErrorCategoryUnspecified] + ApplicationErrorCategoryUnspecified ApplicationErrorCategory = iota + // ApplicationErrorCategoryBenign indicates an error that is expected under normal operation and should not trigger alerts. + // + // Exposed as: [go.temporal.io/sdk/temporal.ApplicationErrorCategoryBenign] + ApplicationErrorCategoryBenign +) + // NewApplicationError create new instance of *ApplicationError with message, type, and optional details. func NewApplicationError(msg string, errType string, nonRetryable bool, cause error, details ...interface{}) error { return NewApplicationErrorWithOptions( @@ -397,6 +417,7 @@ func NewApplicationErrorWithOptions(msg string, errType string, options Applicat cause: options.Cause, nonRetryable: options.NonRetryable, nextRetryDelay: options.NextRetryDelay, + category: options.Category, } // When return error to user, use EncodedValues as details and data is ready to be decoded by calling Get details := options.Details @@ -661,6 +682,11 @@ func (e *ApplicationError) Unwrap() error { // a zero value means to use the activities retry policy. func (e *ApplicationError) NextRetryDelay() time.Duration { return e.nextRetryDelay } +// Category returns the ApplicationErrorCategory of the error. +func (e *ApplicationError) Category() ApplicationErrorCategory { + return e.category +} + // Error from error interface func (e *TimeoutError) Error() string { msg := fmt.Sprintf("%s (type: %s)", e.message(), e.timeoutType) @@ -1029,3 +1055,16 @@ func getErrType(err error) string { return t.Name() } + +func isBenignApplicationError(err error) bool { + appError, _ := err.(*ApplicationError) + return appError != nil && appError.Category() == ApplicationErrorCategoryBenign +} + +func isBenignProtoApplicationFailure(failure *failurepb.Failure) bool { + if failure == nil { + return false + } + appFailureInfo := failure.GetApplicationFailureInfo() + return appFailureInfo != nil && appFailureInfo.GetCategory() == enumspb.APPLICATION_ERROR_CATEGORY_BENIGN +} diff --git a/internal/error_test.go b/internal/error_test.go index 8a69ea4e0..e6bab289a 100644 --- a/internal/error_test.go +++ b/internal/error_test.go @@ -723,6 +723,7 @@ func Test_convertErrorToFailure_ApplicationErrorWithExtraRequests(t *testing.T) NonRetryable: true, Cause: errors.New("cause error"), Details: []interface{}{"details", 2208}, + Category: ApplicationErrorCategoryBenign, }, ) f := fc.ErrorToFailure(err) @@ -734,15 +735,27 @@ func Test_convertErrorToFailure_ApplicationErrorWithExtraRequests(t *testing.T) require.Equal("cause error", f.GetCause().GetMessage()) require.Equal("", f.GetCause().GetApplicationFailureInfo().GetType()) require.Nil(f.GetCause().GetCause()) + require.Equal(enumspb.APPLICATION_ERROR_CATEGORY_BENIGN, f.GetApplicationFailureInfo().GetCategory()) err2 := fc.FailureToError(f) var applicationErr *ApplicationError require.True(errors.As(err2, &applicationErr)) require.Equal("message (type: customType, retryable: false): cause error", applicationErr.Error()) + require.Equal(ApplicationErrorCategoryBenign, applicationErr.Category()) err2 = errors.Unwrap(err2) require.True(errors.As(err2, &applicationErr)) require.Equal("cause error", applicationErr.Error()) + + err = NewApplicationErrorWithOptions( + "another message", + "another customType", + ApplicationErrorOptions{ + Category: ApplicationErrorCategoryUnspecified, + }, + ) + f = fc.ErrorToFailure(err) + require.Equal(enumspb.APPLICATION_ERROR_CATEGORY_UNSPECIFIED, f.GetApplicationFailureInfo().GetCategory()) } func Test_convertErrorToFailure_EncodeMessage(t *testing.T) { @@ -1104,6 +1117,7 @@ func Test_convertFailureToError_ApplicationFailure(t *testing.T) { Type: "MyCoolType", NonRetryable: true, Details: details, + Category: enumspb.APPLICATION_ERROR_CATEGORY_BENIGN, }}, Cause: &failurepb.Failure{ Message: "cause message", @@ -1120,6 +1134,7 @@ func Test_convertFailureToError_ApplicationFailure(t *testing.T) { require.Equal("message (type: MyCoolType, retryable: false): cause message (type: UnknownType, retryable: true)", applicationErr.Error()) require.Equal("MyCoolType", applicationErr.Type()) require.Equal(true, applicationErr.NonRetryable()) + require.Equal(ApplicationErrorCategoryBenign, applicationErr.Category()) var str string var n int require.NoError(applicationErr.Details(&str, &n)) @@ -1149,8 +1164,9 @@ func Test_convertFailureToError_ApplicationFailure(t *testing.T) { f = &failurepb.Failure{ Message: "message", FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{ - Type: "CoolError", - Details: details, + Type: "CoolError", + Details: details, + Category: enumspb.APPLICATION_ERROR_CATEGORY_UNSPECIFIED, }}, } @@ -1160,6 +1176,7 @@ func Test_convertFailureToError_ApplicationFailure(t *testing.T) { require.Equal("message (type: CoolError, retryable: true)", coolErr.Error()) require.Equal("CoolError", coolErr.Type()) require.Equal(false, coolErr.NonRetryable()) + require.Equal(ApplicationErrorCategoryUnspecified, coolErr.Category()) } func Test_convertFailureToError_CanceledFailure(t *testing.T) { diff --git a/internal/failure_converter.go b/internal/failure_converter.go index fcc2f687b..da3686c1d 100644 --- a/internal/failure_converter.go +++ b/internal/failure_converter.go @@ -115,6 +115,7 @@ func (dfc *DefaultFailureConverter) ErrorToFailure(err error) *failurepb.Failure NonRetryable: err.NonRetryable(), Details: convertErrDetailsToPayloads(err.details, dfc.dataConverter), NextRetryDelay: delay, + Category: enumspb.ApplicationErrorCategory(err.Category()), } failure.FailureInfo = &failurepb.Failure_ApplicationFailureInfo{ApplicationFailureInfo: failureInfo} case *CanceledError: @@ -250,6 +251,7 @@ func (dfc *DefaultFailureConverter) FailureToError(failure *failurepb.Failure) e Cause: dfc.FailureToError(failure.GetCause()), Details: []interface{}{details}, NextRetryDelay: nextRetryDelay, + Category: ApplicationErrorCategory(applicationFailureInfo.GetCategory()), }, ) } diff --git a/internal/internal_task_handlers.go b/internal/internal_task_handlers.go index 41f60c95e..d1ac651a8 100644 --- a/internal/internal_task_handlers.go +++ b/internal/internal_task_handlers.go @@ -1894,7 +1894,9 @@ func (wth *workflowTaskHandlerImpl) completeWorkflow( }} } else if workflowContext.err != nil { // Workflow failures - metricsHandler.Counter(metrics.WorkflowFailedCounter).Inc(1) + if !isBenignApplicationError(workflowContext.err) { + metricsHandler.Counter(metrics.WorkflowFailedCounter).Inc(1) + } closeCommand = createNewCommand(enumspb.COMMAND_TYPE_FAIL_WORKFLOW_EXECUTION) failure := wth.failureConverter.ErrorToFailure(workflowContext.err) closeCommand.Attributes = &commandpb.Command_FailWorkflowExecutionCommandAttributes{FailWorkflowExecutionCommandAttributes: &commandpb.FailWorkflowExecutionCommandAttributes{ @@ -2311,7 +2313,11 @@ func (ath *activityTaskHandlerImpl) Execute(taskQueue string, t *workflowservice return nil, ctx.Err() } if err != nil && err != ErrActivityResultPending { - ath.logger.Error("Activity error.", + logFunc := ath.logger.Error // Default to Error + if isBenignApplicationError(err) { + logFunc = ath.logger.Debug // Downgrade to Debug for benign application errors + } + logFunc("Activity error.", tagWorkflowID, t.WorkflowExecution.GetWorkflowId(), tagRunID, t.WorkflowExecution.GetRunId(), tagActivityType, activityType, diff --git a/internal/internal_task_pollers.go b/internal/internal_task_pollers.go index b554561fc..023c1939c 100644 --- a/internal/internal_task_pollers.go +++ b/internal/internal_task_pollers.go @@ -705,7 +705,7 @@ func (lath *localActivityTaskHandler) executeLocalActivityTask(task *localActivi metricsHandler.Counter(metrics.LocalActivityErrorCounter).Inc(1) err = newPanicError(p, st) } - if err != nil { + if err != nil && !isBenignApplicationError(err) { metricsHandler.Counter(metrics.LocalActivityFailedCounter).Inc(1) metricsHandler.Counter(metrics.LocalActivityExecutionFailedCounter).Inc(1) } @@ -1104,8 +1104,10 @@ func (atp *activityTaskPoller) ProcessTask(task interface{}) error { return err } // in case if activity execution failed, request should be of type RespondActivityTaskFailedRequest - if _, ok := request.(*workflowservice.RespondActivityTaskFailedRequest); ok { - activityMetricsHandler.Counter(metrics.ActivityExecutionFailedCounter).Inc(1) + if req, ok := request.(*workflowservice.RespondActivityTaskFailedRequest); ok { + if !isBenignProtoApplicationFailure(req.Failure) { + activityMetricsHandler.Counter(metrics.ActivityExecutionFailedCounter).Inc(1) + } } activityMetricsHandler.Timer(metrics.ActivityExecutionLatency).Record(time.Since(executionStartTime)) diff --git a/temporal/error.go b/temporal/error.go index 736b47701..2ef1e3e80 100644 --- a/temporal/error.go +++ b/temporal/error.go @@ -273,3 +273,14 @@ func NewTimeoutError(timeoutType enumspb.TimeoutType, lastErr error, details ... func NewHeartbeatTimeoutError(details ...interface{}) error { return internal.NewHeartbeatTimeoutError(details...) } + +// ApplicationErrorCategory sets the category of the error. The category of the error +// maps to logging/metrics SDK behaviours, does not impact server-side logging/metrics. +type ApplicationErrorCategory = internal.ApplicationErrorCategory + +const ( + // ApplicationErrorCategoryUnspecified represents an error with an unspecified category. + ApplicationErrorCategoryUnspecified = internal.ApplicationErrorCategoryUnspecified + // ApplicationErrorCategoryBenign indicates an error that is expected under normal operation and should not trigger alerts. + ApplicationErrorCategoryBenign = internal.ApplicationErrorCategoryBenign +) diff --git a/test/go.mod b/test/go.mod index f73880888..308d7de33 100644 --- a/test/go.mod +++ b/test/go.mod @@ -14,7 +14,7 @@ require ( go.opentelemetry.io/otel v1.28.0 go.opentelemetry.io/otel/sdk v1.28.0 go.opentelemetry.io/otel/trace v1.28.0 - go.temporal.io/api v1.46.0 + go.temporal.io/api v1.48.0 go.temporal.io/sdk v1.29.1 go.temporal.io/sdk/contrib/opentelemetry v0.0.0-00010101000000-000000000000 go.temporal.io/sdk/contrib/opentracing v0.0.0-00010101000000-000000000000 diff --git a/test/go.sum b/test/go.sum index d8db3e3ba..e05159777 100644 --- a/test/go.sum +++ b/test/go.sum @@ -190,8 +190,8 @@ go.opentelemetry.io/otel/sdk/metric v1.27.0 h1:5uGNOlpXi+Hbo/DRoI31BSb1v+OGcpv2N go.opentelemetry.io/otel/sdk/metric v1.27.0/go.mod h1:we7jJVrYN2kh3mVBlswtPU22K0SA+769l93J6bsyvqw= go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= -go.temporal.io/api v1.46.0 h1:O1efPDB6O2B8uIeCDIa+3VZC7tZMvYsMZYQapSbHvCg= -go.temporal.io/api v1.46.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= +go.temporal.io/api v1.48.0 h1:qk8c2QkB4RRjBw2yB82hUulbhHxRIYnem+u2mvlbUf8= +go.temporal.io/api v1.48.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= diff --git a/test/integration_test.go b/test/integration_test.go index 9c680e5f2..7483ce53f 100644 --- a/test/integration_test.go +++ b/test/integration_test.go @@ -32,6 +32,7 @@ import ( "math" "math/rand" "os" + "slices" "strconv" "strings" "sync" @@ -7522,3 +7523,198 @@ func (ts *IntegrationTestSuite) TestRawValueQueryMetadata() { ts.Equal("Basic", metadata.Definition.Type) ts.Equal(3, len(metadata.Definition.QueryDefinitions)) } + +func (ts *IntegrationTestSuite) TestWorkflowTaskFailureMetric_BenignHandling() { + wfWithApplicationErr := func(ctx workflow.Context, isBenign bool) error { + if !isBenign { + return temporal.NewApplicationError("Non-benign failure", "", false, nil) + } + return temporal.NewApplicationErrorWithOptions( + "Benign failure", + "", + temporal.ApplicationErrorOptions{ + Category: temporal.ApplicationErrorCategoryBenign, + }, + ) + } + + ts.worker.RegisterWorkflow(wfWithApplicationErr) + currCount := ts.metricCount(metrics.WorkflowFailedCounter) + + runNonBenign, err := ts.client.ExecuteWorkflow( + context.Background(), + ts.startWorkflowOptions("test-non-benign-failure-metric"), + wfWithApplicationErr, + false, + ) + ts.NoError(err) + // Wait for completion + err = runNonBenign.Get(context.Background(), nil) + // Expect a non-benign application error. + ts.Error(err) + var appErr *temporal.ApplicationError + ts.True(errors.As(err, &appErr)) + ts.False(appErr.Category() == temporal.ApplicationErrorCategoryBenign) + ts.Equal("Non-benign failure", appErr.Error()) + + // Expect initial count to have incremented because the workflow failed with non-benign err. + currCount++ + ts.assertMetricCount(metrics.WorkflowFailedCounter, currCount) + + runBenign, err := ts.client.ExecuteWorkflow( + context.Background(), + ts.startWorkflowOptions("test-benign-failure-metric"), + wfWithApplicationErr, + true, + ) + ts.NoError(err) + // Wait for completion + err = runBenign.Get(context.Background(), nil) + // Expect a benign application error. + ts.Error(err) + ts.True(errors.As(err, &appErr)) + ts.True(appErr.Category() == temporal.ApplicationErrorCategoryBenign) + // Expect count to not have incremented because the workflow failed with benign err. + ts.assertMetricCount(metrics.WorkflowFailedCounter, currCount) +} + +func (ts *IntegrationTestSuite) TestActivityFailureMetric_BenignHandling() { + actWithAppErr := func(ctx context.Context, isBenign bool) error { + if isBenign { + return temporal.NewApplicationErrorWithOptions("Benign act failure", "", + temporal.ApplicationErrorOptions{Category: temporal.ApplicationErrorCategoryBenign}) + } + return temporal.NewApplicationError("Non-benign act failure", "", false, nil) + } + + wfWithAppErrActivity := func(ctx workflow.Context, isBenign bool) error { + ctx = workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ + StartToCloseTimeout: 3 * time.Second, + // Don't retry + RetryPolicy: &temporal.RetryPolicy{MaximumAttempts: 1}, + }) + return workflow.ExecuteActivity(ctx, actWithAppErr, isBenign).Get(ctx, nil) + } + + // Configure client/worker with logger, capture logs + logger := ilog.NewMemoryLogger() + c, err := client.Dial(client.Options{ + HostPort: ts.config.ServiceAddr, + Namespace: ts.config.Namespace, + Logger: logger, + ConnectionOptions: client.ConnectionOptions{TLS: ts.config.TLS}, + MetricsHandler: ts.metricsHandler, + }) + ts.NoError(err) + defer c.Close() + + testWorker := worker.New(c, ts.taskQueueName, worker.Options{}) + testWorker.RegisterActivity(actWithAppErr) + testWorker.RegisterWorkflow(wfWithAppErrActivity) + err = testWorker.Start() + ts.NoError(err) + defer testWorker.Stop() + + var appErr *temporal.ApplicationError + currCount := ts.metricCount(metrics.ActivityExecutionFailedCounter) + + runNonBenign, err := c.ExecuteWorkflow( + context.Background(), + ts.startWorkflowOptions(ts.T().Name()+"-non-benign-err"), + wfWithAppErrActivity, + false, + ) + ts.NoError(err) + // Wait for completion + err = runNonBenign.Get(context.Background(), nil) + ts.Error(err) + ts.True(errors.As(err, &appErr)) + // Expect non-benign error + ts.False(appErr.Category() == temporal.ApplicationErrorCategoryBenign) + // Expect warn log for activity failure + ts.True(slices.ContainsFunc(logger.Lines(), func(line string) bool { + return strings.Contains(line, "ERROR") && strings.Contains(line, "Activity error.") + })) + + // Expect initial count to have incremented because the activity failed with non-benign err. + currCount++ + ts.assertMetricCount(metrics.ActivityExecutionFailedCounter, currCount) + + runBenign, err := c.ExecuteWorkflow( + context.Background(), + ts.startWorkflowOptions(ts.T().Name()+"-benign-err"), + wfWithAppErrActivity, true, + ) + ts.NoError(err) + // Wait for completion + err = runBenign.Get(context.Background(), nil) + ts.Error(err) + ts.True(errors.As(err, &appErr)) + // Expect benign error + ts.True(appErr.Category() == temporal.ApplicationErrorCategoryBenign) + // Expect debug log for activity failure + ts.True(slices.ContainsFunc(logger.Lines(), func(line string) bool { + return strings.Contains(line, "DEBUG") && strings.Contains(line, "Activity error.") + })) + + // Expect count to not have incremented because the activity failed with benign err. + ts.assertMetricCount(metrics.ActivityExecutionFailedCounter, currCount) +} + +func (ts *IntegrationTestSuite) TestLocalActivityFailureMetric_BenignHandling() { + localActWithAppErr := func(ctx context.Context, isBenign bool) error { + if isBenign { + return temporal.NewApplicationErrorWithOptions("Benign local act failure", "", + temporal.ApplicationErrorOptions{Category: temporal.ApplicationErrorCategoryBenign}) + } + return temporal.NewApplicationError("Non-benign local act failure", "", false, nil) + } + + wfWithLocalActAppErr := func(ctx workflow.Context, isBenign bool) error { + ctx = workflow.WithLocalActivityOptions(ctx, workflow.LocalActivityOptions{ + ScheduleToCloseTimeout: 3 * time.Second, + // Don't retry + RetryPolicy: &temporal.RetryPolicy{MaximumAttempts: 1}, + }) + return workflow.ExecuteLocalActivity(ctx, localActWithAppErr, isBenign).Get(ctx, nil) + } + + ts.worker.RegisterActivity(localActWithAppErr) + ts.worker.RegisterWorkflow(wfWithLocalActAppErr) + + var appErr *temporal.ApplicationError + currCount := ts.metricCount(metrics.LocalActivityExecutionFailedCounter) + + runNonBenign, err := ts.client.ExecuteWorkflow( + context.Background(), + ts.startWorkflowOptions(ts.T().Name()+"-non-benign-err"), + wfWithLocalActAppErr, false, + ) + ts.NoError(err) + // Wait for completion + err = runNonBenign.Get(context.Background(), nil) + ts.Error(err) + ts.True(errors.As(err, &appErr)) + // Expect non-benign error + ts.False(appErr.Category() == temporal.ApplicationErrorCategoryBenign) + + // Expect initial count to have incremented because the activity failed with non-benign err. + currCount++ + ts.assertMetricCount(metrics.LocalActivityExecutionFailedCounter, currCount) + + runBenign, err := ts.client.ExecuteWorkflow( + context.Background(), + ts.startWorkflowOptions(ts.T().Name()+"-benign-err"), + wfWithLocalActAppErr, true, + ) + ts.NoError(err) + // Wait for completion + err = runBenign.Get(context.Background(), nil) + ts.Error(err) + ts.True(errors.As(err, &appErr)) + // Expect benign error + ts.True(appErr.Category() == temporal.ApplicationErrorCategoryBenign) + + // Expect count to remain unchanged + ts.assertMetricCount(metrics.LocalActivityExecutionFailedCounter, currCount) +}