From 53be2a2d3b384839b00bf6282bb2bf897098f84b Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Mon, 30 Sep 2019 10:46:46 -0400 Subject: [PATCH 01/11] Add incremental chunk transfers This commit introduces several features to comprise a new "incremental chunk transfer" feature: - pkg/ring: incremental transfer management Introduces managing incremental transfers between ingesters when a lifecycler joins a ring and when it leaves the ring. The implementation of the IncrementalTransferer interface will be done in a future commit. The LifecyclerConfig has been updated with JoinIncrementalTransfer and LeaveIncrementalTransfer, available as join_incremental_transfer and leave_incremental_transfer using the YAML config, and join-incremental-transfer and leave-incremental-transfer using command line flags. When JoinIncrementalTransfer is used, the lifecycler will join the ring immediately. Tokens will be inserted into the ring one by one, first into the JOINING state and then the ACTIVE state after requesting chunks in token ranges they should have data for from neighboring ingesters in the ring. When LeaveIncrementalTransfer is used, the lifecycler will incrementally move tokens in LEAVING state after sending ranges to neighboring ingesters that should now have data. Enabling LeaveIncrementalTransfer will disable the handoff process, and flushing non-transferred data always happens at the end. - pkg/distributor: push shard token to ingesters This modifies the ingesters to be aware of the shard token used by the distributors to send traffic to ingesters. This is a requirement for incremental transfers, where the shard token is used to determine which memory series need to be moved. This assumes that all distributors are using the same sharding mechanism and always use the same token for a specific series. If the memory series is appended to with a different token from the one it was created with, a warning will be logged and the new token will be used. - pkg/ingester: implement IncrementalTransferer interface This implements the IncrementalTransferer interface used by lifecyclers to move memory series around the ring as ingesters join and leave. - pkg/ring: add TokenChecker This introduces a TokenChecker component which runs in the background to support reporting metrics on unexpected tokens pushed to ingesters. It supports checking on an interval, checking when a new stream is pushed, checking when an existing stream is appended to, and checking when a stream is transferred. Signed-off-by: Robert Fratto --- Makefile | 2 +- docs/configuration/arguments.md | 12 + pkg/distributor/distributor.go | 7 +- pkg/ingester/client/cortex.pb.go | 1247 +++++++++++++++++++++++--- pkg/ingester/client/cortex.proto | 38 +- pkg/ingester/incremental_transfer.go | 260 ++++++ pkg/ingester/ingester.go | 140 ++- pkg/ingester/ingester_test.go | 16 +- pkg/ingester/ingester_v2.go | 17 +- pkg/ingester/lifecycle_test.go | 1 + pkg/ingester/metrics.go | 21 + pkg/ingester/query_test.go | 2 +- pkg/ingester/series.go | 6 +- pkg/ingester/transfer.go | 248 +++-- pkg/ingester/transfer_test.go | 466 ++++++++++ pkg/ingester/user_state.go | 32 +- pkg/ingester/user_state_test.go | 3 + pkg/ingester/wal.go | 4 +- pkg/ring/incremental_transfer.go | 271 ++++++ pkg/ring/lifecycler.go | 339 ++++++- pkg/ring/lifecycler_test.go | 504 ++++++++++- pkg/ring/model.go | 82 +- pkg/ring/ring.go | 24 +- pkg/ring/ring.pb.go | 347 ++++++- pkg/ring/ring.proto | 19 +- pkg/ring/ring_test.go | 282 +++++- pkg/ring/token_checker.go | 190 ++++ pkg/ring/token_checker_test.go | 131 +++ pkg/ring/token_navigator.go | 254 ++++++ pkg/ring/token_navigator_test.go | 226 +++++ pkg/ring/util.go | 17 + pkg/ruler/ruler.go | 2 +- pkg/util/test/poll.go | 2 +- 33 files changed, 4833 insertions(+), 379 deletions(-) create mode 100644 pkg/ingester/incremental_transfer.go create mode 100644 pkg/ring/incremental_transfer.go create mode 100644 pkg/ring/token_checker.go create mode 100644 pkg/ring/token_checker_test.go create mode 100644 pkg/ring/token_navigator.go create mode 100644 pkg/ring/token_navigator_test.go diff --git a/Makefile b/Makefile index 3b89c3f9306..b65fb6a9c03 100644 --- a/Makefile +++ b/Makefile @@ -118,7 +118,7 @@ $(EXES): protos: $(PROTO_GOS) %.pb.go: - protoc -I $(GOPATH)/src:./vendor:./$(@D) --gogoslick_out=plugins=grpc,Mgoogle/protobuf/any.proto=github.com/gogo/protobuf/types,:./$(@D) ./$(patsubst %.pb.go,%.proto,$@) + protoc -I $(GOPATH)/src:./vendor:./$(@D):. --gogoslick_out=paths=source_relative,plugins=grpc,Mgoogle/protobuf/any.proto=github.com/gogo/protobuf/types,:./$(@D) ./$(patsubst %.pb.go,%.proto,$@) lint: misspell -error docs diff --git a/docs/configuration/arguments.md b/docs/configuration/arguments.md index 737a52f727d..7475b28f044 100644 --- a/docs/configuration/arguments.md +++ b/docs/configuration/arguments.md @@ -274,6 +274,18 @@ It also talks to a KVStore and has it's own copies of the same flags used by the How many times a LEAVING ingester tries to find a PENDING ingester during the [hand-over process](../guides/ingester-handover.md). Each attempt takes a second or so. Negative value or zero disables hand-over process completely. (default 10) +- `-ingester.join-incremental-transfer` + + Enables incremental transfer of chunks when joining the ring. When enabled, a joining ingester will insert its tokens into the ring one at a time. For each token that is inserted, the ingester will request chunks from its peers. The mechanism used to determine what chunks an ingester requests is based on the inserted token and its position in the ring. + + When this flag is enabled, the hand-over process is disabled and the ingester.join-after flag is ignored. + +- `-ingester.leave-incremental-transfer` + + Enables incremental transfer of chunks when leaving the ring. When enabled, a leaving ingester will remove its tokens from the ring one at a time. For each token that is removed, the ingester will send chunks to its peers. The mechanism used to determine what chunks an ingester sends is based on the leaving token and its position in the ring. + + When this flag is enabled, the hand-over process is disabled. Flushing chunks will still occur to flush any data that could not be transferred. + - `-ingester.normalise-tokens` Deprecated. New ingesters always write "normalised" tokens to the ring. Normalised tokens consume less memory to encode and decode; as the ring is unmarshalled regularly, this significantly reduces memory usage of anything that watches the ring. diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index d8d77d81051..8b9fb28ff16 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -188,7 +188,7 @@ func New(cfg Config, clientConfig ingester_client.Config, limits *validation.Ove if !canJoinDistributorsRing { ingestionRateStrategy = newInfiniteIngestionRateStrategy() } else if limits.IngestionRateStrategy() == validation.GlobalIngestionRateStrategy { - distributorsRing, err = ring.NewLifecycler(cfg.DistributorRing.ToLifecyclerConfig(), nil, "distributor", ring.DistributorRingKey, true) + distributorsRing, err = ring.NewLifecycler(cfg.DistributorRing.ToLifecyclerConfig(), nil, nil, "distributor", ring.DistributorRingKey, true) if err != nil { return nil, err } @@ -288,7 +288,7 @@ func (d *Distributor) checkSample(ctx context.Context, userID, cluster, replica // Validates a single series from a write request. Will remove labels if // any are configured to be dropped for the user ID. // Returns the validated series with it's labels/samples, and any error. -func (d *Distributor) validateSeries(ts ingester_client.PreallocTimeseries, userID string) (client.PreallocTimeseries, error) { +func (d *Distributor) validateSeries(ts ingester_client.PreallocTimeseries, userID string, token uint32) (client.PreallocTimeseries, error) { labelsHistogram.Observe(float64(len(ts.Labels))) if err := validation.ValidateLabels(d.limits, userID, ts.Labels); err != nil { return emptyPreallocSeries, err @@ -307,6 +307,7 @@ func (d *Distributor) validateSeries(ts ingester_client.PreallocTimeseries, user TimeSeries: &client.TimeSeries{ Labels: ts.Labels, Samples: samples, + Token: token, }, }, nil @@ -384,7 +385,7 @@ func (d *Distributor) Push(ctx context.Context, req *client.WriteRequest) (*clie return nil, err } - validatedSeries, err := d.validateSeries(ts, userID) + validatedSeries, err := d.validateSeries(ts, userID, key) // Errors in validation are considered non-fatal, as one series in a request may contain // invalid data but all the remaining series could be perfectly valid. diff --git a/pkg/ingester/client/cortex.pb.go b/pkg/ingester/client/cortex.pb.go index c97908a367c..adcc8daacaf 100644 --- a/pkg/ingester/client/cortex.pb.go +++ b/pkg/ingester/client/cortex.pb.go @@ -8,6 +8,7 @@ import ( context "context" encoding_binary "encoding/binary" fmt "fmt" + ring "github.com/cortexproject/cortex/pkg/ring" _ "github.com/gogo/protobuf/gogoproto" proto "github.com/gogo/protobuf/proto" grpc "google.golang.org/grpc" @@ -855,6 +856,7 @@ type TimeSeriesChunk struct { UserId string `protobuf:"bytes,2,opt,name=user_id,json=userId,proto3" json:"user_id,omitempty"` Labels []LabelAdapter `protobuf:"bytes,3,rep,name=labels,proto3,customtype=LabelAdapter" json:"labels"` Chunks []Chunk `protobuf:"bytes,4,rep,name=chunks,proto3" json:"chunks"` + Token uint32 `protobuf:"varint,5,opt,name=token,proto3" json:"token,omitempty"` } func (m *TimeSeriesChunk) Reset() { *m = TimeSeriesChunk{} } @@ -910,6 +912,13 @@ func (m *TimeSeriesChunk) GetChunks() []Chunk { return nil } +func (m *TimeSeriesChunk) GetToken() uint32 { + if m != nil { + return m.Token + } + return 0 +} + type Chunk struct { StartTimestampMs int64 `protobuf:"varint,1,opt,name=start_timestamp_ms,json=startTimestampMs,proto3" json:"start_timestamp_ms,omitempty"` EndTimestampMs int64 `protobuf:"varint,2,opt,name=end_timestamp_ms,json=endTimestampMs,proto3" json:"end_timestamp_ms,omitempty"` @@ -1016,6 +1025,8 @@ type TimeSeries struct { Labels []LabelAdapter `protobuf:"bytes,1,rep,name=labels,proto3,customtype=LabelAdapter" json:"labels"` // Sorted by time, oldest sample first. Samples []Sample `protobuf:"bytes,2,rep,name=samples,proto3" json:"samples"` + // Token used by the distributor. + Token uint32 `protobuf:"varint,3,opt,name=token,proto3" json:"token,omitempty"` } func (m *TimeSeries) Reset() { *m = TimeSeries{} } @@ -1057,6 +1068,152 @@ func (m *TimeSeries) GetSamples() []Sample { return nil } +func (m *TimeSeries) GetToken() uint32 { + if m != nil { + return m.Token + } + return 0 +} + +type GetChunksRequest struct { + FromIngesterId string `protobuf:"bytes,1,opt,name=from_ingester_id,json=fromIngesterId,proto3" json:"from_ingester_id,omitempty"` + Ranges []ring.TokenRange `protobuf:"bytes,2,rep,name=ranges,proto3" json:"ranges"` + // If true, remove the chunks from the server's memory when the + // send completes. + Move bool `protobuf:"varint,3,opt,name=move,proto3" json:"move,omitempty"` +} + +func (m *GetChunksRequest) Reset() { *m = GetChunksRequest{} } +func (*GetChunksRequest) ProtoMessage() {} +func (*GetChunksRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_893a47d0a749d749, []int{21} +} +func (m *GetChunksRequest) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *GetChunksRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_GetChunksRequest.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalTo(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *GetChunksRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_GetChunksRequest.Merge(m, src) +} +func (m *GetChunksRequest) XXX_Size() int { + return m.Size() +} +func (m *GetChunksRequest) XXX_DiscardUnknown() { + xxx_messageInfo_GetChunksRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_GetChunksRequest proto.InternalMessageInfo + +func (m *GetChunksRequest) GetFromIngesterId() string { + if m != nil { + return m.FromIngesterId + } + return "" +} + +func (m *GetChunksRequest) GetRanges() []ring.TokenRange { + if m != nil { + return m.Ranges + } + return nil +} + +func (m *GetChunksRequest) GetMove() bool { + if m != nil { + return m.Move + } + return false +} + +type UnblockRangesRequest struct { + Ranges []ring.TokenRange `protobuf:"bytes,2,rep,name=ranges,proto3" json:"ranges"` +} + +func (m *UnblockRangesRequest) Reset() { *m = UnblockRangesRequest{} } +func (*UnblockRangesRequest) ProtoMessage() {} +func (*UnblockRangesRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_893a47d0a749d749, []int{22} +} +func (m *UnblockRangesRequest) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *UnblockRangesRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_UnblockRangesRequest.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalTo(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *UnblockRangesRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_UnblockRangesRequest.Merge(m, src) +} +func (m *UnblockRangesRequest) XXX_Size() int { + return m.Size() +} +func (m *UnblockRangesRequest) XXX_DiscardUnknown() { + xxx_messageInfo_UnblockRangesRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_UnblockRangesRequest proto.InternalMessageInfo + +func (m *UnblockRangesRequest) GetRanges() []ring.TokenRange { + if m != nil { + return m.Ranges + } + return nil +} + +type UnblockRangesResponse struct { +} + +func (m *UnblockRangesResponse) Reset() { *m = UnblockRangesResponse{} } +func (*UnblockRangesResponse) ProtoMessage() {} +func (*UnblockRangesResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_893a47d0a749d749, []int{23} +} +func (m *UnblockRangesResponse) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *UnblockRangesResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_UnblockRangesResponse.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalTo(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *UnblockRangesResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_UnblockRangesResponse.Merge(m, src) +} +func (m *UnblockRangesResponse) XXX_Size() int { + return m.Size() +} +func (m *UnblockRangesResponse) XXX_DiscardUnknown() { + xxx_messageInfo_UnblockRangesResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_UnblockRangesResponse proto.InternalMessageInfo + type LabelPair struct { Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` Value []byte `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"` @@ -1065,7 +1222,7 @@ type LabelPair struct { func (m *LabelPair) Reset() { *m = LabelPair{} } func (*LabelPair) ProtoMessage() {} func (*LabelPair) Descriptor() ([]byte, []int) { - return fileDescriptor_893a47d0a749d749, []int{21} + return fileDescriptor_893a47d0a749d749, []int{24} } func (m *LabelPair) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1116,7 +1273,7 @@ type Sample struct { func (m *Sample) Reset() { *m = Sample{} } func (*Sample) ProtoMessage() {} func (*Sample) Descriptor() ([]byte, []int) { - return fileDescriptor_893a47d0a749d749, []int{22} + return fileDescriptor_893a47d0a749d749, []int{25} } func (m *Sample) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1166,7 +1323,7 @@ type LabelMatchers struct { func (m *LabelMatchers) Reset() { *m = LabelMatchers{} } func (*LabelMatchers) ProtoMessage() {} func (*LabelMatchers) Descriptor() ([]byte, []int) { - return fileDescriptor_893a47d0a749d749, []int{23} + return fileDescriptor_893a47d0a749d749, []int{26} } func (m *LabelMatchers) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1209,7 +1366,7 @@ type Metric struct { func (m *Metric) Reset() { *m = Metric{} } func (*Metric) ProtoMessage() {} func (*Metric) Descriptor() ([]byte, []int) { - return fileDescriptor_893a47d0a749d749, []int{24} + return fileDescriptor_893a47d0a749d749, []int{27} } func (m *Metric) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1247,7 +1404,7 @@ type LabelMatcher struct { func (m *LabelMatcher) Reset() { *m = LabelMatcher{} } func (*LabelMatcher) ProtoMessage() {} func (*LabelMatcher) Descriptor() ([]byte, []int) { - return fileDescriptor_893a47d0a749d749, []int{25} + return fileDescriptor_893a47d0a749d749, []int{28} } func (m *LabelMatcher) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1307,7 +1464,7 @@ type TimeSeriesFile struct { func (m *TimeSeriesFile) Reset() { *m = TimeSeriesFile{} } func (*TimeSeriesFile) ProtoMessage() {} func (*TimeSeriesFile) Descriptor() ([]byte, []int) { - return fileDescriptor_893a47d0a749d749, []int{26} + return fileDescriptor_893a47d0a749d749, []int{29} } func (m *TimeSeriesFile) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1370,7 +1527,7 @@ type TransferTSDBResponse struct { func (m *TransferTSDBResponse) Reset() { *m = TransferTSDBResponse{} } func (*TransferTSDBResponse) ProtoMessage() {} func (*TransferTSDBResponse) Descriptor() ([]byte, []int) { - return fileDescriptor_893a47d0a749d749, []int{27} + return fileDescriptor_893a47d0a749d749, []int{30} } func (m *TransferTSDBResponse) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1423,6 +1580,9 @@ func init() { proto.RegisterType((*Chunk)(nil), "cortex.Chunk") proto.RegisterType((*TransferChunksResponse)(nil), "cortex.TransferChunksResponse") proto.RegisterType((*TimeSeries)(nil), "cortex.TimeSeries") + proto.RegisterType((*GetChunksRequest)(nil), "cortex.GetChunksRequest") + proto.RegisterType((*UnblockRangesRequest)(nil), "cortex.UnblockRangesRequest") + proto.RegisterType((*UnblockRangesResponse)(nil), "cortex.UnblockRangesResponse") proto.RegisterType((*LabelPair)(nil), "cortex.LabelPair") proto.RegisterType((*Sample)(nil), "cortex.Sample") proto.RegisterType((*LabelMatchers)(nil), "cortex.LabelMatchers") @@ -1435,87 +1595,96 @@ func init() { func init() { proto.RegisterFile("cortex.proto", fileDescriptor_893a47d0a749d749) } var fileDescriptor_893a47d0a749d749 = []byte{ - // 1266 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xb4, 0x57, 0x4d, 0x6f, 0x1b, 0xc5, - 0x1b, 0xdf, 0x8d, 0x5f, 0x12, 0x3f, 0xde, 0xb8, 0xce, 0x24, 0x6d, 0xd3, 0xed, 0xff, 0xbf, 0x29, - 0x23, 0xb5, 0x44, 0x40, 0xdd, 0x92, 0xaa, 0xd0, 0x03, 0x55, 0xe5, 0xb4, 0x69, 0x6b, 0x94, 0xa4, - 0xe9, 0xd8, 0x05, 0x84, 0x84, 0xac, 0x8d, 0x3d, 0x49, 0x56, 0xec, 0x8b, 0xbb, 0x33, 0x8b, 0xe8, - 0x01, 0x09, 0x89, 0x0f, 0x00, 0x47, 0x3e, 0x02, 0x67, 0x2e, 0x70, 0xe6, 0xd4, 0x63, 0x8f, 0x15, - 0x87, 0x8a, 0x3a, 0x17, 0x8e, 0xfd, 0x08, 0x68, 0x67, 0x66, 0xd7, 0xbb, 0xee, 0x5a, 0x44, 0x40, - 0x6f, 0x3b, 0xcf, 0xf3, 0x9b, 0xdf, 0x3c, 0xaf, 0x33, 0xcf, 0x82, 0x31, 0x08, 0x42, 0x4e, 0xbf, - 0x6e, 0x8d, 0xc2, 0x80, 0x07, 0xa8, 0x2a, 0x57, 0xe6, 0xe5, 0x43, 0x87, 0x1f, 0x45, 0xfb, 0xad, - 0x41, 0xe0, 0x5d, 0x39, 0x0c, 0x0e, 0x83, 0x2b, 0x42, 0xbd, 0x1f, 0x1d, 0x88, 0x95, 0x58, 0x88, - 0x2f, 0xb9, 0x0d, 0xff, 0xaa, 0x83, 0xf1, 0x69, 0xe8, 0x70, 0x4a, 0xe8, 0xe3, 0x88, 0x32, 0x8e, - 0x76, 0x01, 0xb8, 0xe3, 0x51, 0x46, 0x43, 0x87, 0xb2, 0x55, 0xfd, 0x42, 0x69, 0xbd, 0xbe, 0x81, - 0x5a, 0xea, 0xa8, 0x9e, 0xe3, 0xd1, 0xae, 0xd0, 0x6c, 0x9a, 0x4f, 0x5f, 0xac, 0x69, 0xbf, 0xbf, - 0x58, 0x43, 0x7b, 0x21, 0xb5, 0x5d, 0x37, 0x18, 0xf4, 0xd2, 0x5d, 0x24, 0xc3, 0x80, 0x3e, 0x84, - 0x6a, 0x37, 0x88, 0xc2, 0x01, 0x5d, 0x9d, 0xbb, 0xa0, 0xaf, 0x37, 0x36, 0xd6, 0x12, 0xae, 0xec, - 0xa9, 0x2d, 0x09, 0xd9, 0xf2, 0x23, 0x8f, 0x54, 0x99, 0xf8, 0xc6, 0x6b, 0x00, 0x13, 0x29, 0x9a, - 0x87, 0x52, 0x7b, 0xaf, 0xd3, 0xd4, 0xd0, 0x02, 0x94, 0xc9, 0xa3, 0xed, 0xad, 0xa6, 0x8e, 0x4f, - 0xc1, 0xa2, 0xe2, 0x60, 0xa3, 0xc0, 0x67, 0x14, 0xdf, 0x84, 0x3a, 0xa1, 0xf6, 0x30, 0xf1, 0xa4, - 0x05, 0xf3, 0x8f, 0xa3, 0xac, 0x1b, 0x2b, 0xc9, 0xd1, 0x0f, 0x23, 0x1a, 0x3e, 0x51, 0x30, 0x92, - 0x80, 0xf0, 0x2d, 0x30, 0xe4, 0x76, 0x49, 0x87, 0xae, 0xc0, 0x7c, 0x48, 0x59, 0xe4, 0xf2, 0x64, - 0xff, 0xe9, 0xa9, 0xfd, 0x12, 0x47, 0x12, 0x14, 0xfe, 0x51, 0x07, 0x23, 0x4b, 0x8d, 0xde, 0x03, - 0xc4, 0xb8, 0x1d, 0xf2, 0xbe, 0x88, 0x07, 0xb7, 0xbd, 0x51, 0xdf, 0x8b, 0xc9, 0xf4, 0xf5, 0x12, - 0x69, 0x0a, 0x4d, 0x2f, 0x51, 0xec, 0x30, 0xb4, 0x0e, 0x4d, 0xea, 0x0f, 0xf3, 0xd8, 0x39, 0x81, - 0x6d, 0x50, 0x7f, 0x98, 0x45, 0x5e, 0x85, 0x05, 0xcf, 0xe6, 0x83, 0x23, 0x1a, 0xb2, 0xd5, 0x52, - 0xde, 0xb5, 0x6d, 0x7b, 0x9f, 0xba, 0x3b, 0x52, 0x49, 0x52, 0x14, 0xee, 0xc0, 0x62, 0xce, 0x68, - 0x74, 0xe3, 0x84, 0x69, 0x2e, 0xc7, 0x69, 0xce, 0x26, 0x14, 0xf7, 0x60, 0x59, 0x50, 0x75, 0x79, - 0x48, 0x6d, 0x2f, 0x25, 0xbc, 0x59, 0x40, 0x78, 0xf6, 0x75, 0xc2, 0xdb, 0x47, 0x91, 0xff, 0x65, - 0x01, 0xeb, 0x35, 0x40, 0xc2, 0xf4, 0x4f, 0x6c, 0x37, 0xa2, 0x2c, 0x09, 0xe0, 0xff, 0x01, 0xdc, - 0x58, 0xda, 0xf7, 0x6d, 0x8f, 0x8a, 0xc0, 0xd5, 0x48, 0x4d, 0x48, 0x76, 0x6d, 0x8f, 0xe2, 0x1b, - 0xb0, 0x9c, 0xdb, 0xa4, 0x4c, 0x79, 0x0b, 0x0c, 0xb9, 0xeb, 0x2b, 0x21, 0x17, 0xc6, 0xd4, 0x48, - 0xdd, 0x9d, 0x40, 0xf1, 0x32, 0x2c, 0x6d, 0x27, 0x34, 0xc9, 0x69, 0xf8, 0xba, 0xb2, 0x41, 0x09, - 0x15, 0xdb, 0x1a, 0xd4, 0x27, 0x36, 0x24, 0x64, 0x90, 0x1a, 0xc1, 0x30, 0x82, 0xe6, 0x23, 0x46, - 0xc3, 0x2e, 0xb7, 0x79, 0x4a, 0xf5, 0x8b, 0x0e, 0x4b, 0x19, 0xa1, 0xa2, 0xba, 0x08, 0x0d, 0xc7, - 0x3f, 0xa4, 0x8c, 0x3b, 0x81, 0xdf, 0x0f, 0x6d, 0x2e, 0x5d, 0xd2, 0xc9, 0x62, 0x2a, 0x25, 0x36, - 0xa7, 0xb1, 0xd7, 0x7e, 0xe4, 0xf5, 0x55, 0x28, 0xe3, 0x12, 0x28, 0x93, 0x9a, 0x1f, 0x79, 0x32, - 0x82, 0x71, 0x55, 0xd9, 0x23, 0xa7, 0x3f, 0xc5, 0x54, 0x12, 0x4c, 0x4d, 0x7b, 0xe4, 0x74, 0x72, - 0x64, 0x2d, 0x58, 0x0e, 0x23, 0x97, 0x4e, 0xc3, 0xcb, 0x02, 0xbe, 0x14, 0xab, 0x72, 0x78, 0xfc, - 0x05, 0x2c, 0xc7, 0x86, 0x77, 0xee, 0xe4, 0x4d, 0x3f, 0x0b, 0xf3, 0x11, 0xa3, 0x61, 0xdf, 0x19, - 0xaa, 0x34, 0x54, 0xe3, 0x65, 0x67, 0x88, 0x2e, 0x43, 0x79, 0x68, 0x73, 0x5b, 0x98, 0x59, 0xdf, - 0x38, 0x97, 0x64, 0xfc, 0x35, 0xe7, 0x89, 0x80, 0xe1, 0x7b, 0x80, 0x62, 0x15, 0xcb, 0xb3, 0xbf, - 0x0f, 0x15, 0x16, 0x0b, 0x54, 0xdd, 0x9c, 0xcf, 0xb2, 0x4c, 0x59, 0x42, 0x24, 0x12, 0xff, 0xac, - 0x83, 0xb5, 0x43, 0x79, 0xe8, 0x0c, 0xd8, 0xdd, 0x20, 0xcc, 0x96, 0x3d, 0x7b, 0xd3, 0xed, 0x77, - 0x03, 0x8c, 0xa4, 0xb1, 0xfa, 0x8c, 0x72, 0xd5, 0x82, 0xa7, 0x8b, 0x5a, 0x90, 0x91, 0x7a, 0x02, - 0xed, 0x52, 0x8e, 0x3b, 0xb0, 0x36, 0xd3, 0x66, 0x15, 0x8a, 0x4b, 0x50, 0xf5, 0x04, 0x44, 0xc5, - 0xa2, 0x91, 0xd0, 0xca, 0x8d, 0x44, 0x69, 0xf1, 0x6f, 0x3a, 0x9c, 0x9a, 0x6a, 0xab, 0xd8, 0x85, - 0x83, 0x30, 0xf0, 0x54, 0xae, 0xb3, 0xd9, 0x6a, 0xc4, 0xf2, 0x8e, 0x12, 0x77, 0x86, 0xd9, 0x74, - 0xce, 0xe5, 0xd2, 0x79, 0x0b, 0xaa, 0xa2, 0xb4, 0x93, 0x8b, 0x65, 0x29, 0xe7, 0xd5, 0x9e, 0xed, - 0x84, 0x9b, 0x2b, 0xea, 0xe6, 0x37, 0x84, 0xa8, 0x3d, 0xb4, 0x47, 0x9c, 0x86, 0x44, 0x6d, 0x43, - 0xef, 0x42, 0x75, 0x10, 0x1b, 0xc3, 0x56, 0xcb, 0x82, 0x60, 0x31, 0x21, 0xc8, 0x76, 0xbe, 0x82, - 0xe0, 0xef, 0x75, 0xa8, 0x48, 0xd3, 0xdf, 0x54, 0xae, 0x4c, 0x58, 0xa0, 0xfe, 0x20, 0x18, 0x3a, - 0xfe, 0xa1, 0x68, 0x91, 0x0a, 0x49, 0xd7, 0x08, 0xa9, 0xd2, 0x8d, 0x7b, 0xc1, 0x50, 0xf5, 0xb9, - 0x0a, 0x67, 0x7a, 0xa1, 0xed, 0xb3, 0x03, 0x1a, 0x0a, 0xc3, 0xd2, 0xc4, 0xe0, 0x6f, 0x00, 0x26, - 0xf1, 0xce, 0xc4, 0x49, 0xff, 0x67, 0x71, 0x6a, 0xc1, 0x3c, 0xb3, 0xbd, 0x91, 0x2b, 0x3a, 0x3c, - 0x97, 0xe8, 0xae, 0x10, 0xab, 0x48, 0x25, 0x20, 0x7c, 0x1d, 0x6a, 0x29, 0x75, 0x6c, 0x79, 0x7a, - 0x23, 0x1a, 0x44, 0x7c, 0xa3, 0x15, 0xa8, 0x88, 0xfb, 0x4e, 0x04, 0xc2, 0x20, 0x72, 0x81, 0xdb, - 0x50, 0x95, 0x7c, 0x13, 0xbd, 0xbc, 0x73, 0xe4, 0x22, 0xbe, 0x2b, 0x0b, 0xa2, 0x58, 0xe7, 0x93, - 0x10, 0xe2, 0x36, 0x2c, 0xe6, 0x4a, 0x35, 0xf7, 0xfc, 0xe8, 0x27, 0x7c, 0x7e, 0xaa, 0xb2, 0x7c, - 0xff, 0x75, 0xdc, 0x70, 0x1f, 0x8c, 0xec, 0x21, 0xe8, 0x22, 0x94, 0xf9, 0x93, 0x91, 0xf4, 0xaa, - 0x31, 0xa1, 0x13, 0xea, 0xde, 0x93, 0x11, 0x25, 0x42, 0x9d, 0x46, 0x4c, 0x56, 0xfb, 0x54, 0xc4, - 0x4a, 0x42, 0xa8, 0x22, 0xf6, 0x9d, 0x0e, 0x8d, 0x49, 0xa2, 0xef, 0x3a, 0x2e, 0xfd, 0x2f, 0xfa, - 0xca, 0x84, 0x85, 0x03, 0xc7, 0xa5, 0xc2, 0x06, 0x79, 0x5c, 0xba, 0x2e, 0xac, 0xc3, 0x33, 0xb0, - 0x92, 0xd4, 0x61, 0xaf, 0x7b, 0x67, 0x33, 0xa9, 0xc2, 0x77, 0x3e, 0x86, 0x5a, 0xea, 0x1a, 0xaa, - 0x41, 0x65, 0xeb, 0xe1, 0xa3, 0xf6, 0x76, 0x53, 0x43, 0x8b, 0x50, 0xdb, 0x7d, 0xd0, 0xeb, 0xcb, - 0xa5, 0x8e, 0x4e, 0x41, 0x9d, 0x6c, 0xdd, 0xdb, 0xfa, 0xac, 0xbf, 0xd3, 0xee, 0xdd, 0xbe, 0xdf, - 0x9c, 0x43, 0x08, 0x1a, 0x52, 0xb0, 0xfb, 0x40, 0xc9, 0x4a, 0x1b, 0xc7, 0x15, 0x58, 0x48, 0x6c, - 0x47, 0xd7, 0xa1, 0xbc, 0x17, 0xb1, 0x23, 0xb4, 0x52, 0x34, 0x9f, 0x99, 0xa7, 0xa7, 0xa4, 0xaa, - 0x27, 0x34, 0xf4, 0x01, 0x54, 0xc4, 0x34, 0x80, 0x0a, 0x87, 0x2b, 0xb3, 0x78, 0x64, 0xc2, 0x1a, - 0xba, 0x03, 0xf5, 0xcc, 0x14, 0x31, 0x63, 0xf7, 0xf9, 0x9c, 0x34, 0x3f, 0x70, 0x60, 0xed, 0xaa, - 0x8e, 0xee, 0x43, 0x3d, 0x33, 0x00, 0x20, 0x33, 0x57, 0x4c, 0xb9, 0x51, 0x62, 0xc2, 0x55, 0x30, - 0x31, 0x60, 0x0d, 0x6d, 0x01, 0x4c, 0xde, 0x7e, 0x74, 0x2e, 0x07, 0xce, 0x0e, 0x09, 0xa6, 0x59, - 0xa4, 0x4a, 0x69, 0x36, 0xa1, 0x96, 0xbe, 0x7c, 0x68, 0xb5, 0xe0, 0x31, 0x94, 0x24, 0xb3, 0x9f, - 0x49, 0xac, 0xa1, 0xbb, 0x60, 0xb4, 0x5d, 0xf7, 0x24, 0x34, 0x66, 0x56, 0xc3, 0xa6, 0x79, 0x5c, - 0x38, 0x3b, 0xe3, 0xb1, 0x41, 0x97, 0xf2, 0x8f, 0xca, 0xac, 0x17, 0xd4, 0x7c, 0xfb, 0x6f, 0x71, - 0xe9, 0x69, 0x3b, 0xd0, 0xc8, 0x5f, 0x9c, 0x68, 0xd6, 0xf4, 0x67, 0x5a, 0xa9, 0xa2, 0xf8, 0xa6, - 0xd5, 0xd6, 0xe3, 0xcc, 0x1a, 0xd9, 0xfa, 0x47, 0x67, 0x5e, 0x27, 0x8b, 0x5b, 0xd3, 0xfc, 0xdf, - 0x34, 0x57, 0xb6, 0x5b, 0x62, 0xa6, 0xcd, 0x8f, 0x9e, 0xbd, 0xb4, 0xb4, 0xe7, 0x2f, 0x2d, 0xed, - 0xd5, 0x4b, 0x4b, 0xff, 0x76, 0x6c, 0xe9, 0x3f, 0x8d, 0x2d, 0xfd, 0xe9, 0xd8, 0xd2, 0x9f, 0x8d, - 0x2d, 0xfd, 0x8f, 0xb1, 0xa5, 0xff, 0x39, 0xb6, 0xb4, 0x57, 0x63, 0x4b, 0xff, 0xe1, 0xd8, 0xd2, - 0x9e, 0x1d, 0x5b, 0xda, 0xf3, 0x63, 0x4b, 0xfb, 0xbc, 0x3a, 0x70, 0x1d, 0xea, 0xf3, 0xfd, 0xaa, - 0xf8, 0x4d, 0xba, 0xf6, 0x57, 0x00, 0x00, 0x00, 0xff, 0xff, 0x1a, 0x9e, 0x33, 0x55, 0x6d, 0x0d, - 0x00, 0x00, + // 1411 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xb4, 0x57, 0xcd, 0x6e, 0xdb, 0xc6, + 0x16, 0x26, 0x2d, 0x59, 0xb6, 0x8e, 0x7e, 0x2c, 0x8f, 0xed, 0xd8, 0x61, 0x6e, 0xe8, 0xdc, 0x01, + 0x92, 0x6b, 0xdc, 0x7b, 0x23, 0xa7, 0x0e, 0xd2, 0x66, 0xd1, 0x20, 0x90, 0x13, 0x3b, 0x51, 0x61, + 0x3b, 0xce, 0x48, 0x6e, 0x8b, 0x02, 0x85, 0x40, 0x4b, 0x63, 0x9b, 0x30, 0x7f, 0x14, 0xce, 0x30, + 0x68, 0x76, 0x01, 0xfa, 0x00, 0x2d, 0xd0, 0x4d, 0x1f, 0xa1, 0xeb, 0x6e, 0xda, 0x47, 0xc8, 0x32, + 0xe8, 0x2a, 0xe8, 0x22, 0x68, 0x94, 0x4d, 0x97, 0x79, 0x84, 0x82, 0x33, 0x43, 0x8a, 0x54, 0x24, + 0xd4, 0x6d, 0x9a, 0x8d, 0xc0, 0x39, 0x3f, 0xdf, 0x9c, 0xdf, 0x39, 0x47, 0x50, 0xee, 0xfa, 0x01, + 0xa7, 0x5f, 0xd5, 0xfb, 0x81, 0xcf, 0x7d, 0x54, 0x90, 0x27, 0xe3, 0xea, 0xb1, 0xcd, 0x4f, 0xc2, + 0xc3, 0x7a, 0xd7, 0x77, 0xd7, 0x8f, 0xfd, 0x63, 0x7f, 0x5d, 0xb0, 0x0f, 0xc3, 0x23, 0x71, 0x12, + 0x07, 0xf1, 0x25, 0xd5, 0x8c, 0x85, 0xfe, 0xe9, 0xf1, 0x7a, 0x60, 0x7b, 0xf2, 0x47, 0x12, 0xf1, + 0xcf, 0x3a, 0x94, 0x3f, 0x0b, 0x6c, 0x4e, 0x09, 0x7d, 0x14, 0x52, 0xc6, 0xd1, 0x1e, 0x00, 0xb7, + 0x5d, 0xca, 0x68, 0x60, 0x53, 0xb6, 0xa2, 0x5f, 0xca, 0xad, 0x95, 0x36, 0x50, 0x5d, 0xdd, 0xdf, + 0xb6, 0x5d, 0xda, 0x12, 0x9c, 0x4d, 0xe3, 0xd9, 0xcb, 0x55, 0xed, 0xd7, 0x97, 0xab, 0x68, 0x3f, + 0xa0, 0x96, 0xe3, 0xf8, 0xdd, 0x76, 0xa2, 0x45, 0x52, 0x08, 0xe8, 0x23, 0x28, 0xb4, 0xfc, 0x30, + 0xe8, 0xd2, 0x95, 0xa9, 0x4b, 0xfa, 0x5a, 0x75, 0x63, 0x35, 0xc6, 0x4a, 0xdf, 0x5a, 0x97, 0x22, + 0x5b, 0x5e, 0xe8, 0x92, 0x02, 0x13, 0xdf, 0x78, 0x15, 0x60, 0x48, 0x45, 0x33, 0x90, 0x6b, 0xec, + 0x37, 0x6b, 0x1a, 0x9a, 0x85, 0x3c, 0x39, 0xd8, 0xd9, 0xaa, 0xe9, 0x78, 0x0e, 0x2a, 0x0a, 0x83, + 0xf5, 0x7d, 0x8f, 0x51, 0x7c, 0x0b, 0x4a, 0x84, 0x5a, 0xbd, 0xd8, 0x93, 0x3a, 0xcc, 0x3c, 0x0a, + 0xd3, 0x6e, 0x2c, 0xc6, 0x57, 0x3f, 0x0c, 0x69, 0xf0, 0x44, 0x89, 0x91, 0x58, 0x08, 0xdf, 0x86, + 0xb2, 0x54, 0x97, 0x70, 0x68, 0x1d, 0x66, 0x02, 0xca, 0x42, 0x87, 0xc7, 0xfa, 0x4b, 0x23, 0xfa, + 0x52, 0x8e, 0xc4, 0x52, 0xf8, 0x7b, 0x1d, 0xca, 0x69, 0x68, 0xf4, 0x7f, 0x40, 0x8c, 0x5b, 0x01, + 0xef, 0x88, 0x78, 0x70, 0xcb, 0xed, 0x77, 0xdc, 0x08, 0x4c, 0x5f, 0xcb, 0x91, 0x9a, 0xe0, 0xb4, + 0x63, 0xc6, 0x2e, 0x43, 0x6b, 0x50, 0xa3, 0x5e, 0x2f, 0x2b, 0x3b, 0x25, 0x64, 0xab, 0xd4, 0xeb, + 0xa5, 0x25, 0xaf, 0xc1, 0xac, 0x6b, 0xf1, 0xee, 0x09, 0x0d, 0xd8, 0x4a, 0x2e, 0xeb, 0xda, 0x8e, + 0x75, 0x48, 0x9d, 0x5d, 0xc9, 0x24, 0x89, 0x14, 0x6e, 0x42, 0x25, 0x63, 0x34, 0xba, 0x79, 0xc6, + 0x34, 0xe7, 0xa3, 0x34, 0xa7, 0x13, 0x8a, 0xdb, 0xb0, 0x20, 0xa0, 0x5a, 0x3c, 0xa0, 0x96, 0x9b, + 0x00, 0xde, 0x1a, 0x03, 0xb8, 0xfc, 0x36, 0xe0, 0x9d, 0x93, 0xd0, 0x3b, 0x1d, 0x83, 0x7a, 0x1d, + 0x90, 0x30, 0xfd, 0x53, 0xcb, 0x09, 0x29, 0x8b, 0x03, 0x78, 0x11, 0xc0, 0x89, 0xa8, 0x1d, 0xcf, + 0x72, 0xa9, 0x08, 0x5c, 0x91, 0x14, 0x05, 0x65, 0xcf, 0x72, 0x29, 0xbe, 0x09, 0x0b, 0x19, 0x25, + 0x65, 0xca, 0xbf, 0xa1, 0x2c, 0xb5, 0x1e, 0x0b, 0xba, 0x30, 0xa6, 0x48, 0x4a, 0xce, 0x50, 0x14, + 0x2f, 0xc0, 0xfc, 0x4e, 0x0c, 0x13, 0xdf, 0x86, 0x6f, 0x28, 0x1b, 0x14, 0x51, 0xa1, 0xad, 0x42, + 0x69, 0x68, 0x43, 0x0c, 0x06, 0x89, 0x11, 0x0c, 0x23, 0xa8, 0x1d, 0x30, 0x1a, 0xb4, 0xb8, 0xc5, + 0x13, 0xa8, 0x9f, 0x74, 0x98, 0x4f, 0x11, 0x15, 0xd4, 0x65, 0xa8, 0xda, 0xde, 0x31, 0x65, 0xdc, + 0xf6, 0xbd, 0x4e, 0x60, 0x71, 0xe9, 0x92, 0x4e, 0x2a, 0x09, 0x95, 0x58, 0x9c, 0x46, 0x5e, 0x7b, + 0xa1, 0xdb, 0x51, 0xa1, 0x8c, 0x4a, 0x20, 0x4f, 0x8a, 0x5e, 0xe8, 0xca, 0x08, 0x46, 0x55, 0x65, + 0xf5, 0xed, 0xce, 0x08, 0x52, 0x4e, 0x20, 0xd5, 0xac, 0xbe, 0xdd, 0xcc, 0x80, 0xd5, 0x61, 0x21, + 0x08, 0x1d, 0x3a, 0x2a, 0x9e, 0x17, 0xe2, 0xf3, 0x11, 0x2b, 0x23, 0x8f, 0xbf, 0x84, 0x85, 0xc8, + 0xf0, 0xe6, 0xdd, 0xac, 0xe9, 0xcb, 0x30, 0x13, 0x32, 0x1a, 0x74, 0xec, 0x9e, 0x4a, 0x43, 0x21, + 0x3a, 0x36, 0x7b, 0xe8, 0x2a, 0xe4, 0x7b, 0x16, 0xb7, 0x84, 0x99, 0xa5, 0x8d, 0xf3, 0x71, 0xc6, + 0xdf, 0x72, 0x9e, 0x08, 0x31, 0x7c, 0x0f, 0x50, 0xc4, 0x62, 0x59, 0xf4, 0x0f, 0x60, 0x9a, 0x45, + 0x04, 0x55, 0x37, 0x17, 0xd2, 0x28, 0x23, 0x96, 0x10, 0x29, 0x89, 0x7f, 0xd4, 0xc1, 0xdc, 0xa5, + 0x3c, 0xb0, 0xbb, 0x6c, 0xdb, 0x0f, 0xd2, 0x65, 0xcf, 0xde, 0x77, 0xfb, 0xdd, 0x84, 0x72, 0xdc, + 0x58, 0x1d, 0x46, 0xb9, 0x6a, 0xc1, 0xa5, 0x71, 0x2d, 0xc8, 0x48, 0x29, 0x16, 0x6d, 0x51, 0x8e, + 0x9b, 0xb0, 0x3a, 0xd1, 0x66, 0x15, 0x8a, 0x2b, 0x50, 0x70, 0x85, 0x88, 0x8a, 0x45, 0x35, 0x86, + 0x95, 0x8a, 0x44, 0x71, 0xf1, 0x0b, 0x1d, 0xe6, 0x46, 0xda, 0x2a, 0x72, 0xe1, 0x28, 0xf0, 0x5d, + 0x95, 0xeb, 0x74, 0xb6, 0xaa, 0x11, 0xbd, 0xa9, 0xc8, 0xcd, 0x5e, 0x3a, 0x9d, 0x53, 0x99, 0x74, + 0xde, 0x86, 0x82, 0x28, 0xed, 0xf8, 0x61, 0x99, 0xcf, 0x78, 0xb5, 0x6f, 0xd9, 0xc1, 0xe6, 0xa2, + 0x7a, 0xf9, 0xcb, 0x82, 0xd4, 0xe8, 0x59, 0x7d, 0x4e, 0x03, 0xa2, 0xd4, 0xd0, 0xff, 0xa0, 0xd0, + 0x8d, 0x8c, 0x61, 0x2b, 0x79, 0x01, 0x50, 0x89, 0x01, 0xd2, 0x9d, 0xaf, 0x44, 0xd0, 0x22, 0x4c, + 0x73, 0xff, 0x94, 0x7a, 0x2b, 0xd3, 0x97, 0xf4, 0xb5, 0x0a, 0x91, 0x07, 0xfc, 0x8d, 0x0e, 0xd3, + 0xd2, 0xa1, 0xf7, 0x95, 0x41, 0x03, 0x66, 0xa9, 0xd7, 0xf5, 0x7b, 0xb6, 0x77, 0x2c, 0x1a, 0x67, + 0x9a, 0x24, 0x67, 0x84, 0x54, 0x41, 0x47, 0x1d, 0x52, 0x56, 0x55, 0xbb, 0x02, 0xe7, 0xda, 0x81, + 0xe5, 0xb1, 0x23, 0x1a, 0x08, 0xc3, 0x92, 0x74, 0xe1, 0xef, 0x74, 0x80, 0x61, 0x1a, 0x52, 0xe1, + 0xd3, 0xff, 0x5e, 0xf8, 0xea, 0x30, 0xc3, 0x2c, 0xb7, 0xef, 0x88, 0xc6, 0xcf, 0xe4, 0xbf, 0x25, + 0xc8, 0x2a, 0x80, 0xb1, 0xd0, 0x30, 0x82, 0xb9, 0x74, 0x04, 0x9f, 0xea, 0x50, 0xbb, 0x47, 0x79, + 0x6c, 0xab, 0x6c, 0x87, 0xb3, 0x57, 0x47, 0x1d, 0x0a, 0x81, 0x15, 0x1d, 0x95, 0x0d, 0xb5, 0xba, + 0xd8, 0x18, 0xda, 0x11, 0x36, 0x89, 0x18, 0x71, 0x1a, 0xa5, 0x54, 0x14, 0x32, 0xd7, 0x7f, 0x2c, + 0xdf, 0xa0, 0x59, 0x22, 0xbe, 0xf1, 0x36, 0x2c, 0x1e, 0x78, 0x87, 0x8e, 0xdf, 0x3d, 0x15, 0x1a, + 0x6c, 0x38, 0x95, 0xff, 0x12, 0x36, 0x5e, 0x86, 0xa5, 0x11, 0x1c, 0x15, 0xf9, 0x1b, 0x50, 0x4c, + 0x82, 0x1a, 0x59, 0x90, 0x8c, 0x88, 0x32, 0x11, 0xdf, 0x51, 0x68, 0xc4, 0x00, 0x10, 0x35, 0x50, + 0x26, 0xf2, 0x80, 0x1b, 0x50, 0x90, 0x91, 0x1c, 0xf2, 0xe5, 0x23, 0x2c, 0x0f, 0xd1, 0xf0, 0x18, + 0x53, 0x40, 0x25, 0x3e, 0xac, 0x1e, 0xdc, 0x80, 0x4a, 0xa6, 0x77, 0x33, 0xf3, 0x58, 0x3f, 0xe3, + 0x3c, 0x2e, 0xc8, 0x7e, 0x7e, 0xe7, 0x8a, 0xc1, 0x1d, 0x28, 0xa7, 0x2f, 0x41, 0x97, 0x21, 0xcf, + 0x9f, 0xf4, 0xa5, 0x57, 0xd5, 0x21, 0x9c, 0x60, 0xb7, 0x9f, 0xf4, 0x29, 0x11, 0xec, 0x24, 0x62, + 0xb2, 0xfd, 0x47, 0x22, 0x96, 0x13, 0x44, 0x15, 0xb1, 0xaf, 0x75, 0xa8, 0x0e, 0x4b, 0x7c, 0xdb, + 0x76, 0xe8, 0x3f, 0xf1, 0xd0, 0x18, 0x30, 0x7b, 0x64, 0x3b, 0x54, 0xd8, 0x20, 0xaf, 0x4b, 0xce, + 0x63, 0x5b, 0xf0, 0x1c, 0x2c, 0xc6, 0x2d, 0xd8, 0x6e, 0xdd, 0xdd, 0x8c, 0xcb, 0xe0, 0xbf, 0x9f, + 0x40, 0x31, 0x71, 0x0d, 0x15, 0x61, 0x7a, 0xeb, 0xe1, 0x41, 0x63, 0xa7, 0xa6, 0xa1, 0x0a, 0x14, + 0xf7, 0x1e, 0xb4, 0x3b, 0xf2, 0xa8, 0xa3, 0x39, 0x28, 0x91, 0xad, 0x7b, 0x5b, 0x9f, 0x77, 0x76, + 0x1b, 0xed, 0x3b, 0xf7, 0x6b, 0x53, 0x08, 0x41, 0x55, 0x12, 0xf6, 0x1e, 0x28, 0x5a, 0x6e, 0xe3, + 0x97, 0x19, 0x98, 0x8d, 0x6d, 0x47, 0x37, 0x20, 0xbf, 0x1f, 0xb2, 0x13, 0xb4, 0x38, 0x6e, 0x61, + 0x35, 0x96, 0x46, 0xa8, 0xaa, 0x28, 0x35, 0xf4, 0x21, 0x4c, 0x8b, 0xf5, 0x08, 0x8d, 0xdd, 0x36, + 0x8d, 0xf1, 0x3b, 0x24, 0xd6, 0xd0, 0x5d, 0x28, 0xa5, 0xd6, 0xaa, 0x09, 0xda, 0x17, 0x32, 0xd4, + 0xec, 0x06, 0x86, 0xb5, 0x6b, 0x3a, 0xba, 0x0f, 0xa5, 0xd4, 0x46, 0x84, 0x8c, 0x4c, 0x31, 0x65, + 0x76, 0xab, 0x21, 0xd6, 0x98, 0x15, 0x0a, 0x6b, 0x68, 0x0b, 0x60, 0xb8, 0x0c, 0xa1, 0xf3, 0x19, + 0xe1, 0xf4, 0xd6, 0x64, 0x18, 0xe3, 0x58, 0x09, 0xcc, 0x26, 0x14, 0x93, 0x55, 0x00, 0xad, 0x8c, + 0xd9, 0x0e, 0x24, 0xc8, 0xe4, 0xbd, 0x01, 0x6b, 0x68, 0x1b, 0xca, 0x0d, 0xc7, 0x39, 0x0b, 0x8c, + 0x91, 0xe6, 0xb0, 0x51, 0x1c, 0x07, 0x96, 0x27, 0x4c, 0x5f, 0x74, 0x25, 0x3b, 0x65, 0x27, 0xad, + 0x14, 0xc6, 0x7f, 0xfe, 0x54, 0x2e, 0xb9, 0x6d, 0x17, 0xaa, 0xd9, 0x99, 0x81, 0x26, 0xad, 0xc3, + 0x86, 0x99, 0x30, 0xc6, 0x0f, 0x19, 0x6d, 0x2d, 0xca, 0x6c, 0x39, 0x5d, 0xff, 0xe8, 0xdc, 0xdb, + 0x60, 0x51, 0x6b, 0x1a, 0xff, 0x1a, 0xc5, 0x4a, 0x77, 0x8b, 0x40, 0x6a, 0x0d, 0x3b, 0x49, 0xde, + 0xd3, 0x0a, 0x0f, 0x19, 0xe5, 0xef, 0x6a, 0xde, 0x5c, 0x32, 0x70, 0x14, 0x5e, 0x92, 0xa6, 0xd1, + 0x49, 0x64, 0x4c, 0xba, 0x49, 0x94, 0xf0, 0x1e, 0x54, 0x32, 0x0f, 0x3e, 0x4a, 0x3c, 0x1a, 0x37, + 0x4f, 0x8c, 0x8b, 0x13, 0xb8, 0xb1, 0x6d, 0x9b, 0x1f, 0x3f, 0x7f, 0x65, 0x6a, 0x2f, 0x5e, 0x99, + 0xda, 0x9b, 0x57, 0xa6, 0xfe, 0x74, 0x60, 0xea, 0x3f, 0x0c, 0x4c, 0xfd, 0xd9, 0xc0, 0xd4, 0x9f, + 0x0f, 0x4c, 0xfd, 0xb7, 0x81, 0xa9, 0xff, 0x3e, 0x30, 0xb5, 0x37, 0x03, 0x53, 0xff, 0xf6, 0xb5, + 0xa9, 0x3d, 0x7f, 0x6d, 0x6a, 0x2f, 0x5e, 0x9b, 0xda, 0x17, 0x85, 0xae, 0x63, 0x53, 0x8f, 0x1f, + 0x16, 0xc4, 0xdf, 0xe4, 0xeb, 0x7f, 0x04, 0x00, 0x00, 0xff, 0xff, 0x7f, 0x9b, 0x95, 0xff, 0x82, + 0x0f, 0x00, 0x00, } func (x MatchType) String() string { @@ -2054,6 +2223,9 @@ func (this *TimeSeriesChunk) Equal(that interface{}) bool { return false } } + if this.Token != that1.Token { + return false + } return true } func (this *Chunk) Equal(that interface{}) bool { @@ -2145,6 +2317,94 @@ func (this *TimeSeries) Equal(that interface{}) bool { return false } } + if this.Token != that1.Token { + return false + } + return true +} +func (this *GetChunksRequest) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + that1, ok := that.(*GetChunksRequest) + if !ok { + that2, ok := that.(GetChunksRequest) + if ok { + that1 = &that2 + } else { + return false + } + } + if that1 == nil { + return this == nil + } else if this == nil { + return false + } + if this.FromIngesterId != that1.FromIngesterId { + return false + } + if len(this.Ranges) != len(that1.Ranges) { + return false + } + for i := range this.Ranges { + if !this.Ranges[i].Equal(&that1.Ranges[i]) { + return false + } + } + if this.Move != that1.Move { + return false + } + return true +} +func (this *UnblockRangesRequest) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + that1, ok := that.(*UnblockRangesRequest) + if !ok { + that2, ok := that.(UnblockRangesRequest) + if ok { + that1 = &that2 + } else { + return false + } + } + if that1 == nil { + return this == nil + } else if this == nil { + return false + } + if len(this.Ranges) != len(that1.Ranges) { + return false + } + for i := range this.Ranges { + if !this.Ranges[i].Equal(&that1.Ranges[i]) { + return false + } + } + return true +} +func (this *UnblockRangesResponse) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + that1, ok := that.(*UnblockRangesResponse) + if !ok { + that2, ok := that.(UnblockRangesResponse) + if ok { + that1 = &that2 + } else { + return false + } + } + if that1 == nil { + return this == nil + } else if this == nil { + return false + } return true } func (this *LabelPair) Equal(that interface{}) bool { @@ -2549,7 +2809,7 @@ func (this *TimeSeriesChunk) GoString() string { if this == nil { return "nil" } - s := make([]string, 0, 8) + s := make([]string, 0, 9) s = append(s, "&client.TimeSeriesChunk{") s = append(s, "FromIngesterId: "+fmt.Sprintf("%#v", this.FromIngesterId)+",\n") s = append(s, "UserId: "+fmt.Sprintf("%#v", this.UserId)+",\n") @@ -2561,6 +2821,7 @@ func (this *TimeSeriesChunk) GoString() string { } s = append(s, "Chunks: "+fmt.Sprintf("%#v", vs)+",\n") } + s = append(s, "Token: "+fmt.Sprintf("%#v", this.Token)+",\n") s = append(s, "}") return strings.Join(s, "") } @@ -2590,7 +2851,7 @@ func (this *TimeSeries) GoString() string { if this == nil { return "nil" } - s := make([]string, 0, 6) + s := make([]string, 0, 7) s = append(s, "&client.TimeSeries{") s = append(s, "Labels: "+fmt.Sprintf("%#v", this.Labels)+",\n") if this.Samples != nil { @@ -2600,6 +2861,50 @@ func (this *TimeSeries) GoString() string { } s = append(s, "Samples: "+fmt.Sprintf("%#v", vs)+",\n") } + s = append(s, "Token: "+fmt.Sprintf("%#v", this.Token)+",\n") + s = append(s, "}") + return strings.Join(s, "") +} +func (this *GetChunksRequest) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 7) + s = append(s, "&client.GetChunksRequest{") + s = append(s, "FromIngesterId: "+fmt.Sprintf("%#v", this.FromIngesterId)+",\n") + if this.Ranges != nil { + vs := make([]*ring.TokenRange, len(this.Ranges)) + for i := range vs { + vs[i] = &this.Ranges[i] + } + s = append(s, "Ranges: "+fmt.Sprintf("%#v", vs)+",\n") + } + s = append(s, "Move: "+fmt.Sprintf("%#v", this.Move)+",\n") + s = append(s, "}") + return strings.Join(s, "") +} +func (this *UnblockRangesRequest) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 5) + s = append(s, "&client.UnblockRangesRequest{") + if this.Ranges != nil { + vs := make([]*ring.TokenRange, len(this.Ranges)) + for i := range vs { + vs[i] = &this.Ranges[i] + } + s = append(s, "Ranges: "+fmt.Sprintf("%#v", vs)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *UnblockRangesResponse) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 4) + s = append(s, "&client.UnblockRangesResponse{") s = append(s, "}") return strings.Join(s, "") } @@ -2710,10 +3015,22 @@ type IngesterClient interface { UserStats(ctx context.Context, in *UserStatsRequest, opts ...grpc.CallOption) (*UserStatsResponse, error) AllUserStats(ctx context.Context, in *UserStatsRequest, opts ...grpc.CallOption) (*UsersStatsResponse, error) MetricsForLabelMatchers(ctx context.Context, in *MetricsForLabelMatchersRequest, opts ...grpc.CallOption) (*MetricsForLabelMatchersResponse, error) - // TransferChunks allows leaving ingester (client) to stream chunks directly to joining ingesters (server). + // TransferChunks allows leaving ingester (client) to stream chunks directly + // to joining ingesters (server). Called during the hand-off process, and is a + // full transfer from the client. TransferChunks(ctx context.Context, opts ...grpc.CallOption) (Ingester_TransferChunksClient, error) // TransferTSDB transfers all files of a tsdb to a joining ingester TransferTSDB(ctx context.Context, opts ...grpc.CallOption) (Ingester_TransferTSDBClient, error) + // TransferChunksSubset is invoked by a leaving ingester (client), streaming + // a subset of its chunks directly to an existing ingester. + TransferChunksSubset(ctx context.Context, opts ...grpc.CallOption) (Ingester_TransferChunksSubsetClient, error) + // GetChunksSubset is invoked by a joining ingester (client). A subset of the + // serving ingester's chunks will be sent as a stream. + GetChunksSubset(ctx context.Context, in *GetChunksRequest, opts ...grpc.CallOption) (Ingester_GetChunksSubsetClient, error) + // UnblockRanges is invoked by a joining ingester after it has successfully + // joined the ring. When invoked, it is safe to forcefully remove blocked + // ranges without waiting for them to time out. + UnblockRanges(ctx context.Context, in *UnblockRangesRequest, opts ...grpc.CallOption) (*UnblockRangesResponse, error) } type ingesterClient struct { @@ -2887,37 +3204,124 @@ func (x *ingesterTransferTSDBClient) CloseAndRecv() (*TransferTSDBResponse, erro return m, nil } -// IngesterServer is the server API for Ingester service. -type IngesterServer interface { - Push(context.Context, *WriteRequest) (*WriteResponse, error) - Query(context.Context, *QueryRequest) (*QueryResponse, error) - QueryStream(*QueryRequest, Ingester_QueryStreamServer) error - LabelValues(context.Context, *LabelValuesRequest) (*LabelValuesResponse, error) - LabelNames(context.Context, *LabelNamesRequest) (*LabelNamesResponse, error) - UserStats(context.Context, *UserStatsRequest) (*UserStatsResponse, error) - AllUserStats(context.Context, *UserStatsRequest) (*UsersStatsResponse, error) - MetricsForLabelMatchers(context.Context, *MetricsForLabelMatchersRequest) (*MetricsForLabelMatchersResponse, error) - // TransferChunks allows leaving ingester (client) to stream chunks directly to joining ingesters (server). - TransferChunks(Ingester_TransferChunksServer) error - // TransferTSDB transfers all files of a tsdb to a joining ingester - TransferTSDB(Ingester_TransferTSDBServer) error +func (c *ingesterClient) TransferChunksSubset(ctx context.Context, opts ...grpc.CallOption) (Ingester_TransferChunksSubsetClient, error) { + stream, err := c.cc.NewStream(ctx, &_Ingester_serviceDesc.Streams[3], "/cortex.Ingester/TransferChunksSubset", opts...) + if err != nil { + return nil, err + } + x := &ingesterTransferChunksSubsetClient{stream} + return x, nil } -func RegisterIngesterServer(s *grpc.Server, srv IngesterServer) { - s.RegisterService(&_Ingester_serviceDesc, srv) +type Ingester_TransferChunksSubsetClient interface { + Send(*TimeSeriesChunk) error + CloseAndRecv() (*TransferChunksResponse, error) + grpc.ClientStream } -func _Ingester_Push_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(WriteRequest) - if err := dec(in); err != nil { +type ingesterTransferChunksSubsetClient struct { + grpc.ClientStream +} + +func (x *ingesterTransferChunksSubsetClient) Send(m *TimeSeriesChunk) error { + return x.ClientStream.SendMsg(m) +} + +func (x *ingesterTransferChunksSubsetClient) CloseAndRecv() (*TransferChunksResponse, error) { + if err := x.ClientStream.CloseSend(); err != nil { return nil, err } - if interceptor == nil { - return srv.(IngesterServer).Push(ctx, in) + m := new(TransferChunksResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/cortex.Ingester/Push", + return m, nil +} + +func (c *ingesterClient) GetChunksSubset(ctx context.Context, in *GetChunksRequest, opts ...grpc.CallOption) (Ingester_GetChunksSubsetClient, error) { + stream, err := c.cc.NewStream(ctx, &_Ingester_serviceDesc.Streams[4], "/cortex.Ingester/GetChunksSubset", opts...) + if err != nil { + return nil, err + } + x := &ingesterGetChunksSubsetClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type Ingester_GetChunksSubsetClient interface { + Recv() (*TimeSeriesChunk, error) + grpc.ClientStream +} + +type ingesterGetChunksSubsetClient struct { + grpc.ClientStream +} + +func (x *ingesterGetChunksSubsetClient) Recv() (*TimeSeriesChunk, error) { + m := new(TimeSeriesChunk) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *ingesterClient) UnblockRanges(ctx context.Context, in *UnblockRangesRequest, opts ...grpc.CallOption) (*UnblockRangesResponse, error) { + out := new(UnblockRangesResponse) + err := c.cc.Invoke(ctx, "/cortex.Ingester/UnblockRanges", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +// IngesterServer is the server API for Ingester service. +type IngesterServer interface { + Push(context.Context, *WriteRequest) (*WriteResponse, error) + Query(context.Context, *QueryRequest) (*QueryResponse, error) + QueryStream(*QueryRequest, Ingester_QueryStreamServer) error + LabelValues(context.Context, *LabelValuesRequest) (*LabelValuesResponse, error) + LabelNames(context.Context, *LabelNamesRequest) (*LabelNamesResponse, error) + UserStats(context.Context, *UserStatsRequest) (*UserStatsResponse, error) + AllUserStats(context.Context, *UserStatsRequest) (*UsersStatsResponse, error) + MetricsForLabelMatchers(context.Context, *MetricsForLabelMatchersRequest) (*MetricsForLabelMatchersResponse, error) + // TransferChunks allows leaving ingester (client) to stream chunks directly + // to joining ingesters (server). Called during the hand-off process, and is a + // full transfer from the client. + TransferChunks(Ingester_TransferChunksServer) error + // TransferTSDB transfers all files of a tsdb to a joining ingester + TransferTSDB(Ingester_TransferTSDBServer) error + // TransferChunksSubset is invoked by a leaving ingester (client), streaming + // a subset of its chunks directly to an existing ingester. + TransferChunksSubset(Ingester_TransferChunksSubsetServer) error + // GetChunksSubset is invoked by a joining ingester (client). A subset of the + // serving ingester's chunks will be sent as a stream. + GetChunksSubset(*GetChunksRequest, Ingester_GetChunksSubsetServer) error + // UnblockRanges is invoked by a joining ingester after it has successfully + // joined the ring. When invoked, it is safe to forcefully remove blocked + // ranges without waiting for them to time out. + UnblockRanges(context.Context, *UnblockRangesRequest) (*UnblockRangesResponse, error) +} + +func RegisterIngesterServer(s *grpc.Server, srv IngesterServer) { + s.RegisterService(&_Ingester_serviceDesc, srv) +} + +func _Ingester_Push_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(WriteRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(IngesterServer).Push(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/cortex.Ingester/Push", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(IngesterServer).Push(ctx, req.(*WriteRequest)) @@ -3106,6 +3510,71 @@ func (x *ingesterTransferTSDBServer) Recv() (*TimeSeriesFile, error) { return m, nil } +func _Ingester_TransferChunksSubset_Handler(srv interface{}, stream grpc.ServerStream) error { + return srv.(IngesterServer).TransferChunksSubset(&ingesterTransferChunksSubsetServer{stream}) +} + +type Ingester_TransferChunksSubsetServer interface { + SendAndClose(*TransferChunksResponse) error + Recv() (*TimeSeriesChunk, error) + grpc.ServerStream +} + +type ingesterTransferChunksSubsetServer struct { + grpc.ServerStream +} + +func (x *ingesterTransferChunksSubsetServer) SendAndClose(m *TransferChunksResponse) error { + return x.ServerStream.SendMsg(m) +} + +func (x *ingesterTransferChunksSubsetServer) Recv() (*TimeSeriesChunk, error) { + m := new(TimeSeriesChunk) + if err := x.ServerStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func _Ingester_GetChunksSubset_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(GetChunksRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(IngesterServer).GetChunksSubset(m, &ingesterGetChunksSubsetServer{stream}) +} + +type Ingester_GetChunksSubsetServer interface { + Send(*TimeSeriesChunk) error + grpc.ServerStream +} + +type ingesterGetChunksSubsetServer struct { + grpc.ServerStream +} + +func (x *ingesterGetChunksSubsetServer) Send(m *TimeSeriesChunk) error { + return x.ServerStream.SendMsg(m) +} + +func _Ingester_UnblockRanges_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(UnblockRangesRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(IngesterServer).UnblockRanges(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/cortex.Ingester/UnblockRanges", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(IngesterServer).UnblockRanges(ctx, req.(*UnblockRangesRequest)) + } + return interceptor(ctx, in, info, handler) +} + var _Ingester_serviceDesc = grpc.ServiceDesc{ ServiceName: "cortex.Ingester", HandlerType: (*IngesterServer)(nil), @@ -3138,6 +3607,10 @@ var _Ingester_serviceDesc = grpc.ServiceDesc{ MethodName: "MetricsForLabelMatchers", Handler: _Ingester_MetricsForLabelMatchers_Handler, }, + { + MethodName: "UnblockRanges", + Handler: _Ingester_UnblockRanges_Handler, + }, }, Streams: []grpc.StreamDesc{ { @@ -3155,6 +3628,16 @@ var _Ingester_serviceDesc = grpc.ServiceDesc{ Handler: _Ingester_TransferTSDB_Handler, ClientStreams: true, }, + { + StreamName: "TransferChunksSubset", + Handler: _Ingester_TransferChunksSubset_Handler, + ClientStreams: true, + }, + { + StreamName: "GetChunksSubset", + Handler: _Ingester_GetChunksSubset_Handler, + ServerStreams: true, + }, }, Metadata: "cortex.proto", } @@ -3724,6 +4207,11 @@ func (m *TimeSeriesChunk) MarshalTo(dAtA []byte) (int, error) { i += n } } + if m.Token != 0 { + dAtA[i] = 0x28 + i++ + i = encodeVarintCortex(dAtA, i, uint64(m.Token)) + } return i, nil } @@ -3823,6 +4311,105 @@ func (m *TimeSeries) MarshalTo(dAtA []byte) (int, error) { i += n } } + if m.Token != 0 { + dAtA[i] = 0x18 + i++ + i = encodeVarintCortex(dAtA, i, uint64(m.Token)) + } + return i, nil +} + +func (m *GetChunksRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *GetChunksRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.FromIngesterId) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintCortex(dAtA, i, uint64(len(m.FromIngesterId))) + i += copy(dAtA[i:], m.FromIngesterId) + } + if len(m.Ranges) > 0 { + for _, msg := range m.Ranges { + dAtA[i] = 0x12 + i++ + i = encodeVarintCortex(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + if m.Move { + dAtA[i] = 0x18 + i++ + if m.Move { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i++ + } + return i, nil +} + +func (m *UnblockRangesRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *UnblockRangesRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.Ranges) > 0 { + for _, msg := range m.Ranges { + dAtA[i] = 0x12 + i++ + i = encodeVarintCortex(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + return i, nil +} + +func (m *UnblockRangesResponse) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *UnblockRangesResponse) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l return i, nil } @@ -4333,6 +4920,9 @@ func (m *TimeSeriesChunk) Size() (n int) { n += 1 + l + sovCortex(uint64(l)) } } + if m.Token != 0 { + n += 1 + sovCortex(uint64(m.Token)) + } return n } @@ -4385,6 +4975,55 @@ func (m *TimeSeries) Size() (n int) { n += 1 + l + sovCortex(uint64(l)) } } + if m.Token != 0 { + n += 1 + sovCortex(uint64(m.Token)) + } + return n +} + +func (m *GetChunksRequest) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + l = len(m.FromIngesterId) + if l > 0 { + n += 1 + l + sovCortex(uint64(l)) + } + if len(m.Ranges) > 0 { + for _, e := range m.Ranges { + l = e.Size() + n += 1 + l + sovCortex(uint64(l)) + } + } + if m.Move { + n += 2 + } + return n +} + +func (m *UnblockRangesRequest) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if len(m.Ranges) > 0 { + for _, e := range m.Ranges { + l = e.Size() + n += 1 + l + sovCortex(uint64(l)) + } + } + return n +} + +func (m *UnblockRangesResponse) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l return n } @@ -4702,6 +5341,7 @@ func (this *TimeSeriesChunk) String() string { `UserId:` + fmt.Sprintf("%v", this.UserId) + `,`, `Labels:` + fmt.Sprintf("%v", this.Labels) + `,`, `Chunks:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Chunks), "Chunk", "Chunk", 1), `&`, ``, 1) + `,`, + `Token:` + fmt.Sprintf("%v", this.Token) + `,`, `}`, }, "") return s @@ -4735,48 +5375,80 @@ func (this *TimeSeries) String() string { s := strings.Join([]string{`&TimeSeries{`, `Labels:` + fmt.Sprintf("%v", this.Labels) + `,`, `Samples:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Samples), "Sample", "Sample", 1), `&`, ``, 1) + `,`, + `Token:` + fmt.Sprintf("%v", this.Token) + `,`, `}`, }, "") return s } -func (this *LabelPair) String() string { +func (this *GetChunksRequest) String() string { if this == nil { return "nil" } - s := strings.Join([]string{`&LabelPair{`, - `Name:` + fmt.Sprintf("%v", this.Name) + `,`, - `Value:` + fmt.Sprintf("%v", this.Value) + `,`, + s := strings.Join([]string{`&GetChunksRequest{`, + `FromIngesterId:` + fmt.Sprintf("%v", this.FromIngesterId) + `,`, + `Ranges:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Ranges), "TokenRange", "ring.TokenRange", 1), `&`, ``, 1) + `,`, + `Move:` + fmt.Sprintf("%v", this.Move) + `,`, `}`, }, "") return s } -func (this *Sample) String() string { +func (this *UnblockRangesRequest) String() string { if this == nil { return "nil" } - s := strings.Join([]string{`&Sample{`, - `Value:` + fmt.Sprintf("%v", this.Value) + `,`, - `TimestampMs:` + fmt.Sprintf("%v", this.TimestampMs) + `,`, + s := strings.Join([]string{`&UnblockRangesRequest{`, + `Ranges:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Ranges), "TokenRange", "ring.TokenRange", 1), `&`, ``, 1) + `,`, `}`, }, "") return s } -func (this *LabelMatchers) String() string { +func (this *UnblockRangesResponse) String() string { if this == nil { return "nil" } - s := strings.Join([]string{`&LabelMatchers{`, - `Matchers:` + strings.Replace(fmt.Sprintf("%v", this.Matchers), "LabelMatcher", "LabelMatcher", 1) + `,`, + s := strings.Join([]string{`&UnblockRangesResponse{`, `}`, }, "") return s } -func (this *Metric) String() string { +func (this *LabelPair) String() string { if this == nil { return "nil" } - s := strings.Join([]string{`&Metric{`, - `Labels:` + fmt.Sprintf("%v", this.Labels) + `,`, + s := strings.Join([]string{`&LabelPair{`, + `Name:` + fmt.Sprintf("%v", this.Name) + `,`, + `Value:` + fmt.Sprintf("%v", this.Value) + `,`, + `}`, + }, "") + return s +} +func (this *Sample) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&Sample{`, + `Value:` + fmt.Sprintf("%v", this.Value) + `,`, + `TimestampMs:` + fmt.Sprintf("%v", this.TimestampMs) + `,`, + `}`, + }, "") + return s +} +func (this *LabelMatchers) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&LabelMatchers{`, + `Matchers:` + strings.Replace(fmt.Sprintf("%v", this.Matchers), "LabelMatcher", "LabelMatcher", 1) + `,`, + `}`, + }, "") + return s +} +func (this *Metric) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&Metric{`, + `Labels:` + fmt.Sprintf("%v", this.Labels) + `,`, `}`, }, "") return s @@ -6502,6 +7174,25 @@ func (m *TimeSeriesChunk) Unmarshal(dAtA []byte) error { return err } iNdEx = postIndex + case 5: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Token", wireType) + } + m.Token = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCortex + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Token |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } + } default: iNdEx = preIndex skippy, err := skipCortex(dAtA[iNdEx:]) @@ -6820,6 +7511,304 @@ func (m *TimeSeries) Unmarshal(dAtA []byte) error { return err } iNdEx = postIndex + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Token", wireType) + } + m.Token = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCortex + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Token |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipCortex(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthCortex + } + if (iNdEx + skippy) < 0 { + return ErrInvalidLengthCortex + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *GetChunksRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCortex + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: GetChunksRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: GetChunksRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field FromIngesterId", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCortex + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthCortex + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthCortex + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.FromIngesterId = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Ranges", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCortex + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthCortex + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthCortex + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Ranges = append(m.Ranges, ring.TokenRange{}) + if err := m.Ranges[len(m.Ranges)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Move", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCortex + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + m.Move = bool(v != 0) + default: + iNdEx = preIndex + skippy, err := skipCortex(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthCortex + } + if (iNdEx + skippy) < 0 { + return ErrInvalidLengthCortex + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *UnblockRangesRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCortex + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: UnblockRangesRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: UnblockRangesRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Ranges", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCortex + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthCortex + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthCortex + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Ranges = append(m.Ranges, ring.TokenRange{}) + if err := m.Ranges[len(m.Ranges)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipCortex(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthCortex + } + if (iNdEx + skippy) < 0 { + return ErrInvalidLengthCortex + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *UnblockRangesResponse) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCortex + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: UnblockRangesResponse: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: UnblockRangesResponse: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { default: iNdEx = preIndex skippy, err := skipCortex(dAtA[iNdEx:]) diff --git a/pkg/ingester/client/cortex.proto b/pkg/ingester/client/cortex.proto index f1c83305d2a..b4714dca18c 100644 --- a/pkg/ingester/client/cortex.proto +++ b/pkg/ingester/client/cortex.proto @@ -5,6 +5,7 @@ package cortex; option go_package = "client"; import "github.com/gogo/protobuf/gogoproto/gogo.proto"; +import "pkg/ring/ring.proto"; option (gogoproto.marshaler_all) = true; option (gogoproto.unmarshaler_all) = true; @@ -20,11 +21,26 @@ service Ingester { rpc AllUserStats(UserStatsRequest) returns (UsersStatsResponse) {}; rpc MetricsForLabelMatchers(MetricsForLabelMatchersRequest) returns (MetricsForLabelMatchersResponse) {}; - // TransferChunks allows leaving ingester (client) to stream chunks directly to joining ingesters (server). + // TransferChunks allows leaving ingester (client) to stream chunks directly + // to joining ingesters (server). Called during the hand-off process, and is a + // full transfer from the client. rpc TransferChunks(stream TimeSeriesChunk) returns (TransferChunksResponse) {}; // TransferTSDB transfers all files of a tsdb to a joining ingester rpc TransferTSDB(stream TimeSeriesFile) returns (TransferTSDBResponse) {}; + + // TransferChunksSubset is invoked by a leaving ingester (client), streaming + // a subset of its chunks directly to an existing ingester. + rpc TransferChunksSubset(stream TimeSeriesChunk) returns (TransferChunksResponse) {}; + + // GetChunksSubset is invoked by a joining ingester (client). A subset of the + // serving ingester's chunks will be sent as a stream. + rpc GetChunksSubset(GetChunksRequest) returns (stream TimeSeriesChunk) {}; + + // UnblockRanges is invoked by a joining ingester after it has successfully + // joined the ring. When invoked, it is safe to forcefully remove blocked + // ranges without waiting for them to time out. + rpc UnblockRanges(UnblockRangesRequest) returns (UnblockRangesResponse) {}; } message WriteRequest { @@ -109,6 +125,7 @@ message TimeSeriesChunk { string user_id = 2; repeated LabelPair labels = 3 [(gogoproto.nullable) = false, (gogoproto.customtype) = "LabelAdapter"]; repeated Chunk chunks = 4 [(gogoproto.nullable) = false]; + uint32 token = 5; } message Chunk { @@ -125,6 +142,25 @@ message TimeSeries { repeated LabelPair labels = 1 [(gogoproto.nullable) = false, (gogoproto.customtype) = "LabelAdapter"]; // Sorted by time, oldest sample first. repeated Sample samples = 2 [(gogoproto.nullable) = false]; + + // Token used by the distributor. + uint32 token = 3; +} + +message GetChunksRequest { + string from_ingester_id = 1; + repeated ring.TokenRange ranges = 2 [(gogoproto.nullable) = false]; + + // If true, remove the chunks from the server's memory when the + // send completes. + bool move = 3; +} + +message UnblockRangesRequest { + repeated ring.TokenRange ranges = 2 [(gogoproto.nullable) = false]; +} + +message UnblockRangesResponse { } message LabelPair { diff --git a/pkg/ingester/incremental_transfer.go b/pkg/ingester/incremental_transfer.go new file mode 100644 index 00000000000..99dd2ec3092 --- /dev/null +++ b/pkg/ingester/incremental_transfer.go @@ -0,0 +1,260 @@ +package ingester + +import ( + "fmt" + "time" + + "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/util" + "github.com/go-kit/kit/log/level" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/weaveworks/common/user" + "golang.org/x/net/context" +) + +var ( + blockedRanges = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "cortex_ingester_blocked_ranges", + Help: "The current number of ranges that will not accept writes by this ingester.", + }) +) + +func init() { + prometheus.MustRegister(blockedRanges) +} + +// TransferChunksSubset accepts chunks from a client and moves them into the local Ingester. +func (i *Ingester) TransferChunksSubset(stream client.Ingester_TransferChunksSubsetServer) error { + i.userStatesMtx.Lock() + defer i.userStatesMtx.Unlock() + + fromIngesterID, seriesReceived, err := i.acceptChunksFromStream(acceptChunksOptions{ + States: i.userStates, + Stream: stream, + }) + if err != nil { + return err + } + + if err := stream.SendAndClose(&client.TransferChunksResponse{}); err != nil { + level.Error(util.Logger).Log("msg", "Error closing SendChunks stream", "from_ingester", fromIngesterID, "err", err) + return err + } + + // Target ingester may not have had any streams to send. + if seriesReceived == 0 { + return nil + } else if fromIngesterID == "" { + level.Error(util.Logger).Log("msg", "received TransferChunks request with no ID from ingester") + return fmt.Errorf("no ingester id") + } + + return nil +} + +// GetChunksSubset accepts a get request from a client and sends all chunks from the serving ingester +// that fall within the given range to the client. +func (i *Ingester) GetChunksSubset(req *client.GetChunksRequest, stream client.Ingester_GetChunksSubsetServer) error { + userStatesCopy := i.userStates.cp() + if len(userStatesCopy) == 0 { + level.Info(util.Logger).Log("msg", "nothing to transfer") + return nil + } + + // Block the ranges locally so we don't receive traffic for it anymore. The caller should decide + // when ranges should be unblocked: we should continue to reject writes for as long as we may + // receive them. When the joining token has been completely inserted into the ring, it will + // be safe to remove the blocks. + i.BlockRanges(req.Ranges) + + return i.pushChunksToStream(pushChunksOptions{ + States: userStatesCopy, + Stream: stream, + MarkAsFlushed: req.Move, + + IncludeFunc: func(pair fingerprintSeriesPair) bool { + return inAnyRange(pair.series.token, req.Ranges) + }, + }) +} + +// BlockRanges configures a range of token values to be blocked. When a range +// is blocked, the Ingester will no longer accept pushes for any streams whose +// token falls within the blocked ranges. Unblocking the range re-enables those +// pushes. +// +// Blocked ranges will automatically be unblocked after the RangeBlockPeriod +// configuration variable. This acts as a failsafe to prevent blocked ranges +// sitting around forever if a joining ingester crashes, as writes will continue +// to go to us and get rejected for as long as the blocked range exists. +func (i *Ingester) BlockRanges(ranges []ring.TokenRange) { + i.blockedTokenMtx.Lock() + defer i.blockedTokenMtx.Unlock() + + for _, rg := range ranges { + if exist := i.blockedRanges[rg]; exist { + continue + } + i.blockedRanges[rg] = true + blockedRanges.Inc() + } + + go func() { + <-time.After(i.cfg.RangeBlockPeriod) + i.UnblockRanges(context.Background(), &client.UnblockRangesRequest{Ranges: ranges}) + }() +} + +// UnblockRanges manually removes blocks for the provided ranges. +func (i *Ingester) UnblockRanges(ctx context.Context, in *client.UnblockRangesRequest) (*client.UnblockRangesResponse, error) { + i.blockedTokenMtx.Lock() + defer i.blockedTokenMtx.Unlock() + + for _, rg := range in.Ranges { + if exist := i.blockedRanges[rg]; !exist { + level.Debug(util.Logger).Log("msg", "token range not blocked", "from", rg.From, "to", rg.To) + continue + } + + delete(i.blockedRanges, rg) + blockedRanges.Dec() + } + + return &client.UnblockRangesResponse{}, nil +} + +// SendChunkRanges connects to the ingester at targetAddr and sends all chunks +// for streams whose token falls within the series of specified ranges. +func (i *Ingester) SendChunkRanges(ctx context.Context, ranges []ring.TokenRange, targetAddr string) error { + // Block the ranges locally so we don't receive traffic for it anymore. + i.BlockRanges(ranges) + + userStatesCopy := i.userStates.cp() + if len(userStatesCopy) == 0 { + level.Info(util.Logger).Log("msg", "nothing to transfer") + return nil + } + + c, err := i.cfg.ingesterClientFactory(targetAddr, i.clientConfig) + if err != nil { + return err + } + defer c.Close() + + ctx = user.InjectOrgID(ctx, fakeOrgID) + stream, err := c.TransferChunksSubset(ctx) + if err != nil { + return errors.Wrap(err, "SendChunks") + } + + err = i.pushChunksToStream(pushChunksOptions{ + States: userStatesCopy, + Stream: stream, + MarkAsFlushed: true, + + IncludeFunc: func(pair fingerprintSeriesPair) bool { + return inAnyRange(pair.series.token, ranges) + }, + }) + if err != nil { + return err + } + + _, err = stream.CloseAndRecv() + if err != nil { + return errors.Wrap(err, "CloseAndRecv") + } + + return nil +} + +// RequestChunkRanges connects to the ingester at targetAddr and requests all +// chunks for streams whose fingerprint falls within the specified token +// ranges. +// +// If move is true, the target ingester should remove sent chunks from +// local memory if the transfer succeeds. +func (i *Ingester) RequestChunkRanges(ctx context.Context, ranges []ring.TokenRange, targetAddr string, move bool) error { + c, err := i.cfg.ingesterClientFactory(targetAddr, i.clientConfig) + if err != nil { + return err + } + defer c.Close() + + ctx = user.InjectOrgID(ctx, fakeOrgID) + stream, err := c.GetChunksSubset(ctx, &client.GetChunksRequest{ + Ranges: ranges, + Move: move, + FromIngesterId: i.lifecycler.ID, + }) + if err != nil { + return errors.Wrap(err, "GetChunks") + } + + i.userStatesMtx.Lock() + defer i.userStatesMtx.Unlock() + + _, _, err = i.acceptChunksFromStream(acceptChunksOptions{ + States: i.userStates, + Stream: stream, + }) + if err != nil { + return err + } + + return nil +} + +// RequestComplete connects to the ingester at targetAddr and calls +// CleanupTransfer. +func (i *Ingester) RequestComplete(ctx context.Context, ranges []ring.TokenRange, targetAddr string) { + c, err := i.cfg.ingesterClientFactory(targetAddr, i.clientConfig) + if err != nil { + level.Error(util.Logger).Log("msg", "could not clean up target after tranfser", "err", err) + return + } + defer c.Close() + + ctx = user.InjectOrgID(ctx, fakeOrgID) + _, err = c.UnblockRanges(ctx, &client.UnblockRangesRequest{Ranges: ranges}) + if err != nil { + level.Error(util.Logger).Log("msg", "could not clean up target after transfer", "err", err) + } +} + +// MemoryStreamTokens returns series of tokens for in-memory streams. +func (i *Ingester) MemoryStreamTokens() []uint32 { + var ret []uint32 + + for _, state := range i.userStates.cp() { + for pair := range state.fpToSeries.iter() { + state.fpLocker.Lock(pair.fp) + + // Skip when there's no chunks in a series. Used to avoid a panic on + // calling head. + if len(pair.series.chunkDescs) == 0 { + state.fpLocker.Unlock(pair.fp) + continue + } + + if head := pair.series.head(); head != nil && !head.flushed { + ret = append(ret, pair.series.token) + } + + state.fpLocker.Unlock(pair.fp) + } + } + + return ret +} + +func inAnyRange(tok uint32, ranges []ring.TokenRange) bool { + for _, rg := range ranges { + if rg.Contains(tok) { + return true + } + } + return false +} diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index 8adb56f51e3..348ea4341d1 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -5,11 +5,14 @@ import ( "flag" "fmt" "net/http" + "strconv" + "strings" "sync" "time" "github.com/go-kit/kit/log/level" "github.com/gogo/status" + "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/pkg/labels" @@ -42,8 +45,11 @@ type Config struct { WALConfig WALConfig `yaml:"walconfig,omitempty"` LifecyclerConfig ring.LifecyclerConfig `yaml:"lifecycler,omitempty"` + TokenCheckerConfig ring.TokenCheckerConfig `yaml:"token_checker,omitempty"` + // Config for transferring chunks. Zero or negative = no retries. - MaxTransferRetries int `yaml:"max_transfer_retries,omitempty"` + MaxTransferRetries int `yaml:"max_transfer_retries,omitempty"` + RangeBlockPeriod time.Duration `yaml:"range_block_period"` // Config for chunk flushing. FlushCheckPeriod time.Duration @@ -56,6 +62,11 @@ type Config struct { ConcurrentFlushes int SpreadFlushes bool + // Config for checking tokens. + CheckOnCreate bool `yaml:"check_token_on_create,omitempty"` + CheckOnAppend bool `yaml:"check_token_on_append,omitempty"` + CheckOnTransfer bool `yaml:"check_token_on_transfer,omitempty"` + RateUpdatePeriod time.Duration // Use tsdb block storage @@ -74,8 +85,10 @@ type Config struct { func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.LifecyclerConfig.RegisterFlags(f) cfg.WALConfig.RegisterFlags(f) + cfg.TokenCheckerConfig.RegisterFlags(f) f.IntVar(&cfg.MaxTransferRetries, "ingester.max-transfer-retries", 10, "Number of times to try and transfer chunks before falling back to flushing. Negative value or zero disables hand-over.") + f.DurationVar(&cfg.RangeBlockPeriod, "ingester.range-block-period", 1*time.Minute, "Period after which write blocks on ranges expire.") f.DurationVar(&cfg.FlushCheckPeriod, "ingester.flush-period", 1*time.Minute, "Period with which to attempt to flush chunks.") f.DurationVar(&cfg.RetainPeriod, "ingester.retain-period", 5*time.Minute, "Period chunks will remain in memory after flushing.") f.DurationVar(&cfg.FlushOpTimeout, "ingester.flush-op-timeout", 1*time.Minute, "Timeout for individual flush operations.") @@ -85,6 +98,9 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.DurationVar(&cfg.ChunkAgeJitter, "ingester.chunk-age-jitter", 20*time.Minute, "Range of time to subtract from MaxChunkAge to spread out flushes") f.BoolVar(&cfg.SpreadFlushes, "ingester.spread-flushes", false, "If true, spread series flushes across the whole period of MaxChunkAge") f.IntVar(&cfg.ConcurrentFlushes, "ingester.concurrent-flushes", 50, "Number of concurrent goroutines flushing to dynamodb.") + f.BoolVar(&cfg.CheckOnCreate, "ingester.check-token-on-create", false, "Check that newly created streams fall within expected token ranges") + f.BoolVar(&cfg.CheckOnAppend, "ingester.check-token-on-append", false, "Check that existing streams appended to fall within expected token ranges") + f.BoolVar(&cfg.CheckOnTransfer, "ingester.check-token-on-transfer", false, "Check that streams transferred in using the transfer mechanism fall within expected token ranges") f.DurationVar(&cfg.RateUpdatePeriod, "ingester.rate-update-period", 15*time.Second, "Period with which to update the per-user ingestion rates.") } @@ -96,10 +112,11 @@ type Ingester struct { metrics *ingesterMetrics - chunkStore ChunkStore - lifecycler *ring.Lifecycler - limits *validation.Overrides - limiter *SeriesLimiter + chunkStore ChunkStore + lifecycler *ring.Lifecycler + tokenChecker *ring.TokenChecker + limits *validation.Overrides + limiter *SeriesLimiter quit chan struct{} done sync.WaitGroup @@ -116,6 +133,10 @@ type Ingester struct { // This should never be nil. wal WAL + // Stops specific appends + blockedTokenMtx sync.RWMutex + blockedRanges map[ring.TokenRange]bool + // Hook for injecting behaviour from tests. preFlushUserSeries func() @@ -152,20 +173,21 @@ func New(cfg Config, clientConfig client.Config, limits *validation.Overrides, c } i := &Ingester{ - cfg: cfg, - clientConfig: clientConfig, - metrics: newIngesterMetrics(registerer, true), - limits: limits, - chunkStore: chunkStore, - quit: make(chan struct{}), - flushQueues: make([]*util.PriorityQueue, cfg.ConcurrentFlushes, cfg.ConcurrentFlushes), + cfg: cfg, + clientConfig: clientConfig, + metrics: newIngesterMetrics(registerer, true), + limits: limits, + chunkStore: chunkStore, + quit: make(chan struct{}), + blockedRanges: make(map[ring.TokenRange]bool), + flushQueues: make([]*util.PriorityQueue, cfg.ConcurrentFlushes, cfg.ConcurrentFlushes), } var err error // During WAL recovery, it will create new user states which requires the limiter. // Hence initialise the limiter before creating the WAL. // The '!cfg.WALConfig.WALEnabled' argument says don't flush on shutdown if the WAL is enabled. - i.lifecycler, err = ring.NewLifecycler(cfg.LifecyclerConfig, i, "ingester", ring.IngesterRingKey, !cfg.WALConfig.WALEnabled) + i.lifecycler, err = ring.NewLifecycler(cfg.LifecyclerConfig, i, i, "ingester", ring.IngesterRingKey, !cfg.WALConfig.WALEnabled) if err != nil { return nil, err } @@ -196,6 +218,9 @@ func New(cfg Config, clientConfig client.Config, limits *validation.Overrides, c // Now that user states have been created, we can start the lifecycler i.lifecycler.Start() + ringConfig := cfg.LifecyclerConfig.RingConfig + i.tokenChecker = ring.NewTokenChecker(cfg.TokenCheckerConfig, ringConfig, i.lifecycler, i.unexpectedStreamsHandler) + i.flushQueuesDone.Add(cfg.ConcurrentFlushes) for j := 0; j < cfg.ConcurrentFlushes; j++ { i.flushQueues[j] = util.NewPriorityQueue(i.metrics.flushQueueLength) @@ -246,6 +271,11 @@ func (i *Ingester) Shutdown() { // Next initiate our graceful exit from the ring. i.lifecycler.Shutdown() + + // Shut down the token checker. Nil when using TSDB. + if i.tokenChecker != nil { + i.tokenChecker.Shutdown() + } } } @@ -263,9 +293,28 @@ func (i *Ingester) ShutdownHandler(w http.ResponseWriter, r *http.Request) { // StopIncomingRequests is called during the shutdown process. func (i *Ingester) StopIncomingRequests() { - i.userStatesMtx.Lock() - defer i.userStatesMtx.Unlock() - i.stopped = true + // If we're not incrementally transferring tokens out, we want + // to stop all traffic. + if !i.cfg.LifecyclerConfig.LeaveIncrementalTransfer { + i.userStatesMtx.Lock() + defer i.userStatesMtx.Unlock() + i.stopped = true + return + } + + // When we are incrementally transferring tokens, we want to wait + // for there to be no blocked ranges on our local ingester. + for { + i.blockedTokenMtx.RLock() + numBlocked := len(i.blockedRanges) + i.blockedTokenMtx.RUnlock() + + if numBlocked == 0 { + return + } + + time.Sleep(time.Millisecond * 250) + } } // Push implements client.IngesterServer @@ -298,7 +347,7 @@ func (i *Ingester) Push(ctx context.Context, req *client.WriteRequest) (*client. for _, ts := range req.Timeseries { for _, s := range ts.Samples { - err := i.append(ctx, userID, ts.Labels, model.Time(s.TimestampMs), model.SampleValue(s.Value), req.Source, record) + err := i.append(ctx, userID, ts.Token, ts.Labels, model.Time(s.TimestampMs), model.SampleValue(s.Value), req.Source, record) if err == nil { continue } @@ -328,7 +377,24 @@ func (i *Ingester) Push(ctx context.Context, req *client.WriteRequest) (*client. return &client.WriteResponse{}, nil } -func (i *Ingester) append(ctx context.Context, userID string, labels labelPairs, timestamp model.Time, value model.SampleValue, source client.WriteRequest_SourceEnum, record *Record) error { +// isTokenBlocked checks to see if a token is in a blocked range. +func (i *Ingester) isTokenBlocked(token uint32) error { + i.blockedTokenMtx.RLock() + defer i.blockedTokenMtx.RUnlock() + + for rg := range i.blockedRanges { + if rg.Contains(token) { + return &validationError{ + err: errors.New("transfer in progress"), + code: http.StatusServiceUnavailable, + } + } + } + + return nil +} + +func (i *Ingester) append(ctx context.Context, userID string, token uint32, labels labelPairs, timestamp model.Time, value model.SampleValue, source client.WriteRequest_SourceEnum, record *Record) error { labels.removeBlanks() var ( @@ -346,7 +412,12 @@ func (i *Ingester) append(ctx context.Context, userID string, labels labelPairs, return fmt.Errorf("ingester stopping") } - state, fp, series, err := i.userStates.getOrCreateSeries(ctx, userID, labels, record) + if err := i.isTokenBlocked(token); err != nil { + i.metrics.rejectedSamplesTotal.Inc() + return err + } + + state, fp, series, sstate, err := i.userStates.getOrCreateSeries(ctx, userID, labels, record, token) if err != nil { if ve, ok := err.(*validationError); ok { state.discardedSamples.WithLabelValues(ve.errorType).Inc() @@ -358,6 +429,18 @@ func (i *Ingester) append(ctx context.Context, userID string, labels labelPairs, return err } + if sstate == seriesCreated && i.cfg.CheckOnCreate { + if ok := i.tokenChecker.TokenExpected(token); !ok { + level.Debug(util.Logger).Log("msg", "unexpected stream created in ingester", "token", token) + i.metrics.unexpectedSeriesTotal.WithLabelValues("create").Inc() + } + } else if i.cfg.CheckOnAppend { + if ok := i.tokenChecker.TokenExpected(token); !ok { + level.Debug(util.Logger).Log("msg", "unexpected stream appended in ingester", "token", token) + i.metrics.unexpectedSeriesTotal.WithLabelValues("append").Inc() + } + } + prevNumChunks := len(series.chunkDescs) if i.cfg.SpreadFlushes && prevNumChunks > 0 { // Map from the fingerprint hash to a point in the cycle of period MaxChunkAge @@ -702,3 +785,22 @@ func (i *Ingester) ReadinessHandler(w http.ResponseWriter, r *http.Request) { http.Error(w, "Not ready: "+err.Error(), http.StatusServiceUnavailable) } } + +func (i *Ingester) unexpectedStreamsHandler(tokens []uint32) { + i.metrics.unexpectedSeries.Set(float64(len(tokens))) + if len(tokens) == 0 { + return + } + + // Cut list of invalid tokens to first 20 + if len(tokens) > 20 { + tokens = tokens[:20] + } + + tokenStr := make([]string, len(tokens)) + for i, tok := range tokens { + tokenStr[i] = strconv.FormatUint(uint64(tok), 10) + } + + level.Debug(util.Logger).Log("msg", "unexpected tokens found", "tokens", strings.Join(tokenStr, ", ")) +} diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go index cc690dcecd4..0d1fc697ca0 100644 --- a/pkg/ingester/ingester_test.go +++ b/pkg/ingester/ingester_test.go @@ -168,7 +168,11 @@ func pushTestSamples(t *testing.T, ing *Ingester, numSeries, samplesPerSeries, o // Append samples. for _, userID := range userIDs { ctx := user.InjectOrgID(context.Background(), userID) - _, err := ing.Push(ctx, client.ToWriteRequest(matrixToLables(testData[userID]), matrixToSamples(testData[userID]), client.API)) + req := client.ToWriteRequest(matrixToLables(testData[userID]), matrixToSamples(testData[userID]), client.API) + for i := range req.Timeseries { + req.Timeseries[i].Token = 1234 + } + _, err := ing.Push(ctx, req) require.NoError(t, err) } @@ -308,22 +312,22 @@ func TestIngesterAppendOutOfOrderAndDuplicate(t *testing.T) { {Name: model.MetricNameLabel, Value: "testmetric"}, } ctx := context.Background() - err := ing.append(ctx, userID, m, 1, 0, client.API, nil) + err := ing.append(ctx, userID, 0, m, 1, 0, client.API, nil) require.NoError(t, err) // Two times exactly the same sample (noop). - err = ing.append(ctx, userID, m, 1, 0, client.API, nil) + err = ing.append(ctx, userID, 0, m, 1, 0, client.API, nil) require.NoError(t, err) // Earlier sample than previous one. - err = ing.append(ctx, userID, m, 0, 0, client.API, nil) + err = ing.append(ctx, userID, 0, m, 0, 0, client.API, nil) require.Contains(t, err.Error(), "sample timestamp out of order") errResp, ok := err.(*validationError) require.True(t, ok) require.Equal(t, errResp.code, 400) // Same timestamp as previous sample, but different value. - err = ing.append(ctx, userID, m, 1, 1, client.API, nil) + err = ing.append(ctx, userID, 0, m, 1, 1, client.API, nil) require.Contains(t, err.Error(), "sample with repeated timestamp but different value") errResp, ok = err.(*validationError) require.True(t, ok) @@ -341,7 +345,7 @@ func TestIngesterAppendBlankLabel(t *testing.T) { {Name: "bar", Value: ""}, } ctx := user.InjectOrgID(context.Background(), userID) - err := ing.append(ctx, userID, lp, 1, 0, client.API, nil) + err := ing.append(ctx, userID, 0, lp, 1, 0, client.API, nil) require.NoError(t, err) res, _, err := runTestQuery(ctx, t, ing, labels.MatchEqual, labels.MetricName, "testmetric") diff --git a/pkg/ingester/ingester_v2.go b/pkg/ingester/ingester_v2.go index 00ea5b242b0..a377c84d9a7 100644 --- a/pkg/ingester/ingester_v2.go +++ b/pkg/ingester/ingester_v2.go @@ -79,13 +79,14 @@ func NewV2(cfg Config, clientConfig client.Config, limits *validation.Overrides, } i := &Ingester{ - cfg: cfg, - clientConfig: clientConfig, - metrics: newIngesterMetrics(registerer, false), - limits: limits, - chunkStore: nil, - quit: make(chan struct{}), - wal: &noopWAL{}, + cfg: cfg, + clientConfig: clientConfig, + metrics: newIngesterMetrics(registerer, false), + limits: limits, + chunkStore: nil, + blockedRanges: make(map[ring.TokenRange]bool), + quit: make(chan struct{}), + wal: &noopWAL{}, TSDBState: TSDBState{ dbs: make(map[string]*userTSDB), bucket: bucketClient, @@ -103,7 +104,7 @@ func NewV2(cfg Config, clientConfig client.Config, limits *validation.Overrides, }, i.numSeriesInTSDB)) } - i.lifecycler, err = ring.NewLifecycler(cfg.LifecyclerConfig, i, "ingester", ring.IngesterRingKey, true) + i.lifecycler, err = ring.NewLifecycler(cfg.LifecyclerConfig, i, nil, "ingester", ring.IngesterRingKey, true) if err != nil { return nil, err } diff --git a/pkg/ingester/lifecycle_test.go b/pkg/ingester/lifecycle_test.go index 21f1436ef5d..0047ad154c0 100644 --- a/pkg/ingester/lifecycle_test.go +++ b/pkg/ingester/lifecycle_test.go @@ -37,6 +37,7 @@ func defaultIngesterTestConfig() Config { cfg := Config{} flagext.DefaultValues(&cfg) flagext.DefaultValues(&cfg.TSDBConfig) + cfg.RangeBlockPeriod = 5 * time.Second cfg.FlushCheckPeriod = 99999 * time.Hour cfg.MaxChunkIdle = 99999 * time.Hour cfg.ConcurrentFlushes = 1 diff --git a/pkg/ingester/metrics.go b/pkg/ingester/metrics.go index d4d64daa272..7d69c10d670 100644 --- a/pkg/ingester/metrics.go +++ b/pkg/ingester/metrics.go @@ -29,6 +29,9 @@ type ingesterMetrics struct { memSeriesCreatedTotal *prometheus.CounterVec memSeriesRemovedTotal *prometheus.CounterVec walReplayDuration prometheus.Gauge + unexpectedSeries prometheus.Gauge + unexpectedSeriesTotal *prometheus.CounterVec + rejectedSamplesTotal prometheus.Counter } func newIngesterMetrics(r prometheus.Registerer, registerMetricsConflictingWithTSDB bool) *ingesterMetrics { @@ -87,6 +90,21 @@ func newIngesterMetrics(r prometheus.Registerer, registerMetricsConflictingWithT Name: "cortex_ingester_wal_replay_duration_seconds", Help: "Time taken to replay the checkpoint and the WAL.", }), + unexpectedSeries: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "cortex", + Name: "ingester_unexpected_series", + Help: "Current number of unexpected series found in the ingester.", + }), + unexpectedSeriesTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "ingester_unexpected_series_total", + Help: "Total number of unexpected series found.", + }, []string{"source"}), + rejectedSamplesTotal: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "ingester_rejected_samples_total", + Help: "Total number of rejected samples due to in-progress transfers.", + }), } if r != nil { @@ -101,6 +119,9 @@ func newIngesterMetrics(r prometheus.Registerer, registerMetricsConflictingWithT m.memSeries, m.memUsers, m.walReplayDuration, + m.unexpectedSeries, + m.unexpectedSeriesTotal, + m.rejectedSamplesTotal, ) if registerMetricsConflictingWithTSDB { diff --git a/pkg/ingester/query_test.go b/pkg/ingester/query_test.go index 4d8190c6cc2..f79f79ee5c2 100644 --- a/pkg/ingester/query_test.go +++ b/pkg/ingester/query_test.go @@ -54,7 +54,7 @@ func BenchmarkQueryStream(b *testing.B) { {Name: "cpu", Value: cpus[i%numCPUs]}, } - state, fp, series, err := ing.userStates.getOrCreateSeries(ctx, "1", labels, nil) + state, fp, series, _, err := ing.userStates.getOrCreateSeries(ctx, "1", labels, nil, 0) require.NoError(b, err) for j := 0; j < numSamples; j++ { diff --git a/pkg/ingester/series.go b/pkg/ingester/series.go index 5bf2f57f0eb..a63481fc608 100644 --- a/pkg/ingester/series.go +++ b/pkg/ingester/series.go @@ -30,6 +30,9 @@ type memorySeries struct { // Sorted by start time, overlapping chunk ranges are forbidden. chunkDescs []*desc + // Token used by the distributor to shard into this series + token uint32 + // Whether the current head chunk has already been finished. If true, // the current head chunk must not be modified anymore. headChunkClosed bool @@ -43,10 +46,11 @@ type memorySeries struct { // newMemorySeries returns a pointer to a newly allocated memorySeries for the // given metric. -func newMemorySeries(m labels.Labels) *memorySeries { +func newMemorySeries(m labels.Labels, token uint32) *memorySeries { return &memorySeries{ metric: m, lastTime: model.Earliest, + token: token, } } diff --git a/pkg/ingester/transfer.go b/pkg/ingester/transfer.go index e1a1aeb11c9..7bbd9326208 100644 --- a/pkg/ingester/transfer.go +++ b/pkg/ingester/transfer.go @@ -26,6 +26,8 @@ import ( ) var ( + fakeOrgID = "-1" + sentChunks = prometheus.NewCounter(prometheus.CounterOpts{ Name: "cortex_ingester_sent_chunks", Help: "The total number of chunks sent by this ingester whilst leaving.", @@ -51,6 +53,13 @@ var ( Help: "The total number of bytes sent by this ingester whilst leaving", }) errTransferNoPendingIngesters = errors.New("no pending ingesters") + + ignoredSeries = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "cortex_ingester_ignored_series", + Help: "The total number of series ignored by this ingester", + }) + + once *sync.Once ) func init() { @@ -60,57 +69,175 @@ func init() { prometheus.MustRegister(receivedBytes) prometheus.MustRegister(receivedFiles) prometheus.MustRegister(sentBytes) + prometheus.MustRegister(ignoredSeries) } -// TransferChunks receives all the chunks from another ingester. -func (i *Ingester) TransferChunks(stream client.Ingester_TransferChunksServer) error { - fromIngesterID := "" - seriesReceived := 0 - xfer := func() error { - userStates := newUserStates(i.limiter, i.cfg, i.metrics) +type chunkStream interface { + Context() context.Context + Recv() (*client.TimeSeriesChunk, error) +} - for { - wireSeries, err := stream.Recv() - if err == io.EOF { - break - } - if err != nil { - return errors.Wrap(err, "TransferChunks: Recv") - } +type acceptChunksOptions struct { + States *userStates + Stream chunkStream + ValidateRemoteLeaving bool +} - // We can't send "extra" fields with a streaming call, so we repeat - // wireSeries.FromIngesterId and assume it is the same every time - // round this loop. - if fromIngesterID == "" { - fromIngesterID = wireSeries.FromIngesterId - level.Info(util.Logger).Log("msg", "processing TransferChunks request", "from_ingester", fromIngesterID) +func (i *Ingester) acceptChunksFromStream(opts acceptChunksOptions) (fromIngesterID string, seriesReceived int, err error) { + for { + wireSeries, err := opts.Stream.Recv() + if err == io.EOF { + break + } + if err != nil { + return fromIngesterID, seriesReceived, err + } - // Before transfer, make sure 'from' ingester is in correct state to call ClaimTokensFor later - err := i.checkFromIngesterIsInLeavingState(stream.Context(), fromIngesterID) - if err != nil { - return errors.Wrap(err, "TransferChunks: checkFromIngesterIsInLeavingState") - } - } - descs, err := fromWireChunks(wireSeries.Chunks) + // We can't send "extra" fields with a streaming call, so we repeat + // wireSeries.FromIngesterId and assume it is the same every time + // round this loop. + if fromIngesterID == "" { + fromIngesterID = wireSeries.FromIngesterId + level.Info(util.Logger).Log("msg", "processing TransferChunks request", "from_ingester", fromIngesterID) + } + if opts.ValidateRemoteLeaving { + // Before transfer, make sure 'from' ingester is in correct state to call ClaimTokensFor later. + err := i.checkFromIngesterIsInLeavingState(opts.Stream.Context(), fromIngesterID) if err != nil { - return errors.Wrap(err, "TransferChunks: fromWireChunks") + return fromIngesterID, seriesReceived, errors.Wrap(err, "TransferChunks: checkFromIngesterIsInLeavingState") } + } - state, fp, series, err := userStates.getOrCreateSeries(stream.Context(), wireSeries.UserId, wireSeries.Labels, nil) - if err != nil { - return errors.Wrapf(err, "TransferChunks: getOrCreateSeries: user %s series %s", wireSeries.UserId, wireSeries.Labels) - } - prevNumChunks := len(series.chunkDescs) + descs, err := fromWireChunks(wireSeries.Chunks) + if err != nil { + return fromIngesterID, seriesReceived, errors.Wrap(err, "TransferChunks: fromWireChunks") + } + + if wireSeries.Token == 0 { + level.Warn(util.Logger).Log("msg", "unexpected token 0 in wireSeries") + } + state, fp, series, sstate, err := opts.States.getOrCreateSeries(opts.Stream.Context(), wireSeries.UserId, wireSeries.Labels, nil, wireSeries.Token) + if err != nil { + return fromIngesterID, seriesReceived, errors.Wrapf(err, "TransferChunks: getOrCreateSeries: user %s series %s", wireSeries.UserId, wireSeries.Labels) + } + prevNumChunks := len(series.chunkDescs) + + if i.cfg.CheckOnTransfer && !i.tokenChecker.TokenExpected(wireSeries.Token) { + level.Debug(util.Logger).Log("msg", "unexpected stream transferred to ingester", "token", wireSeries.Token) + i.metrics.unexpectedSeriesTotal.WithLabelValues("transfer").Inc() + } + + if sstate == seriesCreated { + // If the series already exists, we'll trust its data over ours. err = series.setChunks(descs) - state.fpLocker.Unlock(fp) // acquired in getOrCreateSeries + } else { + ignoredSeries.Inc() + } + + state.fpLocker.Unlock(fp) // acquired in getOrCreateSeries + if err != nil { + return fromIngesterID, seriesReceived, err + } + + seriesReceived++ + memoryChunks.Add(float64(len(series.chunkDescs) - prevNumChunks)) + receivedChunks.Add(float64(len(descs))) + } + + return fromIngesterID, seriesReceived, nil +} + +type chunkPushStream interface { + Send(*client.TimeSeriesChunk) error +} + +type pushChunksOptions struct { + States map[string]*userState + Stream chunkPushStream + IncludeFunc func(pair fingerprintSeriesPair) bool + MarkAsFlushed bool +} + +func (i *Ingester) pushChunksToStream(opts pushChunksOptions) error { + var curCh <-chan fingerprintSeriesPair + defer func() { + if curCh == nil { + return + } + + for range curCh { + // Drain latest channel, needed if we exit on error + } + }() + + for userID, state := range opts.States { + curCh = state.fpToSeries.iter() + for pair := range curCh { + err := i.pushChunksPair(opts, userID, state, pair) if err != nil { - return errors.Wrapf(err, "TransferChunks: setChunks: user %s series %s", wireSeries.UserId, wireSeries.Labels) + return err } + } + } - seriesReceived++ - memoryChunks.Add(float64(len(series.chunkDescs) - prevNumChunks)) - receivedChunks.Add(float64(len(descs))) + return nil +} + +func (i *Ingester) pushChunksPair(opts pushChunksOptions, userID string, state *userState, pair fingerprintSeriesPair) error { + state.fpLocker.Lock(pair.fp) + defer state.fpLocker.Unlock(pair.fp) + + if opts.IncludeFunc != nil && !opts.IncludeFunc(pair) { + return nil + } + + if len(pair.series.chunkDescs) == 0 { + return nil + } + + chunks, err := toWireChunks(pair.series.chunkDescs, []client.Chunk{}) + if err != nil { + return errors.Wrap(err, "toWireChunks") + } + + err = opts.Stream.Send(&client.TimeSeriesChunk{ + FromIngesterId: i.lifecycler.ID, + UserId: userID, + Labels: client.FromLabelsToLabelAdapters(pair.series.metric), + Chunks: chunks, + Token: pair.series.token, + }) + if err == nil && opts.MarkAsFlushed { + // Mark all the chunks as "flushed". They'll retain in memory until the idle limit kicks in. + for _, desc := range pair.series.chunkDescs { + desc.flushed = true + } + } + + if err != nil { + return errors.Wrap(err, "Send") + } + + sentChunks.Add(float64(len(chunks))) + return nil +} + +// TransferChunks receives all the chunks from another ingester. +func (i *Ingester) TransferChunks(stream client.Ingester_TransferChunksServer) error { + fromIngesterID := "" + seriesReceived := 0 + + xfer := func() error { + userStates := newUserStates(i.limiter, i.cfg, i.metrics) + + fromIngesterID, seriesReceived, err := i.acceptChunksFromStream(acceptChunksOptions{ + States: userStates, + Stream: stream, + ValidateRemoteLeaving: true, + }) + if err != nil { + return err } if seriesReceived == 0 { @@ -131,7 +258,6 @@ func (i *Ingester) TransferChunks(stream client.Ingester_TransferChunksServer) e defer i.userStatesMtx.Unlock() i.userStates = userStates - return nil } @@ -185,14 +311,14 @@ func (i *Ingester) transfer(ctx context.Context, xfer func() error) error { // method, and as such we have to ensure we unlock the mutex. defer func() { state := i.lifecycler.GetState() - if i.lifecycler.GetState() == ring.ACTIVE { + if state == ring.ACTIVE { return } level.Error(util.Logger).Log("msg", "TransferChunks failed, not in ACTIVE state.", "state", state) // Enter PENDING state (only valid from JOINING) - if i.lifecycler.GetState() == ring.JOINING { + if state == ring.JOINING { if err := i.lifecycler.ChangeState(ctx, ring.PENDING); err != nil { level.Error(util.Logger).Log("msg", "error rolling back failed TransferChunks", "err", err) os.Exit(1) @@ -451,41 +577,19 @@ func (i *Ingester) transferOut(ctx context.Context) error { } defer c.Close() - ctx = user.InjectOrgID(ctx, "-1") + ctx = user.InjectOrgID(ctx, fakeOrgID) stream, err := c.TransferChunks(ctx) if err != nil { return errors.Wrap(err, "TransferChunks") } - var chunks []client.Chunk - for userID, state := range userStatesCopy { - for pair := range state.fpToSeries.iter() { - state.fpLocker.Lock(pair.fp) - - if len(pair.series.chunkDescs) == 0 { // Nothing to send? - state.fpLocker.Unlock(pair.fp) - continue - } - - chunks, err = toWireChunks(pair.series.chunkDescs, chunks) - if err != nil { - state.fpLocker.Unlock(pair.fp) - return errors.Wrap(err, "toWireChunks") - } - - err = stream.Send(&client.TimeSeriesChunk{ - FromIngesterId: i.lifecycler.ID, - UserId: userID, - Labels: client.FromLabelsToLabelAdapters(pair.series.metric), - Chunks: chunks, - }) - state.fpLocker.Unlock(pair.fp) - if err != nil { - return errors.Wrap(err, "Send") - } - - sentChunks.Add(float64(len(chunks))) - } + err = i.pushChunksToStream(pushChunksOptions{ + States: userStatesCopy, + Stream: stream, + MarkAsFlushed: false, + }) + if err != nil { + return err } _, err = stream.CloseAndRecv() @@ -568,7 +672,7 @@ func (i *Ingester) v2TransferOut(ctx context.Context) error { } defer c.Close() - ctx = user.InjectOrgID(ctx, "-1") + ctx = user.InjectOrgID(ctx, fakeOrgID) stream, err := c.TransferTSDB(ctx) if err != nil { return errors.Wrap(err, "TransferTSDB() has failed") diff --git a/pkg/ingester/transfer_test.go b/pkg/ingester/transfer_test.go index 4e3eb3838af..3f7cfb9fdd6 100644 --- a/pkg/ingester/transfer_test.go +++ b/pkg/ingester/transfer_test.go @@ -4,10 +4,13 @@ import ( "context" "crypto/rand" "fmt" + "io" "io/ioutil" + "math" rnd "math/rand" "os" "path/filepath" + "sync" "testing" "time" @@ -17,6 +20,11 @@ import ( "google.golang.org/grpc" "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/ring/kv" + "github.com/cortexproject/cortex/pkg/ring/kv/codec" + "github.com/weaveworks/common/user" + "google.golang.org/grpc/health/grpc_health_v1" ) type testUserTSDB struct { @@ -198,3 +206,461 @@ func TestTransferUser(t *testing.T) { require.Equal(t, original, xferfiles) } + +type testTimeSeriesClient struct { + ctx context.Context + ch chan *client.TimeSeriesChunk + grpc.ClientStream +} + +func (s *testTimeSeriesClient) Context() context.Context { return s.ctx } + +func (s *testTimeSeriesClient) Recv() (*client.TimeSeriesChunk, error) { + ret, ok := <-s.ch + if !ok { + return nil, io.EOF + } + return ret, nil +} + +func (s *testTimeSeriesClient) Send(resp *client.TimeSeriesChunk) error { + s.ch <- resp + return nil +} + +type testTimeSeriesServer struct { + ctx context.Context + ch chan *client.TimeSeriesChunk + grpc.ServerStream +} + +func (s *testTimeSeriesServer) Context() context.Context { return s.ctx } + +func (s *testTimeSeriesServer) Recv() (*client.TimeSeriesChunk, error) { + ret, ok := <-s.ch + if !ok { + return nil, io.EOF + } + return ret, nil +} + +func (s *testTimeSeriesServer) Send(resp *client.TimeSeriesChunk) error { + s.ch <- resp + return nil +} + +type testTransferChunksClient struct { + testTimeSeriesClient + + resp chan *client.TransferChunksResponse +} + +func (s *testTransferChunksClient) CloseAndRecv() (*client.TransferChunksResponse, error) { + close(s.ch) + resp := <-s.resp + close(s.resp) + return resp, nil +} + +type testTransferChunksServer struct { + testTimeSeriesServer + + resp chan *client.TransferChunksResponse +} + +func (s *testTransferChunksServer) SendAndClose(resp *client.TransferChunksResponse) error { + s.resp <- resp + return nil +} + +func makeDummyPushData() *client.WriteRequest { + return &client.WriteRequest{ + Source: client.API, + Timeseries: []client.PreallocTimeseries{ + { + TimeSeries: &client.TimeSeries{ + Labels: []client.LabelAdapter{ + {Name: "__name__", Value: "metric"}, + {Name: "foo", Value: "bar"}, + {Name: "bar", Value: "baz1"}, + }, + Samples: []client.Sample{ + {Value: 10, TimestampMs: time.Unix(0, 0).Unix() * 1000}, + {Value: 20, TimestampMs: time.Unix(1, 0).Unix() * 1000}, + }, + Token: 1234, + }, + }, + { + TimeSeries: &client.TimeSeries{ + Labels: []client.LabelAdapter{ + {Name: "__name__", Value: "metric"}, + {Name: "foo", Value: "bar"}, + {Name: "bar", Value: "baz2"}, + }, + Samples: []client.Sample{ + {Value: 30, TimestampMs: time.Unix(2, 0).Unix() * 1000}, + {Value: 40, TimestampMs: time.Unix(3, 0).Unix() * 1000}, + }, + Token: 1234, + }, + }, + }, + } +} + +func TestSendChunkRange(t *testing.T) { + ctx := user.InjectOrgID(context.Background(), "test") + f := newTestIngesterFactory(t) + + ing := f.getIngester(t) + defer ing.Shutdown() + _, err := ing.Push(ctx, makeDummyPushData()) + require.NoError(t, err) + + statesCp := ing.userStates.cp() + require.Len(t, statesCp, 1) + require.Equal(t, 2, statesCp["test"].fpToSeries.length()) + + ing2 := f.getIngester(t) + defer ing2.Shutdown() + + // First, try sending zero chunks + err = ing.SendChunkRanges(ctx, []ring.TokenRange{{From: 0, To: 0}}, ing2.lifecycler.ID) + require.NoError(t, err) + + statesCp = ing2.userStates.cp() + require.Len(t, statesCp, 0) + + // Now, send all chunks + err = ing.SendChunkRanges(ctx, []ring.TokenRange{{From: 0, To: math.MaxUint32}}, ing2.lifecycler.ID) + require.NoError(t, err) + + statesCp = ing2.userStates.cp() + require.Len(t, statesCp, 1) + require.Equal(t, 2, statesCp["test"].fpToSeries.length()) +} + +func TestRequestChunkRange(t *testing.T) { + ctx := user.InjectOrgID(context.Background(), "test") + f := newTestIngesterFactory(t) + + ing := f.getIngester(t) + defer ing.Shutdown() + + _, err := ing.Push(ctx, makeDummyPushData()) + require.NoError(t, err) + + statesCp := ing.userStates.cp() + require.Len(t, statesCp, 1) + require.Equal(t, 2, statesCp["test"].fpToSeries.length()) + + ing2 := f.getIngester(t) + defer ing2.Shutdown() + + // First, try requesting zero chunks + err = ing2.RequestChunkRanges(ctx, []ring.TokenRange{{From: 0, To: 0}}, ing.lifecycler.ID, false) + require.NoError(t, err) + + statesCp = ing2.userStates.cp() + require.Len(t, statesCp, 0) + + // Now, request all chunks + err = ing2.RequestChunkRanges(ctx, []ring.TokenRange{{From: 0, To: math.MaxUint32}}, ing.lifecycler.ID, false) + require.NoError(t, err) + + statesCp = ing2.userStates.cp() + require.Len(t, statesCp, 1) + require.Equal(t, 2, statesCp["test"].fpToSeries.length()) + + // Should still be in the first ingester from the copy + statesCp = ing.userStates.cp() + require.Len(t, statesCp, 1) + require.Equal(t, 2, statesCp["test"].fpToSeries.length()) + + // Transfer again with a move and make sure they're gone + // from the first ingester + err = ing2.RequestChunkRanges(ctx, []ring.TokenRange{{From: 0, To: math.MaxUint32}}, ing.lifecycler.ID, true) + require.NoError(t, err) + require.Equal(t, 0, len(ing.MemoryStreamTokens())) +} + +func TestBlockRange(t *testing.T) { + ctx := user.InjectOrgID(context.Background(), "test") + f := newTestIngesterFactory(t) + + ing := f.getIngester(t) + defer ing.Shutdown() + + // Block nothing + ing.BlockRanges([]ring.TokenRange{{From: 0, To: 0}}) + + _, err := ing.Push(ctx, makeDummyPushData()) + require.NoError(t, err) + + _, err = ing.UnblockRanges(ctx, &client.UnblockRangesRequest{ + Ranges: []ring.TokenRange{{From: 0, To: 0}}, + }) + require.NoError(t, err) + + ing.BlockRanges([]ring.TokenRange{{From: 0, To: math.MaxUint32}}) + require.NoError(t, err) + + _, err = ing.Push(ctx, makeDummyPushData()) + require.Error(t, err) +} + +type testIngesterFactory struct { + ReplicationFactor int + GenerateTokens ring.TokenGeneratorFunc + NumTokens int + + t require.TestingT + store kv.Client + n int + + ingestersMtx sync.Mutex + ingesters map[string]*Ingester +} + +func newTestIngesterFactory(t require.TestingT) *testIngesterFactory { + kvClient, err := kv.NewClient(kv.Config{Store: "inmemory"}, codec.Proto{Factory: ring.ProtoDescFactory}) + require.NoError(t, err) + + return &testIngesterFactory{ + ReplicationFactor: 1, + NumTokens: 1, + + t: t, + store: kvClient, + ingesters: make(map[string]*Ingester), + } +} + +func (f *testIngesterFactory) getClient(addr string, cfg client.Config) (client.HealthAndIngesterClient, error) { + f.ingestersMtx.Lock() + defer f.ingestersMtx.Unlock() + + ingester, ok := f.ingesters[addr] + if !ok { + return nil, fmt.Errorf("no ingester %s", addr) + } + + return struct { + client.IngesterClient + grpc_health_v1.HealthClient + io.Closer + }{ + IngesterClient: &testIngesterClient{i: ingester}, + Closer: ioutil.NopCloser(nil), + }, nil +} + +func (f *testIngesterFactory) getIngester(t require.TestingT) *Ingester { + currentIngesters := f.n + f.n++ + + cfg := defaultIngesterTestConfig() + cfg.MaxTransferRetries = 1 + cfg.LifecyclerConfig.NumTokens = f.NumTokens + cfg.LifecyclerConfig.ID = fmt.Sprintf("localhost-%d", f.n) + cfg.LifecyclerConfig.JoinIncrementalTransfer = true + cfg.LifecyclerConfig.LeaveIncrementalTransfer = true + cfg.LifecyclerConfig.Addr = cfg.LifecyclerConfig.ID + cfg.LifecyclerConfig.RingConfig.KVStore.Mock = f.store + cfg.LifecyclerConfig.RingConfig.ReplicationFactor = f.ReplicationFactor + cfg.LifecyclerConfig.TransferFinishDelay = time.Duration(0) + + // Assign incrementally valued tokens to each ingester. + cfg.LifecyclerConfig.GenerateTokens = + func(numTokens int, taken []uint32) []uint32 { + value := uint32(currentIngesters + 1) + var tokens []uint32 + for i := 0; i < numTokens; i++ { + tokens = append(tokens, value) + value++ + } + return tokens + } + + if f.GenerateTokens != nil { + cfg.LifecyclerConfig.GenerateTokens = f.GenerateTokens + } + + cfg.ingesterClientFactory = f.getClient + + _, ing := newTestStore(f.t, cfg, defaultClientTestConfig(), defaultLimitsTestConfig()) + + f.ingestersMtx.Lock() + defer f.ingestersMtx.Unlock() + + f.ingesters[fmt.Sprintf("%s", cfg.LifecyclerConfig.ID)] = ing + f.ingesters[fmt.Sprintf("%s:0", cfg.LifecyclerConfig.ID)] = ing + + // NB there's some kind of race condition with the in-memory KV client when + // we don't give the ingester a little bit of time to initialize. a 100ms + // wait time seems effective. + time.Sleep(time.Millisecond * 100) + return ing +} + +type testIngesterClient struct { + i *Ingester + + client.IngesterClient +} + +func (c *testIngesterClient) Push(ctx context.Context, in *client.WriteRequest, opts ...grpc.CallOption) (*client.WriteResponse, error) { + return c.i.Push(ctx, in) +} + +func (c *testIngesterClient) Query(ctx context.Context, in *client.QueryRequest, opts ...grpc.CallOption) (*client.QueryResponse, error) { + return c.i.Query(ctx, in) +} + +type testIngesterQueryStreamClient struct { + ctx context.Context + ch chan *client.QueryStreamResponse + + grpc.ClientStream +} + +func (c *testIngesterQueryStreamClient) Context() context.Context { + return c.ctx +} + +func (c *testIngesterQueryStreamClient) Recv() (*client.QueryStreamResponse, error) { + resp, ok := <-c.ch + if !ok { + return nil, io.EOF + } + return resp, nil +} + +type testIngesterQueryStreamServer struct { + ctx context.Context + ch chan *client.QueryStreamResponse + + grpc.ServerStream +} + +func (s *testIngesterQueryStreamServer) Context() context.Context { + return s.ctx +} + +func (s *testIngesterQueryStreamServer) Send(resp *client.QueryStreamResponse) error { + s.ch <- resp + return nil +} + +func (c *testIngesterClient) QueryStream(ctx context.Context, in *client.QueryRequest, opts ...grpc.CallOption) (client.Ingester_QueryStreamClient, error) { + ch := make(chan *client.QueryStreamResponse) + + go func() { + srv := testIngesterQueryStreamServer{ctx: ctx, ch: ch} + c.i.QueryStream(in, &srv) + }() + + cli := testIngesterQueryStreamClient{ch: ch} + return &cli, nil +} + +func (c *testIngesterClient) LabelValues(ctx context.Context, in *client.LabelValuesRequest, opts ...grpc.CallOption) (*client.LabelValuesResponse, error) { + return c.i.LabelValues(ctx, in) +} + +func (c *testIngesterClient) LabelNames(ctx context.Context, in *client.LabelNamesRequest, opts ...grpc.CallOption) (*client.LabelNamesResponse, error) { + return c.i.LabelNames(ctx, in) +} + +func (c *testIngesterClient) UserStats(ctx context.Context, in *client.UserStatsRequest, opts ...grpc.CallOption) (*client.UserStatsResponse, error) { + return c.i.UserStats(ctx, in) +} + +func (c *testIngesterClient) AllUserStats(ctx context.Context, in *client.UserStatsRequest, opts ...grpc.CallOption) (*client.UsersStatsResponse, error) { + return c.i.AllUserStats(ctx, in) +} + +func (c *testIngesterClient) MetricsForLabelMatchers(ctx context.Context, in *client.MetricsForLabelMatchersRequest, opts ...grpc.CallOption) (*client.MetricsForLabelMatchersResponse, error) { + return c.i.MetricsForLabelMatchers(ctx, in) +} + +func (c *testIngesterClient) TransferChunks(ctx context.Context, opts ...grpc.CallOption) (client.Ingester_TransferChunksClient, error) { + ch := make(chan *client.TimeSeriesChunk) + resp := make(chan *client.TransferChunksResponse) + + srv := testTransferChunksServer{ + testTimeSeriesServer: testTimeSeriesServer{ctx: ctx, ch: ch}, + resp: resp, + } + + cli := testTransferChunksClient{ + testTimeSeriesClient: testTimeSeriesClient{ctx: ctx, ch: ch}, + resp: resp, + } + + go func() { + c.i.TransferChunks(&srv) + }() + + return &cli, nil +} + +type testSendChunksClient struct { + testTimeSeriesClient + + resp chan *client.TransferChunksResponse +} + +func (s *testSendChunksClient) CloseAndRecv() (*client.TransferChunksResponse, error) { + close(s.ch) + resp := <-s.resp + close(s.resp) + return resp, nil +} + +type testSendChunksServer struct { + testTimeSeriesServer + + resp chan *client.TransferChunksResponse +} + +func (s *testSendChunksServer) SendAndClose(resp *client.TransferChunksResponse) error { + s.resp <- resp + return nil +} + +func (c *testIngesterClient) TransferChunksSubset(ctx context.Context, opts ...grpc.CallOption) (client.Ingester_TransferChunksSubsetClient, error) { + ch := make(chan *client.TimeSeriesChunk) + resp := make(chan *client.TransferChunksResponse) + + srv := testSendChunksServer{ + testTimeSeriesServer: testTimeSeriesServer{ctx: ctx, ch: ch}, + resp: resp, + } + + cli := testSendChunksClient{ + testTimeSeriesClient: testTimeSeriesClient{ctx: ctx, ch: ch}, + resp: resp, + } + + go func() { + c.i.TransferChunksSubset(&srv) + }() + + return &cli, nil +} + +func (c *testIngesterClient) GetChunksSubset(ctx context.Context, in *client.GetChunksRequest, opts ...grpc.CallOption) (client.Ingester_GetChunksSubsetClient, error) { + ch := make(chan *client.TimeSeriesChunk) + + srv := testTimeSeriesServer{ctx: ctx, ch: ch} + cli := testTimeSeriesClient{ctx: ctx, ch: ch} + + go func() { + c.i.GetChunksSubset(in, &srv) + close(ch) + }() + + return &cli, nil +} diff --git a/pkg/ingester/user_state.go b/pkg/ingester/user_state.go index bbe3b0843ab..98d9689aad8 100644 --- a/pkg/ingester/user_state.go +++ b/pkg/ingester/user_state.go @@ -155,13 +155,20 @@ func (us *userStates) getViaContext(ctx context.Context) (*userState, bool, erro return state, ok, nil } -func (us *userStates) getOrCreateSeries(ctx context.Context, userID string, labels []client.LabelAdapter, record *Record) (*userState, model.Fingerprint, *memorySeries, error) { +type seriesState int + +const ( + seriesCreated seriesState = iota + seriesExists +) + +func (us *userStates) getOrCreateSeries(ctx context.Context, userID string, labels []client.LabelAdapter, record *Record, token uint32) (*userState, model.Fingerprint, *memorySeries, seriesState, error) { state := us.getOrCreate(userID) - fp, series, err := state.getSeries(labels, record) - return state, fp, series, err + fp, series, sstate, err := state.getSeries(labels, record, token) + return state, fp, series, sstate, err } -func (u *userState) getSeries(metric labelPairs, record *Record) (model.Fingerprint, *memorySeries, error) { +func (u *userState) getSeries(metric labelPairs, record *Record, token uint32) (model.Fingerprint, *memorySeries, seriesState, error) { rawFP := client.FastFingerprint(metric) u.fpLocker.Lock(rawFP) fp := u.mapper.mapFP(rawFP, metric) @@ -172,19 +179,24 @@ func (u *userState) getSeries(metric labelPairs, record *Record) (model.Fingerpr series, ok := u.fpToSeries.get(fp) if ok { - return fp, series, nil + if token != series.token { + level.Warn(util.Logger).Log("msg", fmt.Sprintf("new token value for metric %s", metric), "previous", series.token, "new", token) + series.token = token + } + + return fp, series, seriesExists, nil } - series, err := u.createSeriesWithFingerprint(fp, metric, record, false) + series, err := u.createSeriesWithFingerprint(fp, token, metric, record, false) if err != nil { u.fpLocker.Unlock(fp) - return 0, nil, err + return 0, nil, seriesCreated, err } - return fp, series, nil + return fp, series, seriesCreated, nil } -func (u *userState) createSeriesWithFingerprint(fp model.Fingerprint, metric labelPairs, record *Record, recovery bool) (*memorySeries, error) { +func (u *userState) createSeriesWithFingerprint(fp model.Fingerprint, token uint32, metric labelPairs, record *Record, recovery bool) (*memorySeries, error) { // There's theoretically a relatively harmless race here if multiple // goroutines get the length of the series map at the same time, then // all proceed to add a new series. This is likely not worth addressing, @@ -220,7 +232,7 @@ func (u *userState) createSeriesWithFingerprint(fp model.Fingerprint, metric lab } labels := u.index.Add(metric, fp) - series := newMemorySeries(labels) + series := newMemorySeries(labels, token) u.fpToSeries.put(fp, series) return series, nil diff --git a/pkg/ingester/user_state_test.go b/pkg/ingester/user_state_test.go index 7899c78c159..e76faf99476 100644 --- a/pkg/ingester/user_state_test.go +++ b/pkg/ingester/user_state_test.go @@ -54,6 +54,9 @@ func TestForSeriesMatchingBatching(t *testing.T) { err = instance.forSeriesMatching(ctx, tc.matchers, func(_ context.Context, _ model.Fingerprint, s *memorySeries) error { batch++ + if s.token != 1234 { + return fmt.Errorf("memorySeries missing token used for push") + } return nil }, func(context.Context) error { diff --git a/pkg/ingester/wal.go b/pkg/ingester/wal.go index 22e3a67c74f..bd629e6de5f 100644 --- a/pkg/ingester/wal.go +++ b/pkg/ingester/wal.go @@ -541,7 +541,7 @@ func processCheckpointRecord(userStates *userStates, seriesPool *sync.Pool, stat Value: string(l.Value), }) } - series, err := state.createSeriesWithFingerprint(model.Fingerprint(s.Fingerprint), la, nil, true) + series, err := state.createSeriesWithFingerprint(model.Fingerprint(s.Fingerprint), 0, la, nil, true) if err != nil { errChan <- err return @@ -630,7 +630,7 @@ Loop: if ok { continue } - _, err := state.createSeriesWithFingerprint(model.Fingerprint(labels.Fingerprint), labels.Labels, nil, true) + _, err := state.createSeriesWithFingerprint(model.Fingerprint(labels.Fingerprint), 0, labels.Labels, nil, true) if err != nil { // We don't return here in order to close/drain all the channels and // make sure all goroutines exit. diff --git a/pkg/ring/incremental_transfer.go b/pkg/ring/incremental_transfer.go new file mode 100644 index 00000000000..b32935e3d2a --- /dev/null +++ b/pkg/ring/incremental_transfer.go @@ -0,0 +1,271 @@ +package ring + +import ( + "context" + "fmt" + "math" + "sync" + "time" + + "github.com/cortexproject/cortex/pkg/util" + "github.com/go-kit/kit/log/level" + "go.uber.org/atomic" +) + +// transferWorkload holds a set of ingester addresses to the set of +// token ranges that should be sent to or requested from that +// ingester address. +type transferWorkload map[string][]TokenRange + +// Do executes functions for the transfer workload in parallel. +func (wl transferWorkload) Do(f func(addr string, ranges []TokenRange)) { + var wg sync.WaitGroup + for addr, ranges := range wl { + wg.Add(1) + go func(addr string, ranges []TokenRange) { + defer wg.Done() + f(addr, ranges) + }(addr, ranges) + } + wg.Wait() +} + +// findTransferWorkload finds all ranges that should be sent/requested against +// a target ingester address. +func (i *Lifecycler) findTransferWorkload(d *Desc, n TokenNavigator, token uint32, healthy HealthCheckFunc) (transferWorkload, bool) { + ret := make(transferWorkload) + rf := i.cfg.RingConfig.ReplicationFactor + + var lastErr error + + for replica := 0; replica < rf; replica++ { + // Find the ranges which our token is holding data for as one of the replicas. + ends, err := n.Predecessors(token, replica, healthy) + if err != nil { + lastErr = err + continue + } + + for _, end := range ends { + // Find the starting value for the range. + start, err := n.Neighbor(end.Token, -1, false, healthy) + if err != nil { + lastErr = err + continue + } + + // The target token's ingester is the ingester that either has data we + // want or should have data we'll send. When joining, it's the old + // end of the replica set. When leaving, it's the new end of the replica set. + target, err := n.Neighbor(start.Token, rf, false, healthy) + if err != nil { + lastErr = err + continue + } + + // If our local ingester is already in the replica set, we don't need to + // transfer. Our current token will never trigger InRange to return + // true since it's only ever unhealthy at this point. + checkRange := RangeOptions{ + Range: TokenRange{From: end.Token, To: target.Token}, + + ID: i.ID, + IncludeFrom: true, + IncludeTo: true, + } + if ok, _ := n.InRange(checkRange, healthy); ok { + continue + } + + addr := d.Ingesters[target.Ingester].Addr + ret[addr] = append(ret[addr], TokenRange{From: start.Token, To: end.Token}) + } + } + + if lastErr != nil { + level.Error(util.Logger).Log("msg", fmt.Sprintf("failed to find complete transfer set for %d", token), "err", lastErr) + } + return ret, lastErr == nil +} + +// joinIncrementalTransfer will attempt to incrementally obtain chunks from +// neighboring lifecyclers that contain data for token ranges they will +// no longer receive writes for. +func (i *Lifecycler) joinIncrementalTransfer(ctx context.Context) error { + // Make sure that we set all tokens to ACTIVE, even when we fail. + defer func() { + i.setTokens(i.getTransitioningTokens()) + i.changeState(ctx, ACTIVE) + }() + + r := i.getLastRing() + n := r.GetNavigator() + + replicationFactor := i.cfg.RingConfig.ReplicationFactor + + // If the replication factor is greater than the number of ingesters, findTransferWorkload + // will fail to detect target ingesters properly. Instead, just request a full copy from + // another existing ingester. + if active := r.FindIngestersByState(ACTIVE); len(active) < replicationFactor { + if len(active) == 0 { + return fmt.Errorf("no ingesters to request data from") + } + + fullRange := []TokenRange{{0, math.MaxUint32}} + err := i.incTransferer.RequestChunkRanges(ctx, fullRange, active[0].Addr, false) + if err != nil { + level.Error(util.Logger).Log("msg", fmt.Sprintf("failed to request copy of data from %s", active[0].Addr)) + } + return nil + } + + var completesMtx sync.Mutex + pendingCompletes := make(map[string][]TokenRange) + + for _, token := range i.transitioningTokens { + healthy := func(t TokenDesc) bool { + // When we're joining the ring, any of our tokens < our current token + // are "healthy" (i.e., they have been added). + if t.Ingester == i.ID { + return t.Token < token + } + + ing := r.Ingesters[t.Ingester] + return ing.IsHealthy(Read, i.cfg.RingConfig.HeartbeatTimeout) + } + + // Add the new token into the copy of the ring. + n.SetIngesterTokens(i.ID, append(i.getTokens(), token)) + + workload, _ := i.findTransferWorkload(r, n, token, healthy) + + workload.Do(func(addr string, ranges []TokenRange) { + err := i.incTransferer.RequestChunkRanges(ctx, ranges, addr, true) + if err != nil { + level.Error(util.Logger).Log("msg", "failed to request chunks", "target", addr, "ranges", PrintableRanges(ranges), "err", err) + } + + completesMtx.Lock() + defer completesMtx.Unlock() + pendingCompletes[addr] = append(pendingCompletes[addr], ranges...) + }) + + // Add the token into the ring now. + i.addToken(token) + n.SetIngesterTokens(i.ID, i.getTokens()) + + if err := i.updateConsul(ctx); err != nil { + level.Error(util.Logger).Log("msg", + fmt.Sprintf("failed to update consul when changing token %d to ACTIVE", token)) + } else { + // Update the latest copy of the ring - if the join is slow, we need to + // keep our copy of the ring updates so ingesters aren't considered + // unhealthy. + r = i.getLastRing() + } + } + + go func() { + // Delay unblocking ranges to allow the distributors enough time to notice the + // new token. If ranges are unblocked too soon, spillover happens. + if i.cfg.TransferFinishDelay > time.Duration(0) { + <-time.After(i.cfg.TransferFinishDelay) + } + + completesMtx.Lock() + defer completesMtx.Unlock() + + for addr, rgs := range pendingCompletes { + i.incTransferer.RequestComplete(context.Background(), rgs, addr) + } + }() + + return nil +} + +// leaveIncrementalTransfer will attempt to incrementally send chunks to +// neighboring lifecyclers that should contain data for token ranges the +// leaving lifecycler will no longer receive writes for. +func (i *Lifecycler) leaveIncrementalTransfer(ctx context.Context) error { + // Make sure all tokens are set to leaving, even when we + // fail. + defer func() { + i.checkRemainingTokens() + + i.setTokens(nil) + i.setState(LEAVING) + i.updateConsul(ctx) + }() + + r := i.getLastRing() + navigator := r.GetNavigator() + + replicationFactor := i.cfg.RingConfig.ReplicationFactor + if active := r.FindIngestersByState(ACTIVE); len(active) <= replicationFactor { + return fmt.Errorf("not transferring out; number of ingesters less than or equal to replication factor") + } + + success := atomic.NewBool(true) + + i.setTransitioningTokens(i.getTokens()) + tokens := i.getTransitioningTokens() + + for _, token := range tokens { + healthy := func(t TokenDesc) bool { + // When we're leaving the ring, any of our tokens > our current token + // are "healthy" (i.e., they are still in a "ACTIVE" state). + if t.Ingester == i.ID { + return t.Token > token + } + + ing := r.Ingesters[t.Ingester] + return ing.IsHealthy(Write, i.cfg.RingConfig.HeartbeatTimeout) + } + + workload, ok := i.findTransferWorkload(r, navigator, token, healthy) + if !ok { + success.Store(false) + } + + workload.Do(func(addr string, ranges []TokenRange) { + err := i.incTransferer.SendChunkRanges(ctx, ranges, addr) + if err != nil { + level.Error(util.Logger).Log("msg", "failed to send chunks", "target_addr", addr, "ranges", PrintableRanges(ranges), "err", err) + success.Store(false) + } + }) + + // Remove the token from the ring. + i.removeToken(token) + if err := i.updateConsul(ctx); err != nil { + level.Error(util.Logger).Log("msg", + fmt.Sprintf("failed to update consul when removing token %d", token)) + } else { + // Update the latest copy of the ring - if the leave is slow, we need to + // keep our copy of the ring updates so ingesters aren't considered + // unhealthy. + r = i.getLastRing() + } + } + + if !success.Load() { + return fmt.Errorf("incremental transfer out incomplete") + } + return nil +} + +func (i *Lifecycler) checkRemainingTokens() { + remainingTokens := i.incTransferer.MemoryStreamTokens() + if len(remainingTokens) == 0 { + return + } + + level.Warn(util.Logger).Log("msg", "not all tokens transferred out", "streams_remaining", len(remainingTokens)) + + printTokens := remainingTokens + if len(printTokens) > 20 { + printTokens = printTokens[:20] + } + + level.Debug(util.Logger).Log("msg", "non-transferred tokens", "tokens", printTokens) +} diff --git a/pkg/ring/lifecycler.go b/pkg/ring/lifecycler.go index 45b67b7d475..5c0ed338053 100644 --- a/pkg/ring/lifecycler.go +++ b/pkg/ring/lifecycler.go @@ -2,8 +2,10 @@ package ring import ( "context" + "errors" "flag" "fmt" + "math/rand" "os" "sort" "sync" @@ -38,20 +40,29 @@ var ( }, []string{"op", "status", "name"}) ) +// TokenGeneratorFunc is any function that will generate a series +// of tokens to apply to a new lifecycler. +type TokenGeneratorFunc func(numTokens int, taken []uint32) []uint32 + // LifecyclerConfig is the config to build a Lifecycler. type LifecyclerConfig struct { RingConfig Config `yaml:"ring,omitempty"` // Config for the ingester lifecycle control - ListenPort *int `yaml:"-"` - NumTokens int `yaml:"num_tokens,omitempty"` - HeartbeatPeriod time.Duration `yaml:"heartbeat_period,omitempty"` - ObservePeriod time.Duration `yaml:"observe_period,omitempty"` - JoinAfter time.Duration `yaml:"join_after,omitempty"` - MinReadyDuration time.Duration `yaml:"min_ready_duration,omitempty"` - InfNames []string `yaml:"interface_names"` - FinalSleep time.Duration `yaml:"final_sleep"` - TokensFilePath string `yaml:"tokens_file_path,omitempty"` + ListenPort *int `yaml:"-"` + NumTokens int `yaml:"num_tokens,omitempty"` + HeartbeatPeriod time.Duration `yaml:"heartbeat_period,omitempty"` + ObservePeriod time.Duration `yaml:"observe_period,omitempty"` + JoinAfter time.Duration `yaml:"join_after,omitempty"` + MinReadyDuration time.Duration `yaml:"min_ready_duration,omitempty"` + InfNames []string `yaml:"interface_names"` + FinalSleep time.Duration `yaml:"final_sleep"` + TokensFilePath string `yaml:"tokens_file_path,omitempty"` + JoinIncrementalTransfer bool `yaml:"join_incremental_transfer,omitempty"` + LeaveIncrementalTransfer bool `yaml:"leave_incremental_transfer,omitempty"` + MinIncrementalJoinJitter time.Duration `yaml:"min_incremental_join_jitter,omitempty"` + MaxIncrementalJoinJitter time.Duration `yaml:"max_incremental_join_jitter,omitempty"` + TransferFinishDelay time.Duration `yaml:"transfer_finish_delay,omitempty"` // For testing, you can override the address and ID of this ingester Addr string `yaml:"address" doc:"hidden"` @@ -62,6 +73,10 @@ type LifecyclerConfig struct { // graveyard for unused flags. UnusedFlag bool `yaml:"claim_on_rollout,omitempty"` // DEPRECATED - left for backwards-compatibility UnusedFlag2 bool `yaml:"normalise_tokens,omitempty"` // DEPRECATED - left for backwards-compatibility + + // Function used to generate tokens, can be mocked from + // tests + GenerateTokens TokenGeneratorFunc `yaml:"-"` } // RegisterFlags adds the flags required to config this to the given FlagSet @@ -86,6 +101,11 @@ func (cfg *LifecyclerConfig) RegisterFlagsWithPrefix(prefix string, f *flag.Flag f.DurationVar(&cfg.MinReadyDuration, prefix+"min-ready-duration", 1*time.Minute, "Minimum duration to wait before becoming ready. This is to work around race conditions with ingesters exiting and updating the ring.") flagext.DeprecatedFlag(f, prefix+"claim-on-rollout", "DEPRECATED. This feature is no longer optional.") flagext.DeprecatedFlag(f, prefix+"normalise-tokens", "DEPRECATED. This feature is no longer optional.") + f.BoolVar(&cfg.JoinIncrementalTransfer, prefix+"join-incremental-transfer", false, "Request chunks from neighboring ingesters on join. Disables the handoff process when set and ignores the -ingester.join-after flag.") + f.BoolVar(&cfg.LeaveIncrementalTransfer, prefix+"leave-incremental-transfer", false, "Send chunks to neighboring ingesters on leave. Takes precedence over chunk flushing when set and disables handoff.") + f.DurationVar(&cfg.MinIncrementalJoinJitter, prefix+"min-incremental-join-jitter", 0*time.Second, "Minimum amount of time to wait before incrementally joining the ring. Allows time to receieve ring updates so two ingesters do not join at once.") + f.DurationVar(&cfg.MaxIncrementalJoinJitter, prefix+"max-incremental-join-jitter", 2*time.Second, "Maximum amount of time to wait before incrementally joining the ring. Allows time to receieve ring updates so two ingesters do not join at once.") + f.DurationVar(&cfg.TransferFinishDelay, prefix+"transfer-finish-delay", 5*time.Second, "How long after the incremental join process to notify the target ingesters to clean up any blocked token ranges.") f.DurationVar(&cfg.FinalSleep, prefix+"final-sleep", 30*time.Second, "Duration to sleep for before exiting, to ensure metrics are scraped.") f.StringVar(&cfg.TokensFilePath, prefix+"tokens-file-path", "", "File path where tokens are stored. If empty, tokens are not stored at shutdown and restored at startup.") @@ -102,10 +122,33 @@ func (cfg *LifecyclerConfig) RegisterFlagsWithPrefix(prefix string, f *flag.Flag f.StringVar(&cfg.ID, prefix+"lifecycler.ID", hostname, "ID to register into consul.") } +// IncrementalTransferer controls partial transfer of chunks as the tokens in a +// ring grows or shrinks. +type IncrementalTransferer interface { + // SendChunkRanges should connect to the target addr and send all chunks for + // streams whose fingerprint falls within the provided token ranges. + SendChunkRanges(ctx context.Context, ranges []TokenRange, targetAddr string) error + + // RequestChunkRanges should connect to the target addr and request all chunks + // for streams whose fingerprint falls within the provided token ranges. + // + // If move is true, transferred data should be removed from the target's memory. + RequestChunkRanges(ctx context.Context, ranges []TokenRange, targetAddr string, move bool) error + + // RequestComplete, when called, indicates that a request of data has been processed. + // The targetAddr ingester should use the opportunity to do cleanup. + RequestComplete(ctx context.Context, ranges []TokenRange, targetAddr string) + + // MemoryStreamTokens should return a list of tokens corresponding to in-memory + // streams for the ingester. Used for reporting purposes. + MemoryStreamTokens() []uint32 +} + // Lifecycler is responsible for managing the lifecycle of entries in the ring. type Lifecycler struct { cfg LifecyclerConfig flushTransferer FlushTransferer + incTransferer IncrementalTransferer KVStore kv.Client // Controls the lifecycle of the ingester @@ -124,22 +167,26 @@ type Lifecycler struct { // We need to remember the ingester state just in case consul goes away and comes // back empty. And it changes during lifecycle of ingester. - stateMtx sync.RWMutex - state IngesterState - tokens Tokens + stateMtx sync.RWMutex + state IngesterState + transitioningTokens Tokens + tokens Tokens // Controls the ready-reporting readyLock sync.Mutex startTime time.Time ready bool - // Keeps stats updated at every heartbeat period + // Keeps stats and ring updated at every heartbeat period countersLock sync.RWMutex healthyInstancesCount int + lastRing *Desc + + generateTokens TokenGeneratorFunc } // NewLifecycler makes and starts a new Lifecycler. -func NewLifecycler(cfg LifecyclerConfig, flushTransferer FlushTransferer, ringName, ringKey string, flushOnShutdown bool) (*Lifecycler, error) { +func NewLifecycler(cfg LifecyclerConfig, flushTransferer FlushTransferer, incTransferer IncrementalTransferer, ringName, ringKey string, flushOnShutdown bool) (*Lifecycler, error) { addr := cfg.Addr if addr == "" { @@ -168,6 +215,7 @@ func NewLifecycler(cfg LifecyclerConfig, flushTransferer FlushTransferer, ringNa l := &Lifecycler{ cfg: cfg, flushTransferer: flushTransferer, + incTransferer: incTransferer, KVStore: store, Addr: fmt.Sprintf("%s:%d", addr, port), @@ -179,8 +227,13 @@ func NewLifecycler(cfg LifecyclerConfig, flushTransferer FlushTransferer, ringNa quit: make(chan struct{}), actorChan: make(chan func()), - state: PENDING, - startTime: time.Now(), + state: PENDING, + startTime: time.Now(), + generateTokens: cfg.GenerateTokens, + } + + if l.generateTokens == nil { + l.generateTokens = GenerateTokens } tokensToOwn.WithLabelValues(l.RingName).Set(float64(cfg.NumTokens)) @@ -256,10 +309,55 @@ func (i *Lifecycler) ChangeState(ctx context.Context, state IngesterState) error return <-err } +func (i *Lifecycler) getTransitioningTokens() Tokens { + i.stateMtx.RLock() + defer i.stateMtx.RUnlock() + ret := make([]uint32, len(i.transitioningTokens)) + copy(ret, i.transitioningTokens) + return ret +} + func (i *Lifecycler) getTokens() Tokens { i.stateMtx.RLock() defer i.stateMtx.RUnlock() - return i.tokens + ret := make([]uint32, len(i.tokens)) + copy(ret, i.tokens) + return ret +} + +// addToken adds a new token into the ring. The newest token must be larger +// than the last token in the set to maintain internal sorted order. +func (i *Lifecycler) addToken(token uint32) { + i.stateMtx.Lock() + defer i.stateMtx.Unlock() + + if len(i.tokens) > 0 && token <= i.tokens[len(i.tokens)-1] { + panic("addToken invoked without maintaining token order") + } + + tokensOwned.WithLabelValues(i.RingName).Inc() + i.tokens = append(i.tokens, token) +} + +func (i *Lifecycler) removeToken(token uint32) { + i.stateMtx.Lock() + defer i.stateMtx.Unlock() + + for idx, tok := range i.tokens { + if tok == token { + tokensOwned.WithLabelValues(i.RingName).Dec() + i.tokens = append(i.tokens[:idx], i.tokens[idx+1:]...) + return + } + } +} + +func (i *Lifecycler) setTransitioningTokens(tokens Tokens) { + i.stateMtx.Lock() + defer i.stateMtx.Unlock() + + i.transitioningTokens = make([]uint32, len(tokens)) + copy(i.transitioningTokens, tokens) } func (i *Lifecycler) setTokens(tokens Tokens) { @@ -268,7 +366,9 @@ func (i *Lifecycler) setTokens(tokens Tokens) { i.stateMtx.Lock() defer i.stateMtx.Unlock() - i.tokens = tokens + i.tokens = make([]uint32, len(tokens)) + copy(i.tokens, tokens) + if i.cfg.TokensFilePath != "" { if err := i.tokens.StoreToFile(i.cfg.TokensFilePath); err != nil { level.Error(util.Logger).Log("msg", "error storing tokens to disk", "path", i.cfg.TokensFilePath, "err", err) @@ -299,6 +399,7 @@ func (i *Lifecycler) ClaimTokensFor(ctx context.Context, ingesterID string) erro ing := ringDesc.Ingesters[i.ID] ing.Timestamp = time.Now().Unix() ringDesc.Ingesters[i.ID] = ing + return ringDesc, true, nil } @@ -328,7 +429,6 @@ func (i *Lifecycler) HealthyInstancesCount() int { // - remove config from Consul. // - block until we've successfully shutdown. func (i *Lifecycler) Shutdown() { - // This will prevent us accepting any more samples i.flushTransferer.StopIncomingRequests() // closing i.quit triggers loop() to exit, which in turn will trigger @@ -351,12 +451,35 @@ func (i *Lifecycler) loop() { } // We do various period tasks - autoJoinAfter := time.After(i.cfg.JoinAfter) + autoJoinTimer := time.NewTimer(i.cfg.JoinAfter) + autoJoinAfter := autoJoinTimer.C var observeChan <-chan time.Time = nil heartbeatTicker := time.NewTicker(i.cfg.HeartbeatPeriod) defer heartbeatTicker.Stop() + if i.cfg.JoinIncrementalTransfer { + if !autoJoinTimer.Stop() { + // Drain the value if one was available. + <-autoJoinTimer.C + } + + level.Info(util.Logger).Log("msg", "joining cluster") + if err := i.waitCleanRing(context.Background()); err != nil { + // If this fails, we'll get spill over of data, but we can safely continue here. + level.Error(util.Logger).Log("msg", "failed to wait for a clean ring to join", "err", err) + } + + if err := i.autoJoin(context.Background(), JOINING); err != nil { + level.Error(util.Logger).Log("msg", "failed to pick tokens in consul", "err", err) + os.Exit(1) + } + + if err := i.joinIncrementalTransfer(context.Background()); err != nil { + level.Error(util.Logger).Log("msg", "failed to obtain chunks on join", "err", err) + } + } + loop: for { select { @@ -406,7 +529,6 @@ loop: // keep observing observeChan = time.After(i.cfg.ObservePeriod) } - case <-heartbeatTicker.C: consulHeartbeats.WithLabelValues(i.RingName).Inc() if err := i.updateConsul(context.Background()); err != nil { @@ -455,6 +577,72 @@ heartbeatLoop: } } +// waitCleanRing incrementally reads from the KV store and waits +// until there are no JOINING or LEAVING ingesters. +func (i *Lifecycler) waitCleanRing(ctx context.Context) error { + max := i.cfg.MaxIncrementalJoinJitter.Milliseconds() + min := i.cfg.MinIncrementalJoinJitter.Milliseconds() + + // Sleep for a random period between [min, max). Used to stagger multiple nodes + // all waiting for the ring to be clean. + r := rand.New(rand.NewSource(time.Now().UnixNano())) + sleepMs := r.Int63n(max-min) + min + time.Sleep(time.Duration(sleepMs) * time.Millisecond) + + backoff := util.NewBackoff(ctx, util.BackoffConfig{ + MinBackoff: 100 * time.Millisecond, + MaxBackoff: 5 * time.Second, + }) + + for backoff.Ongoing() { + select { + case <-i.quit: + return errors.New("shutting down") + default: + } + + ok, err := i.checkCleanRing(ctx) + if err != nil { + return err + } else if ok { + return nil + } + + backoff.Wait() + } + + return backoff.Err() +} + +// checkCleanRing returns true when the ring has no JOINING +// or LEAVING ingesters. "clean" implies that it is safe for a +// new node to join. +func (i *Lifecycler) checkCleanRing(ctx context.Context) (bool, error) { + d, err := i.KVStore.Get(ctx, i.RingKey) + if err != nil { + return false, err + } else if d == nil { + return false, nil + } + + desc, ok := d.(*Desc) + if !ok { + return false, fmt.Errorf("could not convert ring to Desc") + } + + unclean := 0 + for k, ing := range desc.Ingesters { + if k == i.ID { + continue + } + if ing.State == JOINING || ing.State == LEAVING { + unclean++ + } + } + + return unclean == 0, nil +} + // initRing is the first thing we do when we start. It: // - add an ingester entry to the ring // - copies out our state and tokens if they exist @@ -483,20 +671,23 @@ func (i *Lifecycler) initRing(ctx context.Context) error { ingesterDesc, ok := ringDesc.Ingesters[i.ID] if !ok { + state := i.GetState() + incremental := i.incrementalFromState(state) + // We use the tokens from the file only if it does not exist in the ring yet. if len(tokensFromFile) > 0 { level.Info(util.Logger).Log("msg", "adding tokens from file", "num_tokens", len(tokensFromFile)) if len(tokensFromFile) >= i.cfg.NumTokens { i.setState(ACTIVE) } - ringDesc.AddIngester(i.ID, i.Addr, tokensFromFile, i.GetState()) + ringDesc.AddIngester(i.ID, i.Addr, tokensFromFile, i.GetState(), incremental) i.setTokens(tokensFromFile) return ringDesc, true, nil } // Either we are a new ingester, or consul must have restarted level.Info(util.Logger).Log("msg", "instance not found in ring, adding with no tokens", "ring", i.RingName) - ringDesc.AddIngester(i.ID, i.Addr, []uint32{}, i.GetState()) + ringDesc.AddIngester(i.ID, i.Addr, nil, state, incremental) return ringDesc, true, nil } @@ -516,9 +707,10 @@ func (i *Lifecycler) initRing(ctx context.Context) error { return nil, true, nil }) - // Update counters + // Update counters and ring. if err == nil { i.updateCounters(ringDesc) + i.updateLastRing(ringDesc) } return err @@ -546,14 +738,16 @@ func (i *Lifecycler) verifyTokens(ctx context.Context) bool { needTokens := i.cfg.NumTokens - len(ringTokens) level.Info(util.Logger).Log("msg", "generating new tokens", "count", needTokens, "ring", i.RingName) - newTokens := GenerateTokens(needTokens, takenTokens) + newTokens := i.generateTokens(needTokens, takenTokens) - ringTokens = append(ringTokens, newTokens...) - sort.Sort(ringTokens) + addTokens := Tokens(append(ringTokens, newTokens...)) + sort.Sort(addTokens) - ringDesc.AddIngester(i.ID, i.Addr, ringTokens, i.GetState()) + state := i.GetState() + incremental := i.incrementalFromState(state) + ringDesc.AddIngester(i.ID, i.Addr, addTokens, state, incremental) - i.setTokens(ringTokens) + i.setTokens(addTokens) return ringDesc, true, nil } @@ -606,21 +800,36 @@ func (i *Lifecycler) autoJoin(ctx context.Context, targetState IngesterState) er level.Error(util.Logger).Log("msg", "tokens already exist for this instance - wasn't expecting any!", "num_tokens", len(myTokens), "ring", i.RingName) } - newTokens := GenerateTokens(i.cfg.NumTokens-len(myTokens), takenTokens) + newTokens := i.generateTokens(i.cfg.NumTokens-len(myTokens), takenTokens) i.setState(targetState) - myTokens = append(myTokens, newTokens...) - sort.Sort(myTokens) - i.setTokens(myTokens) + // When we're incrementally joining the ring, tokens are only inserted + // incrementally during the join process. + insertTokens := newTokens + if i.cfg.JoinIncrementalTransfer { + insertTokens = nil + } + + state := i.GetState() + incremental := i.incrementalFromState(state) + ringDesc.AddIngester(i.ID, i.Addr, insertTokens, state, incremental) + + tokens := append(myTokens, newTokens...) + sort.Sort(Tokens(tokens)) - ringDesc.AddIngester(i.ID, i.Addr, i.getTokens(), i.GetState()) + if i.cfg.JoinIncrementalTransfer { + i.setTransitioningTokens(tokens) + } else { + i.setTokens(tokens) + } return ringDesc, true, nil }) - // Update counters + // Update counters and ring. if err == nil { i.updateCounters(ringDesc) + i.updateLastRing(ringDesc) } return err @@ -642,25 +851,39 @@ func (i *Lifecycler) updateConsul(ctx context.Context) error { if !ok { // consul must have restarted level.Info(util.Logger).Log("msg", "found empty ring, inserting tokens", "ring", i.RingName) - ringDesc.AddIngester(i.ID, i.Addr, i.getTokens(), i.GetState()) + state := i.GetState() + incremental := i.incrementalFromState(state) + ringDesc.AddIngester(i.ID, i.Addr, i.getTokens(), state, incremental) } else { ingesterDesc.Timestamp = time.Now().Unix() ingesterDesc.State = i.GetState() ingesterDesc.Addr = i.Addr + ingesterDesc.Incremental = i.incrementalFromState(ingesterDesc.State) ringDesc.Ingesters[i.ID] = ingesterDesc } + // Re-sync token states for the current lifecycler if they've changed. + ringDesc.SetIngesterTokens(i.ID, i.tokens) return ringDesc, true, nil }) - // Update counters + // Update counters and ring. if err == nil { i.updateCounters(ringDesc) + i.updateLastRing(ringDesc) } return err } +// incrementalFromState determines if the ingester should be tagged as being in an +// "incremental state", flagging to the distributors to treat JOINING/LEAVING as +// healthy. +func (i *Lifecycler) incrementalFromState(state IngesterState) bool { + return (state == JOINING && i.cfg.JoinIncrementalTransfer) || + (state == LEAVING && i.cfg.LeaveIncrementalTransfer) +} + // changeState updates consul with state transitions for us. NB this must be // called from loop()! Use ChangeState for calls from outside of loop(). func (i *Lifecycler) changeState(ctx context.Context, state IngesterState) error { @@ -679,6 +902,14 @@ func (i *Lifecycler) changeState(ctx context.Context, state IngesterState) error return i.updateConsul(ctx) } +// getLastRing returns a copy of the last ring saved. +func (i *Lifecycler) getLastRing() *Desc { + i.countersLock.Lock() + defer i.countersLock.Unlock() + + return i.lastRing +} + func (i *Lifecycler) updateCounters(ringDesc *Desc) { // Count the number of healthy instances for Write operation healthyInstancesCount := 0 @@ -708,19 +939,39 @@ func (i *Lifecycler) SetFlushOnShutdown(flushOnShutdown bool) { i.flushOnShutdown = flushOnShutdown } +func (i *Lifecycler) updateLastRing(ringDesc *Desc) { + i.countersLock.Lock() + defer i.countersLock.Unlock() + + i.lastRing = ringDesc +} + func (i *Lifecycler) processShutdown(ctx context.Context) { flushRequired := i.flushOnShutdown transferStart := time.Now() - if err := i.flushTransferer.TransferOut(ctx); err != nil { - if err == ErrTransferDisabled { - level.Info(util.Logger).Log("msg", "transfers are disabled") + + if i.cfg.LeaveIncrementalTransfer { + if err := i.leaveIncrementalTransfer(ctx); err != nil { + level.Error(util.Logger).Log("msg", "Failed to incrementally transfer chunks to another ingester", "err", err) + shutdownDuration.WithLabelValues("incremental_transfer", "fail", i.RingName).Observe(time.Since(transferStart).Seconds()) } else { - level.Error(util.Logger).Log("msg", "failed to transfer chunks to another instance", "ring", i.RingName, "err", err) - shutdownDuration.WithLabelValues("transfer", "fail", i.RingName).Observe(time.Since(transferStart).Seconds()) + // The ingester may still have data that wasn't transferred if it got any + // unexpected writes (or if there's a bug!). We'll keep flushRequired as true + // to make sure this remaining data gets flushed. + shutdownDuration.WithLabelValues("incremental_transfer", "success", i.RingName).Observe(time.Since(transferStart).Seconds()) } } else { - flushRequired = false - shutdownDuration.WithLabelValues("transfer", "success", i.RingName).Observe(time.Since(transferStart).Seconds()) + if err := i.flushTransferer.TransferOut(ctx); err != nil { + if err == ErrTransferDisabled { + level.Info(util.Logger).Log("msg", "transfers are disabled") + } else { + level.Error(util.Logger).Log("msg", "failed to transfer chunks to another instance", "ring", i.RingName, "err", err) + shutdownDuration.WithLabelValues("transfer", "fail", i.RingName).Observe(time.Since(transferStart).Seconds()) + } + } else { + flushRequired = false + shutdownDuration.WithLabelValues("transfer", "success", i.RingName).Observe(time.Since(transferStart).Seconds()) + } } if flushRequired { diff --git a/pkg/ring/lifecycler_test.go b/pkg/ring/lifecycler_test.go index 3bfcae4520b..5a7f697fe98 100644 --- a/pkg/ring/lifecycler_test.go +++ b/pkg/ring/lifecycler_test.go @@ -2,15 +2,18 @@ package ring import ( "context" + "fmt" "io/ioutil" "os" "sort" + "sync" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/cortexproject/cortex/pkg/ring/kv" "github.com/cortexproject/cortex/pkg/ring/kv/consul" "github.com/cortexproject/cortex/pkg/util/flagext" "github.com/cortexproject/cortex/pkg/util/test" @@ -39,6 +42,7 @@ func testLifecyclerConfig(ringConfig Config, id string) LifecyclerConfig { lifecyclerConfig.ID = id lifecyclerConfig.FinalSleep = 0 lifecyclerConfig.HeartbeatPeriod = 100 * time.Millisecond + lifecyclerConfig.TransferFinishDelay = time.Duration(0) return lifecyclerConfig } @@ -65,7 +69,7 @@ func TestLifecycler_HealthyInstancesCount(t *testing.T) { lifecyclerConfig1.HeartbeatPeriod = 100 * time.Millisecond lifecyclerConfig1.JoinAfter = 100 * time.Millisecond - lifecycler1, err := NewLifecycler(lifecyclerConfig1, &flushTransferer{}, "ingester", IngesterRingKey, true) + lifecycler1, err := NewLifecycler(lifecyclerConfig1, &flushTransferer{}, nil, "ingester", IngesterRingKey, true) require.NoError(t, err) assert.Equal(t, 0, lifecycler1.HealthyInstancesCount()) @@ -81,7 +85,7 @@ func TestLifecycler_HealthyInstancesCount(t *testing.T) { lifecyclerConfig2.HeartbeatPeriod = 100 * time.Millisecond lifecyclerConfig2.JoinAfter = 100 * time.Millisecond - lifecycler2, err := NewLifecycler(lifecyclerConfig2, &flushTransferer{}, "ingester", IngesterRingKey, true) + lifecycler2, err := NewLifecycler(lifecyclerConfig2, &flushTransferer{}, nil, "ingester", IngesterRingKey, true) require.NoError(t, err) assert.Equal(t, 0, lifecycler2.HealthyInstancesCount()) @@ -105,7 +109,7 @@ func TestLifecycler_NilFlushTransferer(t *testing.T) { lifecyclerConfig := testLifecyclerConfig(ringConfig, "ing1") // Create a lifecycler with nil FlushTransferer to make sure it operates correctly - lifecycler, err := NewLifecycler(lifecyclerConfig, nil, "ingester", IngesterRingKey, true) + lifecycler, err := NewLifecycler(lifecyclerConfig, nil, nil, "ingester", IngesterRingKey, true) require.NoError(t, err) lifecycler.Start() @@ -128,12 +132,12 @@ func TestLifecycler_TwoRingsWithDifferentKeysOnTheSameKVStore(t *testing.T) { lifecyclerConfig1 := testLifecyclerConfig(ringConfig, "instance-1") lifecyclerConfig2 := testLifecyclerConfig(ringConfig, "instance-2") - lifecycler1, err := NewLifecycler(lifecyclerConfig1, nil, "service-1", "ring-1", true) + lifecycler1, err := NewLifecycler(lifecyclerConfig1, nil, nil, "service-1", "ring-1", true) require.NoError(t, err) lifecycler1.Start() defer lifecycler1.Shutdown() - lifecycler2, err := NewLifecycler(lifecyclerConfig2, nil, "service-2", "ring-2", true) + lifecycler2, err := NewLifecycler(lifecyclerConfig2, nil, nil, "service-2", "ring-2", true) require.NoError(t, err) lifecycler2.Start() defer lifecycler2.Shutdown() @@ -149,12 +153,17 @@ func TestLifecycler_TwoRingsWithDifferentKeysOnTheSameKVStore(t *testing.T) { }) } -type nopFlushTransferer struct{} +type nopFlushTransferer struct { + allowTransfer bool +} func (f *nopFlushTransferer) StopIncomingRequests() {} func (f *nopFlushTransferer) Flush() {} func (f *nopFlushTransferer) TransferOut(ctx context.Context) error { - panic("should not be called") + if !f.allowTransfer { + panic("should not be called") + } + return nil } func TestRingRestart(t *testing.T) { @@ -169,7 +178,7 @@ func TestRingRestart(t *testing.T) { // Add an 'ingester' with normalised tokens. lifecyclerConfig1 := testLifecyclerConfig(ringConfig, "ing1") - l1, err := NewLifecycler(lifecyclerConfig1, &nopFlushTransferer{}, "ingester", IngesterRingKey, true) + l1, err := NewLifecycler(lifecyclerConfig1, &nopFlushTransferer{}, nil, "ingester", IngesterRingKey, true) require.NoError(t, err) l1.Start() @@ -183,7 +192,7 @@ func TestRingRestart(t *testing.T) { token := l1.tokens[0] // Add a second ingester with the same settings, so it will think it has restarted - l2, err := NewLifecycler(lifecyclerConfig1, &nopFlushTransferer{}, "ingester", IngesterRingKey, true) + l2, err := NewLifecycler(lifecyclerConfig1, &nopFlushTransferer{allowTransfer: true}, nil, "ingester", IngesterRingKey, true) require.NoError(t, err) l2.Start() @@ -192,6 +201,7 @@ func TestRingRestart(t *testing.T) { d, err := r.KVClient.Get(context.Background(), IngesterRingKey) require.NoError(t, err) l2Tokens := l2.getTokens() + return checkNormalised(d, "ing1") && len(l2Tokens) == 1 && l2Tokens[0] == token @@ -205,6 +215,13 @@ type MockClient struct { WatchPrefixFunc func(ctx context.Context, prefix string, f func(string, interface{}) bool) } +func (m *MockClient) MapFunctions(client kv.Client) { + m.GetFunc = client.Get + m.CASFunc = client.CAS + m.WatchKeyFunc = client.WatchKey + m.WatchPrefixFunc = client.WatchPrefix +} + func (m *MockClient) Get(ctx context.Context, key string) (interface{}, error) { if m.GetFunc != nil { return m.GetFunc(ctx, key) @@ -244,7 +261,7 @@ func TestCheckReady(t *testing.T) { defer r.Stop() cfg := testLifecyclerConfig(ringConfig, "ring1") cfg.MinReadyDuration = 1 * time.Nanosecond - l1, err := NewLifecycler(cfg, &nopFlushTransferer{}, "ingester", IngesterRingKey, true) + l1, err := NewLifecycler(cfg, &nopFlushTransferer{}, nil, "ingester", IngesterRingKey, true) l1.Start() require.NoError(t, err) @@ -255,8 +272,7 @@ func TestCheckReady(t *testing.T) { require.Error(t, err) } -type noopFlushTransferer struct { -} +type noopFlushTransferer struct{} func (f *noopFlushTransferer) StopIncomingRequests() {} func (f *noopFlushTransferer) Flush() {} @@ -282,7 +298,7 @@ func TestTokensOnDisk(t *testing.T) { lifecyclerConfig.TokensFilePath = tokenDir + "/tokens" // Start first ingester. - l1, err := NewLifecycler(lifecyclerConfig, &noopFlushTransferer{}, "ingester", IngesterRingKey, true) + l1, err := NewLifecycler(lifecyclerConfig, &noopFlushTransferer{}, nil, "ingester", IngesterRingKey, true) require.NoError(t, err) l1.Start() // Check this ingester joined, is active, and has 512 token. @@ -304,7 +320,7 @@ func TestTokensOnDisk(t *testing.T) { // Start new ingester at same token directory. lifecyclerConfig.ID = "ing2" - l2, err := NewLifecycler(lifecyclerConfig, &noopFlushTransferer{}, "ingester", IngesterRingKey, true) + l2, err := NewLifecycler(lifecyclerConfig, &noopFlushTransferer{}, nil, "ingester", IngesterRingKey, true) require.NoError(t, err) l2.Start() defer l2.Shutdown() @@ -365,7 +381,7 @@ func TestJoinInLeavingState(t *testing.T) { }) require.NoError(t, err) - l1, err := NewLifecycler(cfg, &nopFlushTransferer{}, "ingester", IngesterRingKey, true) + l1, err := NewLifecycler(cfg, &nopFlushTransferer{}, nil, "ingester", IngesterRingKey, true) l1.Start() require.NoError(t, err) @@ -381,3 +397,461 @@ func TestJoinInLeavingState(t *testing.T) { len(desc.Ingesters["ing2"].Tokens) == 2 }) } + +func TestFindTransferWorkload(t *testing.T) { + tt := []struct { + name string + ring string + token string + replication int + expect transferWorkload + }{ + { + name: "joining: single new token", + ring: "A B C D I+ E F G H", + token: "I+", + replication: 3, + expect: transferWorkload{ + "E": []TokenRange{{2, 3}}, // transfer BC from E + "F": []TokenRange{{3, 4}}, // transfer CD from F + "G": []TokenRange{{4, 5}}, // transfer DI From G + }, + }, + + { + name: "joining: single new token around duplicates", + ring: "A1 A2 B1 B2 C1 C2 D1 D2 I+ E1 E2 F1 F2 G1 G2 H1 H2", + token: "I+", + replication: 3, + expect: transferWorkload{ + "E": []TokenRange{{4, 5}, {5, 6}}, // transfer B2C1, C1C2 from E + "F": []TokenRange{{6, 7}, {7, 8}}, // transfer C2D1, D1D2 from F + "G": []TokenRange{{8, 9}}, // transfer D2I from G + }, + }, + + { + name: "joining: single new token around mixed duplicates", + ring: "A1 B1 A2 B2 C1 D1 C2 D2 I+ E1 F1 E2 F2 G1 H1 G2 H2", + token: "I+", + replication: 3, + expect: transferWorkload{ + "E": []TokenRange{{4, 5}, {5, 6}, {6, 7}}, // transfer B2C1, D1C2, C1D1 from E + "F": []TokenRange{{7, 8}}, // transfer C2D2 from F + "G": []TokenRange{{8, 9}}, // transfer D2I from G + }, + }, + + { + name: "joining: token at start of list", + ring: "A+ B C D E F G H", + token: "A+", + replication: 3, + expect: transferWorkload{ + "B": []TokenRange{{6, 7}}, // transfer FG from B + "C": []TokenRange{{7, 8}}, // transfer GH from C + "D": []TokenRange{{8, 1}}, // transfer HA from D + }, + }, + + { + name: "joining: token already added on right", + ring: "A B C D I1+ I2 E F G H", + token: "I1+", + replication: 3, + expect: transferWorkload{}, // we already own everything + }, + + { + name: "joining: token already added on left", + ring: "A B C D I1 I2+ E F G H", + token: "I2+", + replication: 3, + expect: transferWorkload{ + "G": []TokenRange{{5, 6}}, // transfer I1I2 from G + }, + }, + + { + name: "joining: token already added 2 ingesters over on right", + ring: "A B C I1+ D I2 E F G H", + token: "I1+", + replication: 3, + expect: transferWorkload{ + "D": []TokenRange{{1, 2}}, // transfer AB from D + }, + }, + + { + name: "joining: token already added 2 ingesters over on left", + ring: "A B C I1 D I2+ E F G H", + token: "I2+", + replication: 3, + expect: transferWorkload{ + "F": []TokenRange{{4, 5}}, // transfer ID from F + "G": []TokenRange{{5, 6}}, // transfer DI from G + }, + }, + + { + name: "joining: skip over other joining ingesters", + ring: "A B C D I+ Z+ Y? X+ W? E F G H", + token: "I+", + replication: 3, + expect: transferWorkload{ + "E": []TokenRange{{4, 5}}, // transfer DI from E + "W": []TokenRange{{3, 4}}, // transfer CD from W + "Y": []TokenRange{{2, 3}}, // transfer BC from y + }, + }, + + { + name: "leaving: single leaving token", + ring: "A B C D I- E F G H", + token: "I-", + replication: 3, + expect: transferWorkload{ + "E": []TokenRange{{2, 3}}, // transfer BC to E + "F": []TokenRange{{3, 4}}, // transfer CD to F + "G": []TokenRange{{4, 5}}, // transfer DI to G + }, + }, + + { + name: "leaving: active token on right", + ring: "A B C D I1- I2 E F G H", + token: "I1-", + replication: 3, + expect: transferWorkload{}, // we still own everything after I1 leaves + }, + + { + name: "leaving: active token on left", + ring: "A B C D I1 I2- E F G H", + token: "I2-", + replication: 3, + expect: transferWorkload{ + "G": []TokenRange{{5, 6}}, // transfer I1I2 to G + }, + }, + + { + name: "leaving: token active 2 ingesters over on right", + ring: "A B C I1- D I2 E F G H", + token: "I1-", + replication: 3, + expect: transferWorkload{ + "D": []TokenRange{{1, 2}}, // transfer AB to D + }, + }, + + { + name: "leaving: token active added 2 ingesters over on left", + ring: "A B C I1 D I2- E F G H", + token: "I2-", + replication: 3, + expect: transferWorkload{ + "F": []TokenRange{{4, 5}}, // transfer ID to F + "G": []TokenRange{{5, 6}}, // transfer DI to G + }, + }, + + { + name: "leaving: skip over ingesters not joining/active", + ring: "A B C D I- Z- Y? X- W? E F G H", + token: "I-", + replication: 3, + expect: transferWorkload{ + "E": []TokenRange{{2, 3}}, // transfer BC to E + "F": []TokenRange{{3, 4}}, // transfer CD to F + "G": []TokenRange{{4, 5}}, // transfer DI to G + }, + }, + + { + name: "joining: interleaved tokens", + ring: "A1+ B1 A2 B2", + token: "A1+", + replication: 1, + expect: transferWorkload{ + "B": []TokenRange{{4, 1}}, // Transfer B2 A1+ from B + }, + }, + + { + name: "joining: interleaved tokens", + ring: "A1 B1+ A2 B2", + token: "B1+", + replication: 1, + expect: transferWorkload{ + "A": []TokenRange{{1, 2}}, // Transfer A1 B1+ from A + }, + }, + + { + name: "joining: interleaved tokens", + ring: "A1 B1 A2+ B2", + token: "A2+", + replication: 1, + expect: transferWorkload{ + "B": []TokenRange{{2, 3}}, + }, + }, + + { + name: "joining: interleaved tokens", + ring: "A1 B1 A2 B2+", + token: "B2+", + replication: 1, + expect: transferWorkload{ + "A": []TokenRange{{3, 4}}, + }, + }, + + { + name: "joining: multiple tokens from same ingester", + ring: "B1+ B2 A1 A2 B3 A3 B4 A4", + token: "B1+", + replication: 1, + expect: transferWorkload{}, + }, + + { + name: "joining: multiple tokens from same ingester", + ring: "B1 B2+ A1 A2 B3 A3 B4 A4", + token: "B2+", + replication: 1, + expect: transferWorkload{ + "A": []TokenRange{{1, 2}}, + }, + }, + + { + name: "joining: multiple tokens from same ingester", + ring: "B1 B2 A1 A2 B3+ A3 B4 A4", + token: "B3+", + replication: 1, + expect: transferWorkload{ + "A": []TokenRange{{4, 5}}, + }, + }, + + { + name: "joining: multiple tokens from same ingester", + ring: "B1 B2 A1 A2 B3 A3 B4+ A4", + token: "B4+", + replication: 1, + expect: transferWorkload{ + "A": []TokenRange{{6, 7}}, + }, + }, + + { + name: "joining: target can not be same as end range", + ring: "A1 B1 C1+ B2 A2 D1 E1", + token: "C1+", + replication: 3, + expect: transferWorkload{ + "D": []TokenRange{{2, 3}, {1, 2}, {7, 1}}, + }, + }, + } + + for _, tc := range tt { + t.Run(tc.name, func(t *testing.T) { + d := generateRing(t, tc.ring) + n := d.Desc.GetNavigator() + tok := d.TokenDesc(t, tc.token) + + op := Read + if s, ok := d.tokenStates[tok.Token]; ok && s == LEAVING { + op = Write + } + healthy := d.TokenHealthChecker(op) + + lc := &Lifecycler{ + ID: tok.Ingester, + cfg: LifecyclerConfig{ + RingConfig: Config{ + ReplicationFactor: tc.replication, + HeartbeatTimeout: time.Second * 3600, + }, + }, + } + + wl, ok := lc.findTransferWorkload(d.Desc, n, tok.Token, healthy) + + require.True(t, ok) + require.Equal(t, tc.expect, wl) + }) + } +} + +type mockIncrementalJoin struct { + t *testing.T + + mtx sync.Mutex + requested bool + completed bool + + IncrementalTransferer +} + +func (j *mockIncrementalJoin) MemoryStreamTokens() []uint32 { return nil } + +func (j *mockIncrementalJoin) RequestComplete(_ context.Context, ranges []TokenRange, addr string) { + j.mtx.Lock() + defer j.mtx.Unlock() + + j.completed = true +} + +func (j *mockIncrementalJoin) RequestChunkRanges(_ context.Context, ranges []TokenRange, addr string, move bool) error { + j.mtx.Lock() + defer j.mtx.Unlock() + + j.requested = true + return nil +} + +type casCallback = func(in interface{}) (out interface{}, retry bool, err error) +type casFunc = func(ctx context.Context, key string, f casCallback) error + +func waitIngesterState(t *testing.T, r *Ring, id string, waitTime time.Duration, joined bool) { + t.Helper() + + test.Poll(t, waitTime, true, func() interface{} { + d, err := r.KVClient.Get(context.Background(), "ring") + require.NoError(t, err) + if d == nil { + return false + } + i, exist := d.(*Desc).Ingesters[id] + if joined { + return exist && i.State == ACTIVE + } + return !exist + }) +} + +func getLifecyclers(t *testing.T, r *Ring, cfg Config, count int) ([]*Lifecycler, func()) { + t.Helper() + + ret := []*Lifecycler{} + + for i := 0; i < count; i++ { + id := fmt.Sprintf("ing-%d", i) + + lcc := testLifecyclerConfig(cfg, id) + lcc.Addr = id + lcc.NumTokens = 64 + lc, err := NewLifecycler(lcc, &nopFlushTransferer{allowTransfer: true}, nil, id, "ring", true) + require.NoError(t, err) + lc.Start() + + waitIngesterState(t, r, id, time.Millisecond*250, true) + } + + return ret, func() { + for _, lc := range ret { + lc.Shutdown() + } + } +} + +func testIncrementalRingConfig(t *testing.T) Config { + t.Helper() + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + codec := GetCodec() + + inMemory := consul.NewInMemoryClient(codec) + mockClient := &MockClient{} + mockClient.MapFunctions(inMemory) + ringConfig.KVStore.Mock = mockClient + + return ringConfig +} + +func TestIncrementalJoin(t *testing.T) { + ringConfig := testIncrementalRingConfig(t) + r, err := New(ringConfig, "ingester", "ring") + require.NoError(t, err) + defer r.Stop() + + _, stop := getLifecyclers(t, r, ringConfig, 5) + defer stop() + + mock := mockIncrementalJoin{t: t} + + lcc := testLifecyclerConfig(ringConfig, "joiner") + lcc.NumTokens = 64 + lcc.JoinIncrementalTransfer = true + lc, err := NewLifecycler(lcc, &nopFlushTransferer{allowTransfer: true}, &mock, "joiner", "ring", true) + require.NoError(t, err) + lc.Start() + defer lc.Shutdown() + + waitIngesterState(t, r, "joiner", 5000*time.Millisecond, true) + + mock.mtx.Lock() + defer mock.mtx.Unlock() + require.Equal(t, mock.requested, true) + require.Equal(t, mock.completed, true) +} + +type mockIncrementalLeave struct { + t *testing.T + + mtx sync.Mutex + sent bool + + IncrementalTransferer +} + +func (j *mockIncrementalLeave) MemoryStreamTokens() []uint32 { return nil } + +func (j *mockIncrementalLeave) SendChunkRanges(_ context.Context, ranges []TokenRange, addr string) error { + j.mtx.Lock() + defer j.mtx.Unlock() + + j.sent = true + return nil +} + +func TestIncrementalLeave(t *testing.T) { + var ringConfig Config + flagext.DefaultValues(&ringConfig) + codec := GetCodec() + + inMemory := consul.NewInMemoryClient(codec) + mockClient := &MockClient{} + mockClient.MapFunctions(inMemory) + ringConfig.KVStore.Mock = mockClient + + r, err := New(ringConfig, "ingester", "ring") + require.NoError(t, err) + defer r.Stop() + + _, stop := getLifecyclers(t, r, ringConfig, 5) + defer stop() + + mock := mockIncrementalLeave{t: t} + + lcc := testLifecyclerConfig(ringConfig, "leaver") + lcc.NumTokens = 64 + lcc.LeaveIncrementalTransfer = true + lc, err := NewLifecycler(lcc, &nopFlushTransferer{allowTransfer: true}, &mock, "leaver", "ring", true) + require.NoError(t, err) + lc.Start() + + waitIngesterState(t, r, "leaver", time.Millisecond*250, true) + + lc.Shutdown() + waitIngesterState(t, r, "leaver", 5000*time.Millisecond, false) + + mock.mtx.Lock() + defer mock.mtx.Unlock() + require.Equal(t, mock.sent, true) +} diff --git a/pkg/ring/model.go b/pkg/ring/model.go index 3170deb9128..64edb975c0c 100644 --- a/pkg/ring/model.go +++ b/pkg/ring/model.go @@ -18,6 +18,17 @@ func (ts ByToken) Len() int { return len(ts) } func (ts ByToken) Swap(i, j int) { ts[i], ts[j] = ts[j], ts[i] } func (ts ByToken) Less(i, j int) bool { return ts[i].Token < ts[j].Token } +// Contains indicates that a key falls within a given range. +func (r TokenRange) Contains(key uint32) bool { + if r.From > r.To { + // Wraps around the ring. It's in the range as long as the + // key is in the range of [from, 2<<32-1] or [0, to). + return key >= r.From || key < r.To + } + + return key >= r.From && key < r.To +} + // ProtoDescFactory makes new Descs func ProtoDescFactory() proto.Message { return NewDesc() @@ -37,19 +48,31 @@ func NewDesc() *Desc { // AddIngester adds the given ingester to the ring. Ingester will only use supplied tokens, // any other tokens are removed. -func (d *Desc) AddIngester(id, addr string, tokens []uint32, state IngesterState) { +func (d *Desc) AddIngester(id, addr string, tokens []uint32, state IngesterState, incremental bool) { if d.Ingesters == nil { d.Ingesters = map[string]IngesterDesc{} } - ingester := IngesterDesc{ - Addr: addr, - Timestamp: time.Now().Unix(), - State: state, - Tokens: tokens, + d.Ingesters[id] = IngesterDesc{ + Addr: addr, + Timestamp: time.Now().Unix(), + State: state, + Incremental: incremental, + } + + // Add tokens + d.SetIngesterTokens(id, tokens) +} + +// SetIngesterTokens updates the tokens in the ring with the provided tokens. +func (d *Desc) SetIngesterTokens(id string, tokens []uint32) { + ing, ok := d.Ingesters[id] + if !ok { + return } - d.Ingesters[id] = ingester + ing.Tokens = tokens + d.Ingesters[id] = ing } // RemoveIngester removes the given ingester and all its tokens. @@ -64,6 +87,7 @@ func (d *Desc) RemoveIngester(id string) { func (d *Desc) ClaimTokens(from, to string) Tokens { var result Tokens + // If the ingester we are claiming from is normalising, get its tokens then erase them from the ring. if fromDesc, found := d.Ingesters[from]; found { result = fromDesc.Tokens fromDesc.Tokens = nil @@ -107,9 +131,9 @@ func (d *Desc) Ready(now time.Time, heartbeatTimeout time.Duration) error { } // TokensFor partitions the tokens into those for the given ID, and those for others. -func (d *Desc) TokensFor(id string) (tokens, other Tokens) { - takenTokens, myTokens := Tokens{}, Tokens{} - for _, token := range d.getTokens() { +func (d *Desc) TokensFor(id string) (tokens, other []uint32) { + var takenTokens, myTokens []uint32 + for _, token := range d.GetNavigator() { takenTokens = append(takenTokens, token.Token) if token.Ingester == id { myTokens = append(myTokens, token.Token) @@ -118,8 +142,16 @@ func (d *Desc) TokensFor(id string) (tokens, other Tokens) { return myTokens, takenTokens } -// IsHealthy checks whether the ingester appears to be alive and heartbeating -func (i *IngesterDesc) IsHealthy(op Operation, heartbeatTimeout time.Duration) bool { +// IsHealthyState checks whether or not the ingester state is valid. +// +// When ingesters are using the handoff mechanism, writes should not go to +// ingesters that are not ACTIVE and reads should not go to ingesters that +// are JOINING. +// +// However, when ingesters are using incremental transfers, the state is +// just informational. Since tokens are added/removed as they transfer, +// both writes and reads can go to JOINING/LEAVING incremental ingesters. +func (i *IngesterDesc) IsHealthyState(op Operation) bool { healthy := false switch op { @@ -133,7 +165,16 @@ func (i *IngesterDesc) IsHealthy(op Operation, heartbeatTimeout time.Duration) b healthy = true } - return healthy && time.Since(time.Unix(i.Timestamp, 0)) <= heartbeatTimeout + return healthy +} + +// IsHealthy checks whether the ingester appears to be alive and heartbeating +func (i *IngesterDesc) IsHealthy(op Operation, heartbeatTimeout time.Duration) bool { + if !i.IsHealthyState(op) { + return false + } + + return time.Since(time.Unix(i.Timestamp, 0)) <= heartbeatTimeout } // Merge merges other ring into this one. Returns sub-ring that represents the change, @@ -374,18 +415,29 @@ func (d *Desc) RemoveTombstones(limit time.Time) { } } +// HealthChecker generates a health check function that wraps around +// IngesterDesc.IsHealthyState, validating both the token state and the +// last heartbeat of the ingester. +func (d *Desc) HealthChecker(op Operation, heartbeatTimeout time.Duration) HealthCheckFunc { + return func(t TokenDesc) bool { + ing := d.Ingesters[t.Ingester] + return ing.IsHealthy(op, heartbeatTimeout) + } +} + type TokenDesc struct { Token uint32 Ingester string } -// Returns sorted list of tokens with ingester names. -func (d *Desc) getTokens() []TokenDesc { +// GetNavigator returns a TokenNavigator from the Desc. +func (d *Desc) GetNavigator() TokenNavigator { numTokens := 0 for _, ing := range d.Ingesters { numTokens += len(ing.Tokens) } tokens := make([]TokenDesc, 0, numTokens) + for key, ing := range d.Ingesters { for _, token := range ing.Tokens { tokens = append(tokens, TokenDesc{Token: token, Ingester: key}) diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 4b98a481912..5c2f46f725f 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -99,7 +99,7 @@ type Ring struct { mtx sync.RWMutex ringDesc *Desc - ringTokens []TokenDesc + ringTokens TokenNavigator memberOwnershipDesc *prometheus.Desc numMembersDesc *prometheus.Desc @@ -173,12 +173,11 @@ func (r *Ring) loop(ctx context.Context) { } ringDesc := value.(*Desc) - ringTokens := ringDesc.getTokens() - + navigator := ringDesc.GetNavigator() r.mtx.Lock() defer r.mtx.Unlock() r.ringDesc = ringDesc - r.ringTokens = ringTokens + r.ringTokens = navigator return true }) } @@ -211,15 +210,12 @@ func (r *Ring) Get(key uint32, op Operation, buf []IngesterDesc) (ReplicationSet distinctHosts[token.Ingester] = struct{}{} ingester := r.ringDesc.Ingesters[token.Ingester] - // We do not want to Write to Ingesters that are not ACTIVE, but we do want - // to write the extra replica somewhere. So we increase the size of the set - // of replicas for the key. This means we have to also increase the - // size of the replica set for read, but we can read from Leaving ingesters, - // so don't skip it in this case. - // NB dead ingester will be filtered later (by replication_strategy.go). - if op == Write && ingester.State != ACTIVE { - n++ - } else if op == Read && (ingester.State != ACTIVE && ingester.State != LEAVING) { + // We don't want to operate on unhealthy ingesters, but we do want to operate + // on _some_ ingester. If the ingester is unhealthy, the replica set size + // is increased for the key. Dead ingesters will be filtered later by + // replication_strategy.go. Filtering later means that we can calculate + // a healthiness quorum. + if !ingester.IsHealthyState(op) { n++ } @@ -285,7 +281,7 @@ func (r *Ring) Describe(ch chan<- *prometheus.Desc) { ch <- r.numTokensDesc } -func countTokens(ringDesc *Desc, tokens []TokenDesc) (map[string]uint32, map[string]uint32) { +func countTokens(ringDesc *Desc, tokens TokenNavigator) (map[string]uint32, map[string]uint32) { owned := map[string]uint32{} numTokens := map[string]uint32{} for i, token := range tokens { diff --git a/pkg/ring/ring.pb.go b/pkg/ring/ring.pb.go index 72e40d40e02..3e58ee0016d 100644 --- a/pkg/ring/ring.pb.go +++ b/pkg/ring/ring.pb.go @@ -102,10 +102,15 @@ func (m *Desc) GetIngesters() map[string]IngesterDesc { } type IngesterDesc struct { - Addr string `protobuf:"bytes,1,opt,name=addr,proto3" json:"addr,omitempty"` + Addr string `protobuf:"bytes,1,opt,name=addr,proto3" json:"addr,omitempty"` + // Timestamp is the Unix timestamp for the last heartbeat an ingester + // gave. Timestamp int64 `protobuf:"varint,2,opt,name=timestamp,proto3" json:"timestamp,omitempty"` State IngesterState `protobuf:"varint,3,opt,name=state,proto3,enum=ring.IngesterState" json:"state,omitempty"` Tokens []uint32 `protobuf:"varint,6,rep,packed,name=tokens,proto3" json:"tokens,omitempty"` + // Incremental refers to if the state the ingester is in is utilizing incremental + // transfers. Only meaningful for JOINING and LEAVING states. + Incremental bool `protobuf:"varint,7,opt,name=incremental,proto3" json:"incremental,omitempty"` } func (m *IngesterDesc) Reset() { *m = IngesterDesc{} } @@ -168,42 +173,107 @@ func (m *IngesterDesc) GetTokens() []uint32 { return nil } +func (m *IngesterDesc) GetIncremental() bool { + if m != nil { + return m.Incremental + } + return false +} + +// TokenRange represents a range of tokens, starting inclusively +// with From and ending exclusively at To. +type TokenRange struct { + From uint32 `protobuf:"varint,1,opt,name=from,proto3" json:"from,omitempty"` + To uint32 `protobuf:"varint,2,opt,name=to,proto3" json:"to,omitempty"` +} + +func (m *TokenRange) Reset() { *m = TokenRange{} } +func (*TokenRange) ProtoMessage() {} +func (*TokenRange) Descriptor() ([]byte, []int) { + return fileDescriptor_26381ed67e202a6e, []int{2} +} +func (m *TokenRange) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *TokenRange) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_TokenRange.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalTo(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *TokenRange) XXX_Merge(src proto.Message) { + xxx_messageInfo_TokenRange.Merge(m, src) +} +func (m *TokenRange) XXX_Size() int { + return m.Size() +} +func (m *TokenRange) XXX_DiscardUnknown() { + xxx_messageInfo_TokenRange.DiscardUnknown(m) +} + +var xxx_messageInfo_TokenRange proto.InternalMessageInfo + +func (m *TokenRange) GetFrom() uint32 { + if m != nil { + return m.From + } + return 0 +} + +func (m *TokenRange) GetTo() uint32 { + if m != nil { + return m.To + } + return 0 +} + func init() { proto.RegisterEnum("ring.IngesterState", IngesterState_name, IngesterState_value) proto.RegisterType((*Desc)(nil), "ring.Desc") proto.RegisterMapType((map[string]IngesterDesc)(nil), "ring.Desc.IngestersEntry") proto.RegisterType((*IngesterDesc)(nil), "ring.IngesterDesc") + proto.RegisterType((*TokenRange)(nil), "ring.TokenRange") } func init() { proto.RegisterFile("ring.proto", fileDescriptor_26381ed67e202a6e) } var fileDescriptor_26381ed67e202a6e = []byte{ - // 387 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x54, 0x91, 0x3f, 0x6f, 0xd3, 0x40, - 0x18, 0xc6, 0xef, 0xb5, 0xcf, 0xc6, 0x79, 0x43, 0x2b, 0xeb, 0x90, 0x90, 0xa9, 0xd0, 0x61, 0x75, - 0x32, 0x48, 0xb8, 0x52, 0x60, 0x40, 0x48, 0x0c, 0x2d, 0x35, 0xc8, 0x56, 0x14, 0x2a, 0x53, 0x75, - 0x77, 0xda, 0xc3, 0x58, 0x25, 0x76, 0x65, 0x5f, 0x90, 0xba, 0xf1, 0x0d, 0xe0, 0x0b, 0xb0, 0xf3, - 0x51, 0x3a, 0x66, 0xcc, 0x84, 0x88, 0xb3, 0x30, 0xe6, 0x23, 0xa0, 0x3b, 0x27, 0x0a, 0xd9, 0x9e, - 0xdf, 0x3d, 0x7f, 0xde, 0xe1, 0x10, 0xeb, 0xa2, 0xcc, 0xc3, 0x9b, 0xba, 0x92, 0x15, 0xa3, 0x4a, - 0x1f, 0x3c, 0xcf, 0x0b, 0xf9, 0x79, 0x3a, 0x0e, 0x2f, 0xab, 0xc9, 0x51, 0x5e, 0xe5, 0xd5, 0x91, - 0x36, 0xc7, 0xd3, 0x4f, 0x9a, 0x34, 0x68, 0xd5, 0x95, 0x0e, 0x7f, 0x02, 0xd2, 0x53, 0xd1, 0x5c, - 0xb2, 0x37, 0xd8, 0x2b, 0xca, 0x5c, 0x34, 0x52, 0xd4, 0x8d, 0x07, 0xbe, 0x19, 0xf4, 0x07, 0x8f, - 0x42, 0xbd, 0xae, 0xec, 0x30, 0xde, 0x78, 0x51, 0x29, 0xeb, 0xdb, 0x13, 0x7a, 0xf7, 0xfb, 0x09, - 0x49, 0xb7, 0x8d, 0x83, 0x33, 0xdc, 0xdf, 0x8d, 0x30, 0x17, 0xcd, 0x6b, 0x71, 0xeb, 0x81, 0x0f, - 0x41, 0x2f, 0x55, 0x92, 0x05, 0x68, 0x7d, 0xcd, 0xbe, 0x4c, 0x85, 0x67, 0xf8, 0x10, 0xf4, 0x07, - 0xac, 0x9b, 0xdf, 0xd4, 0xd4, 0x99, 0xb4, 0x0b, 0xbc, 0x36, 0x5e, 0x41, 0x42, 0x1d, 0xc3, 0x35, - 0x0f, 0xbf, 0x03, 0xde, 0xff, 0x3f, 0xc1, 0x18, 0xd2, 0xec, 0xea, 0xaa, 0x5e, 0xef, 0x6a, 0xcd, - 0x1e, 0x63, 0x4f, 0x16, 0x13, 0xd1, 0xc8, 0x6c, 0x72, 0xa3, 0xc7, 0xcd, 0x74, 0xfb, 0xc0, 0x9e, - 0xa2, 0xd5, 0xc8, 0x4c, 0x0a, 0xcf, 0xf4, 0x21, 0xd8, 0x1f, 0x3c, 0xd8, 0x3d, 0xfb, 0x51, 0x59, - 0x69, 0x97, 0x60, 0x0f, 0xd1, 0x96, 0xd5, 0xb5, 0x28, 0x1b, 0xcf, 0xf6, 0xcd, 0x60, 0x2f, 0x5d, - 0x53, 0x42, 0x1d, 0xea, 0x5a, 0x09, 0x75, 0x2c, 0xd7, 0x7e, 0x36, 0xc4, 0xbd, 0x9d, 0x2e, 0x43, - 0xb4, 0x8f, 0xdf, 0x9e, 0xc7, 0x17, 0x91, 0x4b, 0x58, 0x1f, 0xef, 0x0d, 0xa3, 0xe3, 0x8b, 0x78, - 0xf4, 0xde, 0x05, 0x05, 0x67, 0xd1, 0xe8, 0x54, 0x81, 0xa1, 0x20, 0xf9, 0x10, 0x8f, 0x14, 0x98, - 0xcc, 0x41, 0x3a, 0x8c, 0xde, 0x9d, 0xbb, 0xf4, 0xe4, 0xe5, 0x6c, 0xc1, 0xc9, 0x7c, 0xc1, 0xc9, - 0x6a, 0xc1, 0xe1, 0x5b, 0xcb, 0xe1, 0x57, 0xcb, 0xe1, 0xae, 0xe5, 0x30, 0x6b, 0x39, 0xfc, 0x69, - 0x39, 0xfc, 0x6d, 0x39, 0x59, 0xb5, 0x1c, 0x7e, 0x2c, 0x39, 0x99, 0x2d, 0x39, 0x99, 0x2f, 0x39, - 0x19, 0xdb, 0xfa, 0xf3, 0x5e, 0xfc, 0x0b, 0x00, 0x00, 0xff, 0xff, 0x33, 0x18, 0xb8, 0xad, 0xff, - 0x01, 0x00, 0x00, + // 463 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x54, 0x92, 0xb1, 0x6e, 0xd3, 0x40, + 0x1c, 0xc6, 0x7d, 0xf6, 0xc5, 0x75, 0xfe, 0x21, 0x91, 0x75, 0x48, 0xc8, 0x54, 0xe8, 0xb0, 0x32, + 0x19, 0x24, 0x12, 0x14, 0x16, 0x84, 0xc4, 0xd0, 0x52, 0x83, 0x12, 0x45, 0xa1, 0x3a, 0xa2, 0x0e, + 0x0c, 0x48, 0x8e, 0x7b, 0x35, 0x26, 0xb5, 0x2f, 0xb2, 0x2f, 0x88, 0x6e, 0x3c, 0x02, 0x2f, 0xc0, + 0xce, 0xca, 0x5b, 0x74, 0xcc, 0xd8, 0x09, 0x11, 0x67, 0x61, 0xec, 0x23, 0xa0, 0xbb, 0xa4, 0x4a, + 0xb2, 0x7d, 0xbf, 0xfb, 0xff, 0xbf, 0xef, 0x3e, 0x9d, 0x0e, 0xa0, 0x48, 0xf3, 0xa4, 0x33, 0x2b, + 0x84, 0x14, 0x04, 0x2b, 0x7d, 0xf8, 0x2c, 0x49, 0xe5, 0xe7, 0xf9, 0xa4, 0x13, 0x8b, 0xac, 0x9b, + 0x88, 0x44, 0x74, 0xf5, 0x70, 0x32, 0xbf, 0xd0, 0xa4, 0x41, 0xab, 0xb5, 0xa9, 0xfd, 0x13, 0x01, + 0x3e, 0xe1, 0x65, 0x4c, 0x5e, 0x43, 0x3d, 0xcd, 0x13, 0x5e, 0x4a, 0x5e, 0x94, 0x1e, 0xf2, 0xad, + 0xa0, 0xd1, 0x7b, 0xd8, 0xd1, 0xe9, 0x6a, 0xdc, 0xe9, 0xdf, 0xcd, 0xc2, 0x5c, 0x16, 0x57, 0xc7, + 0xf8, 0xfa, 0xcf, 0x63, 0x83, 0x6d, 0x1d, 0x87, 0xa7, 0xd0, 0xda, 0x5f, 0x21, 0x2e, 0x58, 0x53, + 0x7e, 0xe5, 0x21, 0x1f, 0x05, 0x75, 0xa6, 0x24, 0x09, 0xa0, 0xf6, 0x35, 0xba, 0x9c, 0x73, 0xcf, + 0xf4, 0x51, 0xd0, 0xe8, 0x91, 0x75, 0xfc, 0x9d, 0x4d, 0x5d, 0xc3, 0xd6, 0x0b, 0xaf, 0xcc, 0x97, + 0x68, 0x80, 0x1d, 0xd3, 0xb5, 0xda, 0xbf, 0x11, 0xdc, 0xdb, 0xdd, 0x20, 0x04, 0x70, 0x74, 0x7e, + 0x5e, 0x6c, 0x72, 0xb5, 0x26, 0x8f, 0xa0, 0x2e, 0xd3, 0x8c, 0x97, 0x32, 0xca, 0x66, 0x3a, 0xdc, + 0x62, 0xdb, 0x03, 0xf2, 0x04, 0x6a, 0xa5, 0x8c, 0x24, 0xf7, 0x2c, 0x1f, 0x05, 0xad, 0xde, 0xfd, + 0xfd, 0x6b, 0x3f, 0xa8, 0x11, 0x5b, 0x6f, 0x90, 0x07, 0x60, 0x4b, 0x31, 0xe5, 0x79, 0xe9, 0xd9, + 0xbe, 0x15, 0x34, 0xd9, 0x86, 0x88, 0x0f, 0x8d, 0x34, 0x8f, 0x0b, 0x9e, 0xf1, 0x5c, 0x46, 0x97, + 0xde, 0x81, 0x8f, 0x02, 0x87, 0xed, 0x1e, 0x0d, 0xb0, 0x83, 0xdd, 0xda, 0x00, 0x3b, 0x35, 0xd7, + 0x6e, 0x3f, 0x07, 0x18, 0x2b, 0x1f, 0x8b, 0xf2, 0x84, 0xab, 0xc2, 0x17, 0x85, 0xc8, 0x74, 0xe1, + 0x26, 0xd3, 0x9a, 0xb4, 0xc0, 0x94, 0x42, 0x37, 0x6d, 0x32, 0x53, 0x8a, 0xa7, 0x43, 0x68, 0xee, + 0xf5, 0x21, 0x00, 0xf6, 0xd1, 0x9b, 0x71, 0xff, 0x2c, 0x74, 0x0d, 0xd2, 0x80, 0x83, 0x61, 0x78, + 0x74, 0xd6, 0x1f, 0xbd, 0x73, 0x91, 0x82, 0xd3, 0x70, 0x74, 0xa2, 0xc0, 0x54, 0x30, 0x78, 0xdf, + 0x1f, 0x29, 0xb0, 0x88, 0x03, 0x78, 0x18, 0xbe, 0x1d, 0xbb, 0xf8, 0xf8, 0xd3, 0x62, 0x49, 0x8d, + 0x9b, 0x25, 0x35, 0x6e, 0x97, 0x14, 0x7d, 0xaf, 0x28, 0xfa, 0x55, 0x51, 0x74, 0x5d, 0x51, 0xb4, + 0xa8, 0x28, 0xfa, 0x5b, 0x51, 0xf4, 0xaf, 0xa2, 0xc6, 0x6d, 0x45, 0xd1, 0x8f, 0x15, 0x35, 0x16, + 0x2b, 0x6a, 0xdc, 0xac, 0xa8, 0xf1, 0x31, 0xd8, 0xf9, 0x38, 0xb1, 0x28, 0x24, 0xff, 0x36, 0x2b, + 0xc4, 0x17, 0x1e, 0xcb, 0x0d, 0x75, 0x67, 0xd3, 0xa4, 0xab, 0x9e, 0x6e, 0x62, 0xeb, 0xaf, 0xf3, + 0xe2, 0x7f, 0x00, 0x00, 0x00, 0xff, 0xff, 0x0d, 0x4b, 0x04, 0x22, 0x7d, 0x02, 0x00, 0x00, } func (x IngesterState) String() string { @@ -280,6 +350,36 @@ func (this *IngesterDesc) Equal(that interface{}) bool { return false } } + if this.Incremental != that1.Incremental { + return false + } + return true +} +func (this *TokenRange) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + that1, ok := that.(*TokenRange) + if !ok { + that2, ok := that.(TokenRange) + if ok { + that1 = &that2 + } else { + return false + } + } + if that1 == nil { + return this == nil + } else if this == nil { + return false + } + if this.From != that1.From { + return false + } + if this.To != that1.To { + return false + } return true } func (this *Desc) GoString() string { @@ -308,12 +408,24 @@ func (this *IngesterDesc) GoString() string { if this == nil { return "nil" } - s := make([]string, 0, 8) + s := make([]string, 0, 9) s = append(s, "&ring.IngesterDesc{") s = append(s, "Addr: "+fmt.Sprintf("%#v", this.Addr)+",\n") s = append(s, "Timestamp: "+fmt.Sprintf("%#v", this.Timestamp)+",\n") s = append(s, "State: "+fmt.Sprintf("%#v", this.State)+",\n") s = append(s, "Tokens: "+fmt.Sprintf("%#v", this.Tokens)+",\n") + s = append(s, "Incremental: "+fmt.Sprintf("%#v", this.Incremental)+",\n") + s = append(s, "}") + return strings.Join(s, "") +} +func (this *TokenRange) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 6) + s = append(s, "&ring.TokenRange{") + s = append(s, "From: "+fmt.Sprintf("%#v", this.From)+",\n") + s = append(s, "To: "+fmt.Sprintf("%#v", this.To)+",\n") s = append(s, "}") return strings.Join(s, "") } @@ -417,6 +529,44 @@ func (m *IngesterDesc) MarshalTo(dAtA []byte) (int, error) { i = encodeVarintRing(dAtA, i, uint64(j2)) i += copy(dAtA[i:], dAtA3[:j2]) } + if m.Incremental { + dAtA[i] = 0x38 + i++ + if m.Incremental { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i++ + } + return i, nil +} + +func (m *TokenRange) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *TokenRange) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.From != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintRing(dAtA, i, uint64(m.From)) + } + if m.To != 0 { + dAtA[i] = 0x10 + i++ + i = encodeVarintRing(dAtA, i, uint64(m.To)) + } return i, nil } @@ -470,6 +620,24 @@ func (m *IngesterDesc) Size() (n int) { } n += 1 + sovRing(uint64(l)) + l } + if m.Incremental { + n += 2 + } + return n +} + +func (m *TokenRange) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.From != 0 { + n += 1 + sovRing(uint64(m.From)) + } + if m.To != 0 { + n += 1 + sovRing(uint64(m.To)) + } return n } @@ -515,6 +683,18 @@ func (this *IngesterDesc) String() string { `Timestamp:` + fmt.Sprintf("%v", this.Timestamp) + `,`, `State:` + fmt.Sprintf("%v", this.State) + `,`, `Tokens:` + fmt.Sprintf("%v", this.Tokens) + `,`, + `Incremental:` + fmt.Sprintf("%v", this.Incremental) + `,`, + `}`, + }, "") + return s +} +func (this *TokenRange) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&TokenRange{`, + `From:` + fmt.Sprintf("%v", this.From) + `,`, + `To:` + fmt.Sprintf("%v", this.To) + `,`, `}`, }, "") return s @@ -884,6 +1064,117 @@ func (m *IngesterDesc) Unmarshal(dAtA []byte) error { } else { return fmt.Errorf("proto: wrong wireType = %d for field Tokens", wireType) } + case 7: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Incremental", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRing + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + m.Incremental = bool(v != 0) + default: + iNdEx = preIndex + skippy, err := skipRing(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthRing + } + if (iNdEx + skippy) < 0 { + return ErrInvalidLengthRing + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *TokenRange) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRing + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: TokenRange: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: TokenRange: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field From", wireType) + } + m.From = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRing + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.From |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field To", wireType) + } + m.To = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRing + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.To |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } + } default: iNdEx = preIndex skippy, err := skipRing(dAtA[iNdEx:]) diff --git a/pkg/ring/ring.proto b/pkg/ring/ring.proto index 9670b01c090..d0542ab667e 100644 --- a/pkg/ring/ring.proto +++ b/pkg/ring/ring.proto @@ -7,6 +7,8 @@ import "github.com/gogo/protobuf/gogoproto/gogo.proto"; option (gogoproto.marshaler_all) = true; option (gogoproto.unmarshaler_all) = true; +option go_package = "github.com/cortexproject/cortex/pkg/ring"; + message Desc { map ingesters = 1 [(gogoproto.nullable) = false]; reserved 2; @@ -16,9 +18,16 @@ message IngesterDesc { reserved 4, 5; // old, deprecated fields string addr = 1; - int64 timestamp = 2; // unix timestamp + + // Timestamp is the Unix timestamp for the last heartbeat an ingester + // gave. + int64 timestamp = 2; IngesterState state = 3; repeated uint32 tokens = 6; + + // Incremental refers to if the state the ingester is in is utilizing incremental + // transfers. Only meaningful for JOINING and LEAVING states. + bool incremental = 7; } enum IngesterState { @@ -32,3 +41,11 @@ enum IngesterState { // ingesters that have been removed from the ring. Ring users should not use it directly. LEFT = 4; } + +// TokenRange represents a range of tokens, starting inclusively +// with From and ending exclusively at To. +message TokenRange { + uint32 from = 1; + uint32 to = 2; +} + diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index e657c72d9cd..6327169a4f0 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -4,12 +4,16 @@ import ( "context" "fmt" "math/rand" + "regexp" "sort" + "strconv" + "strings" "testing" "time" "github.com/stretchr/testify/require" + "github.com/cortexproject/cortex/pkg/ring/kv/consul" "github.com/cortexproject/cortex/pkg/util/flagext" ) @@ -36,7 +40,7 @@ func benchmarkBatch(b *testing.B, numIngester, numKeys int) { for i := 0; i < numIngester; i++ { tokens := GenerateTokens(numTokens, takenTokens) takenTokens = append(takenTokens, tokens...) - desc.AddIngester(fmt.Sprintf("%d", i), fmt.Sprintf("ingester%d", i), tokens, ACTIVE) + desc.AddIngester(fmt.Sprintf("%d", i), fmt.Sprintf("ingester%d", i), tokens, ACTIVE, false) } cfg := Config{} @@ -97,7 +101,7 @@ func TestAddIngester(t *testing.T) { ing1Tokens := GenerateTokens(128, nil) - r.AddIngester(ingName, "addr", ing1Tokens, ACTIVE) + r.AddIngester(ingName, "addr", ing1Tokens, ACTIVE, false) require.Equal(t, "addr", r.Ingesters[ingName].Addr) require.Equal(t, ing1Tokens, r.Ingesters[ingName].Tokens) @@ -115,7 +119,7 @@ func TestAddIngesterReplacesExistingTokens(t *testing.T) { newTokens := GenerateTokens(128, nil) - r.AddIngester(ing1Name, "addr", newTokens, ACTIVE) + r.AddIngester(ing1Name, "addr", newTokens, ACTIVE, false) require.Equal(t, newTokens, r.Ingesters[ing1Name].Tokens) } @@ -129,7 +133,7 @@ func TestSubring(t *testing.T) { name := fmt.Sprintf("ing%v", i) ingTokens := GenerateTokens(128, prevTokens) - r.AddIngester(name, fmt.Sprintf("addr%v", i), ingTokens, ACTIVE) + r.AddIngester(name, fmt.Sprintf("addr%v", i), ingTokens, ACTIVE, false) prevTokens = append(prevTokens, ingTokens...) } @@ -141,7 +145,7 @@ func TestSubring(t *testing.T) { HeartbeatTimeout: time.Hour, }, ringDesc: r, - ringTokens: r.getTokens(), + ringTokens: r.GetNavigator(), } // Subring of 0 invalid @@ -183,7 +187,7 @@ func TestStableSubring(t *testing.T) { name := fmt.Sprintf("ing%v", i) ingTokens := GenerateTokens(128, prevTokens) - r.AddIngester(name, fmt.Sprintf("addr%v", i), ingTokens, ACTIVE) + r.AddIngester(name, fmt.Sprintf("addr%v", i), ingTokens, ACTIVE, false) prevTokens = append(prevTokens, ingTokens...) } @@ -195,7 +199,7 @@ func TestStableSubring(t *testing.T) { HeartbeatTimeout: time.Hour, }, ringDesc: r, - ringTokens: r.getTokens(), + ringTokens: r.GetNavigator(), } // Generate the same subring multiple times @@ -223,3 +227,267 @@ func TestStableSubring(t *testing.T) { require.Equal(t, subrings[i], subrings[next]) } } + +type namedRing struct { + *Desc + namedTokens map[string]uint32 + tokenStates map[uint32]IngesterState +} + +func (r *namedRing) FindTokensByState(s IngesterState) []uint32 { + var ret []uint32 + for t, state := range r.tokenStates { + if state == s { + ret = append(ret, t) + } + } + return ret +} + +func (r *namedRing) TokenHealthChecker(op Operation) HealthCheckFunc { + return func(t TokenDesc) bool { + state := ACTIVE + if s, ok := r.tokenStates[t.Token]; ok { + state = s + } + + if op == Write && state != ACTIVE { + return false + } else if op == Read && state == JOINING { + return false + } + + return true + } +} + +// Name gets a token's name by its value +func (r *namedRing) TokenName(t *testing.T, value uint32) string { + t.Helper() + for n, v := range r.namedTokens { + if v == value { + return n + } + } + t.Fatalf("could not find %d in ring", value) + return "" +} + +// Token gets a token by its name. +func (r *namedRing) Token(t *testing.T, name string) uint32 { + t.Helper() + v, ok := r.namedTokens[name] + if !ok { + t.Fatalf("no token named %s in ring", name) + } + return v +} + +// Token gets a TokenDesc by its name. +func (r *namedRing) TokenDesc(t *testing.T, name string) TokenDesc { + t.Helper() + v := r.Token(t, name) + for name, ing := range r.Desc.Ingesters { + for _, tok := range ing.Tokens { + if tok == v { + return TokenDesc{Token: tok, Ingester: name} + } + } + } + t.Fatalf("could not find %s in ring", name) + return TokenDesc{} +} + +// BindStates binds token states to the ingesters. +func (r *namedRing) BindStates(t *testing.T) { + for name, ing := range r.Desc.Ingesters { + for _, tok := range ing.Tokens { + state := ACTIVE + for t, s := range r.tokenStates { + if tok == t { + state = s + break + } + } + + ing := r.Desc.Ingesters[name] + ing.State = state + r.Desc.Ingesters[name] = ing + } + } +} + +// generateRing generates a namedRing given a schema describing tokens in the +// ring. The schema is a space-delimited list of letters followed by an +// optional number and a state suffix. If a letter is specified more than once, +// each instance must have a number to differentiate between the two: +// +// A B C D1 E D2 F G +// +// This example creates a ring of 7 ingesters, where ingester with id "D" has +// two tokens, D1 and D2. Tokens are assigned values of the previous token's +// value plus one. +// +// Each token can be suffixed with a state marker to affect the token's state: +// +// .: ACTIVE +// ?: PENDING +// +: JOINING +// -: LEAVING +// +// If no suffix is specified, the default state will be ACTIVE. +func generateRing(t *testing.T, desc string) *namedRing { + t.Helper() + + regex, err := regexp.Compile( + "(?P[A-Z])(?P\\d*)(?P\\+|\\-|\\?|\\.)?", + ) + if err != nil { + t.Fatalf("unexpected regex err %v", err) + } + + r := &namedRing{ + Desc: &Desc{}, + namedTokens: make(map[string]uint32), + tokenStates: make(map[uint32]IngesterState), + } + r.Ingesters = make(map[string]IngesterDesc) + + tokens := strings.Split(desc, " ") + var nextToken uint32 = 1 + + for _, tokDesc := range tokens { + if tokDesc == "" { + continue + } + + submatches := regex.FindStringSubmatch(tokDesc) + if submatches == nil { + t.Fatalf("invalid token desc %s", tokDesc) + continue + } + + ingester := submatches[1] + tokenIndex := 1 + state := ACTIVE + + if submatches[2] != "" { + tokenIndex, err = strconv.Atoi(submatches[2]) + if err != nil { + t.Fatalf("invalid token index %s in %s", submatches[2], tokDesc) + } + } + if submatches[3] != "" { + switch stateStr := submatches[3]; stateStr { + case ".": + state = ACTIVE + case "?": + state = PENDING + case "+": + state = JOINING + case "-": + state = LEAVING + default: + t.Fatalf("invalid token state operator %s in %s", stateStr, tokDesc) + } + } + + ing, ok := r.Ingesters[ingester] + if !ok { + ing = IngesterDesc{ + Addr: ingester, + State: ACTIVE, + Timestamp: time.Now().Unix(), + } + } + + if tokenIndex != len(ing.Tokens)+1 { + t.Fatalf("invalid token index %d in %s, should be %d", tokenIndex, tokDesc, len(ing.Tokens)+1) + } + + ing.Tokens = append(ing.Tokens, nextToken) + r.tokenStates[nextToken] = state + r.namedTokens[tokDesc] = nextToken + r.Ingesters[ingester] = ing + nextToken++ + } + + for id, ing := range r.Ingesters { + r.Ingesters[id] = ing + } + + return r +} + +func TestGenerateRing(t *testing.T) { + tt := []struct { + desc string + ingesters int + active int + pending int + leaving int + joining int + }{ + {"A B C", 3, 3, 0, 0, 0}, + {"A1 B A2 C", 3, 4, 0, 0, 0}, + {"A1. B? A2+ C-", 3, 1, 1, 1, 1}, + } + + for _, tc := range tt { + t.Run(tc.desc, func(t *testing.T) { + r := generateRing(t, tc.desc) + + require.Equal(t, tc.active, len(r.FindTokensByState(ACTIVE))) + require.Equal(t, tc.pending, len(r.FindTokensByState(PENDING))) + require.Equal(t, tc.leaving, len(r.FindTokensByState(LEAVING))) + require.Equal(t, tc.joining, len(r.FindTokensByState(JOINING))) + require.Equal(t, tc.ingesters, len(r.Desc.Ingesters)) + }) + } +} + +func TestRingGet(t *testing.T) { + tt := []struct { + name string + desc string + key uint32 + op Operation + expect []string + }{ + {"all active", "A B C", 0, Read, []string{"A", "B", "C"}}, + {"wrap around", "A B C", 2, Read, []string{"C", "A", "B"}}, + {"skip joining on read", "A B+ C+ D E", 0, Read, []string{"A", "D", "E"}}, + {"skip joining on write", "A B+ C+ D E", 0, Write, []string{"A", "D", "E"}}, + {"skip leaving on write", "A B- C- D E", 0, Write, []string{"A", "D", "E"}}, + {"don't skip leaving on read", "A B- C- D E", 0, Read, []string{"A", "B", "C"}}, + } + + for _, tc := range tt { + t.Run(tc.name, func(t *testing.T) { + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.ReplicationFactor = 3 + ringConfig.KVStore.Mock = consul.NewInMemoryClient(GetCodec()) + + r, err := New(ringConfig, "ingester", "ring") + require.NoError(t, err) + // Stop is slow, run it in a goroutine and don't wait for it + defer func() { go r.Stop() }() + + nr := generateRing(t, tc.desc) + nr.BindStates(t) + + r.ringDesc = nr.Desc + r.ringTokens = r.ringDesc.GetNavigator() + rs, err := r.Get(tc.key, tc.op, nil) + require.NoError(t, err) + + names := []string{} + for _, ing := range rs.Ingesters { + names = append(names, ing.Addr) + } + + require.Equal(t, tc.expect, names) + }) + } +} diff --git a/pkg/ring/token_checker.go b/pkg/ring/token_checker.go new file mode 100644 index 00000000000..98671e4f35c --- /dev/null +++ b/pkg/ring/token_checker.go @@ -0,0 +1,190 @@ +package ring + +import ( + "context" + "flag" + "sync" + "time" + + "github.com/cortexproject/cortex/pkg/util" + "github.com/go-kit/kit/log/level" +) + +// TokenCheckerConfig is the config to configure a TokenChecker. +type TokenCheckerConfig struct { + CheckOnInterval time.Duration `yaml:"check_on_interval"` +} + +// RegisterFlags adds flags required to configure a TokenChecker to +// the provided FlagSet. +func (c *TokenCheckerConfig) RegisterFlags(f *flag.FlagSet) { + c.RegisterFlagsWithPrefix("", f) +} + +// RegisterFlagsWithPrefix adds the flags required to config a TokenChecker to +// the given FlagSet, prefixing each flag with the value provided by prefix. +func (c *TokenCheckerConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { + f.DurationVar(&c.CheckOnInterval, prefix+"token-checker.check-on-interval", time.Duration(0), "Period with which to check that all in-memory streams fall within expected token ranges. 0 to disable.") +} + +// A TokenChecker is responsible for validating that streams written to an +// ingester have tokens that fall within an expected range of values. +// Appropriate values depend on where an ingester's tokens are placed in the +// ring and what its neighbors are. +// +// Checking that newly created streams fall within the expected token +// ranges ensures that writes to the ingester are distributed properly, +// while checking for validity of streams on an interval ensures that +// an ingester's memory only contains appropriate tokens as the ingesters +// in the ring change over time. +type TokenChecker struct { + cfg TokenCheckerConfig + ringCfg Config + lifecycler *Lifecycler + + // Lifecycle control + ctx context.Context + cancel context.CancelFunc + + // Updated throughout the lifetime of a TokenChecker + mut sync.Mutex + expectedRanges []TokenRange + + unexpectedStreamsHandler func(streamTokens []uint32) +} + +// NewTokenChecker makes and starts a new TokenChecker. unexpectedStreamsHandler will be +// invoked by TokenChecker whenever CheckAllTokens is called, even when no unexpected +// tokens are found. If nil, unexpectedStreamsHandler is a no-op. +func NewTokenChecker(cfg TokenCheckerConfig, ringConfig Config, lc *Lifecycler, unexpectedStreamsHandler func(streamTokens []uint32)) *TokenChecker { + tc := &TokenChecker{ + cfg: cfg, + ringCfg: ringConfig, + lifecycler: lc, + unexpectedStreamsHandler: unexpectedStreamsHandler, + } + tc.ctx, tc.cancel = context.WithCancel(context.Background()) + + go tc.loop() + return tc +} + +// Shutdown stops the Token Checker. It will stop watching the ring +// for changes and stop checking that tokens are valid on an interval. +func (tc *TokenChecker) Shutdown() { + tc.cancel() +} + +// TokenExpected iterates over all expected ranges and returns true +// when the token falls within one of those ranges. +func (tc *TokenChecker) TokenExpected(token uint32) bool { + tc.mut.Lock() + defer tc.mut.Unlock() + + for _, rg := range tc.expectedRanges { + if rg.Contains(token) { + return true + } + } + + return false +} + +// CheckAllStreams invokes TokenExpected for all current untransferred +// tokens found in the stored IncrementalTransferer. Invokes +// tc.InvalidTokenHandler when an invalid token was found. Returns true when +// all tokens are valid. +func (tc *TokenChecker) CheckAllStreams() bool { + var invalid []uint32 + + toks := tc.lifecycler.incTransferer.MemoryStreamTokens() + for _, tok := range toks { + valid := tc.TokenExpected(tok) + if !valid { + invalid = append(invalid, tok) + } + } + + numInvalid := len(invalid) + + if tc.unexpectedStreamsHandler != nil { + tc.unexpectedStreamsHandler(invalid) + } + + return numInvalid == 0 +} + +// syncRing syncs the latest ring from the lifecycler with expected ranges. +func (tc *TokenChecker) syncRing() { + r := tc.lifecycler.getLastRing() + if r == nil { + // Wait for there to be a ring available to use + return + } + tc.updateExpectedRanges(r) +} + +// updateExpectedRanges goes through the ring and finds all expected ranges +// given the current set of tokens in a Lifecycler. +func (tc *TokenChecker) updateExpectedRanges(ring *Desc) { + var expected []TokenRange + + healthy := ring.HealthChecker(Read, tc.ringCfg.HeartbeatTimeout) + n := ring.GetNavigator() + + tokens := tc.lifecycler.getTokens() + for _, tok := range tokens { + for replica := 0; replica < tc.ringCfg.ReplicationFactor; replica++ { + endRanges, err := n.Predecessors(tok, replica, healthy) + if err != nil { + level.Error(util.Logger).Log("msg", "unable to update expected token ranges", "err", err) + return + } + + for _, endRange := range endRanges { + startRange, err := n.Neighbor(endRange.Token, -1, false, healthy) + if err != nil { + level.Error(util.Logger).Log("msg", "unable to update expected token ranges", "err", err) + return + } + + expected = append(expected, TokenRange{ + From: startRange.Token, + To: endRange.Token, + }) + } + } + } + + tc.mut.Lock() + defer tc.mut.Unlock() + tc.expectedRanges = expected +} + +// wrapTicker returns a channel that ticks on the duration d. If d +// is zero, returns a channel that never produces a value. +func wrapTicker(d time.Duration) (<-chan time.Time, func()) { + if d <= 0 { + return nil, func() {} + } + + ticker := time.NewTicker(d) + return ticker.C, ticker.Stop +} + +// loop will invoke CheckAllTokens based on the check interval. +func (tc *TokenChecker) loop() { + check, closeCheck := wrapTicker(tc.cfg.CheckOnInterval) + defer closeCheck() + +loop: + for { + select { + case <-check: + tc.syncRing() + tc.CheckAllStreams() + case <-tc.ctx.Done(): + break loop + } + } +} diff --git a/pkg/ring/token_checker_test.go b/pkg/ring/token_checker_test.go new file mode 100644 index 00000000000..8dada56b5c6 --- /dev/null +++ b/pkg/ring/token_checker_test.go @@ -0,0 +1,131 @@ +package ring + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/cortexproject/cortex/pkg/ring/kv/consul" + "github.com/cortexproject/cortex/pkg/util/flagext" + "github.com/cortexproject/cortex/pkg/util/test" + "github.com/stretchr/testify/require" + "go.uber.org/atomic" +) + +func makeSequentialTokenGenerator() TokenGeneratorFunc { + n := 0 + + return func(num int, taken []uint32) []uint32 { + start := n + end := start + num + + ret := make([]uint32, num) + for i := n; i < end; i++ { + ret[i-start] = uint32(i + 1) + } + + n = end + return ret + } +} + +type mockTokenCheckerTransfer struct { + IncrementalTransferer + tokens []uint32 +} + +func (t *mockTokenCheckerTransfer) MemoryStreamTokens() []uint32 { + return t.tokens +} + +func TestTokenChecker(t *testing.T) { + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.ReplicationFactor = 1 + codec := GetCodec() + + inMemory := consul.NewInMemoryClient(codec) + mockClient := &MockClient{} + mockClient.MapFunctions(inMemory) + ringConfig.KVStore.Mock = mockClient + + r, err := New(ringConfig, "ring", "ring") + require.NoError(t, err) + defer r.Stop() + + transfer := &mockTokenCheckerTransfer{} + generator := makeSequentialTokenGenerator() + + var lifecyclers []*Lifecycler + for i := 0; i < 2; i++ { + id := fmt.Sprintf("lc-%d", i) + + lcc := testLifecyclerConfig(ringConfig, id) + lcc.Addr = id + lcc.NumTokens = 32 + lcc.GenerateTokens = generator + + lc, err := NewLifecycler(lcc, &nopFlushTransferer{true}, transfer, id, "ring", true) + require.NoError(t, err) + lc.Start() + defer lc.Shutdown() + + test.Poll(t, 500*time.Millisecond, true, func() interface{} { + d, err := r.KVClient.Get(context.Background(), "ring") + require.NoError(t, err) + desc, ok := d.(*Desc) + if !ok { + return false + } + i, exist := desc.Ingesters[id] + return exist && i.State == ACTIVE + }) + + lifecyclers = append(lifecyclers, lc) + } + + // Update consul for each lifecycler twice: doing it twice makes sure + // that each lifecycler sees the other ones. + for i := 0; i < 2; i++ { + for _, lc := range lifecyclers { + err := lc.updateConsul(context.Background()) + require.NoError(t, err) + } + } + + // Populate transfer with all tokens in lifecycler + for _, tok := range lifecyclers[0].getTokens() { + transfer.tokens = append(transfer.tokens, tok-1) + } + + calledHandler := atomic.NewBool(false) + streamsHandler := func(l []uint32) { + calledHandler.Store(true) + } + + tc := NewTokenChecker(TokenCheckerConfig{ + CheckOnInterval: time.Duration(50 * time.Millisecond), + }, lifecyclers[0].cfg.RingConfig, lifecyclers[0], streamsHandler) + defer tc.Shutdown() + + // Make sure ring is updated + tc.syncRing() + + // Make sure CheckToken with token == 1 returns true + test.Poll(t, time.Millisecond*500, true, func() interface{} { + return tc.TokenExpected(1) + }) + + // Make sure CheckToken with a token out of range returns false. + require.False(t, tc.TokenExpected( + transfer.tokens[len(transfer.tokens)-1]+1, + )) + + // Make sure all tokens are valid. + test.Poll(t, time.Millisecond*500, true, func() interface{} { + return tc.CheckAllStreams() + }) + + require.True(t, calledHandler.Load()) +} diff --git a/pkg/ring/token_navigator.go b/pkg/ring/token_navigator.go new file mode 100644 index 00000000000..7b5ad5e9688 --- /dev/null +++ b/pkg/ring/token_navigator.go @@ -0,0 +1,254 @@ +package ring + +import ( + "errors" + "fmt" + "sort" +) + +// TokenNavigator provides utility methods for traversing the ring. +type TokenNavigator []TokenDesc + +// findTokenIndex searches the slice of tokens for the index of +// the token matching the value provided by the token parameter. +func (n TokenNavigator) findTokenIndex(token uint32) (int, bool) { + i := sort.Search(len(n), func(x int) bool { + return n[x].Token >= token + }) + if i >= len(n) { + i = 0 + } + return i, len(n) > i && n[i].Token == token +} + +// SetIngesterTokens updates the tokens for a specific ingester with the provided +// tokens. SetIngesterTokens will remove tokens for id that are no longer in the +// list provided by tokens. +func (n *TokenNavigator) SetIngesterTokens(id string, tokens []uint32) { + oldTokens := *n + newTokens := make([]TokenDesc, 0, len(oldTokens)) + + // Copy all tokens from other ingesters + for _, tok := range oldTokens { + if tok.Ingester != id { + newTokens = append(newTokens, tok) + continue + } + } + + // Add back in our tokens + for _, tok := range tokens { + newTokens = append(newTokens, TokenDesc{Token: tok, Ingester: id}) + } + + // Re-sort the list + sort.Sort(ByToken(newTokens)) + *n = newTokens +} + +// HealthCheckFunc is a function that validates whether a given token in the +// ring is healthy and can be used. +type HealthCheckFunc func(TokenDesc) bool + +// Neighbor moves around the token list to find the ingester neighboring start determined +// by the provided offset. The healthy function determines which tokens are considered as +// potential neighbors. +// +// Only one token per ingester is considered as a neighbor. If markStartIngesterSeen is +// true, then the ingester for the starting token is included as one of the seen +// ingesters and that ingester won't be considered again. +// +// If offset has a positive value, Neighbor searches the ring clockwise. +// Otherwise, if offset has a negative value, Neighbor searches the ring counter-clockwise. +// An offset of 0 will return the starting token. +func (n TokenNavigator) Neighbor(start uint32, offset int, markStartIngesterSeen bool, healthy HealthCheckFunc) (TokenDesc, error) { + idx, ok := n.findTokenIndex(start) + if !ok { + return TokenDesc{}, fmt.Errorf("could not find token %d in ring", start) + } + if offset == 0 { + return n[idx], nil + } + + distinct := map[string]bool{} + if markStartIngesterSeen { + distinct[n[idx].Ingester] = true + } + + numNeighbors := offset + neighborCount := 0 + + direction := 1 + + if offset < 0 { + direction = -1 + numNeighbors = -numNeighbors + } + + it := newTokenIterator(n, direction, idx, healthy) + for it.HasNext() { + successor := it.Next() + if distinct[successor.Ingester] { + continue + } + + neighborCount++ + if neighborCount == numNeighbors { + return successor, nil + } + + distinct[successor.Ingester] = true + } + + return TokenDesc{}, fmt.Errorf("could not find neighbor %d for token %d", offset, start) +} + +// Predecessors finds all tokens of which the start is the offset-th neighbor to. +func (n TokenNavigator) Predecessors(start uint32, offset int, healthy HealthCheckFunc) ([]TokenDesc, error) { + idx, ok := n.findTokenIndex(start) + if !ok { + return nil, fmt.Errorf("could not find token %d in ring", start) + } else if offset == 0 { + return []TokenDesc{n[idx]}, nil + } else if offset < 0 { + return nil, errors.New("Predecessors must be called with a non-negative offset") + } + + var ( + predecessors []TokenDesc + + distinct = map[string]bool{} + startTok = n[idx] + ) + + // We'll be checking to see if a successor is our starting token, + // so we force our starting token to be considered healthy here. + healthyOrStart := func(t TokenDesc) bool { + if t.Token == startTok.Token { + return true + } + return healthy(t) + } + + it := newTokenIterator(n, -1, idx, healthyOrStart) + for it.HasNext() { + predecessor := it.Next() + + // Stop if this is a new ingester and we already have seen enough unique ingesters. + if distinct[predecessor.Ingester] && len(distinct) == offset+1 { + break + } + + // Collect the token if its successor is our starting token. + succ, err := n.Neighbor(predecessor.Token, offset, true, healthyOrStart) + if err != nil { + return predecessors, err + } else if succ.Token == start { + predecessors = append([]TokenDesc{predecessor}, predecessors...) + } + + distinct[predecessor.Ingester] = true + } + + return predecessors, nil +} + +// RangeOptions configures the search parameters of Desc.InRange. +type RangeOptions struct { + // Range of tokens to search. + Range TokenRange + + // ID is the ingester ID to search for. + ID string + + // IncludeFrom will include the From token in the range as part of the search. + IncludeFrom bool + + // IncludeTo will include the To token in the range as part of the search. + IncludeTo bool +} + +// InRange checks to see if a given ingester ID (specified by opts.ID) has any +// tokens within the range specified by opts.Range. The inclusivity of each side +// of the range is determined by opts.IncludeFrom and opts.IncludeTo. +func (n TokenNavigator) InRange(opts RangeOptions, healthy HealthCheckFunc) (bool, error) { + start, ok := n.findTokenIndex(opts.Range.From) + if !ok { + return false, fmt.Errorf("could not find token %d in ring", opts.Range.From) + } + + end, ok := n.findTokenIndex(opts.Range.To) + if !ok { + return false, fmt.Errorf("could not find token %d in ring", opts.Range.To) + } + + var ( + startTok = n[start] + endTok = n[end] + ) + + if opts.IncludeFrom && startTok.Ingester == opts.ID && healthy(startTok) { + return true, nil + } else if startTok.Token == endTok.Token { + return opts.IncludeTo && endTok.Ingester == opts.ID && healthy(endTok), nil + } + + // We don't pass a health check function to the iterator to make sure we don't + // accidentally iterate past an unhealthy end token. + it := newTokenIterator(n, 1, start, nil) + for it.HasNext() { + tok := it.Next() + if tok.Token == endTok.Token { + return opts.IncludeTo && tok.Ingester == opts.ID && healthy(tok), nil + } else if tok.Ingester == opts.ID && healthy(tok) { + return true, nil + } + } + + return false, nil +} + +// tokenIterator allows for iterating through tokens in the ring. tokenIterator +// will only return healthy tokens. If healthy is not defined, all tokens +// are considered healthy. +type tokenIterator struct { + tokens []TokenDesc + start int + healthy HealthCheckFunc + direction int + + idx int + next *TokenDesc +} + +func newTokenIterator(tokens []TokenDesc, direction int, start int, healthy HealthCheckFunc) tokenIterator { + return tokenIterator{tokens: tokens, direction: direction, start: start, healthy: healthy, idx: start} +} + +func (it *tokenIterator) HasNext() bool { + for { + it.idx += it.direction + if it.idx < 0 { + it.idx = len(it.tokens) - 1 + } else { + it.idx %= len(it.tokens) + } + + if it.idx == it.start { + it.next = nil + return false + } + tok := it.tokens[it.idx] + if it.healthy == nil || it.healthy(tok) { + it.next = &tok + return true + } + } +} + +func (it *tokenIterator) Next() TokenDesc { + if it.next == nil { + return TokenDesc{} + } + return *it.next +} diff --git a/pkg/ring/token_navigator_test.go b/pkg/ring/token_navigator_test.go new file mode 100644 index 00000000000..0b62c115833 --- /dev/null +++ b/pkg/ring/token_navigator_test.go @@ -0,0 +1,226 @@ +package ring + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestTokenNavigator_SetIngesterTokens(t *testing.T) { + tt := []struct { + name string + before []TokenDesc + changeID string + setTokens []uint32 + expect []TokenDesc + }{ + { + name: "empty navigator", + before: []TokenDesc{}, + changeID: "foo", + setTokens: []uint32{1}, + expect: []TokenDesc{ + {Ingester: "foo", Token: 1}, + }, + }, + + { + name: "should sort tokens", + before: []TokenDesc{}, + changeID: "foo", + setTokens: []uint32{5, 1, 3}, + expect: []TokenDesc{ + {Ingester: "foo", Token: 1}, + {Ingester: "foo", Token: 3}, + {Ingester: "foo", Token: 5}, + }, + }, + + { + name: "should override tokens from same ingester", + before: []TokenDesc{ + {Ingester: "foo", Token: 1}, + {Ingester: "foo", Token: 3}, + {Ingester: "foo", Token: 5}, + }, + changeID: "foo", + setTokens: []uint32{1, 9}, + expect: []TokenDesc{ + {Ingester: "foo", Token: 1}, + {Ingester: "foo", Token: 9}, + }, + }, + + { + name: "should keep tokens from other ingesters", + before: []TokenDesc{ + {Ingester: "bar", Token: 3}, + {Ingester: "bar", Token: 5}, + }, + changeID: "foo", + setTokens: []uint32{1, 9}, + expect: []TokenDesc{ + {Ingester: "foo", Token: 1}, + {Ingester: "bar", Token: 3}, + {Ingester: "bar", Token: 5}, + {Ingester: "foo", Token: 9}, + }, + }, + } + + for _, tc := range tt { + t.Run(tc.name, func(t *testing.T) { + navigator := TokenNavigator(tc.before) + navigator.SetIngesterTokens(tc.changeID, tc.setTokens) + require.Equal(t, tc.expect, []TokenDesc(navigator)) + }) + } +} + +func TestTokenNavigator_InRange(t *testing.T) { + tt := []struct { + desc string + opts RangeOptions + expect bool + }{ + {"A B C D E", RangeOptions{Range: TokenRange{1, 5}, ID: "C"}, true}, + {"A B C D E", RangeOptions{Range: TokenRange{1, 5}, ID: "F"}, false}, + {"A B C D E", RangeOptions{Range: TokenRange{1, 5}, ID: "A"}, false}, + {"A B C D E", RangeOptions{Range: TokenRange{1, 5}, ID: "E"}, false}, + + // Inclusivity + {"A B C D E", RangeOptions{Range: TokenRange{1, 5}, ID: "A", IncludeFrom: true}, true}, + {"A B C D E", RangeOptions{Range: TokenRange{1, 5}, ID: "E", IncludeTo: true}, true}, + {"A B C D E", RangeOptions{Range: TokenRange{1, 5}, ID: "C", IncludeFrom: true}, true}, + {"A B C D E", RangeOptions{Range: TokenRange{1, 5}, ID: "C", IncludeTo: true}, true}, + {"A B C D E", RangeOptions{Range: TokenRange{1, 5}, ID: "A", IncludeFrom: true, IncludeTo: true}, true}, + {"A B C D E", RangeOptions{Range: TokenRange{1, 5}, ID: "E", IncludeFrom: true, IncludeTo: true}, true}, + {"A B C D E", RangeOptions{Range: TokenRange{1, 5}, ID: "C", IncludeFrom: true, IncludeTo: true}, true}, + {"A B C D E", RangeOptions{Range: TokenRange{1, 5}, ID: "F", IncludeFrom: true, IncludeTo: true}, false}, + + // only consider healthy tokens + {"A B C- D E", RangeOptions{Range: TokenRange{1, 5}, ID: "C"}, true}, + {"A B C+ D E", RangeOptions{Range: TokenRange{1, 5}, ID: "C"}, false}, + {"A+ B C D E", RangeOptions{Range: TokenRange{1, 5}, ID: "A", IncludeFrom: true}, false}, + {"A+ B C D E", RangeOptions{Range: TokenRange{1, 5}, ID: "A", IncludeFrom: false}, false}, + {"A B C D E+", RangeOptions{Range: TokenRange{1, 5}, ID: "E", IncludeTo: true}, false}, + {"A B C D E+", RangeOptions{Range: TokenRange{1, 5}, ID: "E", IncludeTo: false}, false}, + } + + for _, tc := range tt { + t.Run(tc.desc, func(t *testing.T) { + r := generateRing(t, tc.desc) + healthy := r.TokenHealthChecker(Read) + n := r.GetNavigator() + + ok, err := n.InRange(tc.opts, healthy) + require.NoError(t, err) + require.Equal(t, tc.expect, ok) + }) + } +} + +func TestTokenNavigator_Predecessors(t *testing.T) { + tt := []struct { + desc string + token string + n int + expect []string + }{ + // Simple cases + {"A B C", "C", 0, []string{"C"}}, + {"A B C", "C", 1, []string{"B"}}, + {"A B C", "C", 2, []string{"A"}}, + + // Handling duplicates + {"A1 A2 B1 B2 B3 C1 C2 D", "D", 0, []string{"D"}}, + {"A1 A2 B1 B2 B3 C1 C2 D", "D", 1, []string{"C1", "C2"}}, + {"A1 A2 B1 B2 B3 C1 C2 D", "D", 2, []string{"B1", "B2", "B3"}}, + {"A1 A2 B1 B2 B3 C1 C2 D", "D", 3, []string{"A1", "A2"}}, + + {"A1 B1 C1 A2 B2 A3 Y", "Y", 0, []string{"Y"}}, + {"A1 B1 C1 A2 B2 A3 Y", "Y", 1, []string{"A3"}}, + {"A1 B1 C1 A2 B2 A3 Y", "Y", 2, []string{"A2", "B2"}}, + {"A1 B1 C1 A2 B2 A3 Y", "Y", 3, []string{"A1", "B1", "C1"}}, + + // Wrap around ring + {"A B C", "A", 0, []string{"A"}}, + {"A B C", "A", 1, []string{"C"}}, + {"A B C", "A", 2, []string{"B"}}, + + // Only consider healthy tokens + {"A B? C- D+ E", "E", 0, []string{"E"}}, + {"A B? C- D+ E", "E", 1, []string{"C-"}}, + {"A B? C- D+ E", "E", 2, []string{"B?"}}, + } + + for _, tc := range tt { + t.Run(tc.desc, func(t *testing.T) { + r := generateRing(t, tc.desc) + healthy := r.TokenHealthChecker(Read) + n := r.GetNavigator() + + start := r.TokenDesc(t, tc.token) + res, err := n.Predecessors(start.Token, tc.n, healthy) + require.NoError(t, err) + + var foundTokens []string + for _, res := range res { + foundTokens = append(foundTokens, r.TokenName(t, res.Token)) + } + + require.Equal(t, tc.expect, foundTokens) + }) + } +} + +func TestTokenNavigator_Successor(t *testing.T) { + tt := []struct { + desc string + token string + n int + expect string + includeStart bool + }{ + // Simple cases + {"A B C", "A", 0, "A", true}, + {"A B C", "A", 1, "B", true}, + {"A B C", "A", 2, "C", true}, + + // Handling duplicates + {"A1 A2 B1 B2 B3 C1 C2 D", "A1", 0, "A", true}, + {"A1 A2 B1 B2 B3 C1 C2 D", "A1", 1, "B", true}, + {"A1 A2 B1 B2 B3 C1 C2 D", "A1", 2, "C", true}, + {"A1 A2 B1 B2 B3 C1 C2 D", "A1", 3, "D", true}, + + // Duplicates ignoring own token + {"A1 A2 B1 B2 B3 C1 C2 D", "A1", 0, "A", false}, + {"A1 A2 B1 B2 B3 C1 C2 D", "A1", 1, "A", false}, + {"A1 A2 B1 B2 B3 C1 C2 D", "A1", 2, "B", false}, + {"A1 A2 B1 B2 B3 C1 C2 D", "A1", 3, "C", false}, + + // Wrap around ring + {"A B C", "C", 0, "C", true}, + {"A B C", "C", 1, "A", true}, + {"A B C", "C", 2, "B", true}, + + // Only consider healthy tokens + {"A B? C- D+ E", "A", 0, "A", true}, + {"A B? C- D+ E", "A", 1, "B", true}, + {"A B? C- D+ E", "A", 2, "C", true}, + } + + for _, tc := range tt { + t.Run(tc.desc, func(t *testing.T) { + r := generateRing(t, tc.desc) + healthy := r.TokenHealthChecker(Read) + n := r.GetNavigator() + + start := r.TokenDesc(t, tc.token) + res, err := n.Neighbor(start.Token, tc.n, tc.includeStart, healthy) + + require.NoError(t, err) + require.Equal(t, tc.expect, res.Ingester) + }) + } +} diff --git a/pkg/ring/util.go b/pkg/ring/util.go index e30166fa7bc..221260934cb 100644 --- a/pkg/ring/util.go +++ b/pkg/ring/util.go @@ -1,12 +1,28 @@ package ring import ( + "fmt" "math/rand" + "strings" "time" ) +// PrintableRanges wraps a slice of TokenRanges and provides a String +// method so it can be printed and displayed to the user. +type PrintableRanges []TokenRange + +func (r PrintableRanges) String() string { + strs := make([]string, len(r)) + for i, rg := range r { + strs[i] = fmt.Sprintf("(%d, %d)", rg.From, rg.To) + } + return strings.Join(strs, ", ") +} + // GenerateTokens make numTokens unique random tokens, none of which clash // with takenTokens. +// +// GenerateTokens is the default implementation of TokenGeneratorFunc. func GenerateTokens(numTokens int, takenTokens []uint32) []uint32 { r := rand.New(rand.NewSource(time.Now().UnixNano())) @@ -16,6 +32,7 @@ func GenerateTokens(numTokens int, takenTokens []uint32) []uint32 { } tokens := []uint32{} + for i := 0; i < numTokens; { candidate := r.Uint32() if used[candidate] { diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index 79c64c41450..cf306cac290 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -163,7 +163,7 @@ func NewRuler(cfg Config, engine *promql.Engine, queryable promStorage.Queryable // the ruler if cfg.EnableSharding { lifecyclerCfg := cfg.Ring.ToLifecyclerConfig() - ruler.lifecycler, err = ring.NewLifecycler(lifecyclerCfg, ruler, "ruler", ring.RulerRingKey, true) + ruler.lifecycler, err = ring.NewLifecycler(lifecyclerCfg, ruler, nil, "ruler", ring.RulerRingKey, true) if err != nil { return nil, err } diff --git a/pkg/util/test/poll.go b/pkg/util/test/poll.go index 115b97f2636..05ce8edf55f 100644 --- a/pkg/util/test/poll.go +++ b/pkg/util/test/poll.go @@ -7,7 +7,7 @@ import ( ) // Poll repeatedly evaluates condition until we either timeout, or it succeeds. -func Poll(t *testing.T, d time.Duration, want interface{}, have func() interface{}) { +func Poll(t testing.TB, d time.Duration, want interface{}, have func() interface{}) { t.Helper() deadline := time.Now().Add(d) for { From 24fac662409d24731eb593a13c7808d41a70752a Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Fri, 31 Jan 2020 13:39:09 -0500 Subject: [PATCH 02/11] address lint errors Signed-off-by: Robert Fratto --- pkg/ingester/incremental_transfer.go | 5 ++++- pkg/ingester/ingester.go | 2 +- pkg/ingester/transfer_test.go | 12 ++++++------ pkg/ring/incremental_transfer.go | 10 ++++++++-- pkg/ring/lifecycler_test.go | 3 --- pkg/ring/ring_test.go | 2 +- 6 files changed, 20 insertions(+), 14 deletions(-) diff --git a/pkg/ingester/incremental_transfer.go b/pkg/ingester/incremental_transfer.go index 99dd2ec3092..e8277e7995e 100644 --- a/pkg/ingester/incremental_transfer.go +++ b/pkg/ingester/incremental_transfer.go @@ -103,7 +103,10 @@ func (i *Ingester) BlockRanges(ranges []ring.TokenRange) { go func() { <-time.After(i.cfg.RangeBlockPeriod) - i.UnblockRanges(context.Background(), &client.UnblockRangesRequest{Ranges: ranges}) + _, err := i.UnblockRanges(context.Background(), &client.UnblockRangesRequest{Ranges: ranges}) + if err != nil { + level.Error(util.Logger).Log("msg", "error unblocking ranges", "err", err) + } }() } diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index 348ea4341d1..f05a067a2b3 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -180,7 +180,7 @@ func New(cfg Config, clientConfig client.Config, limits *validation.Overrides, c chunkStore: chunkStore, quit: make(chan struct{}), blockedRanges: make(map[ring.TokenRange]bool), - flushQueues: make([]*util.PriorityQueue, cfg.ConcurrentFlushes, cfg.ConcurrentFlushes), + flushQueues: make([]*util.PriorityQueue, cfg.ConcurrentFlushes), } var err error diff --git a/pkg/ingester/transfer_test.go b/pkg/ingester/transfer_test.go index 3f7cfb9fdd6..bffa9721a41 100644 --- a/pkg/ingester/transfer_test.go +++ b/pkg/ingester/transfer_test.go @@ -494,7 +494,7 @@ func (f *testIngesterFactory) getIngester(t require.TestingT) *Ingester { f.ingestersMtx.Lock() defer f.ingestersMtx.Unlock() - f.ingesters[fmt.Sprintf("%s", cfg.LifecyclerConfig.ID)] = ing + f.ingesters[cfg.LifecyclerConfig.ID] = ing f.ingesters[fmt.Sprintf("%s:0", cfg.LifecyclerConfig.ID)] = ing // NB there's some kind of race condition with the in-memory KV client when @@ -558,9 +558,9 @@ func (c *testIngesterClient) QueryStream(ctx context.Context, in *client.QueryRe go func() { srv := testIngesterQueryStreamServer{ctx: ctx, ch: ch} - c.i.QueryStream(in, &srv) - }() + _ = c.i.QueryStream(in, &srv) + }() cli := testIngesterQueryStreamClient{ch: ch} return &cli, nil } @@ -600,7 +600,7 @@ func (c *testIngesterClient) TransferChunks(ctx context.Context, opts ...grpc.Ca } go func() { - c.i.TransferChunks(&srv) + _ = c.i.TransferChunks(&srv) }() return &cli, nil @@ -645,7 +645,7 @@ func (c *testIngesterClient) TransferChunksSubset(ctx context.Context, opts ...g } go func() { - c.i.TransferChunksSubset(&srv) + _ = c.i.TransferChunksSubset(&srv) }() return &cli, nil @@ -658,7 +658,7 @@ func (c *testIngesterClient) GetChunksSubset(ctx context.Context, in *client.Get cli := testTimeSeriesClient{ctx: ctx, ch: ch} go func() { - c.i.GetChunksSubset(in, &srv) + _ = c.i.GetChunksSubset(in, &srv) close(ch) }() diff --git a/pkg/ring/incremental_transfer.go b/pkg/ring/incremental_transfer.go index b32935e3d2a..3dbd91d6834 100644 --- a/pkg/ring/incremental_transfer.go +++ b/pkg/ring/incremental_transfer.go @@ -95,7 +95,10 @@ func (i *Lifecycler) joinIncrementalTransfer(ctx context.Context) error { // Make sure that we set all tokens to ACTIVE, even when we fail. defer func() { i.setTokens(i.getTransitioningTokens()) - i.changeState(ctx, ACTIVE) + err := i.changeState(ctx, ACTIVE) + if err != nil { + level.Error(util.Logger).Log("msg", "failed to force state to active", "err", err) + } }() r := i.getLastRing() @@ -194,7 +197,10 @@ func (i *Lifecycler) leaveIncrementalTransfer(ctx context.Context) error { i.setTokens(nil) i.setState(LEAVING) - i.updateConsul(ctx) + err := i.updateConsul(ctx) + if err != nil { + level.Error(util.Logger).Log("msg", "failed to update consul after leave incremental transfer", "err", err) + } }() r := i.getLastRing() diff --git a/pkg/ring/lifecycler_test.go b/pkg/ring/lifecycler_test.go index 5a7f697fe98..c8fbb52f3e3 100644 --- a/pkg/ring/lifecycler_test.go +++ b/pkg/ring/lifecycler_test.go @@ -714,9 +714,6 @@ func (j *mockIncrementalJoin) RequestChunkRanges(_ context.Context, ranges []Tok return nil } -type casCallback = func(in interface{}) (out interface{}, retry bool, err error) -type casFunc = func(ctx context.Context, key string, f casCallback) error - func waitIngesterState(t *testing.T, r *Ring, id string, waitTime time.Duration, joined bool) { t.Helper() diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index 6327169a4f0..1028b72ed0f 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -340,7 +340,7 @@ func generateRing(t *testing.T, desc string) *namedRing { t.Helper() regex, err := regexp.Compile( - "(?P[A-Z])(?P\\d*)(?P\\+|\\-|\\?|\\.)?", + `(?P[A-Z])(?P\d*)(?P\+|\-|\?|\.)?`, ) if err != nil { t.Fatalf("unexpected regex err %v", err) From a38e6f256a2fec7f93a26200663cbcb19f7e2e0c Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Fri, 31 Jan 2020 13:43:48 -0500 Subject: [PATCH 03/11] update config file reference Signed-off-by: Robert Fratto --- docs/configuration/config-file-reference.md | 48 +++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index f5476173763..bfea07be3fa 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -454,11 +454,46 @@ lifecycler: # CLI flag: -ingester.tokens-file-path [tokens_file_path: | default = ""] + # Request chunks from neighboring ingesters on join. Disables the handoff + # process when set and ignores the -ingester.join-after flag. + # CLI flag: -ingester.join-incremental-transfer + [join_incremental_transfer: | default = false] + + # Send chunks to neighboring ingesters on leave. Takes precedence over chunk + # flushing when set and disables handoff. + # CLI flag: -ingester.leave-incremental-transfer + [leave_incremental_transfer: | default = false] + + # Minimum amount of time to wait before incrementally joining the ring. Allows + # time to receieve ring updates so two ingesters do not join at once. + # CLI flag: -ingester.min-incremental-join-jitter + [min_incremental_join_jitter: | default = 0s] + + # Maximum amount of time to wait before incrementally joining the ring. Allows + # time to receieve ring updates so two ingesters do not join at once. + # CLI flag: -ingester.max-incremental-join-jitter + [max_incremental_join_jitter: | default = 2s] + + # How long after the incremental join process to notify the target ingesters + # to clean up any blocked token ranges. + # CLI flag: -ingester.transfer-finish-delay + [transfer_finish_delay: | default = 5s] + +token_checker: + # Period with which to check that all in-memory streams fall within expected + # token ranges. 0 to disable. + # CLI flag: -token-checker.check-on-interval + [check_on_interval: | default = 0s] + # Number of times to try and transfer chunks before falling back to flushing. # Negative value or zero disables hand-over. # CLI flag: -ingester.max-transfer-retries [max_transfer_retries: | default = 10] +# Period after which write blocks on ranges expire. +# CLI flag: -ingester.range-block-period +[range_block_period: | default = 1m0s] + # Period with which to attempt to flush chunks. # CLI flag: -ingester.flush-period [flushcheckperiod: | default = 1m0s] @@ -497,6 +532,19 @@ lifecycler: # CLI flag: -ingester.spread-flushes [spreadflushes: | default = false] +# Check that newly created streams fall within expected token ranges +# CLI flag: -ingester.check-token-on-create +[check_token_on_create: | default = false] + +# Check that existing streams appended to fall within expected token ranges +# CLI flag: -ingester.check-token-on-append +[check_token_on_append: | default = false] + +# Check that streams transferred in using the transfer mechanism fall within +# expected token ranges +# CLI flag: -ingester.check-token-on-transfer +[check_token_on_transfer: | default = false] + # Period with which to update the per-user ingestion rates. # CLI flag: -ingester.rate-update-period [rateupdateperiod: | default = 15s] From 0763ba0add12a3410d885a9692ed670d9eaba528 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Fri, 31 Jan 2020 13:55:31 -0500 Subject: [PATCH 04/11] pkg/ring: fix data race Signed-off-by: Robert Fratto --- pkg/ring/lifecycler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ring/lifecycler.go b/pkg/ring/lifecycler.go index 5c0ed338053..ffccee9a671 100644 --- a/pkg/ring/lifecycler.go +++ b/pkg/ring/lifecycler.go @@ -863,7 +863,7 @@ func (i *Lifecycler) updateConsul(ctx context.Context) error { } // Re-sync token states for the current lifecycler if they've changed. - ringDesc.SetIngesterTokens(i.ID, i.tokens) + ringDesc.SetIngesterTokens(i.ID, i.getTokens()) return ringDesc, true, nil }) From 432c1bcf7dd96fa76e75fabe461e90eb4c3539bd Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Mon, 3 Feb 2020 13:14:14 -0500 Subject: [PATCH 05/11] address some review feedback Signed-off-by: Robert Fratto --- docs/configuration/config-file-reference.md | 15 +- pkg/ingester/client/cortex.pb.go | 214 ++++++++++---------- pkg/ingester/client/cortex.proto | 4 +- pkg/ingester/incremental_transfer.go | 27 ++- pkg/ingester/ingester.go | 43 ++-- pkg/ingester/transfer.go | 8 +- pkg/ingester/transfer_test.go | 4 +- pkg/ring/lifecycler.go | 2 +- pkg/ring/model.go | 25 ++- pkg/ring/ring.go | 16 ++ pkg/ring/ring.pb.go | 4 +- pkg/ring/ring.proto | 42 ++-- 12 files changed, 208 insertions(+), 196 deletions(-) diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index bfea07be3fa..5d895901634 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -532,18 +532,9 @@ token_checker: # CLI flag: -ingester.spread-flushes [spreadflushes: | default = false] -# Check that newly created streams fall within expected token ranges -# CLI flag: -ingester.check-token-on-create -[check_token_on_create: | default = false] - -# Check that existing streams appended to fall within expected token ranges -# CLI flag: -ingester.check-token-on-append -[check_token_on_append: | default = false] - -# Check that streams transferred in using the transfer mechanism fall within -# expected token ranges -# CLI flag: -ingester.check-token-on-transfer -[check_token_on_transfer: | default = false] +# Check tokens for streams that are created or appended to. +# CLI flag: -ingester.check-tokens +[check_tokens: | default = false] # Period with which to update the per-user ingestion rates. # CLI flag: -ingester.rate-update-period diff --git a/pkg/ingester/client/cortex.pb.go b/pkg/ingester/client/cortex.pb.go index adcc8daacaf..bda992dcba8 100644 --- a/pkg/ingester/client/cortex.pb.go +++ b/pkg/ingester/client/cortex.pb.go @@ -1596,94 +1596,94 @@ func init() { proto.RegisterFile("cortex.proto", fileDescriptor_893a47d0a749d749 var fileDescriptor_893a47d0a749d749 = []byte{ // 1411 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xb4, 0x57, 0xcd, 0x6e, 0xdb, 0xc6, - 0x16, 0x26, 0x2d, 0x59, 0xb6, 0x8e, 0x7e, 0x2c, 0x8f, 0xed, 0xd8, 0x61, 0x6e, 0xe8, 0xdc, 0x01, - 0x92, 0x6b, 0xdc, 0x7b, 0x23, 0xa7, 0x0e, 0xd2, 0x66, 0xd1, 0x20, 0x90, 0x13, 0x3b, 0x51, 0x61, - 0x3b, 0xce, 0x48, 0x6e, 0x8b, 0x02, 0x85, 0x40, 0x4b, 0x63, 0x9b, 0x30, 0x7f, 0x14, 0xce, 0x30, - 0x68, 0x76, 0x01, 0xfa, 0x00, 0x2d, 0xd0, 0x4d, 0x1f, 0xa1, 0xeb, 0x6e, 0xda, 0x47, 0xc8, 0x32, - 0xe8, 0x2a, 0xe8, 0x22, 0x68, 0x94, 0x4d, 0x97, 0x79, 0x84, 0x82, 0x33, 0x43, 0x8a, 0x54, 0x24, - 0xd4, 0x6d, 0x9a, 0x8d, 0xc0, 0x39, 0x3f, 0xdf, 0x9c, 0xdf, 0x39, 0x47, 0x50, 0xee, 0xfa, 0x01, - 0xa7, 0x5f, 0xd5, 0xfb, 0x81, 0xcf, 0x7d, 0x54, 0x90, 0x27, 0xe3, 0xea, 0xb1, 0xcd, 0x4f, 0xc2, - 0xc3, 0x7a, 0xd7, 0x77, 0xd7, 0x8f, 0xfd, 0x63, 0x7f, 0x5d, 0xb0, 0x0f, 0xc3, 0x23, 0x71, 0x12, - 0x07, 0xf1, 0x25, 0xd5, 0x8c, 0x85, 0xfe, 0xe9, 0xf1, 0x7a, 0x60, 0x7b, 0xf2, 0x47, 0x12, 0xf1, - 0xcf, 0x3a, 0x94, 0x3f, 0x0b, 0x6c, 0x4e, 0x09, 0x7d, 0x14, 0x52, 0xc6, 0xd1, 0x1e, 0x00, 0xb7, - 0x5d, 0xca, 0x68, 0x60, 0x53, 0xb6, 0xa2, 0x5f, 0xca, 0xad, 0x95, 0x36, 0x50, 0x5d, 0xdd, 0xdf, - 0xb6, 0x5d, 0xda, 0x12, 0x9c, 0x4d, 0xe3, 0xd9, 0xcb, 0x55, 0xed, 0xd7, 0x97, 0xab, 0x68, 0x3f, - 0xa0, 0x96, 0xe3, 0xf8, 0xdd, 0x76, 0xa2, 0x45, 0x52, 0x08, 0xe8, 0x23, 0x28, 0xb4, 0xfc, 0x30, - 0xe8, 0xd2, 0x95, 0xa9, 0x4b, 0xfa, 0x5a, 0x75, 0x63, 0x35, 0xc6, 0x4a, 0xdf, 0x5a, 0x97, 0x22, - 0x5b, 0x5e, 0xe8, 0x92, 0x02, 0x13, 0xdf, 0x78, 0x15, 0x60, 0x48, 0x45, 0x33, 0x90, 0x6b, 0xec, - 0x37, 0x6b, 0x1a, 0x9a, 0x85, 0x3c, 0x39, 0xd8, 0xd9, 0xaa, 0xe9, 0x78, 0x0e, 0x2a, 0x0a, 0x83, - 0xf5, 0x7d, 0x8f, 0x51, 0x7c, 0x0b, 0x4a, 0x84, 0x5a, 0xbd, 0xd8, 0x93, 0x3a, 0xcc, 0x3c, 0x0a, - 0xd3, 0x6e, 0x2c, 0xc6, 0x57, 0x3f, 0x0c, 0x69, 0xf0, 0x44, 0x89, 0x91, 0x58, 0x08, 0xdf, 0x86, - 0xb2, 0x54, 0x97, 0x70, 0x68, 0x1d, 0x66, 0x02, 0xca, 0x42, 0x87, 0xc7, 0xfa, 0x4b, 0x23, 0xfa, - 0x52, 0x8e, 0xc4, 0x52, 0xf8, 0x7b, 0x1d, 0xca, 0x69, 0x68, 0xf4, 0x7f, 0x40, 0x8c, 0x5b, 0x01, - 0xef, 0x88, 0x78, 0x70, 0xcb, 0xed, 0x77, 0xdc, 0x08, 0x4c, 0x5f, 0xcb, 0x91, 0x9a, 0xe0, 0xb4, - 0x63, 0xc6, 0x2e, 0x43, 0x6b, 0x50, 0xa3, 0x5e, 0x2f, 0x2b, 0x3b, 0x25, 0x64, 0xab, 0xd4, 0xeb, - 0xa5, 0x25, 0xaf, 0xc1, 0xac, 0x6b, 0xf1, 0xee, 0x09, 0x0d, 0xd8, 0x4a, 0x2e, 0xeb, 0xda, 0x8e, - 0x75, 0x48, 0x9d, 0x5d, 0xc9, 0x24, 0x89, 0x14, 0x6e, 0x42, 0x25, 0x63, 0x34, 0xba, 0x79, 0xc6, - 0x34, 0xe7, 0xa3, 0x34, 0xa7, 0x13, 0x8a, 0xdb, 0xb0, 0x20, 0xa0, 0x5a, 0x3c, 0xa0, 0x96, 0x9b, - 0x00, 0xde, 0x1a, 0x03, 0xb8, 0xfc, 0x36, 0xe0, 0x9d, 0x93, 0xd0, 0x3b, 0x1d, 0x83, 0x7a, 0x1d, - 0x90, 0x30, 0xfd, 0x53, 0xcb, 0x09, 0x29, 0x8b, 0x03, 0x78, 0x11, 0xc0, 0x89, 0xa8, 0x1d, 0xcf, - 0x72, 0xa9, 0x08, 0x5c, 0x91, 0x14, 0x05, 0x65, 0xcf, 0x72, 0x29, 0xbe, 0x09, 0x0b, 0x19, 0x25, - 0x65, 0xca, 0xbf, 0xa1, 0x2c, 0xb5, 0x1e, 0x0b, 0xba, 0x30, 0xa6, 0x48, 0x4a, 0xce, 0x50, 0x14, - 0x2f, 0xc0, 0xfc, 0x4e, 0x0c, 0x13, 0xdf, 0x86, 0x6f, 0x28, 0x1b, 0x14, 0x51, 0xa1, 0xad, 0x42, - 0x69, 0x68, 0x43, 0x0c, 0x06, 0x89, 0x11, 0x0c, 0x23, 0xa8, 0x1d, 0x30, 0x1a, 0xb4, 0xb8, 0xc5, - 0x13, 0xa8, 0x9f, 0x74, 0x98, 0x4f, 0x11, 0x15, 0xd4, 0x65, 0xa8, 0xda, 0xde, 0x31, 0x65, 0xdc, - 0xf6, 0xbd, 0x4e, 0x60, 0x71, 0xe9, 0x92, 0x4e, 0x2a, 0x09, 0x95, 0x58, 0x9c, 0x46, 0x5e, 0x7b, - 0xa1, 0xdb, 0x51, 0xa1, 0x8c, 0x4a, 0x20, 0x4f, 0x8a, 0x5e, 0xe8, 0xca, 0x08, 0x46, 0x55, 0x65, - 0xf5, 0xed, 0xce, 0x08, 0x52, 0x4e, 0x20, 0xd5, 0xac, 0xbe, 0xdd, 0xcc, 0x80, 0xd5, 0x61, 0x21, - 0x08, 0x1d, 0x3a, 0x2a, 0x9e, 0x17, 0xe2, 0xf3, 0x11, 0x2b, 0x23, 0x8f, 0xbf, 0x84, 0x85, 0xc8, - 0xf0, 0xe6, 0xdd, 0xac, 0xe9, 0xcb, 0x30, 0x13, 0x32, 0x1a, 0x74, 0xec, 0x9e, 0x4a, 0x43, 0x21, - 0x3a, 0x36, 0x7b, 0xe8, 0x2a, 0xe4, 0x7b, 0x16, 0xb7, 0x84, 0x99, 0xa5, 0x8d, 0xf3, 0x71, 0xc6, - 0xdf, 0x72, 0x9e, 0x08, 0x31, 0x7c, 0x0f, 0x50, 0xc4, 0x62, 0x59, 0xf4, 0x0f, 0x60, 0x9a, 0x45, - 0x04, 0x55, 0x37, 0x17, 0xd2, 0x28, 0x23, 0x96, 0x10, 0x29, 0x89, 0x7f, 0xd4, 0xc1, 0xdc, 0xa5, - 0x3c, 0xb0, 0xbb, 0x6c, 0xdb, 0x0f, 0xd2, 0x65, 0xcf, 0xde, 0x77, 0xfb, 0xdd, 0x84, 0x72, 0xdc, - 0x58, 0x1d, 0x46, 0xb9, 0x6a, 0xc1, 0xa5, 0x71, 0x2d, 0xc8, 0x48, 0x29, 0x16, 0x6d, 0x51, 0x8e, - 0x9b, 0xb0, 0x3a, 0xd1, 0x66, 0x15, 0x8a, 0x2b, 0x50, 0x70, 0x85, 0x88, 0x8a, 0x45, 0x35, 0x86, - 0x95, 0x8a, 0x44, 0x71, 0xf1, 0x0b, 0x1d, 0xe6, 0x46, 0xda, 0x2a, 0x72, 0xe1, 0x28, 0xf0, 0x5d, - 0x95, 0xeb, 0x74, 0xb6, 0xaa, 0x11, 0xbd, 0xa9, 0xc8, 0xcd, 0x5e, 0x3a, 0x9d, 0x53, 0x99, 0x74, - 0xde, 0x86, 0x82, 0x28, 0xed, 0xf8, 0x61, 0x99, 0xcf, 0x78, 0xb5, 0x6f, 0xd9, 0xc1, 0xe6, 0xa2, - 0x7a, 0xf9, 0xcb, 0x82, 0xd4, 0xe8, 0x59, 0x7d, 0x4e, 0x03, 0xa2, 0xd4, 0xd0, 0xff, 0xa0, 0xd0, - 0x8d, 0x8c, 0x61, 0x2b, 0x79, 0x01, 0x50, 0x89, 0x01, 0xd2, 0x9d, 0xaf, 0x44, 0xd0, 0x22, 0x4c, - 0x73, 0xff, 0x94, 0x7a, 0x2b, 0xd3, 0x97, 0xf4, 0xb5, 0x0a, 0x91, 0x07, 0xfc, 0x8d, 0x0e, 0xd3, - 0xd2, 0xa1, 0xf7, 0x95, 0x41, 0x03, 0x66, 0xa9, 0xd7, 0xf5, 0x7b, 0xb6, 0x77, 0x2c, 0x1a, 0x67, - 0x9a, 0x24, 0x67, 0x84, 0x54, 0x41, 0x47, 0x1d, 0x52, 0x56, 0x55, 0xbb, 0x02, 0xe7, 0xda, 0x81, - 0xe5, 0xb1, 0x23, 0x1a, 0x08, 0xc3, 0x92, 0x74, 0xe1, 0xef, 0x74, 0x80, 0x61, 0x1a, 0x52, 0xe1, - 0xd3, 0xff, 0x5e, 0xf8, 0xea, 0x30, 0xc3, 0x2c, 0xb7, 0xef, 0x88, 0xc6, 0xcf, 0xe4, 0xbf, 0x25, - 0xc8, 0x2a, 0x80, 0xb1, 0xd0, 0x30, 0x82, 0xb9, 0x74, 0x04, 0x9f, 0xea, 0x50, 0xbb, 0x47, 0x79, - 0x6c, 0xab, 0x6c, 0x87, 0xb3, 0x57, 0x47, 0x1d, 0x0a, 0x81, 0x15, 0x1d, 0x95, 0x0d, 0xb5, 0xba, - 0xd8, 0x18, 0xda, 0x11, 0x36, 0x89, 0x18, 0x71, 0x1a, 0xa5, 0x54, 0x14, 0x32, 0xd7, 0x7f, 0x2c, - 0xdf, 0xa0, 0x59, 0x22, 0xbe, 0xf1, 0x36, 0x2c, 0x1e, 0x78, 0x87, 0x8e, 0xdf, 0x3d, 0x15, 0x1a, - 0x6c, 0x38, 0x95, 0xff, 0x12, 0x36, 0x5e, 0x86, 0xa5, 0x11, 0x1c, 0x15, 0xf9, 0x1b, 0x50, 0x4c, - 0x82, 0x1a, 0x59, 0x90, 0x8c, 0x88, 0x32, 0x11, 0xdf, 0x51, 0x68, 0xc4, 0x00, 0x10, 0x35, 0x50, - 0x26, 0xf2, 0x80, 0x1b, 0x50, 0x90, 0x91, 0x1c, 0xf2, 0xe5, 0x23, 0x2c, 0x0f, 0xd1, 0xf0, 0x18, - 0x53, 0x40, 0x25, 0x3e, 0xac, 0x1e, 0xdc, 0x80, 0x4a, 0xa6, 0x77, 0x33, 0xf3, 0x58, 0x3f, 0xe3, - 0x3c, 0x2e, 0xc8, 0x7e, 0x7e, 0xe7, 0x8a, 0xc1, 0x1d, 0x28, 0xa7, 0x2f, 0x41, 0x97, 0x21, 0xcf, - 0x9f, 0xf4, 0xa5, 0x57, 0xd5, 0x21, 0x9c, 0x60, 0xb7, 0x9f, 0xf4, 0x29, 0x11, 0xec, 0x24, 0x62, - 0xb2, 0xfd, 0x47, 0x22, 0x96, 0x13, 0x44, 0x15, 0xb1, 0xaf, 0x75, 0xa8, 0x0e, 0x4b, 0x7c, 0xdb, - 0x76, 0xe8, 0x3f, 0xf1, 0xd0, 0x18, 0x30, 0x7b, 0x64, 0x3b, 0x54, 0xd8, 0x20, 0xaf, 0x4b, 0xce, - 0x63, 0x5b, 0xf0, 0x1c, 0x2c, 0xc6, 0x2d, 0xd8, 0x6e, 0xdd, 0xdd, 0x8c, 0xcb, 0xe0, 0xbf, 0x9f, - 0x40, 0x31, 0x71, 0x0d, 0x15, 0x61, 0x7a, 0xeb, 0xe1, 0x41, 0x63, 0xa7, 0xa6, 0xa1, 0x0a, 0x14, - 0xf7, 0x1e, 0xb4, 0x3b, 0xf2, 0xa8, 0xa3, 0x39, 0x28, 0x91, 0xad, 0x7b, 0x5b, 0x9f, 0x77, 0x76, - 0x1b, 0xed, 0x3b, 0xf7, 0x6b, 0x53, 0x08, 0x41, 0x55, 0x12, 0xf6, 0x1e, 0x28, 0x5a, 0x6e, 0xe3, - 0x97, 0x19, 0x98, 0x8d, 0x6d, 0x47, 0x37, 0x20, 0xbf, 0x1f, 0xb2, 0x13, 0xb4, 0x38, 0x6e, 0x61, - 0x35, 0x96, 0x46, 0xa8, 0xaa, 0x28, 0x35, 0xf4, 0x21, 0x4c, 0x8b, 0xf5, 0x08, 0x8d, 0xdd, 0x36, - 0x8d, 0xf1, 0x3b, 0x24, 0xd6, 0xd0, 0x5d, 0x28, 0xa5, 0xd6, 0xaa, 0x09, 0xda, 0x17, 0x32, 0xd4, - 0xec, 0x06, 0x86, 0xb5, 0x6b, 0x3a, 0xba, 0x0f, 0xa5, 0xd4, 0x46, 0x84, 0x8c, 0x4c, 0x31, 0x65, - 0x76, 0xab, 0x21, 0xd6, 0x98, 0x15, 0x0a, 0x6b, 0x68, 0x0b, 0x60, 0xb8, 0x0c, 0xa1, 0xf3, 0x19, - 0xe1, 0xf4, 0xd6, 0x64, 0x18, 0xe3, 0x58, 0x09, 0xcc, 0x26, 0x14, 0x93, 0x55, 0x00, 0xad, 0x8c, - 0xd9, 0x0e, 0x24, 0xc8, 0xe4, 0xbd, 0x01, 0x6b, 0x68, 0x1b, 0xca, 0x0d, 0xc7, 0x39, 0x0b, 0x8c, - 0x91, 0xe6, 0xb0, 0x51, 0x1c, 0x07, 0x96, 0x27, 0x4c, 0x5f, 0x74, 0x25, 0x3b, 0x65, 0x27, 0xad, - 0x14, 0xc6, 0x7f, 0xfe, 0x54, 0x2e, 0xb9, 0x6d, 0x17, 0xaa, 0xd9, 0x99, 0x81, 0x26, 0xad, 0xc3, - 0x86, 0x99, 0x30, 0xc6, 0x0f, 0x19, 0x6d, 0x2d, 0xca, 0x6c, 0x39, 0x5d, 0xff, 0xe8, 0xdc, 0xdb, - 0x60, 0x51, 0x6b, 0x1a, 0xff, 0x1a, 0xc5, 0x4a, 0x77, 0x8b, 0x40, 0x6a, 0x0d, 0x3b, 0x49, 0xde, - 0xd3, 0x0a, 0x0f, 0x19, 0xe5, 0xef, 0x6a, 0xde, 0x5c, 0x32, 0x70, 0x14, 0x5e, 0x92, 0xa6, 0xd1, - 0x49, 0x64, 0x4c, 0xba, 0x49, 0x94, 0xf0, 0x1e, 0x54, 0x32, 0x0f, 0x3e, 0x4a, 0x3c, 0x1a, 0x37, - 0x4f, 0x8c, 0x8b, 0x13, 0xb8, 0xb1, 0x6d, 0x9b, 0x1f, 0x3f, 0x7f, 0x65, 0x6a, 0x2f, 0x5e, 0x99, - 0xda, 0x9b, 0x57, 0xa6, 0xfe, 0x74, 0x60, 0xea, 0x3f, 0x0c, 0x4c, 0xfd, 0xd9, 0xc0, 0xd4, 0x9f, - 0x0f, 0x4c, 0xfd, 0xb7, 0x81, 0xa9, 0xff, 0x3e, 0x30, 0xb5, 0x37, 0x03, 0x53, 0xff, 0xf6, 0xb5, - 0xa9, 0x3d, 0x7f, 0x6d, 0x6a, 0x2f, 0x5e, 0x9b, 0xda, 0x17, 0x85, 0xae, 0x63, 0x53, 0x8f, 0x1f, - 0x16, 0xc4, 0xdf, 0xe4, 0xeb, 0x7f, 0x04, 0x00, 0x00, 0xff, 0xff, 0x7f, 0x9b, 0x95, 0xff, 0x82, + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xb4, 0x57, 0x4f, 0x6f, 0x1b, 0x45, + 0x14, 0xdf, 0x8d, 0x1d, 0x27, 0x7e, 0xfe, 0x13, 0x67, 0x92, 0x34, 0xe9, 0x96, 0x6e, 0xca, 0x48, + 0x2d, 0x11, 0x50, 0xa7, 0xa4, 0x2a, 0xf4, 0x40, 0x55, 0x39, 0x6d, 0xd2, 0x1a, 0x25, 0x69, 0x3a, + 0x76, 0x00, 0x21, 0x21, 0x6b, 0x63, 0x4f, 0x92, 0x55, 0xf6, 0x8f, 0xbb, 0x33, 0x5b, 0xd1, 0x5b, + 0x25, 0x3e, 0x00, 0x48, 0x5c, 0xf8, 0x08, 0x9c, 0xb9, 0xc0, 0x47, 0xe8, 0x31, 0xc7, 0x8a, 0x43, + 0x45, 0xd3, 0x0b, 0xc7, 0x7e, 0x04, 0xb4, 0x33, 0xb3, 0xeb, 0x5d, 0xd7, 0x16, 0x81, 0xd2, 0x8b, + 0xb5, 0xf3, 0xfe, 0xfc, 0xe6, 0xfd, 0x9d, 0xf7, 0x0c, 0xe5, 0xae, 0x1f, 0x70, 0xfa, 0x5d, 0xbd, + 0x1f, 0xf8, 0xdc, 0x47, 0x05, 0x79, 0x32, 0xae, 0x1e, 0xda, 0xfc, 0x28, 0xdc, 0xaf, 0x77, 0x7d, + 0x77, 0xf5, 0xd0, 0x3f, 0xf4, 0x57, 0x05, 0x7b, 0x3f, 0x3c, 0x10, 0x27, 0x71, 0x10, 0x5f, 0x52, + 0xcd, 0x98, 0xeb, 0x1f, 0x1f, 0xae, 0x06, 0xb6, 0x27, 0x7f, 0x24, 0x11, 0xff, 0xae, 0x43, 0xf9, + 0xab, 0xc0, 0xe6, 0x94, 0xd0, 0x47, 0x21, 0x65, 0x1c, 0xed, 0x00, 0x70, 0xdb, 0xa5, 0x8c, 0x06, + 0x36, 0x65, 0x4b, 0xfa, 0xa5, 0xdc, 0x4a, 0x69, 0x0d, 0xd5, 0xd5, 0xfd, 0x6d, 0xdb, 0xa5, 0x2d, + 0xc1, 0x59, 0x37, 0x9e, 0xbd, 0x58, 0xd6, 0xfe, 0x78, 0xb1, 0x8c, 0x76, 0x03, 0x6a, 0x39, 0x8e, + 0xdf, 0x6d, 0x27, 0x5a, 0x24, 0x85, 0x80, 0x3e, 0x83, 0x42, 0xcb, 0x0f, 0x83, 0x2e, 0x5d, 0x9a, + 0xb8, 0xa4, 0xaf, 0x54, 0xd7, 0x96, 0x63, 0xac, 0xf4, 0xad, 0x75, 0x29, 0xb2, 0xe1, 0x85, 0x2e, + 0x29, 0x30, 0xf1, 0x8d, 0x97, 0x01, 0x06, 0x54, 0x34, 0x05, 0xb9, 0xc6, 0x6e, 0xb3, 0xa6, 0xa1, + 0x69, 0xc8, 0x93, 0xbd, 0xad, 0x8d, 0x9a, 0x8e, 0x67, 0xa0, 0xa2, 0x30, 0x58, 0xdf, 0xf7, 0x18, + 0xc5, 0xb7, 0xa0, 0x44, 0xa8, 0xd5, 0x8b, 0x3d, 0xa9, 0xc3, 0xd4, 0xa3, 0x30, 0xed, 0xc6, 0x7c, + 0x7c, 0xf5, 0xc3, 0x90, 0x06, 0x4f, 0x94, 0x18, 0x89, 0x85, 0xf0, 0x6d, 0x28, 0x4b, 0x75, 0x09, + 0x87, 0x56, 0x61, 0x2a, 0xa0, 0x2c, 0x74, 0x78, 0xac, 0xbf, 0x30, 0xa4, 0x2f, 0xe5, 0x48, 0x2c, + 0x85, 0x7f, 0xd6, 0xa1, 0x9c, 0x86, 0x46, 0x1f, 0x03, 0x62, 0xdc, 0x0a, 0x78, 0x47, 0xc4, 0x83, + 0x5b, 0x6e, 0xbf, 0xe3, 0x46, 0x60, 0xfa, 0x4a, 0x8e, 0xd4, 0x04, 0xa7, 0x1d, 0x33, 0xb6, 0x19, + 0x5a, 0x81, 0x1a, 0xf5, 0x7a, 0x59, 0xd9, 0x09, 0x21, 0x5b, 0xa5, 0x5e, 0x2f, 0x2d, 0x79, 0x0d, + 0xa6, 0x5d, 0x8b, 0x77, 0x8f, 0x68, 0xc0, 0x96, 0x72, 0x59, 0xd7, 0xb6, 0xac, 0x7d, 0xea, 0x6c, + 0x4b, 0x26, 0x49, 0xa4, 0x70, 0x13, 0x2a, 0x19, 0xa3, 0xd1, 0xcd, 0x33, 0xa6, 0x39, 0x1f, 0xa5, + 0x39, 0x9d, 0x50, 0xdc, 0x86, 0x39, 0x01, 0xd5, 0xe2, 0x01, 0xb5, 0xdc, 0x04, 0xf0, 0xd6, 0x08, + 0xc0, 0xc5, 0x37, 0x01, 0xef, 0x1c, 0x85, 0xde, 0xf1, 0x08, 0xd4, 0xeb, 0x80, 0x84, 0xe9, 0x5f, + 0x5a, 0x4e, 0x48, 0x59, 0x1c, 0xc0, 0x8b, 0x00, 0x4e, 0x44, 0xed, 0x78, 0x96, 0x4b, 0x45, 0xe0, + 0x8a, 0xa4, 0x28, 0x28, 0x3b, 0x96, 0x4b, 0xf1, 0x4d, 0x98, 0xcb, 0x28, 0x29, 0x53, 0xde, 0x87, + 0xb2, 0xd4, 0x7a, 0x2c, 0xe8, 0xc2, 0x98, 0x22, 0x29, 0x39, 0x03, 0x51, 0x3c, 0x07, 0xb3, 0x5b, + 0x31, 0x4c, 0x7c, 0x1b, 0xbe, 0xa1, 0x6c, 0x50, 0x44, 0x85, 0xb6, 0x0c, 0xa5, 0x81, 0x0d, 0x31, + 0x18, 0x24, 0x46, 0x30, 0x8c, 0xa0, 0xb6, 0xc7, 0x68, 0xd0, 0xe2, 0x16, 0x4f, 0xa0, 0x7e, 0xd3, + 0x61, 0x36, 0x45, 0x54, 0x50, 0x97, 0xa1, 0x6a, 0x7b, 0x87, 0x94, 0x71, 0xdb, 0xf7, 0x3a, 0x81, + 0xc5, 0xa5, 0x4b, 0x3a, 0xa9, 0x24, 0x54, 0x62, 0x71, 0x1a, 0x79, 0xed, 0x85, 0x6e, 0x47, 0x85, + 0x32, 0x2a, 0x81, 0x3c, 0x29, 0x7a, 0xa1, 0x2b, 0x23, 0x18, 0x55, 0x95, 0xd5, 0xb7, 0x3b, 0x43, + 0x48, 0x39, 0x81, 0x54, 0xb3, 0xfa, 0x76, 0x33, 0x03, 0x56, 0x87, 0xb9, 0x20, 0x74, 0xe8, 0xb0, + 0x78, 0x5e, 0x88, 0xcf, 0x46, 0xac, 0x8c, 0x3c, 0xfe, 0x16, 0xe6, 0x22, 0xc3, 0x9b, 0x77, 0xb3, + 0xa6, 0x2f, 0xc2, 0x54, 0xc8, 0x68, 0xd0, 0xb1, 0x7b, 0x2a, 0x0d, 0x85, 0xe8, 0xd8, 0xec, 0xa1, + 0xab, 0x90, 0xef, 0x59, 0xdc, 0x12, 0x66, 0x96, 0xd6, 0xce, 0xc7, 0x19, 0x7f, 0xc3, 0x79, 0x22, + 0xc4, 0xf0, 0x3d, 0x40, 0x11, 0x8b, 0x65, 0xd1, 0x3f, 0x81, 0x49, 0x16, 0x11, 0x54, 0xdd, 0x5c, + 0x48, 0xa3, 0x0c, 0x59, 0x42, 0xa4, 0x24, 0xfe, 0x55, 0x07, 0x73, 0x9b, 0xf2, 0xc0, 0xee, 0xb2, + 0x4d, 0x3f, 0x48, 0x97, 0x3d, 0x7b, 0xd7, 0xed, 0x77, 0x13, 0xca, 0x71, 0x63, 0x75, 0x18, 0xe5, + 0xaa, 0x05, 0x17, 0x46, 0xb5, 0x20, 0x23, 0xa5, 0x58, 0xb4, 0x45, 0x39, 0x6e, 0xc2, 0xf2, 0x58, + 0x9b, 0x55, 0x28, 0xae, 0x40, 0xc1, 0x15, 0x22, 0x2a, 0x16, 0xd5, 0x18, 0x56, 0x2a, 0x12, 0xc5, + 0xc5, 0xcf, 0x75, 0x98, 0x19, 0x6a, 0xab, 0xc8, 0x85, 0x83, 0xc0, 0x77, 0x55, 0xae, 0xd3, 0xd9, + 0xaa, 0x46, 0xf4, 0xa6, 0x22, 0x37, 0x7b, 0xe9, 0x74, 0x4e, 0x64, 0xd2, 0x79, 0x1b, 0x0a, 0xa2, + 0xb4, 0xe3, 0x87, 0x65, 0x36, 0xe3, 0xd5, 0xae, 0x65, 0x07, 0xeb, 0xf3, 0xea, 0xe5, 0x2f, 0x0b, + 0x52, 0xa3, 0x67, 0xf5, 0x39, 0x0d, 0x88, 0x52, 0x43, 0x1f, 0x41, 0xa1, 0x1b, 0x19, 0xc3, 0x96, + 0xf2, 0x02, 0xa0, 0x12, 0x03, 0xa4, 0x3b, 0x5f, 0x89, 0xa0, 0x79, 0x98, 0xe4, 0xfe, 0x31, 0xf5, + 0x96, 0x26, 0x2f, 0xe9, 0x2b, 0x15, 0x22, 0x0f, 0xf8, 0x07, 0x1d, 0x26, 0xa5, 0x43, 0xef, 0x2a, + 0x83, 0x06, 0x4c, 0x53, 0xaf, 0xeb, 0xf7, 0x6c, 0xef, 0x50, 0x34, 0xce, 0x24, 0x49, 0xce, 0x08, + 0xa9, 0x82, 0x8e, 0x3a, 0xa4, 0xac, 0xaa, 0x76, 0x09, 0xce, 0xb5, 0x03, 0xcb, 0x63, 0x07, 0x34, + 0x10, 0x86, 0x25, 0xe9, 0xc2, 0x3f, 0xe9, 0x00, 0x83, 0x34, 0xa4, 0xc2, 0xa7, 0xff, 0xb7, 0xf0, + 0xd5, 0x61, 0x8a, 0x59, 0x6e, 0xdf, 0x11, 0x8d, 0x9f, 0xc9, 0x7f, 0x4b, 0x90, 0x55, 0x00, 0x63, + 0xa1, 0x41, 0x04, 0x73, 0xe9, 0x08, 0x3e, 0xd5, 0xa1, 0x76, 0x8f, 0xf2, 0xd8, 0x56, 0xd9, 0x0e, + 0x67, 0xaf, 0x8e, 0x3a, 0x14, 0x02, 0x2b, 0x3a, 0x2a, 0x1b, 0x6a, 0x75, 0xb1, 0x31, 0xb4, 0x23, + 0x6c, 0x12, 0x31, 0xe2, 0x34, 0x4a, 0xa9, 0x28, 0x64, 0xae, 0xff, 0x58, 0xbe, 0x41, 0xd3, 0x44, + 0x7c, 0xe3, 0x4d, 0x98, 0xdf, 0xf3, 0xf6, 0x1d, 0xbf, 0x7b, 0x2c, 0x34, 0xd8, 0x60, 0x2a, 0xff, + 0x2b, 0x6c, 0xbc, 0x08, 0x0b, 0x43, 0x38, 0x2a, 0xf2, 0x37, 0xa0, 0x98, 0x04, 0x35, 0xb2, 0x20, + 0x19, 0x11, 0x65, 0x22, 0xbe, 0xa3, 0xd0, 0x88, 0x01, 0x20, 0x6a, 0xa0, 0x4c, 0xe4, 0x01, 0x37, + 0xa0, 0x20, 0x23, 0x39, 0xe0, 0xcb, 0x47, 0x58, 0x1e, 0xa2, 0xe1, 0x31, 0xa2, 0x80, 0x4a, 0x7c, + 0x50, 0x3d, 0xb8, 0x01, 0x95, 0x4c, 0xef, 0x66, 0xe6, 0xb1, 0x7e, 0xc6, 0x79, 0x5c, 0x90, 0xfd, + 0xfc, 0xd6, 0x15, 0x83, 0x3b, 0x50, 0x4e, 0x5f, 0x82, 0x2e, 0x43, 0x9e, 0x3f, 0xe9, 0x4b, 0xaf, + 0xaa, 0x03, 0x38, 0xc1, 0x6e, 0x3f, 0xe9, 0x53, 0x22, 0xd8, 0x49, 0xc4, 0x64, 0xfb, 0x0f, 0x45, + 0x2c, 0x27, 0x88, 0x2a, 0x62, 0xdf, 0xeb, 0x50, 0x1d, 0x94, 0xf8, 0xa6, 0xed, 0xd0, 0xff, 0xe3, + 0xa1, 0x31, 0x60, 0xfa, 0xc0, 0x76, 0xa8, 0xb0, 0x41, 0x5e, 0x97, 0x9c, 0x47, 0xb6, 0xe0, 0x39, + 0x98, 0x8f, 0x5b, 0xb0, 0xdd, 0xba, 0xbb, 0x1e, 0x97, 0xc1, 0x87, 0x5f, 0x40, 0x31, 0x71, 0x0d, + 0x15, 0x61, 0x72, 0xe3, 0xe1, 0x5e, 0x63, 0xab, 0xa6, 0xa1, 0x0a, 0x14, 0x77, 0x1e, 0xb4, 0x3b, + 0xf2, 0xa8, 0xa3, 0x19, 0x28, 0x91, 0x8d, 0x7b, 0x1b, 0x5f, 0x77, 0xb6, 0x1b, 0xed, 0x3b, 0xf7, + 0x6b, 0x13, 0x08, 0x41, 0x55, 0x12, 0x76, 0x1e, 0x28, 0x5a, 0x6e, 0xed, 0x64, 0x0a, 0xa6, 0x63, + 0xdb, 0xd1, 0x0d, 0xc8, 0xef, 0x86, 0xec, 0x08, 0xcd, 0x8f, 0x5a, 0x58, 0x8d, 0x85, 0x21, 0xaa, + 0x2a, 0x4a, 0x0d, 0x7d, 0x0a, 0x93, 0x62, 0x3d, 0x42, 0x23, 0xb7, 0x4d, 0x63, 0xf4, 0x0e, 0x89, + 0x35, 0x74, 0x17, 0x4a, 0xa9, 0xb5, 0x6a, 0x8c, 0xf6, 0x85, 0x0c, 0x35, 0xbb, 0x81, 0x61, 0xed, + 0x9a, 0x8e, 0xee, 0x43, 0x29, 0xb5, 0x11, 0x21, 0x23, 0x53, 0x4c, 0x99, 0xdd, 0x6a, 0x80, 0x35, + 0x62, 0x85, 0xc2, 0x1a, 0xda, 0x00, 0x18, 0x2c, 0x43, 0xe8, 0x7c, 0x46, 0x38, 0xbd, 0x35, 0x19, + 0xc6, 0x28, 0x56, 0x02, 0xb3, 0x0e, 0xc5, 0x64, 0x15, 0x40, 0x4b, 0x23, 0xb6, 0x03, 0x09, 0x32, + 0x7e, 0x6f, 0xc0, 0x1a, 0xda, 0x84, 0x72, 0xc3, 0x71, 0xce, 0x02, 0x63, 0xa4, 0x39, 0x6c, 0x18, + 0xc7, 0x81, 0xc5, 0x31, 0xd3, 0x17, 0x5d, 0xc9, 0x4e, 0xd9, 0x71, 0x2b, 0x85, 0xf1, 0xc1, 0x3f, + 0xca, 0x25, 0xb7, 0x6d, 0x43, 0x35, 0x3b, 0x33, 0xd0, 0xb8, 0x75, 0xd8, 0x30, 0x13, 0xc6, 0xe8, + 0x21, 0xa3, 0xad, 0x44, 0x99, 0x2d, 0xa7, 0xeb, 0x1f, 0x9d, 0x7b, 0x13, 0x2c, 0x6a, 0x4d, 0xe3, + 0xbd, 0x61, 0xac, 0x74, 0xb7, 0x08, 0xa4, 0x87, 0x80, 0x1a, 0xdd, 0x2e, 0xed, 0xab, 0xf1, 0xd0, + 0x0a, 0xf7, 0x19, 0xe5, 0x6f, 0x6b, 0xdc, 0x4c, 0x32, 0x6e, 0x14, 0x5e, 0x92, 0xa4, 0xe1, 0x39, + 0x64, 0x8c, 0xbb, 0x49, 0x14, 0xf0, 0x0e, 0x54, 0x32, 0xcf, 0x3d, 0x4a, 0xfc, 0x19, 0x35, 0x4d, + 0x8c, 0x8b, 0x63, 0xb8, 0xb1, 0x6d, 0xeb, 0x9f, 0x9f, 0xbc, 0x34, 0xb5, 0xe7, 0x2f, 0x4d, 0xed, + 0xf5, 0x4b, 0x53, 0x7f, 0x7a, 0x6a, 0xea, 0xbf, 0x9c, 0x9a, 0xfa, 0xb3, 0x53, 0x53, 0x3f, 0x39, + 0x35, 0xf5, 0x3f, 0x4f, 0x4d, 0xfd, 0xaf, 0x53, 0x53, 0x7b, 0x7d, 0x6a, 0xea, 0x3f, 0xbe, 0x32, + 0xb5, 0x93, 0x57, 0xa6, 0xf6, 0xfc, 0x95, 0xa9, 0x7d, 0x53, 0xe8, 0x3a, 0x36, 0xf5, 0xf8, 0x7e, + 0x41, 0xfc, 0x49, 0xbe, 0xfe, 0x77, 0x00, 0x00, 0x00, 0xff, 0xff, 0x52, 0xb5, 0x77, 0x6d, 0x80, 0x0f, 0x00, 0x00, } @@ -3021,9 +3021,9 @@ type IngesterClient interface { TransferChunks(ctx context.Context, opts ...grpc.CallOption) (Ingester_TransferChunksClient, error) // TransferTSDB transfers all files of a tsdb to a joining ingester TransferTSDB(ctx context.Context, opts ...grpc.CallOption) (Ingester_TransferTSDBClient, error) - // TransferChunksSubset is invoked by a leaving ingester (client), streaming + // AcceptChunksSubset is invoked by a leaving ingester (client), streaming // a subset of its chunks directly to an existing ingester. - TransferChunksSubset(ctx context.Context, opts ...grpc.CallOption) (Ingester_TransferChunksSubsetClient, error) + AcceptChunksSubset(ctx context.Context, opts ...grpc.CallOption) (Ingester_AcceptChunksSubsetClient, error) // GetChunksSubset is invoked by a joining ingester (client). A subset of the // serving ingester's chunks will be sent as a stream. GetChunksSubset(ctx context.Context, in *GetChunksRequest, opts ...grpc.CallOption) (Ingester_GetChunksSubsetClient, error) @@ -3204,30 +3204,30 @@ func (x *ingesterTransferTSDBClient) CloseAndRecv() (*TransferTSDBResponse, erro return m, nil } -func (c *ingesterClient) TransferChunksSubset(ctx context.Context, opts ...grpc.CallOption) (Ingester_TransferChunksSubsetClient, error) { - stream, err := c.cc.NewStream(ctx, &_Ingester_serviceDesc.Streams[3], "/cortex.Ingester/TransferChunksSubset", opts...) +func (c *ingesterClient) AcceptChunksSubset(ctx context.Context, opts ...grpc.CallOption) (Ingester_AcceptChunksSubsetClient, error) { + stream, err := c.cc.NewStream(ctx, &_Ingester_serviceDesc.Streams[3], "/cortex.Ingester/AcceptChunksSubset", opts...) if err != nil { return nil, err } - x := &ingesterTransferChunksSubsetClient{stream} + x := &ingesterAcceptChunksSubsetClient{stream} return x, nil } -type Ingester_TransferChunksSubsetClient interface { +type Ingester_AcceptChunksSubsetClient interface { Send(*TimeSeriesChunk) error CloseAndRecv() (*TransferChunksResponse, error) grpc.ClientStream } -type ingesterTransferChunksSubsetClient struct { +type ingesterAcceptChunksSubsetClient struct { grpc.ClientStream } -func (x *ingesterTransferChunksSubsetClient) Send(m *TimeSeriesChunk) error { +func (x *ingesterAcceptChunksSubsetClient) Send(m *TimeSeriesChunk) error { return x.ClientStream.SendMsg(m) } -func (x *ingesterTransferChunksSubsetClient) CloseAndRecv() (*TransferChunksResponse, error) { +func (x *ingesterAcceptChunksSubsetClient) CloseAndRecv() (*TransferChunksResponse, error) { if err := x.ClientStream.CloseSend(); err != nil { return nil, err } @@ -3295,9 +3295,9 @@ type IngesterServer interface { TransferChunks(Ingester_TransferChunksServer) error // TransferTSDB transfers all files of a tsdb to a joining ingester TransferTSDB(Ingester_TransferTSDBServer) error - // TransferChunksSubset is invoked by a leaving ingester (client), streaming + // AcceptChunksSubset is invoked by a leaving ingester (client), streaming // a subset of its chunks directly to an existing ingester. - TransferChunksSubset(Ingester_TransferChunksSubsetServer) error + AcceptChunksSubset(Ingester_AcceptChunksSubsetServer) error // GetChunksSubset is invoked by a joining ingester (client). A subset of the // serving ingester's chunks will be sent as a stream. GetChunksSubset(*GetChunksRequest, Ingester_GetChunksSubsetServer) error @@ -3510,25 +3510,25 @@ func (x *ingesterTransferTSDBServer) Recv() (*TimeSeriesFile, error) { return m, nil } -func _Ingester_TransferChunksSubset_Handler(srv interface{}, stream grpc.ServerStream) error { - return srv.(IngesterServer).TransferChunksSubset(&ingesterTransferChunksSubsetServer{stream}) +func _Ingester_AcceptChunksSubset_Handler(srv interface{}, stream grpc.ServerStream) error { + return srv.(IngesterServer).AcceptChunksSubset(&ingesterAcceptChunksSubsetServer{stream}) } -type Ingester_TransferChunksSubsetServer interface { +type Ingester_AcceptChunksSubsetServer interface { SendAndClose(*TransferChunksResponse) error Recv() (*TimeSeriesChunk, error) grpc.ServerStream } -type ingesterTransferChunksSubsetServer struct { +type ingesterAcceptChunksSubsetServer struct { grpc.ServerStream } -func (x *ingesterTransferChunksSubsetServer) SendAndClose(m *TransferChunksResponse) error { +func (x *ingesterAcceptChunksSubsetServer) SendAndClose(m *TransferChunksResponse) error { return x.ServerStream.SendMsg(m) } -func (x *ingesterTransferChunksSubsetServer) Recv() (*TimeSeriesChunk, error) { +func (x *ingesterAcceptChunksSubsetServer) Recv() (*TimeSeriesChunk, error) { m := new(TimeSeriesChunk) if err := x.ServerStream.RecvMsg(m); err != nil { return nil, err @@ -3629,8 +3629,8 @@ var _Ingester_serviceDesc = grpc.ServiceDesc{ ClientStreams: true, }, { - StreamName: "TransferChunksSubset", - Handler: _Ingester_TransferChunksSubset_Handler, + StreamName: "AcceptChunksSubset", + Handler: _Ingester_AcceptChunksSubset_Handler, ClientStreams: true, }, { diff --git a/pkg/ingester/client/cortex.proto b/pkg/ingester/client/cortex.proto index b4714dca18c..6971ba34bd6 100644 --- a/pkg/ingester/client/cortex.proto +++ b/pkg/ingester/client/cortex.proto @@ -29,9 +29,9 @@ service Ingester { // TransferTSDB transfers all files of a tsdb to a joining ingester rpc TransferTSDB(stream TimeSeriesFile) returns (TransferTSDBResponse) {}; - // TransferChunksSubset is invoked by a leaving ingester (client), streaming + // AcceptChunksSubset is invoked by a leaving ingester (client), streaming // a subset of its chunks directly to an existing ingester. - rpc TransferChunksSubset(stream TimeSeriesChunk) returns (TransferChunksResponse) {}; + rpc AcceptChunksSubset(stream TimeSeriesChunk) returns (TransferChunksResponse) {}; // GetChunksSubset is invoked by a joining ingester (client). A subset of the // serving ingester's chunks will be sent as a stream. diff --git a/pkg/ingester/incremental_transfer.go b/pkg/ingester/incremental_transfer.go index e8277e7995e..66ea3c7fc1f 100644 --- a/pkg/ingester/incremental_transfer.go +++ b/pkg/ingester/incremental_transfer.go @@ -10,23 +10,20 @@ import ( "github.com/go-kit/kit/log/level" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "github.com/weaveworks/common/user" "golang.org/x/net/context" ) var ( - blockedRanges = prometheus.NewGauge(prometheus.GaugeOpts{ + blockedRanges = promauto.NewGauge(prometheus.GaugeOpts{ Name: "cortex_ingester_blocked_ranges", Help: "The current number of ranges that will not accept writes by this ingester.", }) ) -func init() { - prometheus.MustRegister(blockedRanges) -} - -// TransferChunksSubset accepts chunks from a client and moves them into the local Ingester. -func (i *Ingester) TransferChunksSubset(stream client.Ingester_TransferChunksSubsetServer) error { +// AcceptChunksSubset accepts chunks from a client and moves them into the local Ingester. +func (i *Ingester) AcceptChunksSubset(stream client.Ingester_AcceptChunksSubsetServer) error { i.userStatesMtx.Lock() defer i.userStatesMtx.Unlock() @@ -90,8 +87,8 @@ func (i *Ingester) GetChunksSubset(req *client.GetChunksRequest, stream client.I // sitting around forever if a joining ingester crashes, as writes will continue // to go to us and get rejected for as long as the blocked range exists. func (i *Ingester) BlockRanges(ranges []ring.TokenRange) { - i.blockedTokenMtx.Lock() - defer i.blockedTokenMtx.Unlock() + i.blockedRangesMtx.Lock() + defer i.blockedRangesMtx.Unlock() for _, rg := range ranges { if exist := i.blockedRanges[rg]; exist { @@ -112,8 +109,8 @@ func (i *Ingester) BlockRanges(ranges []ring.TokenRange) { // UnblockRanges manually removes blocks for the provided ranges. func (i *Ingester) UnblockRanges(ctx context.Context, in *client.UnblockRangesRequest) (*client.UnblockRangesResponse, error) { - i.blockedTokenMtx.Lock() - defer i.blockedTokenMtx.Unlock() + i.blockedRangesMtx.Lock() + defer i.blockedRangesMtx.Unlock() for _, rg := range in.Ranges { if exist := i.blockedRanges[rg]; !exist { @@ -146,8 +143,8 @@ func (i *Ingester) SendChunkRanges(ctx context.Context, ranges []ring.TokenRange } defer c.Close() - ctx = user.InjectOrgID(ctx, fakeOrgID) - stream, err := c.TransferChunksSubset(ctx) + ctx = user.InjectOrgID(ctx, noOrgID) + stream, err := c.AcceptChunksSubset(ctx) if err != nil { return errors.Wrap(err, "SendChunks") } @@ -186,7 +183,7 @@ func (i *Ingester) RequestChunkRanges(ctx context.Context, ranges []ring.TokenRa } defer c.Close() - ctx = user.InjectOrgID(ctx, fakeOrgID) + ctx = user.InjectOrgID(ctx, noOrgID) stream, err := c.GetChunksSubset(ctx, &client.GetChunksRequest{ Ranges: ranges, Move: move, @@ -220,7 +217,7 @@ func (i *Ingester) RequestComplete(ctx context.Context, ranges []ring.TokenRange } defer c.Close() - ctx = user.InjectOrgID(ctx, fakeOrgID) + ctx = user.InjectOrgID(ctx, noOrgID) _, err = c.UnblockRanges(ctx, &client.UnblockRangesRequest{Ranges: ranges}) if err != nil { level.Error(util.Logger).Log("msg", "could not clean up target after transfer", "err", err) diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index f05a067a2b3..f5928456d38 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -63,9 +63,7 @@ type Config struct { SpreadFlushes bool // Config for checking tokens. - CheckOnCreate bool `yaml:"check_token_on_create,omitempty"` - CheckOnAppend bool `yaml:"check_token_on_append,omitempty"` - CheckOnTransfer bool `yaml:"check_token_on_transfer,omitempty"` + CheckTokens bool `yaml:"check_tokens,omitempty"` RateUpdatePeriod time.Duration @@ -98,9 +96,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.DurationVar(&cfg.ChunkAgeJitter, "ingester.chunk-age-jitter", 20*time.Minute, "Range of time to subtract from MaxChunkAge to spread out flushes") f.BoolVar(&cfg.SpreadFlushes, "ingester.spread-flushes", false, "If true, spread series flushes across the whole period of MaxChunkAge") f.IntVar(&cfg.ConcurrentFlushes, "ingester.concurrent-flushes", 50, "Number of concurrent goroutines flushing to dynamodb.") - f.BoolVar(&cfg.CheckOnCreate, "ingester.check-token-on-create", false, "Check that newly created streams fall within expected token ranges") - f.BoolVar(&cfg.CheckOnAppend, "ingester.check-token-on-append", false, "Check that existing streams appended to fall within expected token ranges") - f.BoolVar(&cfg.CheckOnTransfer, "ingester.check-token-on-transfer", false, "Check that streams transferred in using the transfer mechanism fall within expected token ranges") + f.BoolVar(&cfg.CheckTokens, "ingester.check-tokens", false, "Check tokens for streams that are created or appended to.") f.DurationVar(&cfg.RateUpdatePeriod, "ingester.rate-update-period", 15*time.Second, "Period with which to update the per-user ingestion rates.") } @@ -134,8 +130,8 @@ type Ingester struct { wal WAL // Stops specific appends - blockedTokenMtx sync.RWMutex - blockedRanges map[ring.TokenRange]bool + blockedRangesMtx sync.RWMutex + blockedRanges map[ring.TokenRange]bool // Hook for injecting behaviour from tests. preFlushUserSeries func() @@ -299,21 +295,22 @@ func (i *Ingester) StopIncomingRequests() { i.userStatesMtx.Lock() defer i.userStatesMtx.Unlock() i.stopped = true - return } - // When we are incrementally transferring tokens, we want to wait - // for there to be no blocked ranges on our local ingester. - for { - i.blockedTokenMtx.RLock() - numBlocked := len(i.blockedRanges) - i.blockedTokenMtx.RUnlock() + if i.cfg.LifecyclerConfig.LeaveIncrementalTransfer { + // When we are incrementally transferring tokens, we want to wait + // for there to be no blocked ranges on our local ingester. + for { + i.blockedRangesMtx.RLock() + numBlocked := len(i.blockedRanges) + i.blockedRangesMtx.RUnlock() - if numBlocked == 0 { - return - } + if numBlocked == 0 { + break + } - time.Sleep(time.Millisecond * 250) + time.Sleep(time.Millisecond * 250) + } } } @@ -379,8 +376,8 @@ func (i *Ingester) Push(ctx context.Context, req *client.WriteRequest) (*client. // isTokenBlocked checks to see if a token is in a blocked range. func (i *Ingester) isTokenBlocked(token uint32) error { - i.blockedTokenMtx.RLock() - defer i.blockedTokenMtx.RUnlock() + i.blockedRangesMtx.RLock() + defer i.blockedRangesMtx.RUnlock() for rg := range i.blockedRanges { if rg.Contains(token) { @@ -429,12 +426,12 @@ func (i *Ingester) append(ctx context.Context, userID string, token uint32, labe return err } - if sstate == seriesCreated && i.cfg.CheckOnCreate { + if sstate == seriesCreated && i.cfg.CheckTokens { if ok := i.tokenChecker.TokenExpected(token); !ok { level.Debug(util.Logger).Log("msg", "unexpected stream created in ingester", "token", token) i.metrics.unexpectedSeriesTotal.WithLabelValues("create").Inc() } - } else if i.cfg.CheckOnAppend { + } else if i.cfg.CheckTokens { if ok := i.tokenChecker.TokenExpected(token); !ok { level.Debug(util.Logger).Log("msg", "unexpected stream appended in ingester", "token", token) i.metrics.unexpectedSeriesTotal.WithLabelValues("append").Inc() diff --git a/pkg/ingester/transfer.go b/pkg/ingester/transfer.go index 7bbd9326208..3c55ba19401 100644 --- a/pkg/ingester/transfer.go +++ b/pkg/ingester/transfer.go @@ -26,7 +26,7 @@ import ( ) var ( - fakeOrgID = "-1" + noOrgID = "-1" sentChunks = prometheus.NewCounter(prometheus.CounterOpts{ Name: "cortex_ingester_sent_chunks", @@ -123,7 +123,7 @@ func (i *Ingester) acceptChunksFromStream(opts acceptChunksOptions) (fromIngeste } prevNumChunks := len(series.chunkDescs) - if i.cfg.CheckOnTransfer && !i.tokenChecker.TokenExpected(wireSeries.Token) { + if i.cfg.CheckTokens && !i.tokenChecker.TokenExpected(wireSeries.Token) { level.Debug(util.Logger).Log("msg", "unexpected stream transferred to ingester", "token", wireSeries.Token) i.metrics.unexpectedSeriesTotal.WithLabelValues("transfer").Inc() } @@ -577,7 +577,7 @@ func (i *Ingester) transferOut(ctx context.Context) error { } defer c.Close() - ctx = user.InjectOrgID(ctx, fakeOrgID) + ctx = user.InjectOrgID(ctx, noOrgID) stream, err := c.TransferChunks(ctx) if err != nil { return errors.Wrap(err, "TransferChunks") @@ -672,7 +672,7 @@ func (i *Ingester) v2TransferOut(ctx context.Context) error { } defer c.Close() - ctx = user.InjectOrgID(ctx, fakeOrgID) + ctx = user.InjectOrgID(ctx, noOrgID) stream, err := c.TransferTSDB(ctx) if err != nil { return errors.Wrap(err, "TransferTSDB() has failed") diff --git a/pkg/ingester/transfer_test.go b/pkg/ingester/transfer_test.go index bffa9721a41..edd4b746219 100644 --- a/pkg/ingester/transfer_test.go +++ b/pkg/ingester/transfer_test.go @@ -630,7 +630,7 @@ func (s *testSendChunksServer) SendAndClose(resp *client.TransferChunksResponse) return nil } -func (c *testIngesterClient) TransferChunksSubset(ctx context.Context, opts ...grpc.CallOption) (client.Ingester_TransferChunksSubsetClient, error) { +func (c *testIngesterClient) AcceptChunksSubset(ctx context.Context, opts ...grpc.CallOption) (client.Ingester_AcceptChunksSubsetClient, error) { ch := make(chan *client.TimeSeriesChunk) resp := make(chan *client.TransferChunksResponse) @@ -645,7 +645,7 @@ func (c *testIngesterClient) TransferChunksSubset(ctx context.Context, opts ...g } go func() { - _ = c.i.TransferChunksSubset(&srv) + _ = c.i.AcceptChunksSubset(&srv) }() return &cli, nil diff --git a/pkg/ring/lifecycler.go b/pkg/ring/lifecycler.go index ffccee9a671..1d3ce340ac4 100644 --- a/pkg/ring/lifecycler.go +++ b/pkg/ring/lifecycler.go @@ -471,7 +471,7 @@ func (i *Lifecycler) loop() { } if err := i.autoJoin(context.Background(), JOINING); err != nil { - level.Error(util.Logger).Log("msg", "failed to pick tokens in consul", "err", err) + level.Error(util.Logger).Log("msg", "failed to pick tokens in KV store", "err", err) os.Exit(1) } diff --git a/pkg/ring/model.go b/pkg/ring/model.go index 64edb975c0c..0adca7d0449 100644 --- a/pkg/ring/model.go +++ b/pkg/ring/model.go @@ -154,15 +154,26 @@ func (d *Desc) TokensFor(id string) (tokens, other []uint32) { func (i *IngesterDesc) IsHealthyState(op Operation) bool { healthy := false - switch op { - case Write: - healthy = (i.State == ACTIVE) + if i.Incremental { + // Incremental transferring ingester: everything is healthy except + // writing or reading to a PENDING ingester. + switch op { + case Write: + healthy = (i.State != PENDING) + default: + healthy = true + } + } else { + switch op { + case Write: + healthy = (i.State == ACTIVE) - case Read: - healthy = (i.State == ACTIVE) || (i.State == LEAVING) || (i.State == PENDING) + case Read: + healthy = (i.State == ACTIVE) || (i.State == LEAVING) || (i.State == PENDING) - case Reporting: - healthy = true + case Reporting: + healthy = true + } } return healthy diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 5c2f46f725f..b182204b6eb 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -215,6 +215,22 @@ func (r *Ring) Get(key uint32, op Operation, buf []IngesterDesc) (ReplicationSet // is increased for the key. Dead ingesters will be filtered later by // replication_strategy.go. Filtering later means that we can calculate // a healthiness quorum. + if !ingester.Incremental { + if op == Write && ingester.State != ACTIVE { + n++ + } else if op == Read && (ingester.State != ACTIVE && ingester.State != LEAVING) { + n++ + } + } else { + // If the ingester is incrementally transferring tokens, its current + // state is just informational and is used to inform users what phase + // the transfer is in. Incremental transfers only disallow writing in + // the PENDING state. + if op == Write && ingester.State == PENDING { + n++ + } + } + if !ingester.IsHealthyState(op) { n++ } diff --git a/pkg/ring/ring.pb.go b/pkg/ring/ring.pb.go index 3e58ee0016d..c20a67028d5 100644 --- a/pkg/ring/ring.pb.go +++ b/pkg/ring/ring.pb.go @@ -108,8 +108,8 @@ type IngesterDesc struct { Timestamp int64 `protobuf:"varint,2,opt,name=timestamp,proto3" json:"timestamp,omitempty"` State IngesterState `protobuf:"varint,3,opt,name=state,proto3,enum=ring.IngesterState" json:"state,omitempty"` Tokens []uint32 `protobuf:"varint,6,rep,packed,name=tokens,proto3" json:"tokens,omitempty"` - // Incremental refers to if the state the ingester is in is utilizing incremental - // transfers. Only meaningful for JOINING and LEAVING states. + // When true, the ingester is currently incrementally transferring tokens. This + // will only be true when the ingester's state is JOINING or LEAVING. Incremental bool `protobuf:"varint,7,opt,name=incremental,proto3" json:"incremental,omitempty"` } diff --git a/pkg/ring/ring.proto b/pkg/ring/ring.proto index d0542ab667e..e89c7cd65e8 100644 --- a/pkg/ring/ring.proto +++ b/pkg/ring/ring.proto @@ -10,42 +10,42 @@ option (gogoproto.unmarshaler_all) = true; option go_package = "github.com/cortexproject/cortex/pkg/ring"; message Desc { - map ingesters = 1 [(gogoproto.nullable) = false]; - reserved 2; + map ingesters = 1 [(gogoproto.nullable) = false]; + reserved 2; } message IngesterDesc { - reserved 4, 5; // old, deprecated fields + reserved 4, 5; // old, deprecated fields - string addr = 1; + string addr = 1; - // Timestamp is the Unix timestamp for the last heartbeat an ingester - // gave. - int64 timestamp = 2; - IngesterState state = 3; - repeated uint32 tokens = 6; + // Timestamp is the Unix timestamp for the last heartbeat an ingester + // gave. + int64 timestamp = 2; + IngesterState state = 3; + repeated uint32 tokens = 6; - // Incremental refers to if the state the ingester is in is utilizing incremental - // transfers. Only meaningful for JOINING and LEAVING states. - bool incremental = 7; + // When true, the ingester is currently incrementally transferring tokens. This + // will only be true when the ingester's state is JOINING or LEAVING. + bool incremental = 7; } enum IngesterState { - ACTIVE = 0; - LEAVING = 1; + ACTIVE = 0; + LEAVING = 1; - PENDING = 2; - JOINING = 3; + PENDING = 2; + JOINING = 3; - // This state is only used by gossiping code to distribute information about - // ingesters that have been removed from the ring. Ring users should not use it directly. - LEFT = 4; + // This state is only used by gossiping code to distribute information about + // ingesters that have been removed from the ring. Ring users should not use it directly. + LEFT = 4; } // TokenRange represents a range of tokens, starting inclusively // with From and ending exclusively at To. message TokenRange { - uint32 from = 1; - uint32 to = 2; + uint32 from = 1; + uint32 to = 2; } From ea7b4299b925d040fd53b396adf6190a74f35c46 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Tue, 11 Feb 2020 09:06:29 -0500 Subject: [PATCH 06/11] goimports -local github.com/cortexproject/cortex Signed-off-by: Robert Fratto --- pkg/ingester/incremental_transfer.go | 7 ++++--- pkg/ingester/transfer_test.go | 4 ++-- pkg/ring/incremental_transfer.go | 3 ++- pkg/ring/token_checker.go | 3 ++- pkg/ring/token_checker_test.go | 5 +++-- 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/pkg/ingester/incremental_transfer.go b/pkg/ingester/incremental_transfer.go index 66ea3c7fc1f..228ae500a0a 100644 --- a/pkg/ingester/incremental_transfer.go +++ b/pkg/ingester/incremental_transfer.go @@ -4,15 +4,16 @@ import ( "fmt" "time" - "github.com/cortexproject/cortex/pkg/ingester/client" - "github.com/cortexproject/cortex/pkg/ring" - "github.com/cortexproject/cortex/pkg/util" "github.com/go-kit/kit/log/level" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/weaveworks/common/user" "golang.org/x/net/context" + + "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/util" ) var ( diff --git a/pkg/ingester/transfer_test.go b/pkg/ingester/transfer_test.go index edd4b746219..5ce3588fbd3 100644 --- a/pkg/ingester/transfer_test.go +++ b/pkg/ingester/transfer_test.go @@ -17,14 +17,14 @@ import ( "github.com/oklog/ulid" "github.com/stretchr/testify/require" "github.com/thanos-io/thanos/pkg/shipper" + "github.com/weaveworks/common/user" "google.golang.org/grpc" + "google.golang.org/grpc/health/grpc_health_v1" "github.com/cortexproject/cortex/pkg/ingester/client" "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/ring/kv" "github.com/cortexproject/cortex/pkg/ring/kv/codec" - "github.com/weaveworks/common/user" - "google.golang.org/grpc/health/grpc_health_v1" ) type testUserTSDB struct { diff --git a/pkg/ring/incremental_transfer.go b/pkg/ring/incremental_transfer.go index 3dbd91d6834..17cdbe8615e 100644 --- a/pkg/ring/incremental_transfer.go +++ b/pkg/ring/incremental_transfer.go @@ -7,9 +7,10 @@ import ( "sync" "time" - "github.com/cortexproject/cortex/pkg/util" "github.com/go-kit/kit/log/level" "go.uber.org/atomic" + + "github.com/cortexproject/cortex/pkg/util" ) // transferWorkload holds a set of ingester addresses to the set of diff --git a/pkg/ring/token_checker.go b/pkg/ring/token_checker.go index 98671e4f35c..e307f5dbaa7 100644 --- a/pkg/ring/token_checker.go +++ b/pkg/ring/token_checker.go @@ -6,8 +6,9 @@ import ( "sync" "time" - "github.com/cortexproject/cortex/pkg/util" "github.com/go-kit/kit/log/level" + + "github.com/cortexproject/cortex/pkg/util" ) // TokenCheckerConfig is the config to configure a TokenChecker. diff --git a/pkg/ring/token_checker_test.go b/pkg/ring/token_checker_test.go index 8dada56b5c6..2498e1bc406 100644 --- a/pkg/ring/token_checker_test.go +++ b/pkg/ring/token_checker_test.go @@ -6,11 +6,12 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "go.uber.org/atomic" + "github.com/cortexproject/cortex/pkg/ring/kv/consul" "github.com/cortexproject/cortex/pkg/util/flagext" "github.com/cortexproject/cortex/pkg/util/test" - "github.com/stretchr/testify/require" - "go.uber.org/atomic" ) func makeSequentialTokenGenerator() TokenGeneratorFunc { From 5a2382af312849dab1c8e44fbdf0f6a7e244b4f1 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Tue, 18 Feb 2020 09:39:19 -0500 Subject: [PATCH 07/11] make warnings less frequent when rolling out to code using incremental transfers, the warnings messages can be annoying. This commit reduces it to debug-level log lines or logs the warning only if the token checking flag is enabled. Signed-off-by: Robert Fratto --- pkg/ingester/transfer.go | 2 +- pkg/ingester/user_state.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/ingester/transfer.go b/pkg/ingester/transfer.go index 3c55ba19401..a6d345d923a 100644 --- a/pkg/ingester/transfer.go +++ b/pkg/ingester/transfer.go @@ -113,7 +113,7 @@ func (i *Ingester) acceptChunksFromStream(opts acceptChunksOptions) (fromIngeste return fromIngesterID, seriesReceived, errors.Wrap(err, "TransferChunks: fromWireChunks") } - if wireSeries.Token == 0 { + if i.cfg.CheckTokens && wireSeries.Token == 0 { level.Warn(util.Logger).Log("msg", "unexpected token 0 in wireSeries") } diff --git a/pkg/ingester/user_state.go b/pkg/ingester/user_state.go index 98d9689aad8..824c630ee2f 100644 --- a/pkg/ingester/user_state.go +++ b/pkg/ingester/user_state.go @@ -180,7 +180,7 @@ func (u *userState) getSeries(metric labelPairs, record *Record, token uint32) ( series, ok := u.fpToSeries.get(fp) if ok { if token != series.token { - level.Warn(util.Logger).Log("msg", fmt.Sprintf("new token value for metric %s", metric), "previous", series.token, "new", token) + level.Debug(util.Logger).Log("msg", fmt.Sprintf("new token value for metric %s", metric), "previous", series.token, "new", token) series.token = token } From 439cd2119950c23bcef91e876c68e2edabfb135e Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Wed, 19 Feb 2020 14:28:03 -0500 Subject: [PATCH 08/11] only check that from ingester is in leaving state once per transfer This was a problem caused by the refactoring, moving the check inside the previous if statement (which only happens once per transfer) is equivalent to what was happening before. Signed-off-by: Robert Fratto --- pkg/ingester/transfer.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/ingester/transfer.go b/pkg/ingester/transfer.go index a6d345d923a..7deba12f799 100644 --- a/pkg/ingester/transfer.go +++ b/pkg/ingester/transfer.go @@ -99,15 +99,15 @@ func (i *Ingester) acceptChunksFromStream(opts acceptChunksOptions) (fromIngeste if fromIngesterID == "" { fromIngesterID = wireSeries.FromIngesterId level.Info(util.Logger).Log("msg", "processing TransferChunks request", "from_ingester", fromIngesterID) - } - if opts.ValidateRemoteLeaving { - // Before transfer, make sure 'from' ingester is in correct state to call ClaimTokensFor later. - err := i.checkFromIngesterIsInLeavingState(opts.Stream.Context(), fromIngesterID) - if err != nil { - return fromIngesterID, seriesReceived, errors.Wrap(err, "TransferChunks: checkFromIngesterIsInLeavingState") + + if opts.ValidateRemoteLeaving { + // Before transfer, make sure 'from' ingester is in correct state to call ClaimTokensFor later. + err := i.checkFromIngesterIsInLeavingState(opts.Stream.Context(), fromIngesterID) + if err != nil { + return fromIngesterID, seriesReceived, errors.Wrap(err, "TransferChunks: checkFromIngesterIsInLeavingState") + } } } - descs, err := fromWireChunks(wireSeries.Chunks) if err != nil { return fromIngesterID, seriesReceived, errors.Wrap(err, "TransferChunks: fromWireChunks") From 4886229f8339d58760552f71d991ce61496105e5 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Wed, 19 Feb 2020 15:12:32 -0500 Subject: [PATCH 09/11] increase timeout in incremental transfer tests Signed-off-by: Robert Fratto --- pkg/ring/lifecycler_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ring/lifecycler_test.go b/pkg/ring/lifecycler_test.go index c8fbb52f3e3..b2cec7c3655 100644 --- a/pkg/ring/lifecycler_test.go +++ b/pkg/ring/lifecycler_test.go @@ -746,7 +746,7 @@ func getLifecyclers(t *testing.T, r *Ring, cfg Config, count int) ([]*Lifecycler require.NoError(t, err) lc.Start() - waitIngesterState(t, r, id, time.Millisecond*250, true) + waitIngesterState(t, r, id, time.Millisecond*500, true) } return ret, func() { From 80249b4d8a163abf29a912729963bf76d0c512a0 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Wed, 19 Feb 2020 15:36:26 -0500 Subject: [PATCH 10/11] don't shadow seriesReceived and fromIngesterID Signed-off-by: Robert Fratto --- pkg/ingester/transfer.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/ingester/transfer.go b/pkg/ingester/transfer.go index 7deba12f799..7b30eb9808d 100644 --- a/pkg/ingester/transfer.go +++ b/pkg/ingester/transfer.go @@ -231,7 +231,8 @@ func (i *Ingester) TransferChunks(stream client.Ingester_TransferChunksServer) e xfer := func() error { userStates := newUserStates(i.limiter, i.cfg, i.metrics) - fromIngesterID, seriesReceived, err := i.acceptChunksFromStream(acceptChunksOptions{ + var err error + fromIngesterID, seriesReceived, err = i.acceptChunksFromStream(acceptChunksOptions{ States: userStates, Stream: stream, ValidateRemoteLeaving: true, From d343cb1c82247d5c566d6b54f481ac6c9a1dd605 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Wed, 19 Feb 2020 15:50:06 -0500 Subject: [PATCH 11/11] fix test failures after rebase against master Signed-off-by: Robert Fratto --- pkg/compactor/compactor.go | 2 +- pkg/ingester/transfer_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index 70119c40e02..7d1761fff85 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -161,7 +161,7 @@ func newCompactor( // Initialize the compactors ring if sharding is enabled. if compactorCfg.ShardingEnabled { lifecyclerCfg := compactorCfg.ShardingRing.ToLifecyclerConfig() - lifecycler, err := ring.NewLifecycler(lifecyclerCfg, ring.NewNoopFlushTransferer(), "compactor", ring.CompactorRingKey, false) + lifecycler, err := ring.NewLifecycler(lifecyclerCfg, ring.NewNoopFlushTransferer(), nil, "compactor", ring.CompactorRingKey, false) if err != nil { return nil, errors.Wrap(err, "unable to initialize compactor ring lifecycler") } diff --git a/pkg/ingester/transfer_test.go b/pkg/ingester/transfer_test.go index 5ce3588fbd3..718d3bc0c44 100644 --- a/pkg/ingester/transfer_test.go +++ b/pkg/ingester/transfer_test.go @@ -424,7 +424,7 @@ type testIngesterFactory struct { } func newTestIngesterFactory(t require.TestingT) *testIngesterFactory { - kvClient, err := kv.NewClient(kv.Config{Store: "inmemory"}, codec.Proto{Factory: ring.ProtoDescFactory}) + kvClient, err := kv.NewClient(kv.Config{Store: "inmemory"}, codec.NewProtoCodec("codec", ring.ProtoDescFactory)) require.NoError(t, err) return &testIngesterFactory{