-
Notifications
You must be signed in to change notification settings - Fork 1.7k
data: add DataProvider
gRPC service spec
#4314
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fa6040b
b27ecfb
2fc0cea
1aa393d
da76a8a
e82bc06
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
load("//tensorboard/defs:protos.bzl", "tb_proto_library") | ||
|
||
package(default_visibility = ["//tensorboard:internal"]) | ||
|
||
licenses(["notice"]) | ||
|
||
filegroup( | ||
name = "proto_srcs", | ||
srcs = glob(["*.proto"]), | ||
) | ||
|
||
tb_proto_library( | ||
name = "protos_all", | ||
srcs = [ | ||
"data_provider.proto", | ||
], | ||
has_services = True, | ||
deps = ["//tensorboard/compat/proto:protos_all"], | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,312 @@ | ||
syntax = "proto3"; | ||
|
||
package tensorboard.data; | ||
|
||
import "tensorboard/compat/proto/summary.proto"; | ||
import "tensorboard/compat/proto/tensor.proto"; | ||
|
||
option go_package = "github.com/tensorflow/tensorboard/tensorboard/data/proto/data_provider_proto"; | ||
|
||
service TensorBoardDataProvider { | ||
// List runs within an experiment. | ||
rpc ListRuns(ListRunsRequest) returns (ListRunsResponse) {} | ||
// List metadata about scalar time series. | ||
rpc ListScalars(ListScalarsRequest) returns (ListScalarsResponse) {} | ||
// Read data from scalar time series. | ||
rpc ReadScalars(ReadScalarsRequest) returns (ReadScalarsResponse) {} | ||
// List metadata about tensor time series. | ||
rpc ListTensors(ListTensorsRequest) returns (ListTensorsResponse) {} | ||
// Read data from tensor time series. | ||
rpc ReadTensors(ReadTensorsRequest) returns (ReadTensorsResponse) {} | ||
// List metadata about blob sequence time series. | ||
rpc ListBlobSequences(ListBlobSequencesRequest) | ||
returns (ListBlobSequencesResponse) {} | ||
// Read blob references from blob sequence time series. See `ReadBlob` to read | ||
// the actual blob data. | ||
rpc ReadBlobSequences(ReadBlobSequencesRequest) | ||
returns (ReadBlobSequencesResponse) {} | ||
// Read data for a specific blob. | ||
rpc ReadBlob(ReadBlobRequest) returns (stream ReadBlobResponse) {} | ||
} | ||
|
||
message PluginFilter { | ||
// Only match data with exactly this plugin name. | ||
string plugin_name = 1; | ||
} | ||
|
||
message RunTagFilter { | ||
// Optional filter for runs. If omitted, *all* runs match; this is not | ||
// equivalent to an empty submessage (which matches no tags). | ||
RunFilter runs = 1; | ||
// Optional filter for tags. If omitted, *all* tags match; this is not | ||
// equivalent to an empty submessage (which matches no tags). | ||
TagFilter tags = 2; | ||
} | ||
|
||
message RunFilter { | ||
// Only match runs with exactly one of these names. In particular, if this | ||
// list is empty, no runs match. | ||
repeated string names = 1; | ||
} | ||
|
||
message TagFilter { | ||
// Only match tags with exactly one of these names. In particular, if this | ||
// list is empty, no tags match. | ||
repeated string names = 1; | ||
} | ||
|
||
message Downsample { | ||
// Maximum number of points to return. Must be positive. | ||
int64 num_points = 1; | ||
} | ||
|
||
message ListRunsRequest { | ||
// ID of experiment in which to query data. | ||
string experiment_id = 1; | ||
} | ||
|
||
message ListRunsResponse { | ||
repeated Run runs = 1; | ||
} | ||
|
||
message Run { | ||
// Reserved for a unique internal identifier: names are unique, but can also | ||
// be long, so it would be nice to work with a more compact ID form. | ||
reserved "id"; | ||
reserved 1; | ||
// User-facing name. | ||
string name = 2; | ||
// Wall time of earliest recorded event, as floating-point seconds since | ||
// epoch (same as event file format). | ||
double start_time = 3; | ||
} | ||
|
||
message ListScalarsRequest { | ||
// ID of experiment in which to query data. | ||
string experiment_id = 1; | ||
// Filter for the plugin that owns the scalars. It is an error if | ||
// `plugin_filter.plugin_name` is the empty string. | ||
PluginFilter plugin_filter = 2; | ||
// Optional filter for time series. If omitted, all time series match. | ||
RunTagFilter run_tag_filter = 3; | ||
} | ||
|
||
message ListScalarsResponse { | ||
repeated RunEntry runs = 1; | ||
message RunEntry { | ||
string run_name = 1; | ||
repeated TagEntry tags = 2; | ||
} | ||
message TagEntry { | ||
string tag_name = 1; | ||
ScalarMetadata metadata = 2; | ||
} | ||
} | ||
|
||
message ScalarMetadata { | ||
// Largest step value of any datum in this time series. | ||
int64 max_step = 1; | ||
// Largest wall time of any datum in this time series. | ||
double max_wall_time = 2; | ||
// Atemporal summary metadata for this time series. | ||
tensorboard.SummaryMetadata summary_metadata = 3; | ||
} | ||
|
||
message ReadScalarsRequest { | ||
// ID of experiment in which to query data. | ||
string experiment_id = 1; | ||
// Filter for the plugin that owns the scalars. It is an error if | ||
// `plugin_filter.plugin_name` is the empty string. | ||
PluginFilter plugin_filter = 2; | ||
// Optional filter for time series. If omitted, all time series match. | ||
RunTagFilter run_tag_filter = 3; | ||
// Downsampling specification describing how many points to return per time | ||
// series. It is an error if `downsample.num_points <= 0`. | ||
Downsample downsample = 4; | ||
} | ||
|
||
message ReadScalarsResponse { | ||
repeated RunEntry runs = 1; | ||
message RunEntry { | ||
string run_name = 1; | ||
repeated TagEntry tags = 2; | ||
} | ||
message TagEntry { | ||
string tag_name = 1; | ||
ScalarData data = 2; | ||
} | ||
} | ||
|
||
// A column-major sequence of scalar points. Arrays `step`, `wall_time`, and | ||
// `value` have the same lengths. | ||
// | ||
// These are repeated primitive values, so they will be packed on the wire: | ||
// <https://developers.google.com/protocol-buffers/docs/encoding#packed> | ||
message ScalarData { | ||
repeated int64 step = 1; | ||
repeated double wall_time = 2; | ||
repeated double value = 3; | ||
} | ||
|
||
message ListTensorsRequest { | ||
// ID of experiment in which to query data. | ||
string experiment_id = 1; | ||
// Filter for the plugin that owns the tensors. It is an error if | ||
// `plugin_filter.plugin_name` is the empty string. | ||
PluginFilter plugin_filter = 2; | ||
// Optional filter for time series. If omitted, all time series match. | ||
RunTagFilter run_tag_filter = 3; | ||
} | ||
|
||
message ListTensorsResponse { | ||
repeated RunEntry runs = 1; | ||
message RunEntry { | ||
string run_name = 1; | ||
repeated TagEntry tags = 2; | ||
} | ||
message TagEntry { | ||
string tag_name = 1; | ||
TensorMetadata metadata = 2; | ||
} | ||
} | ||
|
||
message TensorMetadata { | ||
// Largest step value of any datum in this time series. | ||
int64 max_step = 1; | ||
// Largest wall time of any datum in this time series. | ||
double max_wall_time = 2; | ||
// Atemporal summary metadata for this time series. | ||
tensorboard.SummaryMetadata summary_metadata = 3; | ||
} | ||
|
||
message ReadTensorsRequest { | ||
// ID of experiment in which to query data. | ||
string experiment_id = 1; | ||
// Filter for the plugin that owns the tensors. It is an error if | ||
// `plugin_filter.plugin_name` is the empty string. | ||
PluginFilter plugin_filter = 2; | ||
// Optional filter for time series. If omitted, all time series match. | ||
RunTagFilter run_tag_filter = 3; | ||
// Downsampling specification describing how many points to return per time | ||
// series. It is an error if `downsample.num_points <= 0`. | ||
Downsample downsample = 4; | ||
} | ||
|
||
message ReadTensorsResponse { | ||
repeated RunEntry runs = 1; | ||
message RunEntry { | ||
string run_name = 1; | ||
repeated TagEntry tags = 2; | ||
} | ||
message TagEntry { | ||
string tag_name = 1; | ||
TensorData data = 2; | ||
} | ||
} | ||
|
||
// A column-major sequence of tensor points. Arrays `step`, `wall_time`, and | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume the main motivation for doing this column-major unlike TB.dev is compactness of the wire representation? Or was there another motivation? I imagine it's significant space overhead for scalars where each point is very small, but it seems less significant for tensors and blobs, so I'd be a bit curious to hear some reasoning for this, since it does have the downside of not guarding against having arrays of different lengths. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Compactness and consistency. I don’t feel strongly about it. FWIW, the Can make this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's fine as-is, I was just curious. I forgot There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cool. |
||
// `value` have the same lengths. | ||
// | ||
// The `step` and `wall_time` fields are repeated primitive values, so they | ||
// will be packed on the wire: | ||
// <https://developers.google.com/protocol-buffers/docs/encoding#packed> | ||
message TensorData { | ||
repeated int64 step = 1; | ||
repeated double wall_time = 2; | ||
repeated tensorboard.TensorProto value = 3; | ||
} | ||
|
||
message ListBlobSequencesRequest { | ||
// ID of experiment in which to query data. | ||
string experiment_id = 1; | ||
// Filter for the plugin that owns the blob sequences. It is an error if | ||
// `plugin_filter.plugin_name` is the empty string. | ||
PluginFilter plugin_filter = 2; | ||
// Optional filter for time series. If omitted, all time series match. | ||
RunTagFilter run_tag_filter = 3; | ||
} | ||
|
||
message ListBlobSequencesResponse { | ||
repeated RunEntry runs = 1; | ||
message RunEntry { | ||
string run_name = 1; | ||
repeated TagEntry tags = 2; | ||
} | ||
message TagEntry { | ||
string tag_name = 1; | ||
BlobSequenceMetadata metadata = 2; | ||
} | ||
} | ||
|
||
message BlobSequenceMetadata { | ||
// Largest step value of any datum in this time series. | ||
int64 max_step = 1; | ||
// Largest wall time of any datum in this time series. | ||
double max_wall_time = 2; | ||
// Largest number of blobs in any datum in this time series. | ||
int64 max_length = 3; | ||
// Atemporal summary metadata for this time series. | ||
tensorboard.SummaryMetadata summary_metadata = 4; | ||
} | ||
|
||
message ReadBlobSequencesRequest { | ||
// ID of experiment in which to query data. | ||
string experiment_id = 1; | ||
// Filter for the plugin that owns the blob sequences. It is an error if | ||
// `plugin_filter.plugin_name` is the empty string. | ||
PluginFilter plugin_filter = 2; | ||
// Optional filter for time series. If omitted, all time series match. | ||
RunTagFilter run_tag_filter = 3; | ||
// Downsampling specification describing how many points to return per time | ||
// series. It is an error if `downsample.num_points <= 0`. | ||
Downsample downsample = 4; | ||
} | ||
|
||
message ReadBlobSequencesResponse { | ||
repeated RunEntry runs = 1; | ||
message RunEntry { | ||
string run_name = 1; | ||
repeated TagEntry tags = 2; | ||
} | ||
message TagEntry { | ||
string tag_name = 1; | ||
BlobSequenceData data = 2; | ||
} | ||
} | ||
|
||
// A column-major sequence of blob sequence points. Arrays `step`, `wall_time`, | ||
// and `value` have the same lengths. | ||
// | ||
// The `step` and `wall_time` fields are repeated primitive values, so they | ||
// will be packed on the wire: | ||
// <https://developers.google.com/protocol-buffers/docs/encoding#packed> | ||
message BlobSequenceData { | ||
repeated int64 step = 1; | ||
repeated double wall_time = 2; | ||
repeated BlobReferenceSequence values = 3; | ||
} | ||
|
||
message BlobReferenceSequence { | ||
repeated BlobReference blob_refs = 1; | ||
} | ||
|
||
// A reference to a blob. | ||
message BlobReference { | ||
// Unique identifier for a blob, which may be dereferenced via the `ReadBlob` | ||
// RPC. Must be suitable for inclusion directly in a URL with no further | ||
// encoding. Case-sensitive. Required; the empty string is not a valid key. | ||
string blob_key = 1; | ||
// Optional URL from which the blob may be fetched directly, bypassing the | ||
// data provider interface. | ||
string url = 2; | ||
} | ||
|
||
message ReadBlobRequest { | ||
string blob_key = 1; | ||
} | ||
|
||
message ReadBlobResponse { | ||
// The bytes in this chunk. Should be concatenated with any other responses | ||
// in the stream to recover the full blob contents. | ||
bytes data = 1; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We used
google.protobuf.Timestamp
for this for TB.dev - maybe a bit nicer? I guess we might not want that for the actualwall_time
values where it adds conversion overhead and I could see an argument for consistency in timestamp treatment throughout and/or withDataProvider
, but where it's not particularly performance or overhead sensitive it does seem nicer to use a proper type.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It’s indeed much slower for individual wall times:
wchargin/tensorboard-data-server-go@ff0a517
We use
float64
everywhere: the wire format source of truth, the dataprovider interface, in RustBoard memory, on the frontend. A
Timestamp
also has greater precision than
float64
, so embedding a float walltime suggests precision that is not there. Technically, it’s also a
lossy conversion, though for times after 1970-02-21 this won’t matter.
So while I certainly appreciate
Timestamp
on its merits, and later inRustBoard we’ll use
std::time::Instant
for some internal bookkeeping,I’m pretty disinclined to introduce
Timestamp
s here just because.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I can believe it makes a substantial performance difference for the datum-level wall times; I had already suggested as much in my original comment. The
ListRunsResponse
results have far fewer timestamps, but it's fine to leave it as-is here too, since that's your preference.I mentioned it just since in the grand scheme of things, when they're not a significant performance cost, I think having timestamp types is preferable to not having them. Every
double
timestamp we put in a protobuf makes it take more effort for calling code to use those types, so I'd rather that we at least weigh the tradeoffs when it comes up versus just making floating point timestamps our go-to everywhere.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yep :-) (reading my own comment, I guess it wasn’t explicit, but I was
replying to the suggestion to use them here specifically, and understand
that you weren’t suggesting that we use them everywhere)
I buy this general argument, but when I switched from
Timestamp
s todouble
s, I actually found that it simplified both the client and theserver code, dropping representation-converting helpers on both sides.
So I think we can probably agree on the following tradeoffs, from the
perspective of using
Timestamp
s:and self-documentation.
ListRuns
, no significant performance impact.client complexity.
using packed primitives is a significant performance boost.
complexity and potentially either suggesting higher-than-actual
precision or (theoretically, not realistically) repesenting lossily.
appropriate from first principles but not actually a faithful
representation.
…and while as resident powerful-type-systems evangelist I’m certainly
sympathetic to (1), I’m finding myself most swayed by (5) and by the
sub-point of (6).
Leaving it as is.