diff --git a/sdk/schema/manifest-lax.schema.json b/sdk/schema/manifest-lax.schema.json new file mode 100644 index 0000000000..532c95099b --- /dev/null +++ b/sdk/schema/manifest-lax.schema.json @@ -0,0 +1,238 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/manifest.schema.json", + "title": "manifest", + "description": "TDF manifest in JSON", + "type": "object", + "properties": { + "payload": { + "type": "object", + "description": "An object which contains information describing the payload.", + "properties": { + "type": { + "description": "Describes the type of payload is associated with the TDF.", + "type": "string" + }, + "url": { + "description": "URL which points to payload. For reference types, with the default ZIP protocol, the URL would point to a local file within the zip.", + "type": "string" + }, + "protocol": { + "description": "The payload protocol. Default is zip." + }, + "isEncrypted": { + "description": "Designates whether or not the payload is encrypted, or cleartext.", + "type": "boolean" + }, + "mimeType": { + "description": "Specifies the type of file that is encrypted. Default is `application/octet-stream`.", + "type": "string" + }, + "tdf_spec_version": { + "description": "Semver version number of the TDF spec.", + "type": ["string", "null"] + } + }, + "required": ["type", "url", "protocol", "isEncrypted"] + }, + "encryptionInformation": { + "type": "object", + "properties": { + "type": { + "description": "Designates the type of key access protocol was used. Default, is split.", + "type": "string" + }, + "keyAccess": { + "description": "An array of keyAccess objects which are used to retrieve keys from one, or more Key Access Services", + "type": "array", + "items": { + "description": "A key access object", + "type": "object", + "properties": { + "type": { + "description": "The type of key access object.", + "type": "string", + "enum": ["wrapped", "remote"] + }, + "url": { + "description": "A fully qualified URL pointing to a key access service responsible for managing access to the encryption keys.", + "type": "string" + }, + "protocol": { + "description": "The protocol to be used for managing key access.", + "type": "string", + "enum": ["kas"] + }, + "wrappedKey": { + "description": "The symmetric key used to encrypt the payload. It has been encrypted using the public key of the KAS, then base64 encoded.", + "type": "string" + }, + "sid": { + "description": "A unique identifier for a single key split. In some complex policies, multiple key access objects may exist that share a specific key split. Using a splitId allows software to more efficiently operate by not reusing key material unnecessarily. ", + "type": ["string", "null"] + }, + "kid": { + "description": "A UUID for the specific keypair used for wrapping the symmetric key.", + "type": "string" + }, + "policyBinding": { + "description": "Object describing the policyBinding. Contains a hash, and an algorithm used. May also be a string, with just the hash. In that case default to HS256.", + "oneOf": [ + { + "type": "string" + },{ + "type": "object", + "properties": { + "alg": { + "description": "The policy binding algorithm used to generate the hash.", + "type": "string" + }, + "hash": { + "description": "This contains a keyed hash that will provide cryptographic integrity on the policy object, such that it cannot be modified or copied to another TDF, without invalidating the binding. Specifically, you would have to have access to the key in order to overwrite the policy.", + "type": "string" + } + } + ,"required": ["alg", "hash"] + } + ] + }, + "encryptedMetadata": { + "description": "Metadata associated with the TDF, and the request. The contents of the metadata are freeform, and are used to pass information from the client, and any plugins that may be in use by the KAS. The metadata stored here should not be used for primary access decisions. Base64.", + "type": ["string", "null"] + } + } + }, + "required": ["type", "url", "protocol", "wrappedKey","sid", "kid", "policyBinding"] + }, + "method": { + "type": "object", + "properties": { + "algorithm": { + "description": "Algorithm used to encrypt the payload", + "type": "string" + }, + "isStreamable": { + "description": "Designates whether or not the payload is streamable.", + "type": "boolean" + } + }, + "required": ["algorithm", "isStreamable"] + }, + "integrityInformation": { + "type": "object", + "properties": { + "rootSignature": { + "type": "object", + "properties": { + "alg": { + "description": "Algorithm used to generate the root signature of the payload", + "type": "string" + }, + "sig": { + "description": "The payload signature", + "type": "string" + } + } + }, + "segmentSizeDefault": { + "description": "Default size of a encryption segment", + "type": "number" + }, + "segmentHashAlg": { + "description": "Algorithm used to generate segment hashes", + "type": "string" + }, + "segments": { + "description": "An array of segment objects. Allows for the possibility of assuring integrity over file segments, in addition to the entire payload. Useful for streaming.", + "type": "array", + "items": { + "description": "Segment object. Contains information necessary to validate integrity over a specific byte range of a payload.", + "type": "object", + "properties": { + "hash": { + "description": "Generated hash using the segment hashing algorithm specified in the parent object.", + "type": "string" + }, + "segmentSize": { + "description": "The size of the segment prior to its encryption. Optional field only specified if it differs from the 'segmentSizeDefault', specified above.", + "type": "number" + }, + "encryptedSegmentSize": { + "description": "The size of the segment once it has been encrypted.", + "type": "number" + } + } + } + }, + "encryptedSegmentSizeDefault": { + "description": "Default size of an encrypted segment. TODO: Is this necessary??", + "type": "number" + } + }, + "required": ["rootSignature", "segmentSizeDefault", "segments", "encryptedSegmentSizeDefault"] + }, + "policy": { + "description": "Base64 encoded policy object", + "type": "string" + } + } + }, + "assertions": { + "type": "array", + "description": "An array of objects used to express metadata about the objects in the scope attribute of the assertion. An assertion also supports metadata about the assertion statement for the purposes of indicating any handling instructions pertinent to the statement itself. Also supports encrypted statements and binding the statement with objects in its scope.", + "items": { + "type": "object", + "description": "A single assertion", + "properties": { + "id": { + "description": "A unique local identifier used for binding and signing purposes. Not guaranteed to be unique across multiple TDOs but must be unique within a single instance.", + "type": "string" + }, + "type": { + "description": "Describes the type of assertion ('handling' or 'other').", + "type": "string" + }, + "scope": { + "description": "An enumeration of the object to which the assertion applies ('tdo' or 'payload').", + "type": "string" + }, + "appliesToState": { + "description": "Used to indicate if the statement metadata applies to 'encrypted' or 'unencrypted' data.", + "type": ["string", "null"] + }, + "statement": { + "description": "Intended for access, rights, and/or handling instructions that apply to the scope of the assertion.", + "type": "object", + "properties": { + "format": { + "description": "Describes the payload content encoding format ('xml-structured', 'base64binary', 'string').", + "type": "string" + }, + "value": { + "description": "Payload content encoded in the format specified.", + "type": ["string", "object"] + } + } + }, + "binding": { + "description": "Object describing the assertionBinding. Contains a hash, and an algorithm used.", + "type": "object", + "properties": { + "method": { + "description": "The assertion binding method used encode the signature. Default is 'jws'", + "type": "string" + }, + "signature": { + "description": "This contains a keyed hash that will provide cryptographic integrity on the assertion object, such that it cannot be modified or copied to another TDF, without invalidating the binding. Specifically, you would have to have access to the key in order to overwrite the policy.", + "type": "string" + } + }, + "required": ["method", "signature"] + } + }, + "required": ["id", "type", "scope", "appliesToState", "statement"] + } + } + }, + "required": ["payload", "encryptionInformation"] +} diff --git a/sdk/schema/manifest.schema.json b/sdk/schema/manifest.schema.json index 55bfe2cdf5..dfb920d386 100644 --- a/sdk/schema/manifest.schema.json +++ b/sdk/schema/manifest.schema.json @@ -30,7 +30,7 @@ }, "tdf_spec_version": { "description": "Semver version number of the TDF spec.", - "type": ["string", "null"] + "type": "string" } }, "required": ["type", "url", "protocol", "isEncrypted"] @@ -69,7 +69,7 @@ }, "sid": { "description": "A unique identifier for a single key split. In some complex policies, multiple key access objects may exist that share a specific key split. Using a splitId allows software to more efficiently operate by not reusing key material unnecessarily. ", - "type": ["string", "null"] + "type": "string" }, "kid": { "description": "A UUID for the specific keypair used for wrapping the symmetric key.", @@ -98,7 +98,7 @@ }, "encryptedMetadata": { "description": "Metadata associated with the TDF, and the request. The contents of the metadata are freeform, and are used to pass information from the client, and any plugins that may be in use by the KAS. The metadata stored here should not be used for primary access decisions. Base64.", - "type": ["string", "null"] + "type": "string" } } }, @@ -198,7 +198,7 @@ }, "appliesToState": { "description": "Used to indicate if the statement metadata applies to 'encrypted' or 'unencrypted' data.", - "type": ["string", "null"] + "type": "string" }, "statement": { "description": "Intended for access, rights, and/or handling instructions that apply to the scope of the assertion.", diff --git a/sdk/sdk.go b/sdk/sdk.go index 5d91b945b8..0a92800592 100644 --- a/sdk/sdk.go +++ b/sdk/sdk.go @@ -349,8 +349,11 @@ func GetTdfType(reader io.ReadSeeker) TdfType { // Some invalid manifests are still usable, so this file may still be usable. var ErrInvalidPerSchema = errors.New("manifest was not valid") +//go:embed schema/manifest-lax.schema.json +var manifestLaxSchema []byte + //go:embed schema/manifest.schema.json -var manifestSchema []byte +var manifestStrictSchema []byte // Detects whether, or not the reader is a valid TDF. It first checks if it can "open" it // Then attempts to extract a manifest, then finally it validates the manifest using the json schema @@ -372,8 +375,22 @@ func IsValidTdf(reader io.ReadSeeker) (bool, error) { return false, fmt.Errorf("tdfReader.Manifest failed: %w", err) } + return isValidManifest(manifest, Lax) +} + +func isValidManifest(manifest string, intensity SchemaValidationIntensity) (bool, error) { // Convert the embedded data to a string - manifestSchemaString := string(manifestSchema) + var manifestSchemaString string + switch intensity { + case Strict: + manifestSchemaString = string(manifestStrictSchema) + case Lax: + manifestSchemaString = string(manifestLaxSchema) + case Skip: + return true, nil + default: + manifestSchemaString = string(manifestLaxSchema) + } loader := gojsonschema.NewStringLoader(manifestSchemaString) manifestStringLoader := gojsonschema.NewStringLoader(manifest) result, err := gojsonschema.Validate(loader, manifestStringLoader) diff --git a/sdk/tdf.go b/sdk/tdf.go index 5a8fa4bb78..b3eb269327 100644 --- a/sdk/tdf.go +++ b/sdk/tdf.go @@ -564,6 +564,16 @@ func (s SDK) LoadTDF(reader io.ReadSeeker, opts ...TDFReaderOption) (*Reader, er return nil, fmt.Errorf("tdfReader.Manifest failed: %w", err) } + if config.schemaValidationIntensity == Lax || config.schemaValidationIntensity == Strict { + valid, err := isValidManifest(manifest, config.schemaValidationIntensity) + if err != nil { + return nil, err + } + if !valid { + return nil, fmt.Errorf("manifest schema validation failed") + } + } + manifestObj := &Manifest{} err = json.Unmarshal([]byte(manifest), manifestObj) if err != nil { @@ -1013,9 +1023,9 @@ func (r *Reader) buildKey(_ context.Context, results []kaoResult) error { assertionKey.Alg = AssertionKeyAlgHS256 assertionKey.Key = payloadKey[:] - if !r.config.AssertionVerificationKeys.IsEmpty() { + if !r.config.verifiers.IsEmpty() { // Look up the key for the assertion - foundKey, err := r.config.AssertionVerificationKeys.Get(assertion.ID) + foundKey, err := r.config.verifiers.Get(assertion.ID) if err != nil { return fmt.Errorf("%w: %w", ErrAssertionFailure{ID: assertion.ID}, err) diff --git a/sdk/tdf_config.go b/sdk/tdf_config.go index 25f761a85b..e0c182bb65 100644 --- a/sdk/tdf_config.go +++ b/sdk/tdf_config.go @@ -213,12 +213,23 @@ func WithAutoconfigure(enable bool) TDFOption { } } +// Schema Validation where 0 = none (skip), 1 = lax (allowing novel entries, 'falsy' values for unkowns), 2 = strict (rejecting novel entries, strict match to manifest schema) +type SchemaValidationIntensity int + +const ( + Skip SchemaValidationIntensity = iota + Lax + Strict + unreasonable = 100 +) + type TDFReaderOption func(*TDFReaderConfig) error type TDFReaderConfig struct { - // Optional Map of Assertion Verification Keys - AssertionVerificationKeys AssertionVerificationKeys + verifiers AssertionVerificationKeys disableAssertionVerification bool + + schemaValidationIntensity SchemaValidationIntensity } func newTDFReaderConfig(opt ...TDFReaderOption) (*TDFReaderConfig, error) { @@ -237,7 +248,14 @@ func newTDFReaderConfig(opt ...TDFReaderOption) (*TDFReaderConfig, error) { func WithAssertionVerificationKeys(keys AssertionVerificationKeys) TDFReaderOption { return func(c *TDFReaderConfig) error { - c.AssertionVerificationKeys = keys + c.verifiers = keys + return nil + } +} + +func WithSchemaValidation(intensity SchemaValidationIntensity) TDFReaderOption { + return func(c *TDFReaderConfig) error { + c.schemaValidationIntensity = intensity return nil } } diff --git a/sdk/tdf_test.go b/sdk/tdf_test.go index c45ef798bd..9d19795728 100644 --- a/sdk/tdf_test.go +++ b/sdk/tdf_test.go @@ -1,17 +1,20 @@ package sdk import ( + "archive/zip" "bytes" "context" "crypto/rand" "crypto/rsa" "crypto/sha256" + "encoding/json" "fmt" "io" "log/slog" "net" "net/url" "os" + "path/filepath" "strconv" "strings" "testing" @@ -31,6 +34,7 @@ import ( "google.golang.org/grpc/test/bufconn" "google.golang.org/protobuf/types/known/structpb" + "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" ) @@ -218,7 +222,7 @@ type partialReadTdfTest struct { type assertionTests struct { assertions []AssertionConfig - assertionVerificationKeys *AssertionVerificationKeys + verifiers *AssertionVerificationKeys disableAssertionVerification bool expectedSize int } @@ -393,7 +397,7 @@ func (s *TDFSuite) Test_TDFWithAssertion() { }, }, }, - assertionVerificationKeys: nil, + verifiers: nil, disableAssertionVerification: false, expectedSize: 2689, }, @@ -424,7 +428,7 @@ func (s *TDFSuite) Test_TDFWithAssertion() { SigningKey: defaultKey, }, }, - assertionVerificationKeys: &AssertionVerificationKeys{ + verifiers: &AssertionVerificationKeys{ DefaultKey: defaultKey, }, disableAssertionVerification: false, @@ -463,7 +467,7 @@ func (s *TDFSuite) Test_TDFWithAssertion() { }, }, }, - assertionVerificationKeys: &AssertionVerificationKeys{ + verifiers: &AssertionVerificationKeys{ // defaultVerificationKey: nil, Keys: map[string]AssertionKey{ "assertion1": { @@ -508,7 +512,7 @@ func (s *TDFSuite) Test_TDFWithAssertion() { }, }, }, - assertionVerificationKeys: &AssertionVerificationKeys{ + verifiers: &AssertionVerificationKeys{ Keys: map[string]AssertionKey{ "assertion1": { Alg: AssertionKeyAlgHS256, @@ -580,11 +584,11 @@ func (s *TDFSuite) Test_TDFWithAssertion() { buf := make([]byte, 8) var r *Reader - if test.assertionVerificationKeys == nil { + if test.verifiers == nil { r, err = s.sdk.LoadTDF(readSeeker, WithDisableAssertionVerification(test.disableAssertionVerification)) } else { r, err = s.sdk.LoadTDF(readSeeker, - WithAssertionVerificationKeys(*test.assertionVerificationKeys), + WithAssertionVerificationKeys(*test.verifiers), WithDisableAssertionVerification(test.disableAssertionVerification)) } s.Require().NoError(err) @@ -602,6 +606,127 @@ func (s *TDFSuite) Test_TDFWithAssertion() { } } +func updateManifest(t *testing.T, tdfFile, outFile string, changer func(t *testing.T, dst io.Writer, f *zip.File) error) error { + z, err := zip.OpenReader(tdfFile) + if err != nil { + return err + } + defer func() { + err := z.Close() + require.NoError(t, err) + }() + + unzippedDir := tdfFile + "-unzipped" + if err := os.MkdirAll(unzippedDir, os.ModePerm); err != nil { + return err + } + defer func() { + err := os.RemoveAll(unzippedDir) + require.NoError(t, err) + }() + + for _, file := range z.File { + fpath := filepath.Join(unzippedDir, file.Name) + if file.FileInfo().IsDir() { + err := os.MkdirAll(fpath, os.ModePerm) + if err != nil { + return err + } + continue + } + + if err := os.MkdirAll(filepath.Dir(fpath), os.ModePerm); err != nil { + return err + } + + outFile, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, file.Mode()) + if err != nil { + return err + } + + err = changer(t, outFile, file) + outFile.Close() + if err != nil { + return err + } + } + + outZip, err := os.Create(outFile) + if err != nil { + return err + } + defer outZip.Close() + + zipWriter := zip.NewWriter(outZip) + defer zipWriter.Close() + + err = filepath.Walk(unzippedDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + header, err := zip.FileInfoHeader(info) + if err != nil { + return err + } + + header.Name, err = filepath.Rel(unzippedDir, path) + if err != nil { + return err + } + + if info.IsDir() { + header.Name += "/" + } else { + header.Method = zip.Store + } + + writer, err := zipWriter.CreateHeader(header) + if err != nil { + return err + } + + if !info.IsDir() { + file, err := os.Open(path) + if err != nil { + return err + } + defer file.Close() + _, err = io.Copy(writer, file) + if err != nil { + return err + } + } + + return nil + }) + + return err +} + +/* + manifestPath := filepath.Join(unzippedDir, "0.manifest.json") + manifestFile, err := os.ReadFile(manifestPath) + if err != nil { + return "", err + } + + var manifestData Manifest + if err := json.Unmarshal(manifestFile, &manifestData); err != nil { + return "", err + } + + newManifestData := manifestChange(manifestData) + newManifestFile, err := json.Marshal(newManifestData) + if err != nil { + return "", err + } + + if err := os.WriteFile(manifestPath, newManifestFile, os.ModePerm); err != nil { + return "", err + } +*/ + func (s *TDFSuite) Test_TDFWithAssertionNegativeTests() { hs256Key := make([]byte, 32) _, err := rand.Read(hs256Key) @@ -678,7 +803,7 @@ func (s *TDFSuite) Test_TDFWithAssertionNegativeTests() { }, }, }, - assertionVerificationKeys: &AssertionVerificationKeys{ + verifiers: &AssertionVerificationKeys{ // defaultVerificationKey: nil, Keys: map[string]AssertionKey{ "assertion1": { @@ -722,7 +847,7 @@ func (s *TDFSuite) Test_TDFWithAssertionNegativeTests() { }, }, }, - assertionVerificationKeys: &AssertionVerificationKeys{ + verifiers: &AssertionVerificationKeys{ DefaultKey: defaultKey, }, expectedSize: 2689, @@ -771,10 +896,10 @@ func (s *TDFSuite) Test_TDFWithAssertionNegativeTests() { buf := make([]byte, 8) var r *Reader - if test.assertionVerificationKeys == nil { + if test.verifiers == nil { r, err = s.sdk.LoadTDF(readSeeker) } else { - r, err = s.sdk.LoadTDF(readSeeker, WithAssertionVerificationKeys(*test.assertionVerificationKeys)) + r, err = s.sdk.LoadTDF(readSeeker, WithAssertionVerificationKeys(*test.verifiers)) } s.Require().NoError(err) @@ -935,6 +1060,184 @@ func (s *TDFSuite) Test_TDFReaderFail() { } } +func (s *TDFSuite) Test_ValidateSchema() { + for index, test := range []struct { + n string + changer func(*testing.T, io.Writer, *zip.File) error + err error + failOn SchemaValidationIntensity + }{ + { + n: "valid", + changer: func(_ *testing.T, dst io.Writer, f *zip.File) error { + rc, err := f.Open() + if err != nil { + return err + } + + _, err = io.Copy(dst, rc) + return err + }, + err: nil, + failOn: unreasonable, + }, + { + n: "emptymanifest", + changer: func(_ *testing.T, dst io.Writer, f *zip.File) error { + rc, err := f.Open() + if err != nil { + return err + } + + if f.Name == "0.manifest.json" { + _, err = dst.Write([]byte("{}")) + } else { + _, err = io.Copy(dst, rc) + } + return err + }, + err: ErrInvalidPerSchema, + failOn: Skip, + }, + { + n: "nojsonchange", + changer: func(_ *testing.T, dst io.Writer, f *zip.File) error { + rc, err := f.Open() + if err != nil { + return err + } + + // Validate json changer code + if f.Name != "0.manifest.json" { + _, err = io.Copy(dst, rc) + return err + } + // Read file from json as a map + var data map[string]interface{} + err = json.NewDecoder(rc).Decode(&data) + if err != nil { + return err + } + // encode data to dst + + err = json.NewEncoder(dst).Encode(data) + return err + }, + err: nil, + failOn: unreasonable, + }, + { + n: "lax", + changer: func(_ *testing.T, dst io.Writer, f *zip.File) error { + rc, err := f.Open() + if err != nil { + return err + } + + if f.Name != "0.manifest.json" { + _, err = io.Copy(dst, rc) + return err + } + // Read file from json as a map + var data map[string]interface{} + err = json.NewDecoder(rc).Decode(&data) + if err != nil { + return err + } + + (data["payload"].(map[string]interface{}))["tdf_spec_version"] = nil //nolint:forcetypeassert // testonly code + + err = json.NewEncoder(dst).Encode(data) + return err + }, + err: ErrInvalidPerSchema, + failOn: Strict, + }, + } { + s.Run(test.n, func() { + // create .txt file + plainTextFileName := test.n + "-" + strconv.Itoa(index) + ".txt" + s.createFileName(buffer, plainTextFileName, 16) + defer func() { + // Remove the test files + _ = os.Remove(plainTextFileName) + }() + tdfFileName := plainTextFileName + ".tdf" + + plainReader, err := os.Open(plainTextFileName) + s.Require().NoError(err) + + defer func() { + err := plainReader.Close() + s.Require().NoError(err) + }() + + ciphertextWriter, err := os.Create(tdfFileName) + s.Require().NoError(err) + + defer func() { + err := ciphertextWriter.Close() + s.Require().NoError(err) + err = os.Remove(tdfFileName) + s.Require().NoError(err) + }() + + encryptOpts := []TDFOption{ + WithKasInformation(s.kases[0].KASInfo), + WithAutoconfigure(false), + } + + // test encrypt + _, err = s.sdk.CreateTDF(ciphertextWriter, plainReader, encryptOpts...) + s.Require().NoError(err) + + alteredFileName := "altered-" + tdfFileName + s.Require().NoError(updateManifest(s.T(), tdfFileName, alteredFileName, test.changer)) + + cipherText, err := os.Open(alteredFileName) + s.Require().NoError(err) + + defer func() { + err := cipherText.Close() + s.Require().NoError(err) + _ = os.Remove(alteredFileName) + }() + + for _, svi := range []SchemaValidationIntensity{Skip, Lax, Strict} { + r, err := s.sdk.LoadTDF(cipherText, WithSchemaValidation(svi)) + switch { + case test.failOn > svi: + s.Require().NoError(err, "error should be nil at %s", svi) + case test.err != nil && svi > Skip: + // can either fail here or on first read (ie in Copy below) + // Errors on 'skip' won't match the expected error type, though. + if test.err != nil { + s.Require().ErrorIs(err, test.err, "[%v] at %s", err, svi) + } else { + s.Require().Error(err, "at %s", svi) + } + continue + default: + s.Require().NoError(err, "[%v] at %s", err, svi) + } + + if test.failOn > svi { + n, err := io.Copy(io.Discard, r) + s.Require().NoError(err, "at %s", svi) + s.Equal(int64(16), n) + } else { + _, err := io.Copy(io.Discard, r) + if test.err != nil && svi != Skip { + s.Require().ErrorIs(err, test.err, "[%v] at %s", err, svi) + } else { + s.Require().Error(err, "[%v] at %s", err, svi) + } + } + } + }) + } +} + func (s *TDFSuite) Test_TDF() { for index, test := range []tdfTest{ {