@@ -1277,6 +1277,139 @@ def deidentify_table_condition_replace_with_info_types(
1277
1277
# [END dlp_deidentify_table_condition_infotypes]
1278
1278
1279
1279
1280
+ # [START dlp_deidentify_table_condition_masking]
1281
+ def deidentify_table_condition_masking (
1282
+ project ,
1283
+ table_data ,
1284
+ deid_content_list ,
1285
+ condition_field = None ,
1286
+ condition_operator = None ,
1287
+ condition_value = None ,
1288
+ masking_character = None
1289
+ ):
1290
+ """ Uses the Data Loss Prevention API to de-identify sensitive data in a
1291
+ table by masking them based on a condition.
1292
+
1293
+ Args:
1294
+ project: The Google Cloud project id to use as a parent resource.
1295
+ table_data: Json string representing table data.
1296
+ deid_content_list: A list of fields in table to de-identify.
1297
+ condition_field: A table Field within the record this condition is evaluated against.
1298
+ condition_operator: Operator used to compare the field or infoType to the value. One of:
1299
+ RELATIONAL_OPERATOR_UNSPECIFIED, EQUAL_TO, NOT_EQUAL_TO, GREATER_THAN, LESS_THAN, GREATER_THAN_OR_EQUALS,
1300
+ LESS_THAN_OR_EQUALS, EXISTS.
1301
+ condition_value: Value to compare against. [Mandatory, except for ``EXISTS`` tests.].
1302
+ masking_character: The character to mask matching sensitive data with.
1303
+
1304
+ Returns:
1305
+ De-identified table is returned;
1306
+ the response from the API is also printed to the terminal.
1307
+
1308
+ Example:
1309
+ table_data = {
1310
+ "header":[
1311
+ "email",
1312
+ "phone number",
1313
+ "age",
1314
+ "happiness_score"
1315
+ ],
1316
+ "rows":[
1317
+ [
1318
+
1319
+ "4232342345",
1320
+ "35",
1321
+ "21"
1322
+ ],
1323
+ [
1324
+
1325
+ "4253458383",
1326
+ "64",
1327
+ "34"
1328
+ ]
1329
+ ]
1330
+ }
1331
+
1332
+ >> $ python deid.py deid_table_condition_mask \
1333
+ '{"header": ["email", "phone number", "age", "happiness_score"],
1334
+ "rows": [["[email protected] ", "4232342345", "35", "21"],
1335
+ ["[email protected] ", "4253458383", "64", "34"]]}' \
1336
+ ["happiness_score"] "age" "GREATER_THAN" 50
1337
+ >> '{"header": ["email", "phone number", "age", "happiness_score"],
1338
+ "rows": [["[email protected] ", "4232342345", "35", "21"],
1339
+ ["[email protected] ", "4253458383", "64", "**"]]}'
1340
+ """
1341
+
1342
+ # Import the client library
1343
+ import google .cloud .dlp
1344
+
1345
+ # Instantiate a client.
1346
+ dlp = google .cloud .dlp_v2 .DlpServiceClient ()
1347
+
1348
+ # Construct the `table`. For more details on the table schema, please see
1349
+ # https://cloud.google.com/dlp/docs/reference/rest/v2/ContentItem#Table
1350
+ headers = [{"name" : val } for val in table_data ["header" ]]
1351
+ rows = []
1352
+ for row in table_data ["rows" ]:
1353
+ rows .append ({"values" : [{"string_value" : cell_val } for cell_val in row ]})
1354
+
1355
+ table = {"headers" : headers , "rows" : rows }
1356
+
1357
+ # Construct the `item`
1358
+ item = {"table" : table }
1359
+
1360
+ # Specify fields to be de-identified
1361
+ deid_content_list = [{"name" : _i } for _i in deid_content_list ]
1362
+
1363
+ # Construct condition list
1364
+ condition = [
1365
+ {
1366
+ "field" : {"name" : condition_field },
1367
+ "operator" : condition_operator ,
1368
+ "value" : {"integer_value" : condition_value }
1369
+ }
1370
+ ]
1371
+
1372
+ # Construct deidentify configuration dictionary
1373
+ deidentify_config = {
1374
+ "record_transformations" : {
1375
+ "field_transformations" : [
1376
+ {
1377
+ "primitive_transformation" : {
1378
+ "character_mask_config" : {
1379
+ "masking_character" : masking_character
1380
+ }
1381
+ },
1382
+ "fields" : deid_content_list ,
1383
+ "condition" : {
1384
+ "expressions" : {
1385
+ "conditions" : {"conditions" : condition }
1386
+ }
1387
+ }
1388
+ }
1389
+ ]
1390
+ }
1391
+ }
1392
+
1393
+ # Convert the project id into a full resource id.
1394
+ parent = f"projects/{ project } "
1395
+
1396
+ # Call the API.
1397
+ response = dlp .deidentify_content (
1398
+ request = {
1399
+ "parent" : parent ,
1400
+ "deidentify_config" : deidentify_config ,
1401
+ "item" : item
1402
+ })
1403
+
1404
+ # Print the result
1405
+ print ("Table after de-identification: {}" .format (response .item .table ))
1406
+
1407
+ # Return the response
1408
+ return response .item .table
1409
+
1410
+ # [END dlp_deidentify_table_condition_masking]
1411
+
1412
+
1280
1413
if __name__ == "__main__" :
1281
1414
parser = argparse .ArgumentParser (description = __doc__ )
1282
1415
subparsers = parser .add_subparsers (
@@ -1607,6 +1740,45 @@ def deidentify_table_condition_replace_with_info_types(
1607
1740
help = "Value to compare against. [Mandatory, except for ``EXISTS`` tests.]." ,
1608
1741
)
1609
1742
1743
+ table_condition_mask_parser = subparsers .add_parser (
1744
+ "deid_table_condition_mask" ,
1745
+ help = "De-identify sensitive data in a table by masking"
1746
+ "them based on a condition." ,
1747
+ )
1748
+ table_condition_mask_parser .add_argument (
1749
+ "project" ,
1750
+ help = "The Google Cloud project id to use as a parent resource." ,
1751
+ )
1752
+ table_condition_mask_parser .add_argument (
1753
+ "table_data" ,
1754
+ help = "Json string representing table data" ,
1755
+ )
1756
+ table_condition_mask_parser .add_argument (
1757
+ "deid_content_list" ,
1758
+ help = "A list of fields in table to de-identify."
1759
+ )
1760
+ table_condition_mask_parser .add_argument (
1761
+ "--condition_field" ,
1762
+ help = "A table Field within the record this condition is evaluated "
1763
+ "against." ,
1764
+ )
1765
+ table_condition_mask_parser .add_argument (
1766
+ "--condition_operator" ,
1767
+ help = "Operator used to compare the field or infoType to the value. "
1768
+ "One of: RELATIONAL_OPERATOR_UNSPECIFIED, EQUAL_TO, NOT_EQUAL_TO, "
1769
+ "GREATER_THAN, LESS_THAN, GREATER_THAN_OR_EQUALS, LESS_THAN_OR_EQUALS, "
1770
+ "EXISTS." ,
1771
+ )
1772
+ table_condition_mask_parser .add_argument (
1773
+ "--condition_value" ,
1774
+ help = "Value to compare against. [Mandatory, except for ``EXISTS`` tests.]." ,
1775
+ )
1776
+ table_condition_mask_parser .add_argument (
1777
+ "-m" ,
1778
+ "--masking_character" ,
1779
+ help = "The character to mask matching sensitive data with." ,
1780
+ )
1781
+
1610
1782
args = parser .parse_args ()
1611
1783
1612
1784
if args .content == "deid_mask" :
@@ -1687,3 +1859,13 @@ def deidentify_table_condition_replace_with_info_types(
1687
1859
condition_operator = args .condition_operator ,
1688
1860
condition_value = args .condition_value
1689
1861
)
1862
+ elif args .content == "deid_table_condition_mask" :
1863
+ deidentify_table_condition_masking (
1864
+ args .project ,
1865
+ args .table_data ,
1866
+ args .deid_content_list ,
1867
+ condition_field = args .condition_field ,
1868
+ condition_operator = args .condition_operator ,
1869
+ condition_value = args .condition_value ,
1870
+ masking_character = args .masking_character
1871
+ )
0 commit comments