From aa75f082b68643ef0b78ef69d665631e4391d88f Mon Sep 17 00:00:00 2001 From: vishal Date: Thu, 18 Apr 2019 17:01:59 -0400 Subject: [PATCH 1/4] Basic recommendation engine --- examples/recommendations/app.yaml | 2 ++ .../implementations/models/basic_embedding.py | 27 ++++++++++++++++ examples/recommendations/resources/apis.yaml | 5 +++ .../resources/environments.yaml | 24 ++++++++++++++ .../recommendations/resources/models.yaml | 19 ++++++++++++ .../resources/transformed_columns.yaml | 31 +++++++++++++++++++ examples/recommendations/samples.json | 8 +++++ 7 files changed, 116 insertions(+) create mode 100644 examples/recommendations/app.yaml create mode 100644 examples/recommendations/implementations/models/basic_embedding.py create mode 100644 examples/recommendations/resources/apis.yaml create mode 100644 examples/recommendations/resources/environments.yaml create mode 100644 examples/recommendations/resources/models.yaml create mode 100644 examples/recommendations/resources/transformed_columns.yaml create mode 100644 examples/recommendations/samples.json diff --git a/examples/recommendations/app.yaml b/examples/recommendations/app.yaml new file mode 100644 index 0000000000..b427e58093 --- /dev/null +++ b/examples/recommendations/app.yaml @@ -0,0 +1,2 @@ +- kind: app + name: recommendations diff --git a/examples/recommendations/implementations/models/basic_embedding.py b/examples/recommendations/implementations/models/basic_embedding.py new file mode 100644 index 0000000000..d2e64c43d5 --- /dev/null +++ b/examples/recommendations/implementations/models/basic_embedding.py @@ -0,0 +1,27 @@ +import tensorflow as tf + + +def create_estimator(run_config, model_config): + user_id_index = model_config["aggregates"]["user_id_index"] + movie_id_index = model_config["aggregates"]["movie_id_index"] + + feature_columns = [ + tf.feature_column.embedding_column( + tf.feature_column.categorical_column_with_identity( + "user_id_indexed", len(user_id_index) + ), + model_config["hparams"]["embedding_size"], + ), + tf.feature_column.embedding_column( + tf.feature_column.categorical_column_with_identity( + "movie_id_indexed", len(movie_id_index) + ), + model_config["hparams"]["embedding_size"], + ), + ] + + return tf.estimator.DNNRegressor( + feature_columns=feature_columns, + hidden_units=model_config["hparams"]["hidden_units"], + config=run_config, + ) diff --git a/examples/recommendations/resources/apis.yaml b/examples/recommendations/resources/apis.yaml new file mode 100644 index 0000000000..aac148b956 --- /dev/null +++ b/examples/recommendations/resources/apis.yaml @@ -0,0 +1,5 @@ +- kind: api + name: rating-predictor + model_name: basic_embedding + compute: + replicas: 1 diff --git a/examples/recommendations/resources/environments.yaml b/examples/recommendations/resources/environments.yaml new file mode 100644 index 0000000000..119b2f19a2 --- /dev/null +++ b/examples/recommendations/resources/environments.yaml @@ -0,0 +1,24 @@ +- kind: environment + name: dev + data: + type: csv + path: s3a://data-vishal/ratings.csv + csv_config: + header: true + schema: ['user_id','movie_id','rating','timestamp'] + +- kind: raw_column + name: user_id + type: STRING_COLUMN + +- kind: raw_column + name: movie_id + type: STRING_COLUMN + +- kind: raw_column + name: rating + type: FLOAT_COLUMN + +- kind: raw_column + name: timestamp + type: INT_COLUMN diff --git a/examples/recommendations/resources/models.yaml b/examples/recommendations/resources/models.yaml new file mode 100644 index 0000000000..38b34ead66 --- /dev/null +++ b/examples/recommendations/resources/models.yaml @@ -0,0 +1,19 @@ +- kind: model + name: basic_embedding + type: regression + target_column: rating + feature_columns: + - user_id_indexed + - movie_id_indexed + aggregates: + - user_id_index + - movie_id_index + hparams: + embedding_size: 10 + hidden_units: [128] + data_partition_ratio: + training: 0.8 + evaluation: 0.2 + training: + batch_size: 50 + num_steps: 8000 diff --git a/examples/recommendations/resources/transformed_columns.yaml b/examples/recommendations/resources/transformed_columns.yaml new file mode 100644 index 0000000000..2a93022ffd --- /dev/null +++ b/examples/recommendations/resources/transformed_columns.yaml @@ -0,0 +1,31 @@ +- kind: aggregate + name: user_id_index + aggregator: cortex.index_string + inputs: + columns: + col: user_id + +- kind: transformed_column + name: user_id_indexed + transformer: cortex.index_string + inputs: + columns: + text: user_id + args: + index: user_id_index + +- kind: aggregate + name: movie_id_index + aggregator: cortex.index_string + inputs: + columns: + col: movie_id + +- kind: transformed_column + name: movie_id_indexed + transformer: cortex.index_string + inputs: + columns: + text: movie_id + args: + index: movie_id_index diff --git a/examples/recommendations/samples.json b/examples/recommendations/samples.json new file mode 100644 index 0000000000..4e6704c79b --- /dev/null +++ b/examples/recommendations/samples.json @@ -0,0 +1,8 @@ +{ + "samples": [ + { + "user_id": "71", + "movie_id": "91529" + } + ] +} From 361f4326c63fc5befc15e49449f48af0668788cc Mon Sep 17 00:00:00 2001 From: vishal Date: Mon, 22 Apr 2019 16:02:47 +0000 Subject: [PATCH 2/4] Move input dataset to cortex-examples --- examples/recommendations/resources/environments.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/recommendations/resources/environments.yaml b/examples/recommendations/resources/environments.yaml index 119b2f19a2..a30f3baef9 100644 --- a/examples/recommendations/resources/environments.yaml +++ b/examples/recommendations/resources/environments.yaml @@ -2,7 +2,7 @@ name: dev data: type: csv - path: s3a://data-vishal/ratings.csv + path: s3a://cortex-examples/movie-ratings.csv csv_config: header: true schema: ['user_id','movie_id','rating','timestamp'] From bafd8899e08087f7f095598417ed2330e12c2c56 Mon Sep 17 00:00:00 2001 From: vishal Date: Thu, 25 Apr 2019 12:03:24 -0400 Subject: [PATCH 3/4] Rename recommendations to movie-ratings --- examples/movie-ratings/app.yaml | 2 ++ .../implementations/models/basic_embedding.py | 0 .../{recommendations => movie-ratings}/resources/apis.yaml | 2 +- .../resources/environments.yaml | 0 .../{recommendations => movie-ratings}/resources/models.yaml | 0 .../resources/transformed_columns.yaml | 0 .../samples.json => movie-ratings/watch_list.json} | 4 ++++ examples/recommendations/app.yaml | 2 -- 8 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 examples/movie-ratings/app.yaml rename examples/{recommendations => movie-ratings}/implementations/models/basic_embedding.py (100%) rename examples/{recommendations => movie-ratings}/resources/apis.yaml (73%) rename examples/{recommendations => movie-ratings}/resources/environments.yaml (100%) rename examples/{recommendations => movie-ratings}/resources/models.yaml (100%) rename examples/{recommendations => movie-ratings}/resources/transformed_columns.yaml (100%) rename examples/{recommendations/samples.json => movie-ratings/watch_list.json} (57%) delete mode 100644 examples/recommendations/app.yaml diff --git a/examples/movie-ratings/app.yaml b/examples/movie-ratings/app.yaml new file mode 100644 index 0000000000..e9dda0b34b --- /dev/null +++ b/examples/movie-ratings/app.yaml @@ -0,0 +1,2 @@ +- kind: app + name: movie-ratings diff --git a/examples/recommendations/implementations/models/basic_embedding.py b/examples/movie-ratings/implementations/models/basic_embedding.py similarity index 100% rename from examples/recommendations/implementations/models/basic_embedding.py rename to examples/movie-ratings/implementations/models/basic_embedding.py diff --git a/examples/recommendations/resources/apis.yaml b/examples/movie-ratings/resources/apis.yaml similarity index 73% rename from examples/recommendations/resources/apis.yaml rename to examples/movie-ratings/resources/apis.yaml index aac148b956..187089009a 100644 --- a/examples/recommendations/resources/apis.yaml +++ b/examples/movie-ratings/resources/apis.yaml @@ -1,5 +1,5 @@ - kind: api - name: rating-predictor + name: ratings model_name: basic_embedding compute: replicas: 1 diff --git a/examples/recommendations/resources/environments.yaml b/examples/movie-ratings/resources/environments.yaml similarity index 100% rename from examples/recommendations/resources/environments.yaml rename to examples/movie-ratings/resources/environments.yaml diff --git a/examples/recommendations/resources/models.yaml b/examples/movie-ratings/resources/models.yaml similarity index 100% rename from examples/recommendations/resources/models.yaml rename to examples/movie-ratings/resources/models.yaml diff --git a/examples/recommendations/resources/transformed_columns.yaml b/examples/movie-ratings/resources/transformed_columns.yaml similarity index 100% rename from examples/recommendations/resources/transformed_columns.yaml rename to examples/movie-ratings/resources/transformed_columns.yaml diff --git a/examples/recommendations/samples.json b/examples/movie-ratings/watch_list.json similarity index 57% rename from examples/recommendations/samples.json rename to examples/movie-ratings/watch_list.json index 4e6704c79b..a4cfd58bab 100644 --- a/examples/recommendations/samples.json +++ b/examples/movie-ratings/watch_list.json @@ -3,6 +3,10 @@ { "user_id": "71", "movie_id": "91529" + }, + { + "user_id": "71", + "movie_id": "174055" } ] } diff --git a/examples/recommendations/app.yaml b/examples/recommendations/app.yaml deleted file mode 100644 index b427e58093..0000000000 --- a/examples/recommendations/app.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- kind: app - name: recommendations From 279f52389114c5f4fc34645ebc01576ded476148 Mon Sep 17 00:00:00 2001 From: vishal Date: Thu, 25 Apr 2019 23:37:23 +0000 Subject: [PATCH 4/4] Rename watch_list.json to movies.json --- examples/movie-ratings/{watch_list.json => movies.json} | 0 examples/movie-ratings/resources/models.yaml | 8 ++------ 2 files changed, 2 insertions(+), 6 deletions(-) rename examples/movie-ratings/{watch_list.json => movies.json} (100%) diff --git a/examples/movie-ratings/watch_list.json b/examples/movie-ratings/movies.json similarity index 100% rename from examples/movie-ratings/watch_list.json rename to examples/movie-ratings/movies.json diff --git a/examples/movie-ratings/resources/models.yaml b/examples/movie-ratings/resources/models.yaml index 38b34ead66..8f5e0ef5c8 100644 --- a/examples/movie-ratings/resources/models.yaml +++ b/examples/movie-ratings/resources/models.yaml @@ -2,12 +2,8 @@ name: basic_embedding type: regression target_column: rating - feature_columns: - - user_id_indexed - - movie_id_indexed - aggregates: - - user_id_index - - movie_id_index + feature_columns: [user_id_indexed, movie_id_indexed] + aggregates: [user_id_index, movie_id_index] hparams: embedding_size: 10 hidden_units: [128]