Skip to content
This repository was archived by the owner on Aug 25, 2024. It is now read-only.

Change repo/Repo to record/Record #400

Merged
merged 26 commits into from
Feb 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
fb5be8a
Merge pull request #1 from intel/master
0dust Jan 3, 2020
41128ce
Merge pull request #2 from intel/master
0dust Jan 3, 2020
14d969f
Merge pull request #3 from intel/master
0dust Jan 9, 2020
0bfca29
Merge branch 'master' of https://github.com/intel/dffml
0dust Jan 9, 2020
8fce308
Merge branch 'master' of https://github.com/0dust/dffml
0dust Jan 9, 2020
942a09c
Merge branch 'master' of https://github.com/intel/dffml
0dust Jan 10, 2020
36e9625
Merge branch 'master' of https://github.com/intel/dffml
0dust Jan 11, 2020
72850a7
Merge branch 'master' of https://github.com/intel/dffml
0dust Jan 12, 2020
98eae76
Merge branch 'master' of https://github.com/intel/dffml
0dust Jan 13, 2020
6e2ebd4
Merge branch 'master' of https://github.com/intel/dffml
0dust Jan 15, 2020
e10cb1b
Merge branch 'master' of https://github.com/intel/dffml
0dust Jan 22, 2020
d553dd0
Merge branch 'master' of https://github.com/intel/dffml
0dust Jan 26, 2020
35146f1
Merge branch 'master' of https://github.com/intel/dffml
0dust Jan 30, 2020
4e628d3
Merge branch 'master' of https://github.com/intel/dffml
0dust Feb 8, 2020
9ffade2
Merge branch 'master' of https://github.com/intel/dffml
0dust Feb 12, 2020
8ae5a9b
Merge branch 'master' of https://github.com/intel/dffml
0dust Feb 18, 2020
90aac1c
Merge branch 'master' of https://github.com/intel/dffml
0dust Feb 19, 2020
3bfbab4
Merge branch 'master' of https://github.com/intel/dffml
0dust Feb 21, 2020
0be16d0
repo to record
0dust Feb 21, 2020
9ecd6d0
repo to record
0dust Feb 21, 2020
eac03f8
Merge branch 'rename_to_record' of https://github.com/0dust/dffml int…
0dust Feb 21, 2020
6808365
changed repo/Repo to record/Record
0dust Feb 21, 2020
7c9ed83
show git_commits operation in docs
0dust Feb 21, 2020
ff314e9
Merge branch 'master' of https://github.com/intel/dffml
0dust Feb 21, 2020
7e31f80
fix conflicts
0dust Feb 21, 2020
717c100
suggested changes
0dust Feb 21, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Notes on development dependencies in `setup.py` files to codebase notes.
- Test for `cached_download`
### Changed
- `repo`/`Repo` to `record`/`Record`
- Definitions with a `spec` can use the `subspec` parameter to declare that they
are a list or a dict where the values are of the `spec` type. Rather than the
list or dict itself being of the `spec` type.
Expand Down
30 changes: 16 additions & 14 deletions dffml/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pkg_resources

from ..version import VERSION
from ..repo import Repo
from ..record import Record
from ..source.source import BaseSource
from ..util.packaging import is_develop
from ..util.cli.arg import Arg
Expand Down Expand Up @@ -37,40 +37,42 @@ async def run(self):

class Edit(SourcesCMD, KeysCMD):
"""
Edit each specified repo
Edit each specified record
"""

async def run(self):
async with self.sources as sources:
async with sources() as sctx:
for key in self.keys:
repo = await sctx.repo(key)
record = await sctx.record(key)
pdb.set_trace()
await sctx.update(repo)
await sctx.update(record)


class Merge(CMD):
"""
Merge repo data between sources
Merge record data between sources
"""

arg_dest = Arg(
"dest", help="Sources merge repos into", type=BaseSource.load_labeled
"dest", help="Sources merge records into", type=BaseSource.load_labeled
)
arg_src = Arg(
"src", help="Sources to pull repos from", type=BaseSource.load_labeled
"src",
help="Sources to pull records from",
type=BaseSource.load_labeled,
)

async def run(self):
async with self.src.withconfig(
self.extra_config
) as src, self.dest.withconfig(self.extra_config) as dest:
async with src() as sctx, dest() as dctx:
async for src in sctx.repos():
repo = Repo(src.key)
repo.merge(src)
repo.merge(await dctx.repo(repo.key))
await dctx.update(repo)
async for src in sctx.records():
record = Record(src.key)
record.merge(src)
record.merge(await dctx.record(record.key))
await dctx.update(record)


class ImportExportCMD(PortCMD, SourcesCMD):
Expand All @@ -80,7 +82,7 @@ class ImportExportCMD(PortCMD, SourcesCMD):


class Import(ImportExportCMD):
"""Imports repos"""
"""Imports records"""

async def run(self):
async with self.sources as sources:
Expand All @@ -89,7 +91,7 @@ async def run(self):


class Export(ImportExportCMD):
"""Exports repos"""
"""Exports records"""

async def run(self):
async with self.sources as sources:
Expand Down
86 changes: 43 additions & 43 deletions dffml/cli/dataflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,14 @@ class RunCMD(SourcesCMD):
arg_sources = SourcesCMD.arg_sources.modify(required=False)
arg_caching = Arg(
"-caching",
help="Skip running DataFlow if a repo already contains these features",
help="Skip running DataFlow if a record already contains these features",
nargs="+",
required=False,
default=[],
)
arg_no_update = Arg(
"-no-update",
help="Update repo with sources",
help="Update record with sources",
required=False,
default=False,
action="store_true",
Expand Down Expand Up @@ -134,42 +134,42 @@ class RunCMD(SourcesCMD):
nargs="+",
action=ParseInputsAction,
default=[],
help="Other inputs to add under each ctx (repo's key will "
help="Other inputs to add under each ctx (record's key will "
+ "be used as the context)",
)
arg_repo_def = Arg(
"-repo-def",
arg_record_def = Arg(
"-record-def",
default=False,
type=str,
help="Definition to be used for repo.key."
+ "If set, repo.key will be added to the set of inputs "
+ "under each context (which is also the repo's key)",
help="Definition to be used for record.key."
+ "If set, record.key will be added to the set of inputs "
+ "under each context (which is also the record's key)",
)

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.orchestrator = self.orchestrator.withconfig(self.extra_config)


class RunAllRepos(RunCMD):
"""Run dataflow for all repos in sources"""
class RunAllRecords(RunCMD):
"""Run dataflow for all records in sources"""

async def repos(self, sctx):
async def records(self, sctx):
"""
This method exists so that it can be overriden by RunRepoSet
This method exists so that it can be overriden by RunRecordSet
"""
async for repo in sctx.repos():
yield repo
async for record in sctx.records():
yield record

async def run_dataflow(self, orchestrator, sources, dataflow):
# Orchestrate the running of these operations
async with orchestrator(dataflow) as octx, sources() as sctx:
# Add our inputs to the input network with the context being the
# repo key
# record key
inputs = []
async for repo in self.repos(sctx):
# Skip running DataFlow if repo already has features
existing_features = repo.features()
async for record in self.records(sctx):
# Skip running DataFlow if record already has features
existing_features = record.features()
if self.caching and all(
map(
lambda cached: cached in existing_features,
Expand All @@ -178,19 +178,19 @@ async def run_dataflow(self, orchestrator, sources, dataflow):
):
continue

repo_inputs = []
record_inputs = []
for value, def_name in self.inputs:
repo_inputs.append(
record_inputs.append(
Input(
value=value,
definition=dataflow.definitions[def_name],
)
)
if self.repo_def:
repo_inputs.append(
if self.record_def:
record_inputs.append(
Input(
value=repo.key,
definition=dataflow.definitions[self.repo_def],
value=record.key,
definition=dataflow.definitions[self.record_def],
)
)

Expand All @@ -199,8 +199,8 @@ async def run_dataflow(self, orchestrator, sources, dataflow):
inputs.append(
MemoryInputSet(
MemoryInputSetConfig(
ctx=StringInputSetContext(repo.key),
inputs=repo_inputs,
ctx=StringInputSetContext(record.key),
inputs=record_inputs,
)
)
)
Expand All @@ -212,14 +212,14 @@ async def run_dataflow(self, orchestrator, sources, dataflow):
*inputs, strict=not self.no_strict
):
ctx_str = (await ctx.handle()).as_string()
# TODO(p4) Make a RepoInputSetContext which would let us
# store the repo instead of recalling it by the URL
repo = await sctx.repo(ctx_str)
# TODO(p4) Make a RecordInputSetContext which would let us
# store the record instead of recalling it by the URL
record = await sctx.record(ctx_str)
# Store the results
repo.evaluated(results)
yield repo
record.evaluated(results)
yield record
if not self.no_update:
await sctx.update(repo)
await sctx.update(record)

async def run(self):
dataflow_path = pathlib.Path(self.dataflow)
Expand All @@ -232,35 +232,35 @@ async def run(self):
exported = await loader.loadb(dataflow_path.read_bytes())
dataflow = DataFlow._fromdict(**exported)
async with self.orchestrator as orchestrator, self.sources as sources:
async for repo in self.run_dataflow(
async for record in self.run_dataflow(
orchestrator, sources, dataflow
):
yield repo
yield record


class RunRepoSet(RunAllRepos, KeysCMD):
"""Run dataflow for single repo or set of repos"""
class RunRecordSet(RunAllRecords, KeysCMD):
"""Run dataflow for single record or set of records"""

async def repos(self, sctx):
async def records(self, sctx):
for key in self.keys:
yield await sctx.repo(key)
yield await sctx.record(key)

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.sources = SubsetSources(*self.sources, keys=self.keys)


class RunRepos(CMD):
"""Run DataFlow and assign output to a repo"""
class RunRecords(CMD):
"""Run DataFlow and assign output to a record"""

_set = RunRepoSet
_all = RunAllRepos
_set = RunRecordSet
_all = RunAllRecords


class Run(CMD):
"""Run dataflow"""

repos = RunRepos
records = RunRecords


class Diagram(CMD):
Expand Down
12 changes: 6 additions & 6 deletions dffml/cli/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@
from ..util.cli.cmds import SourcesCMD, ListEntrypoint


class ListRepos(SourcesCMD):
class ListRecords(SourcesCMD):
"""
List repos stored in sources
List records stored in sources
"""

async def run(self):
async with self.sources as sources:
async with sources() as sctx:
async for repo in sctx.repos():
print(repo)
async for record in sctx.records():
print(record)


class ListServices(ListEntrypoint):
Expand Down Expand Up @@ -48,10 +48,10 @@ class ListModels(ListEntrypoint):

class List(CMD):
"""
List repos and installed interfaces
List records and installed interfaces
"""

repos = ListRepos
records = ListRecords
sources = ListSources
models = ListModels
services = ListServices
16 changes: 8 additions & 8 deletions dffml/cli/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,29 +38,29 @@ class PredictAll(MLCMD):

arg_update = Arg(
"-update",
help="Update repo with sources",
help="Update record with sources",
required=False,
default=False,
action="store_true",
)

async def run(self):
async for repo in predict(
self.model, self.sources, update=self.update, keep_repo=True
async for record in predict(
self.model, self.sources, update=self.update, keep_record=True
):
yield repo
yield record


class PredictRepo(PredictAll, KeysCMD):
"""Predictions for individual repos"""
class PredictRecord(PredictAll, KeysCMD):
"""Predictions for individual records"""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.sources = SubsetSources(*self.sources, keys=self.keys)


class Predict(CMD):
"""Evaluate features against repos and produce a prediction"""
"""Evaluate features against records and produce a prediction"""

repo = PredictRepo
record = PredictRecord
_all = PredictAll
2 changes: 1 addition & 1 deletion dffml/feature/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ async def inc(self, key, default=None, by=1):

class Data(Task):
"""
Passed to each feature during evaluation. Shared between all features a repo
Passed to each feature during evaluation. Shared between all features a record
is being evaluated with
"""

Expand Down
Loading