Skip to content
This repository was archived by the owner on Aug 25, 2024. It is now read-only.

shouldi: deptree: Create dependency tree of project #596

Open
johnandersen777 opened this issue Apr 29, 2020 · 6 comments
Open

shouldi: deptree: Create dependency tree of project #596

johnandersen777 opened this issue Apr 29, 2020 · 6 comments
Labels
Alice Work related to our tutorial series "Rolling Alice" enhancement New feature or request p3 Average Priority security Security related issues or improvements tL Esitmated Time To Complete: Long
Milestone

Comments

@johnandersen777
Copy link

johnandersen777 commented Apr 29, 2020

https://github.com/intel/dffml/commits/shouldi_dep_tree

The idea behind the work that was done so far in the above branch was to produce the full dependency tree for a given python package. We'll want this to be the shouldi deptree command (as opposed to shouldi install)

It's currently in a state where it can grab package names out of setup.py files. It also needs to be able to grab them out of setup.cfg files and requirements.txt files.

Another thing that's missing is the version number of the package. Right now the latest version of each package is being downloaded. We need to check if the package was pinned to a version and download that version (if example_package==0.3.1 for example we'd download version 0.3.1 of example_package). Come up with reasonable ways to handle all the following cases: https://stackoverflow.com/a/50842265/12310488

There is also a new output operation that needs to be made for this work. It's the Tree output operation. It's not working at the moment. What it should do is output a dict similar to the way config structures work with their use of plugin and config (you'll see what I'm about when you read the existing code.

import sys
import pathlib
import argparse
import unittest.mock
import importlib.util
from dffml.base import config
from dffml.util.os import chdir
from dffml.df.base import opimp_in, op
from dffml.df.types import Input, DataFlow
from dffml.service.dev import SetupPyKWArg
from dffml.operation.output import GetMulti
from dffml.df.memory import MemoryOrchestrator
from dffml.util.asynctestcase import AsyncTestCase
from shouldi.pypi import *
def remove_package_versions(packages):
no_versions = []
appended = False
for package in packages:
for char in [">", "<", "="]:
if char in package:
no_versions.append(package.split(char)[0].strip())
appended = True
break
if not appended:
no_versions.append(package.strip())
appended = False
return no_versions
PACKAGE_DEPS_KWARGS = dict(
inputs={"src": pypi_package_contents.op.outputs["directory"],},
outputs={"package": pypi_package_json.op.inputs["package"]},
expand=["package"],
)
@op(**PACKAGE_DEPS_KWARGS)
async def package_deps_setup_py(src: str):
setup_py_path = list(pathlib.Path(src).rglob("**/setup.py"))
if not setup_py_path:
return
setup_py_path = setup_py_path[0]
deps = SetupPyKWArg.get_kwargs(str(setup_py_path)).get(
"install_requires", []
)
no_versions = {}
print(src, remove_package_versions(deps))
return {"package": remove_package_versions(deps)}
@op(**PACKAGE_DEPS_KWARGS)
async def package_deps_setup_cfg(src: str):
# TODO
return {"package": []}
@op(**PACKAGE_DEPS_KWARGS)
async def package_deps_requirements_txt(src: str):
# TODO
return {"package": []}
SUBFLOW = DataFlow.auto(*[opimp for opimp in opimp_in(sys.modules[__name__])])
SUBFLOW.seed.append(
Input(
value=[pypi_package_json.op.inputs["package"].name],
definition=GetMulti.op.inputs["spec"],
)
)
# Do not allow package names in the subflow to re-trigger the whole subflow
# again, since this will cause version numbers and directories to get crossed
SUBFLOW.flow["pypi_package_json"].inputs["package"] = ["seed"]
SUBFLOW.update_by_origin()
def create_parent_flow():
"""
This function exists so that shouldi_dataflow_as_operation doesn't end up
in the subflow when we grab from sys.modules[__name__]
"""
@config
class ShouldIDataFlowAsOperationConfig:
dataflow: DataFlow
@op(
inputs={"package": pypi_package_json.op.inputs["package"]},
outputs={"package": pypi_package_json.op.inputs["package"]},
expand=["package"],
config_cls=ShouldIDataFlowAsOperationConfig,
)
async def shouldi_dataflow_as_operation(self, package: str):
async with self.octx.parent(self.config.dataflow) as octx:
async for ctx, result in octx.run(
{
package: [
Input(
value=package,
definition=self.parent.op.inputs["package"],
)
]
}
):
packages = result[self.parent.op.inputs["package"].name]
# Remove input package from list
packages = list(filter(lambda pkg: pkg != package, packages))
# TODO Deduplicate
return {"package": packages}
dataflow = DataFlow.auto(shouldi_dataflow_as_operation, GetMulti)
dataflow.seed.append(
Input(
value=[pypi_package_json.op.inputs["package"].name],
definition=GetMulti.op.inputs["spec"],
)
)
dataflow.configs[
"shouldi_dataflow_as_operation"
] = ShouldIDataFlowAsOperationConfig(dataflow=SUBFLOW)
dataflow.flow["shouldi_dataflow_as_operation"].inputs["package"].append(
"seed"
)
dataflow.update_by_origin()
return dataflow
DATAFLOW = create_parent_flow()
class TestOperations(AsyncTestCase):
async def test_run(self):
check = {"shouldi": [], "dffml-config-yaml": []}
async with MemoryOrchestrator.withconfig({}) as orchestrator:
async with orchestrator(DATAFLOW) as octx:
async for ctx, results in octx.run(
{
package_name: [
Input(
value=package_name,
definition=pypi_package_json.op.inputs[
"package"
],
),
]
for package_name in check.keys()
}
):
ctx_str = (await ctx.handle()).as_string()
with self.subTest(package=ctx_str):
print(ctx_str, results)
print(DATAFLOW.flow)
continue
self.assertEqual(
check[ctx_str],
results[
pypi_package_json.op.inputs["package"].name
],
)

@johnandersen777 johnandersen777 added enhancement New feature or request p3 Average Priority tL Esitmated Time To Complete: Long security Security related issues or improvements labels Apr 29, 2020
@johnandersen777 johnandersen777 added this to the 0.3.8 Alpha Release milestone Apr 29, 2020
@johnandersen777 johnandersen777 changed the title shouldi: Complete work to create dependency tree of project shouldi: deptree: Create dependency tree of project Apr 29, 2020
@johnandersen777
Copy link
Author

This would probably help us figure out what's wrong with #737

@johnandersen777 johnandersen777 added the Alice Work related to our tutorial series "Rolling Alice" label Oct 11, 2022
@johnandersen777
Copy link
Author

@johnandersen777
Copy link
Author

johnandersen777 commented Oct 25, 2022


2022-10-25 Alice Initiative welcome aboard!

PACKAGE_DEPS_KWARGS = dict(
inputs={"src": pypi_package_contents.op.outputs["directory"],},
outputs={"package": pypi_package_json.op.inputs["package"]},
expand=["package"],
)
@op(**PACKAGE_DEPS_KWARGS)
async def package_deps_setup_py(src: str):
setup_py_path = list(pathlib.Path(src).rglob("**/setup.py"))
if not setup_py_path:
return
setup_py_path = setup_py_path[0]
deps = SetupPyKWArg.get_kwargs(str(setup_py_path)).get(
"install_requires", []
)
no_versions = {}
print(src, remove_package_versions(deps))
return {"package": remove_package_versions(deps)}
@op(**PACKAGE_DEPS_KWARGS)
async def package_deps_setup_cfg(src: str):
# TODO
return {"package": []}
@op(**PACKAGE_DEPS_KWARGS)
async def package_deps_requirements_txt(src: str):
# TODO
return {"package": []}

@johnandersen777

This comment was marked as off-topic.

@johnandersen777
Copy link
Author

johnandersen777 commented Nov 10, 2022

A discussion today revealed we should go with the file paring approach (rather than pipdeptree)

@johnandersen777
Copy link
Author

johnandersen777 commented Mar 2, 2023

$ sbom4python  --format json --sbom spdx -m black                     
{                                                                                                                                                                                         
  "SPDXID": "SPDXRef-DOCUMENT",                                                                                                                                                           
  "spdxVersion": "SPDX-2.2",         
  "creationInfo": {                 
    "comment": "This document has been automatically generated.",                            
    "creators": [                                                                            
      "Tool: sbom4python-0.7.0"                                                              
    ],                                                                                                                                                                                    
    "created": "2023-03-02T13:53:48Z",                                                       
    "licenseListVersion": "3.18"                                                                                                                                                          
  },                                      
  "name": "black",                                                                                                                                                                        
  "dataLicense": "CC0-1.0",                                                                  
  "documentNamespace": "http://spdx.org/spdxdocs/black-7ebf1fb9-a781-41f5-9e57-a6bba6969ecb", 
  "packages": [                                                                                                                                                                           
    {                                                                                        
      "SPDXID": "SPDXRef-Package-1-black",                                                   
      "name": "black",                                                                       
      "versionInfo": "23.1.1.dev8+g25d886f",                                                 
      "supplier": "Organization: ukasz Langa ([email protected])",                             
      "downloadLocation": "NONE",                                                            
      "filesAnalyzed": false,                                                                
      "licenseConcluded": "MIT",                                                             
      "licenseDeclared": "MIT",                                                              
      "copyrightText": "NOASSERTION",  
      "externalRefs": [              
        {                                                                                    
          "referenceCategory": "PACKAGE-MANAGER",                                            
          "referenceLocator": "pkg:pypi/[email protected]+g25d886f",
          "referenceType": "purl"                                                            
        },                       
        {                                                                                                                                                                                 
          "referenceCategory": "SECURITY",
          "referenceLocator": "cpe:2.3:a:ukasz_langa:black:23.1.1.dev8+g25d886f:*:*:*:*:*:*:*",
          "referenceType": "cpe23Type"
        }
      ]
    },
    {
      "SPDXID": "SPDXRef-Package-2-click",
      "name": "click",
      "versionInfo": "8.1.3",
      "supplier": "Organization: Armin Ronacher ([email protected])",
      "downloadLocation": "NONE",
      "filesAnalyzed": false,
      "licenseConcluded": "BSD-3-Clause",
      "licenseDeclared": "BSD-3-Clause",
      "copyrightText": "NOASSERTION",
      "externalRefs": [
        {
          "referenceCategory": "PACKAGE-MANAGER",
          "referenceLocator": "pkg:pypi/[email protected]",
          "referenceType": "purl"
        },
        {
          "referenceCategory": "SECURITY",
          "referenceLocator": "cpe:2.3:a:armin_ronacher:click:8.1.3:*:*:*:*:*:*:*",
          "referenceType": "cpe23Type"
        }
      ]
    },
    {
      "SPDXID": "SPDXRef-Package-3-mypy-extensions",
      "name": "mypy-extensions",
      "versionInfo": "1.0.0",
      "supplier": "Organization: The mypy developers ([email protected])",
      "downloadLocation": "NONE",
      "filesAnalyzed": false,
      "licenseConcluded": "MIT",
      "licenseDeclared": "MIT",
      "copyrightText": "NOASSERTION",
      "externalRefs": [                                                                                                                                                           [97/296]
        {                                                                                    
          "referenceCategory": "PACKAGE-MANAGER",                                            
          "referenceLocator": "pkg:pypi/[email protected]+g25d886f",
          "referenceType": "purl"                                                            
        },                       
        {                                                                                                                                                                                 
          "referenceCategory": "SECURITY",
          "referenceLocator": "cpe:2.3:a:ukasz_langa:black:23.1.1.dev8+g25d886f:*:*:*:*:*:*:*",
          "referenceType": "cpe23Type"
        }
      ]
    },
    {
      "SPDXID": "SPDXRef-Package-2-click",
      "name": "click",
      "versionInfo": "8.1.3",
      "supplier": "Organization: Armin Ronacher ([email protected])",
      "downloadLocation": "NONE",
      "filesAnalyzed": false,
      "licenseConcluded": "BSD-3-Clause",
      "licenseDeclared": "BSD-3-Clause",
      "copyrightText": "NOASSERTION",
      "externalRefs": [
        {
          "referenceCategory": "PACKAGE-MANAGER",
          "referenceLocator": "pkg:pypi/[email protected]",
          "referenceType": "purl"
        },
        {
          "referenceCategory": "SECURITY",
          "referenceLocator": "cpe:2.3:a:armin_ronacher:click:8.1.3:*:*:*:*:*:*:*",
          "referenceType": "cpe23Type"
        }
      ]
    },
    {
      "SPDXID": "SPDXRef-Package-3-mypy-extensions",
      "name": "mypy-extensions",
      "versionInfo": "1.0.0",
      "supplier": "Organization: The mypy developers ([email protected])",
      "downloadLocation": "NONE",
      "filesAnalyzed": false,
      "licenseConcluded": "MIT",
      "licenseDeclared": "MIT",
      "copyrightText": "NOASSERTION",
      "externalRefs": [
        {
          "referenceCategory": "PACKAGE-MANAGER",
          "referenceLocator": "pkg:pypi/[email protected]",
          "referenceType": "purl"
        },
        {
          "referenceCategory": "SECURITY",
          "referenceLocator": "cpe:2.3:a:the_mypy_developers:mypy-extensions:1.0.0:*:*:*:*:*:*:*",
          "referenceType": "cpe23Type"
        }
      ]
    },
    {
      "SPDXID": "SPDXRef-Package-4-packaging", 
      "name": "packaging",
      "versionInfo": "23.0",
      "supplier": "Organization: Donald Stufft ([email protected])",
      "downloadLocation": "NONE",
      "filesAnalyzed": false,
      "licenseConcluded": "NOASSERTION",
      "licenseDeclared": "NOASSERTION",
      "copyrightText": "NOASSERTION",
      "externalRefs": [
        {
          "referenceCategory": "PACKAGE-MANAGER",
          "referenceLocator": "pkg:pypi/[email protected]",
          "referenceType": "purl"
        },
        {
          "referenceCategory": "SECURITY",
          "referenceLocator": "cpe:2.3:a:donald_stufft:packaging:23.0:*:*:*:*:*:*:*",
          "referenceType": "cpe23Type"
        }
      ]
    },
    {
      "SPDXID": "SPDXRef-Package-5-pathspec",
      "name": "pathspec",
      "versionInfo": "0.11.0",
      "supplier": "Organization: Caleb P. ([email protected])",
      "downloadLocation": "NONE",
      "filesAnalyzed": false,
      "licenseConcluded": "NOASSERTION",
      "licenseDeclared": "NOASSERTION",
      "copyrightText": "NOASSERTION",
      "externalRefs": [

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
Alice Work related to our tutorial series "Rolling Alice" enhancement New feature or request p3 Average Priority security Security related issues or improvements tL Esitmated Time To Complete: Long
Projects
None yet
Development

No branches or pull requests

1 participant