Skip to content
This repository was archived by the owner on Mar 27, 2024. It is now read-only.

Add rpmlayer differ #252

Merged
merged 2 commits into from
Aug 10, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions cmd/root.go
Original file line number Diff line number Diff line change
@@ -59,8 +59,6 @@ const (
RemotePrefix = "remote://"
)

var layerAnalyzers = [...]string{"layer", "aptlayer"}

var RootCmd = &cobra.Command{
Use: "container-diff",
Short: "container-diff is a tool for analyzing and comparing container images",
@@ -270,7 +268,7 @@ func getExtractPathForName(name string) (string, error) {

func includeLayers() bool {
for _, t := range types {
for _, a := range layerAnalyzers {
for _, a := range differs.LayerAnalyzers {
if t == a {
return true
}
32 changes: 23 additions & 9 deletions differs/differs.go
Original file line number Diff line number Diff line change
@@ -24,6 +24,17 @@ import (
"github.com/sirupsen/logrus"
)

const historyAnalyzer = "history"
const metadataAnalyzer = "metadata"
const fileAnalyzer = "file"
const layerAnalyzer = "layer"
const aptAnalyzer = "apt"
const aptLayerAnalyzer = "aptlayer"
const rpmAnalyzer = "rpm"
const rpmLayerAnalyzer = "rpmlayer"
const pipAnalyzer = "pip"
const nodeAnalyzer = "node"

type DiffRequest struct {
Image1 pkgutil.Image
Image2 pkgutil.Image
@@ -42,17 +53,20 @@ type Analyzer interface {
}

var Analyzers = map[string]Analyzer{
"history": HistoryAnalyzer{},
"metadata": MetadataAnalyzer{},
"file": FileAnalyzer{},
"layer": FileLayerAnalyzer{},
"apt": AptAnalyzer{},
"aptlayer": AptLayerAnalyzer{},
"rpm": RPMAnalyzer{},
"pip": PipAnalyzer{},
"node": NodeAnalyzer{},
historyAnalyzer: HistoryAnalyzer{},
metadataAnalyzer: MetadataAnalyzer{},
fileAnalyzer: FileAnalyzer{},
layerAnalyzer: FileLayerAnalyzer{},
aptAnalyzer: AptAnalyzer{},
aptLayerAnalyzer: AptLayerAnalyzer{},
rpmAnalyzer: RPMAnalyzer{},
rpmLayerAnalyzer: RPMLayerAnalyzer{},
pipAnalyzer: PipAnalyzer{},
nodeAnalyzer: NodeAnalyzer{},
}

var LayerAnalyzers = [...]string{layerAnalyzer, aptLayerAnalyzer, rpmLayerAnalyzer}

func (req DiffRequest) GetDiff() (map[string]util.Result, error) {
img1 := req.Image1
img2 := req.Image2
157 changes: 134 additions & 23 deletions differs/rpm_diff.go
Original file line number Diff line number Diff line change
@@ -34,6 +34,8 @@ import (
"github.com/google/go-containerregistry/pkg/name"
"github.com/google/go-containerregistry/pkg/v1"
"github.com/google/go-containerregistry/pkg/v1/daemon"
"github.com/google/go-containerregistry/pkg/v1/mutate"
"github.com/google/go-containerregistry/pkg/v1/random"

pkgutil "github.com/GoogleContainerTools/container-diff/pkg/util"
"github.com/GoogleContainerTools/container-diff/util"
@@ -100,39 +102,31 @@ func (a RPMAnalyzer) getPackages(image pkgutil.Image) (map[string]util.PackageIn

packages, err := rpmDataFromImageFS(image)
if err != nil {
logrus.Info("Running RPM binary from image in a container")
return rpmDataFromContainer(image)
logrus.Info("Couldn't retrieve RPM data from extracted filesystem; running query in container")
return rpmDataFromContainer(image.Image)
}
return packages, err
}

// rpmDataFromImageFS runs a local rpm binary, if any, to query the image
// rpmdb and returns a map of installed packages.
func rpmDataFromImageFS(image pkgutil.Image) (map[string]util.PackageInfo, error) {
packages := make(map[string]util.PackageInfo)
// Check there is an executable rpm tool in host
if err := exec.Command("rpm", "--version").Run(); err != nil {
logrus.Warn("No RPM binary in host")
return packages, err
}
dbPath, err := rpmDBPath(image.FSPath)
dbPath, err := rpmEnvCheck(image.FSPath)
if err != nil {
logrus.Warnf("Couldn't find RPM database: %s", err.Error())
return packages, err
return nil, err
}
cmdArgs := append([]string{"--root", image.FSPath, "--dbpath", dbPath}, rpmCmd[1:]...)
out, err := exec.Command(rpmCmd[0], cmdArgs...).Output()
if err != nil {
logrus.Warnf("RPM call failed: %s", err.Error())
return packages, err
}
output := strings.Split(string(out), "\n")
return parsePackageData(output)
return rpmDataFromFS(image.FSPath, dbPath)
}

// rpmDBPath tries to get the RPM database path from the /usr/lib/rpm/macros
// file in the image rootfs.
func rpmDBPath(rootFSPath string) (string, error) {
// rpmEnvCheck checks there is an rpm binary in the host and tries to
// get the RPM database path from the /usr/lib/rpm/macros file in the
// image rootfs
func rpmEnvCheck(rootFSPath string) (string, error) {
if err := exec.Command("rpm", "--version").Run(); err != nil {
logrus.Warn("No RPM binary in host")
return "", err
}
imgMacrosFile, err := os.Open(filepath.Join(rootFSPath, rpmMacros))
if err != nil {
return "", err
@@ -164,7 +158,7 @@ func rpmDBPath(rootFSPath string) (string, error) {

// rpmDataFromContainer runs image in a container, queries the data of
// installed rpm packages and returns a map of packages.
func rpmDataFromContainer(image pkgutil.Image) (map[string]util.PackageInfo, error) {
func rpmDataFromContainer(image v1.Image) (map[string]util.PackageInfo, error) {
packages := make(map[string]util.PackageInfo)

client, err := godocker.NewClientFromEnv()
@@ -175,7 +169,7 @@ func rpmDataFromContainer(image pkgutil.Image) (map[string]util.PackageInfo, err
return packages, err
}

imageName, err := loadImageToDaemon(image.Image)
imageName, err := loadImageToDaemon(image)

if err != nil {
return packages, fmt.Errorf("Error loading image: %s", err)
@@ -364,3 +358,120 @@ func unlock() error {
daemonMutex.Unlock()
return nil
}

type RPMLayerAnalyzer struct {
}

// Name returns the name of the analyzer.
func (a RPMLayerAnalyzer) Name() string {
return "RPMLayerAnalyzer"
}

// Diff compares the installed rpm packages of image1 and image2 for each layer
func (a RPMLayerAnalyzer) Diff(image1, image2 pkgutil.Image) (util.Result, error) {
diff, err := singleVersionLayerDiff(image1, image2, a)
return diff, err
}

// Analyze collects information of the installed rpm packages on each layer
func (a RPMLayerAnalyzer) Analyze(image pkgutil.Image) (util.Result, error) {
analysis, err := singleVersionLayerAnalysis(image, a)
return analysis, err
}

// getPackages returns an array of maps of installed rpm packages on each layer
func (a RPMLayerAnalyzer) getPackages(image pkgutil.Image) ([]map[string]util.PackageInfo, error) {
path := image.FSPath
var packages []map[string]util.PackageInfo
if _, err := os.Stat(path); err != nil {
// invalid image directory path
return packages, err
}

// try to find the rpm binary in bin/ or usr/bin/
rpmBinary := filepath.Join(path, "bin/rpm")
if _, err := os.Stat(rpmBinary); err != nil {
rpmBinary = filepath.Join(path, "usr/bin/rpm")
if _, err = os.Stat(rpmBinary); err != nil {
logrus.Errorf("Could not detect RPM binary in unpacked image %s", image.Source)
return packages, nil
}
}

packages, err := rpmDataFromLayerFS(image)
if err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we care about the error here? This fallback pattern makes sense to me here, but I feel like we might want to be logging this error (I know we're not doing it in RPMAnalyzer.getPackages() either). At the very least we should add something to the log message saying something like Couldn't retrieve RPM data from extracted filesystem; running query in container.

Copy link
Contributor Author

@davidcassany davidcassany Jul 31, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, lets include some better logs

logrus.Info("Couldn't retrieve RPM data from extracted filesystem; running query in container")
return rpmDataFromLayeredContainers(image.Image)
}
return packages, err
}

// rpmDataFromLayerFS runs a local rpm binary, if any, to query the layer
// rpmdb and returns an array of maps of installed packages.
func rpmDataFromLayerFS(image pkgutil.Image) ([]map[string]util.PackageInfo, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic in this function is really similar to rpmDataFromImageFS. Could you pull out some of the shared code between them and reuse it in both functions? Apart from iterating over layers here I think they're almost identical.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was also thinking about it, I can try to move some part to another function an see how it looks like. I think only lines 435->444 are clear candidates to be moved to another function.

var packages []map[string]util.PackageInfo
dbPath, err := rpmEnvCheck(image.FSPath)
if err != nil {
logrus.Warnf("Couldn't find RPM database: %s", err.Error())
return packages, err
}
for _, layer := range image.Layers {
layerPackages, err := rpmDataFromFS(layer.FSPath, dbPath)
if err != nil {
return packages, err
}
packages = append(packages, layerPackages)
}

return packages, nil
}

// rpmDataFromFS runs a local rpm binary to query the image
// rpmdb and returns a map of installed packages.
func rpmDataFromFS(fsPath string, dbPath string) (map[string]util.PackageInfo, error) {
packages := make(map[string]util.PackageInfo)
if _, err := os.Stat(filepath.Join(fsPath, dbPath)); err == nil {
cmdArgs := append([]string{"--root", fsPath, "--dbpath", dbPath}, rpmCmd[1:]...)
out, err := exec.Command(rpmCmd[0], cmdArgs...).Output()
if err != nil {
logrus.Warnf("RPM call failed: %s", err.Error())
return packages, err
}
output := strings.Split(string(out), "\n")
packages, err := parsePackageData(output)
if err != nil {
return packages, err
}
}
return packages, nil
}

// rpmDataFromLayeredContainers runs a tmp image in a container for each layer,
// queries the data of installed rpm packages and returns an array of maps of
// packages.
func rpmDataFromLayeredContainers(image v1.Image) ([]map[string]util.PackageInfo, error) {
var packages []map[string]util.PackageInfo
tmpImage, err := random.Image(0, 0)
if err != nil {
return packages, err
}
layers, err := image.Layers()
if err != nil {
return packages, err
}
// Append layers one by one to an empty image and query rpm
// database on each iteration
for _, layer := range layers {
tmpImage, err = mutate.AppendLayers(tmpImage, layer)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're trying to retrieve a map of packages installed in each layer here, right? I'm wondering if we actually want to be appending each layer onto the previous layers before running rpmDataFromContainer(), and instead create a single-layer image for each layer. Won't we be getting package information for the combination of all layers before the one we're actually examining here?

e.g. if I have layer A with package foo==1.0.0, and layer B with package bar==1.0.0, my maps here will look like
A: {
foo: 1.0.0
},

B: {
foo: 1.0.0, <-- don't want this here right?
bar: 1.0.0,
}

To get around this you could just create a new random.Image for each layer, append it, and then make the rpmDataFromContainer() call. Does this make sense?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To get around this you could just create a new random.Image for each layer, append it, and then make the rpmDataFromContainer() call. Does this make sense?

This is the same situation we have in APT packages, the package database reports all the installed packages, thus in each layer we will get always the list of all packages included in current and previous layers. To work around this there is the method singleVersionLayerAnalysis

func singleVersionLayerAnalysis(image pkgutil.Image, analyzer SingleVersionPackageLayerAnalyzer) (*util.SingleVersionPackageLayerAnalyzeResult, error) {
pack, err := analyzer.getPackages(image)
if err != nil {
return &util.SingleVersionPackageLayerAnalyzeResult{}, err
}
var pkgDiffs []util.PackageDiff
// Each layer with modified packages includes a complete list of packages
// in its package database. Thus we diff the current layer with the
// previous one that contains a package database. Layers that do not
// include a package database are omitted.
preInd := -1
for i := range pack {
var pkgDiff util.PackageDiff
if preInd < 0 && len(pack[i]) > 0 {
pkgDiff = util.GetMapDiff(make(map[string]util.PackageInfo), pack[i])
preInd = i
} else if preInd >= 0 && len(pack[i]) > 0 {
pkgDiff = util.GetMapDiff(pack[preInd], pack[i])
preInd = i
}
pkgDiffs = append(pkgDiffs, pkgDiff)
}

which basically performs a diff between layers.

Also I am appending the layers one by one because in this case the rpm binary used to parse the database is the one included inside the image, thus at least the layer including this binary (most probably the base layer) should be also part of the container to run. In order to make things simpler without having to include some kind of logic to guess which layers should be appended and which aren't needed I opted to simply stack them one by one. Note that this is the fallback in case the host does not have the RPM tool.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh right, ok I think I follow that. Thanks for the explanation!

if err != nil {
return packages, err
}
layerPackages, err := rpmDataFromContainer(tmpImage)
if err != nil {
return packages, err
}
packages = append(packages, layerPackages)
}

return packages, nil
}