Skip to content

Commit 18e2356

Browse files
authored
Merge pull request #1062 from dcantah/coredump
Add process dump functionality for WCOW/LCOW
2 parents 50c48de + f964e28 commit 18e2356

File tree

23 files changed

+576
-28
lines changed

23 files changed

+576
-28
lines changed

cmd/gcs/main.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ func readMemoryEvents(startTime time.Time, efdFile *os.File, cgName string, thre
8484
func main() {
8585
startTime := time.Now()
8686
logLevel := flag.String("loglevel", "debug", "Logging Level: debug, info, warning, error, fatal, panic.")
87+
coreDumpLoc := flag.String("core-dump-location", "", "The location/format where process core dumps will be written to.")
8788
kmsgLogLevel := flag.Uint("kmsgLogLevel", uint(kmsg.Warning), "Log all kmsg entries with a priority less than or equal to the supplied level.")
8889
logFile := flag.String("logfile", "", "Logging Target: An optional file name/path. Omit for console output.")
8990
logFormat := flag.String("log-format", "text", "Logging Format: text or json")
@@ -144,6 +145,19 @@ func main() {
144145

145146
logrus.Info("GCS started")
146147

148+
// Set the process core dump location. This will be global to all containers as it's a kernel configuration.
149+
// If no path is specified core dumps will just be placed in the working directory of wherever the process
150+
// was invoked to a file named "core".
151+
if *coreDumpLoc != "" {
152+
if err := ioutil.WriteFile(
153+
"/proc/sys/kernel/core_pattern",
154+
[]byte(*coreDumpLoc),
155+
0644,
156+
); err != nil {
157+
logrus.WithError(err).Fatal("failed to set core dump location")
158+
}
159+
}
160+
147161
// Continuously log /dev/kmsg
148162
go kmsg.ReadForever(kmsg.LogLevel(*kmsgLogLevel))
149163

internal/guest/runtime/hcsv2/container.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ func (c *Container) ExecProcess(ctx context.Context, process *oci.Process, conSe
6666
return -1, err
6767
}
6868

69+
// Add in the core rlimit specified on the container in case there was one set. This makes it so that execed processes can also generate
70+
// core dumps.
71+
process.Rlimits = c.spec.Process.Rlimits
6972
p, err := c.container.ExecProcess(process, stdioSet)
7073
if err != nil {
7174
stdioSet.Close()

internal/guest/runtime/hcsv2/sandbox_container.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@ func setupSandboxContainerSpec(ctx context.Context, id string, spec *oci.Spec) (
108108
}
109109
}
110110

111+
if rlimCore := spec.Annotations["io.microsoft.lcow.rlimitcore"]; rlimCore != "" {
112+
if err := setCoreRLimit(spec, rlimCore); err != nil {
113+
return err
114+
}
115+
}
116+
111117
// TODO: JTERRY75 /dev/shm is not properly setup for LCOW I believe. CRI
112118
// also has a concept of a sandbox/shm file when the IPC NamespaceMode !=
113119
// NODE.

internal/guest/runtime/hcsv2/spec.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,32 @@ func setProcess(spec *oci.Spec) {
6363
}
6464
}
6565

66+
func setCoreRLimit(spec *oci.Spec, value string) error {
67+
setProcess(spec)
68+
69+
vals := strings.Split(value, ";")
70+
if len(vals) != 2 {
71+
return errors.New("wrong number of values supplied for rlimit core")
72+
}
73+
74+
soft, err := strconv.ParseUint(vals[0], 10, 64)
75+
if err != nil {
76+
return errors.Wrap(err, "failed to parse soft core rlimit")
77+
}
78+
hard, err := strconv.ParseUint(vals[1], 10, 64)
79+
if err != nil {
80+
return errors.Wrap(err, "failed to parse hard core rlimit")
81+
}
82+
83+
spec.Process.Rlimits = append(spec.Process.Rlimits, oci.POSIXRlimit{
84+
Type: "RLIMIT_CORE",
85+
Soft: soft,
86+
Hard: hard,
87+
})
88+
89+
return nil
90+
}
91+
6692
// setUserStr sets `spec.Process` to the valid `userstr` based on the OCI Image Spec
6793
// v1.0.0 `userstr`.
6894
//

internal/guest/runtime/hcsv2/workload_container.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,12 @@ func setupWorkloadContainerSpec(ctx context.Context, sbid, id string, spec *oci.
161161
return err
162162
}
163163

164+
if rlimCore := spec.Annotations["io.microsoft.lcow.rlimitcore"]; rlimCore != "" {
165+
if err := setCoreRLimit(spec, rlimCore); err != nil {
166+
return err
167+
}
168+
}
169+
164170
// Force the parent cgroup into our /containers root
165171
spec.Linux.CgroupsPath = "/containers/" + id
166172

internal/hcsoci/hcsdoc_wcow.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package hcsoci
44

55
import (
66
"context"
7+
"errors"
78
"fmt"
89
"path/filepath"
910
"regexp"
@@ -385,6 +386,51 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter
385386
}
386387
v2Container.AdditionalDeviceNamespace = extensions
387388

389+
// Process dump setup (if requested)
390+
dumpPath := ""
391+
if coi.HostingSystem != nil {
392+
dumpPath = coi.HostingSystem.ProcessDumpLocation()
393+
}
394+
395+
if specDumpPath, ok := coi.Spec.Annotations[oci.AnnotationContainerProcessDumpLocation]; ok {
396+
// If a process dump path was specified at pod creation time for a hypervisor isolated pod, then
397+
// use this value. If one was specified on the container creation document then override with this
398+
// instead. Unlike Linux, Windows containers can set the dump path on a per container basis.
399+
dumpPath = specDumpPath
400+
}
401+
402+
if dumpPath != "" {
403+
dumpType, err := parseDumpType(coi.Spec.Annotations)
404+
if err != nil {
405+
return nil, nil, err
406+
}
407+
408+
// Setup WER registry keys for local process dump creation if specified.
409+
// https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps
410+
v2Container.RegistryChanges = &hcsschema.RegistryChanges{
411+
AddValues: []hcsschema.RegistryValue{
412+
{
413+
Key: &hcsschema.RegistryKey{
414+
Hive: "Software",
415+
Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps",
416+
},
417+
Name: "DumpFolder",
418+
StringValue: dumpPath,
419+
Type_: "String",
420+
},
421+
{
422+
Key: &hcsschema.RegistryKey{
423+
Hive: "Software",
424+
Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps",
425+
},
426+
Name: "DumpType",
427+
DWordValue: dumpType,
428+
Type_: "DWord",
429+
},
430+
},
431+
}
432+
}
433+
388434
return v1, v2Container, nil
389435
}
390436

@@ -415,3 +461,22 @@ func parseAssignedDevices(ctx context.Context, coi *createOptionsInternal, v2 *h
415461
v2.AssignedDevices = v2AssignedDevices
416462
return nil
417463
}
464+
465+
// parseDumpType parses the passed in string representation of the local user mode process dump type to the
466+
// corresponding value the registry expects to be set.
467+
//
468+
// See DumpType at https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps for the mappings
469+
func parseDumpType(annotations map[string]string) (int32, error) {
470+
dmpTypeStr := annotations[oci.AnnotationWCOWProcessDumpType]
471+
switch dmpTypeStr {
472+
case "":
473+
// If no type specified, default to full dumps.
474+
return 2, nil
475+
case "mini":
476+
return 1, nil
477+
case "full":
478+
return 2, nil
479+
default:
480+
return -1, errors.New(`unknown dump type specified, valid values are "mini" or "full"`)
481+
}
482+
}

internal/oci/annotations.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,4 +221,17 @@ const (
221221

222222
// AnnotationSecurityPolicy is used to specify a security policy for opengcs to enforce
223223
AnnotationSecurityPolicy = "io.microsoft.virtualmachine.lcow.securitypolicy"
224+
// AnnotationContainerProcessDumpLocation specifies a path inside of containers to save process dumps to. As
225+
// the scratch space for a container is generally cleaned up after exit, this is best set to a volume mount of
226+
// some kind (vhd, bind mount, fileshare mount etc.)
227+
AnnotationContainerProcessDumpLocation = "io.microsoft.container.processdumplocation"
228+
229+
// AnnotationWCOWProcessDumpType specifies the type of dump to create when generating a local user mode
230+
// process dump for Windows containers. The supported options are "mini", and "full".
231+
// See DumpType: https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps
232+
AnnotationWCOWProcessDumpType = "io.microsoft.wcow.processdumptype"
233+
234+
// AnnotationRLimitCore specifies the core rlimit value for a container. This will need to be set
235+
// in order to have core dumps generated for a given container.
236+
AnnotationRLimitCore = "io.microsoft.lcow.rlimitcore"
224237
)

internal/oci/uvm.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) (
332332
lopts.EnableScratchEncryption = parseAnnotationsBool(ctx, s.Annotations, AnnotationEncryptedScratchDisk, lopts.EnableScratchEncryption)
333333
lopts.SecurityPolicy = parseAnnotationsString(s.Annotations, AnnotationSecurityPolicy, lopts.SecurityPolicy)
334334
lopts.KernelBootOptions = parseAnnotationsString(s.Annotations, AnnotationKernelBootOptions, lopts.KernelBootOptions)
335-
335+
lopts.ProcessDumpLocation = parseAnnotationsString(s.Annotations, AnnotationContainerProcessDumpLocation, lopts.ProcessDumpLocation)
336336
handleAnnotationPreferredRootFSType(ctx, s.Annotations, lopts)
337337
handleAnnotationKernelDirectBoot(ctx, s.Annotations, lopts)
338338

@@ -357,6 +357,7 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) (
357357
wopts.CPUGroupID = parseAnnotationsString(s.Annotations, AnnotationCPUGroupID, wopts.CPUGroupID)
358358
wopts.NetworkConfigProxy = parseAnnotationsString(s.Annotations, AnnotationNetworkConfigProxy, wopts.NetworkConfigProxy)
359359
wopts.NoDirectMap = parseAnnotationsBool(ctx, s.Annotations, AnnotationVSMBNoDirectMap, wopts.NoDirectMap)
360+
wopts.ProcessDumpLocation = parseAnnotationsString(s.Annotations, AnnotationContainerProcessDumpLocation, wopts.ProcessDumpLocation)
360361
handleAnnotationFullyPhysicallyBacked(ctx, s.Annotations, wopts)
361362
if err := handleCloneAnnotations(ctx, s.Annotations, wopts); err != nil {
362363
return nil, err

internal/uvm/create.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ type Options struct {
8585
// that receives the UVMs set of NICs from this proxy instead of enumerating
8686
// the endpoints locally.
8787
NetworkConfigProxy string
88+
89+
// Sets the location for process dumps to be placed in. On Linux this is a kernel setting so it will be
90+
// applied to all containers. On Windows it's configurable per container, but we can mimic this for
91+
// Windows by just applying the location specified here per container.
92+
ProcessDumpLocation string
8893
}
8994

9095
// compares the create opts used during template creation with the create opts
@@ -347,6 +352,12 @@ func (uvm *UtilityVM) PhysicallyBacked() bool {
347352
return uvm.physicallyBacked
348353
}
349354

355+
// ProcessDumpLocation returns the location that process dumps will get written to for containers running
356+
// in the UVM.
357+
func (uvm *UtilityVM) ProcessDumpLocation() string {
358+
return uvm.processDumpLocation
359+
}
360+
350361
func (uvm *UtilityVM) normalizeMemorySize(ctx context.Context, requested uint64) uint64 {
351362
actual := (requested + 1) &^ 1 // align up to an even number
352363
if requested != actual {

internal/uvm/create_lcow.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,10 @@ func CreateLCOW(ctx context.Context, opts *OptionsLCOW) (_ *UtilityVM, err error
384384

385385
initArgs += " " + opts.ExecCommandLine
386386

387+
if opts.ProcessDumpLocation != "" {
388+
initArgs += " -core-dump-location " + opts.ProcessDumpLocation
389+
}
390+
387391
if vmDebugging {
388392
// Launch a shell on the console.
389393
initArgs = `sh -c "` + initArgs + ` & exec sh"`

internal/uvm/create_wcow.go

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -105,25 +105,54 @@ func prepareConfigDoc(ctx context.Context, uvm *UtilityVM, opts *OptionsWCOW, uv
105105
},
106106
}
107107

108+
var registryChanges hcsschema.RegistryChanges
109+
// We're getting asked to setup local dump collection for WCOW. We need to:
110+
//
111+
// 1. Turn off WER reporting, so we don't both upload the dump and save a local copy.
112+
// 2. Set WerSvc to start when the UVM starts to work around a bug when generating dumps for certain exceptions.
113+
// https://github.com/microsoft/Windows-Containers/issues/60#issuecomment-834633192
114+
// This supposedly should be fixed soon but for now keep this until we know which container images
115+
// (1809, 1903/9, 2004 etc.) this went out too.
116+
if opts.ProcessDumpLocation != "" {
117+
uvm.processDumpLocation = opts.ProcessDumpLocation
118+
registryChanges.AddValues = append(registryChanges.AddValues,
119+
hcsschema.RegistryValue{
120+
Key: &hcsschema.RegistryKey{
121+
Hive: "System",
122+
Name: "ControlSet001\\Services\\WerSvc",
123+
},
124+
Name: "Start",
125+
DWordValue: 2,
126+
Type_: "DWord",
127+
},
128+
hcsschema.RegistryValue{
129+
Key: &hcsschema.RegistryKey{
130+
Hive: "Software",
131+
Name: "Microsoft\\Windows\\Windows Error Reporting",
132+
},
133+
Name: "Disabled",
134+
DWordValue: 1,
135+
Type_: "DWord",
136+
},
137+
)
138+
}
139+
108140
// Here for a temporary workaround until the need for setting this regkey is no more. To protect
109141
// against any undesired behavior (such as some general networking scenarios ceasing to function)
110142
// with a recent change to fix SMB share access in the UVM, this registry key will be checked to
111143
// enable the change in question inside GNS.dll.
112-
var registryChanges hcsschema.RegistryChanges
113144
if !opts.DisableCompartmentNamespace {
114-
registryChanges = hcsschema.RegistryChanges{
115-
AddValues: []hcsschema.RegistryValue{
116-
{
117-
Key: &hcsschema.RegistryKey{
118-
Hive: "System",
119-
Name: "CurrentControlSet\\Services\\gns",
120-
},
121-
Name: "EnableCompartmentNamespace",
122-
DWordValue: 1,
123-
Type_: "DWord",
145+
registryChanges.AddValues = append(registryChanges.AddValues,
146+
hcsschema.RegistryValue{
147+
Key: &hcsschema.RegistryKey{
148+
Hive: "System",
149+
Name: "CurrentControlSet\\Services\\gns",
124150
},
151+
Name: "EnableCompartmentNamespace",
152+
DWordValue: 1,
153+
Type_: "DWord",
125154
},
126-
}
155+
)
127156
}
128157

129158
processor := &hcsschema.Processor2{

internal/uvm/types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,13 @@ type UtilityVM struct {
122122
// is true
123123
TemplateID string
124124

125+
// Location that container process dumps will get written too.
126+
processDumpLocation string
127+
125128
// The CreateOpts used to create this uvm. These can be either of type
126129
// uvm.OptionsLCOW or uvm.OptionsWCOW
127130
createOpts interface{}
131+
128132
// Network config proxy client. If nil then this wasn't requested and the
129133
// uvms network will be configured locally.
130134
ncProxyClient ncproxyttrpc.NetworkConfigProxyService

test/cri-containerd/main.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ const (
4848
lcowRuntimeHandler = "runhcs-lcow"
4949
imageLcowK8sPause = "k8s.gcr.io/pause:3.1"
5050
imageLcowAlpine = "docker.io/library/alpine:latest"
51+
imageLcowAlpineCoreDump = "cplatpublic.azurecr.io/stackoverflow-alpine:latest"
52+
imageWindowsProcessDump = "cplatpublic.azurecr.io/crashdump:latest"
5153
imageLcowCosmos = "cosmosarno/spark-master:2.4.1_2019-04-18_8e864ce"
5254
imageJobContainerHNS = "cplatpublic.azurecr.io/jobcontainer_hns:latest"
5355
imageJobContainerETW = "cplatpublic.azurecr.io/jobcontainer_etw:latest"
@@ -162,7 +164,7 @@ func getWindowsNanoserverImage(build uint16) string {
162164
case osversion.V20H2:
163165
return "mcr.microsoft.com/windows/nanoserver:2009"
164166
default:
165-
panic("unsupported build")
167+
return "mcr.microsoft.com/windows/nanoserver:2009"
166168
}
167169
}
168170

@@ -179,7 +181,7 @@ func getWindowsServerCoreImage(build uint16) string {
179181
case osversion.V20H2:
180182
return "mcr.microsoft.com/windows/servercore:2009"
181183
default:
182-
panic("unsupported build")
184+
return "mcr.microsoft.com/windows/nanoserver:2009"
183185
}
184186
}
185187

0 commit comments

Comments
 (0)