diff --git a/internal/collector/instance.go b/internal/collector/instance.go index 9cb170804..d6e6d153c 100644 --- a/internal/collector/instance.go +++ b/internal/collector/instance.go @@ -7,7 +7,6 @@ package collector import ( "context" "fmt" - "path" corev1 "k8s.io/api/core/v1" @@ -185,7 +184,7 @@ func startCommand(logDirectories []string, includeLogrotate bool) []string { if len(logDirectories) != 0 { for _, logDir := range logDirectories { mkdirScript = mkdirScript + ` -` + shell.MakeDirectories(logDir, path.Join(logDir, "receiver")) +` + shell.MakeDirectories(logDir, "receiver") } } diff --git a/internal/collector/postgres.go b/internal/collector/postgres.go index 892748c0a..6dc2eddcd 100644 --- a/internal/collector/postgres.go +++ b/internal/collector/postgres.go @@ -19,9 +19,60 @@ import ( "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) +func PostgreSQLParameters(ctx context.Context, + inCluster *v1beta1.PostgresCluster, + outParameters *postgres.Parameters, +) { + version := inCluster.Spec.PostgresVersion + + if OpenTelemetryLogsEnabled(ctx, inCluster) { + var spec *v1beta1.InstrumentationLogsSpec + if inCluster != nil && inCluster.Spec.Instrumentation != nil { + spec = inCluster.Spec.Instrumentation.Logs + } + + // Retain logs for a short time unless specified. + retention := metav1.Duration{Duration: 24 * time.Hour} + if spec != nil && spec.RetentionPeriod != nil { + retention = spec.RetentionPeriod.AsDuration() + } + + // Rotate log files according to retention and name them for the OpenTelemetry Collector. + // + // The ".log" suffix is replaced by ".csv" for CSV log files, and + // the ".log" suffix is replaced by ".json" for JSON log files. + // + // https://www.postgresql.org/docs/current/runtime-config-logging.html + for k, v := range postgres.LogRotation(retention, "postgresql-", ".log") { + outParameters.Mandatory.Add(k, v) + } + + // Enable logging to file. Postgres uses a "logging collector" to safely write concurrent messages. + // NOTE: That collector is designed to not lose messages. When it is overloaded, other Postgres processes block. + // + // https://www.postgresql.org/docs/current/runtime-config-logging.html + outParameters.Mandatory.Add("logging_collector", "on") + + // PostgreSQL v8.3 adds support for CSV logging, and + // PostgreSQL v15 adds support for JSON logging. + // The latter is preferred because newlines are escaped as "\n", U+005C + U+006E. + if version >= 15 { + outParameters.Mandatory.Add("log_destination", "jsonlog") + } else { + outParameters.Mandatory.Add("log_destination", "csvlog") + } + + // Log in a timezone the OpenTelemetry Collector understands. + outParameters.Mandatory.Add("log_timezone", "UTC") + + // TODO(logs): Remove this call and do it in [postgres.NewParameters] regardless of the gate. + outParameters.Mandatory.Add("log_directory", fmt.Sprintf("%s/logs/postgres", postgres.DataStorage(inCluster))) + } +} + func NewConfigForPostgresPod(ctx context.Context, inCluster *v1beta1.PostgresCluster, - outParameters *postgres.ParameterSet, + inParameters *postgres.ParameterSet, ) *Config { config := NewConfig(inCluster.Spec.Instrumentation) @@ -30,7 +81,7 @@ func NewConfigForPostgresPod(ctx context.Context, EnablePatroniMetrics(ctx, inCluster, config) // Logging - EnablePostgresLogging(ctx, inCluster, config, outParameters) + EnablePostgresLogging(ctx, inCluster, inParameters, config) EnablePatroniLogging(ctx, inCluster, config) return config @@ -76,8 +127,8 @@ func postgresCSVNames(version int) string { func EnablePostgresLogging( ctx context.Context, inCluster *v1beta1.PostgresCluster, + inParameters *postgres.ParameterSet, outConfig *Config, - outParameters *postgres.ParameterSet, ) { var spec *v1beta1.InstrumentationLogsSpec if inCluster != nil && inCluster.Spec.Instrumentation != nil { @@ -85,42 +136,9 @@ func EnablePostgresLogging( } if OpenTelemetryLogsEnabled(ctx, inCluster) { - directory := postgres.LogDirectory() + directory := inParameters.Value("log_directory") version := inCluster.Spec.PostgresVersion - // https://www.postgresql.org/docs/current/runtime-config-logging.html - outParameters.Add("logging_collector", "on") - outParameters.Add("log_directory", directory) - - // PostgreSQL v8.3 adds support for CSV logging, and - // PostgreSQL v15 adds support for JSON logging. The latter is preferred - // because newlines are escaped as "\n", U+005C + U+006E. - if version < 15 { - outParameters.Add("log_destination", "csvlog") - } else { - outParameters.Add("log_destination", "jsonlog") - } - - // If retentionPeriod is set in the spec, use that value; otherwise, we want - // to use a reasonably short duration. Defaulting to 1 day. - retentionPeriod := metav1.Duration{Duration: 24 * time.Hour} - if spec != nil && spec.RetentionPeriod != nil { - retentionPeriod = spec.RetentionPeriod.AsDuration() - } - - // Rotate log files according to retention. - // - // The ".log" suffix is replaced by ".csv" for CSV log files, and - // the ".log" suffix is replaced by ".json" for JSON log files. - // - // https://www.postgresql.org/docs/current/runtime-config-logging.html - for k, v := range postgres.LogRotation(retentionPeriod, "postgresql-", ".log") { - outParameters.Add(k, v) - } - - // Log in a timezone that the OpenTelemetry Collector will understand. - outParameters.Add("log_timezone", "UTC") - // Keep track of what log records and files have been processed. // Use a subdirectory of the logs directory to stay within the same failure domain. // TODO(log-rotation): Create this directory during Collector startup. @@ -145,8 +163,8 @@ func EnablePostgresLogging( // The 2nd through 5th fields are optional, so match through to the 7th field. // This should do a decent job of not matching the middle of some SQL statement. // - // The number of fields has changed over the years, but the first few - // are always formatted the same way. + // The number of fields has changed over the years, but the first few are always formatted the same way. + // [PostgreSQLParameters] ensures the timezone is UTC. // // NOTE: This regexp is invoked in multi-line mode. https://go.dev/s/re2syntax "multiline": map[string]string{ diff --git a/internal/collector/postgres_test.go b/internal/collector/postgres_test.go index 89f5f5225..a1a221bd6 100644 --- a/internal/collector/postgres_test.go +++ b/internal/collector/postgres_test.go @@ -31,9 +31,11 @@ func TestEnablePostgresLogging(t *testing.T) { }`) config := NewConfig(nil) - params := postgres.NewParameterSet() + params := postgres.NewParameters() - EnablePostgresLogging(ctx, cluster, config, params) + // NOTE: This call is necessary only because the default "log_directory" is not absolute. + PostgreSQLParameters(ctx, cluster, ¶ms) + EnablePostgresLogging(ctx, cluster, params.Mandatory, config) result, err := config.ToYAML() assert.NilError(t, err) @@ -293,9 +295,11 @@ service: cluster.Spec.Instrumentation = testInstrumentationSpec() config := NewConfig(cluster.Spec.Instrumentation) - params := postgres.NewParameterSet() + params := postgres.NewParameters() - EnablePostgresLogging(ctx, cluster, config, params) + // NOTE: This call is necessary only because the default "log_directory" is not absolute. + PostgreSQLParameters(ctx, cluster, ¶ms) + EnablePostgresLogging(ctx, cluster, params.Mandatory, config) result, err := config.ToYAML() assert.NilError(t, err) diff --git a/internal/controller/postgrescluster/controller.go b/internal/controller/postgrescluster/controller.go index bbe141c0b..c3264d0a3 100644 --- a/internal/controller/postgrescluster/controller.go +++ b/internal/controller/postgrescluster/controller.go @@ -339,7 +339,7 @@ func (r *Reconciler) Reconcile( ctx, cluster, clusterConfigMap, clusterReplicationSecret, rootCA, clusterPodService, instanceServiceAccount, instances, patroniLeaderService, primaryCertificate, clusterVolumes, exporterQueriesConfig, exporterWebConfig, - backupsSpecFound, otelConfig, + backupsSpecFound, otelConfig, pgParameters, ) } diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index 97b035c04..1bdac799d 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -534,6 +534,7 @@ func (r *Reconciler) reconcileInstanceSets( exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap, backupsSpecFound bool, otelConfig *collector.Config, + pgParameters *postgres.ParameterSet, ) error { // Go through the observed instances and check if a primary has been determined. @@ -571,7 +572,7 @@ func (r *Reconciler) reconcileInstanceSets( patroniLeaderService, primaryCertificate, findAvailableInstanceNames(*set, instances, clusterVolumes), numInstancePods, clusterVolumes, exporterQueriesConfig, exporterWebConfig, - backupsSpecFound, otelConfig, + backupsSpecFound, otelConfig, pgParameters, ) if err == nil { @@ -1007,6 +1008,7 @@ func (r *Reconciler) scaleUpInstances( exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap, backupsSpecFound bool, otelConfig *collector.Config, + pgParameters *postgres.ParameterSet, ) ([]*appsv1.StatefulSet, error) { log := logging.FromContext(ctx) @@ -1053,7 +1055,7 @@ func (r *Reconciler) scaleUpInstances( rootCA, clusterPodService, instanceServiceAccount, patroniLeaderService, primaryCertificate, instances[i], numInstancePods, clusterVolumes, exporterQueriesConfig, exporterWebConfig, - backupsSpecFound, otelConfig, + backupsSpecFound, otelConfig, pgParameters, ) } if err == nil { @@ -1085,6 +1087,7 @@ func (r *Reconciler) reconcileInstance( exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap, backupsSpecFound bool, otelConfig *collector.Config, + pgParameters *postgres.ParameterSet, ) error { log := logging.FromContext(ctx).WithValues("instance", instance.Name) ctx = logging.NewContext(ctx, log) @@ -1128,7 +1131,7 @@ func (r *Reconciler) reconcileInstance( postgres.InstancePod( ctx, cluster, spec, primaryCertificate, replicationCertSecretProjection(clusterReplicationSecret), - postgresDataVolume, postgresWALVolume, tablespaceVolumes, + postgresDataVolume, postgresWALVolume, tablespaceVolumes, pgParameters, &instance.Spec.Template) if backupsSpecFound { diff --git a/internal/controller/postgrescluster/postgres.go b/internal/controller/postgrescluster/postgres.go index 4dd4a9d78..3d254a96a 100644 --- a/internal/controller/postgrescluster/postgres.go +++ b/internal/controller/postgrescluster/postgres.go @@ -25,6 +25,7 @@ import ( "k8s.io/apimachinery/pkg/util/validation/field" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/logging" @@ -129,6 +130,7 @@ func (*Reconciler) generatePostgresParameters( ctx context.Context, cluster *v1beta1.PostgresCluster, backupsSpecFound bool, ) *postgres.ParameterSet { builtin := postgres.NewParameters() + collector.PostgreSQLParameters(ctx, cluster, &builtin) pgaudit.PostgreSQLParameters(&builtin) pgbackrest.PostgreSQLParameters(cluster, &builtin, backupsSpecFound) pgmonitor.PostgreSQLParameters(ctx, cluster, &builtin) diff --git a/internal/postgres/config.go b/internal/postgres/config.go index 65c26dec6..6c6ddccdb 100644 --- a/internal/postgres/config.go +++ b/internal/postgres/config.go @@ -8,6 +8,7 @@ import ( "context" "fmt" "math" + "path" "strings" corev1 "k8s.io/api/core/v1" @@ -21,6 +22,38 @@ import ( ) const ( + // bashDataDirectory is a Bash function that ensures a directory has the correct permissions for PostgreSQL data. + // + // Postgres requires its data directories be writable by only itself. + // Pod "securityContext.fsGroup" sets g+w on directories for *some* storage providers. + // Ensure the current user owns the directory, and remove group-write permission. + // + // - https://www.postgresql.org/docs/current/creating-cluster.html + // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/postmaster/postmaster.c;hb=REL_10_0#l1522 + // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/utils/init/miscinit.c;hb=REL_11_0#l142 + // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/utils/init/miscinit.c;hb=REL_17_0#l386 + // - https://issue.k8s.io/93802#issuecomment-717646167 + // + // During CREATE TABLESPACE, Postgres sets the permissions of a tablespace directory to match the data directory. + // + // - https://www.postgresql.org/docs/current/manage-ag-tablespaces.html + // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/commands/tablespace.c;hb=REL_14_0#l600 + // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/common/file_perm.c;hb=REL_14_0#l27 + // + bashDataDirectory = `dataDirectory() {` + + // When the directory does not exist, create it with the correct permissions. + // When the directory has the correct owner, set the correct permissions. + ` if [[ ! -e "$1" || -O "$1" ]]; then install --directory --mode=0750 "$1";` + + // + // The directory exists but its owner is wrong. + // When it is writable, the set-group-ID bit indicates that "fsGroup" probably ran on its contents making them safe to use. + // In this case, we can make a new directory (owned by this user) and refill it. + ` elif [[ -w "$1" && -g "$1" ]]; then recreate "$1" '0750';` + + // + // The directory exists, its owner is wrong, and it is not writable. + // This is probably fatal, but indicate failure to let the caller decide. + ` else false; fi; }` + // bashHalt is a Bash function that prints its arguments to stderr then // exits with a non-zero status. It uses the exit status of the prior // command if that was not zero. @@ -89,13 +122,13 @@ func ConfigDirectory(cluster *v1beta1.PostgresCluster) string { // DataDirectory returns the absolute path to the "data_directory" of cluster. // - https://www.postgresql.org/docs/current/runtime-config-file-locations.html func DataDirectory(cluster *v1beta1.PostgresCluster) string { - return fmt.Sprintf("%s/pg%d", dataMountPath, cluster.Spec.PostgresVersion) + return fmt.Sprintf("%s/pg%d", DataStorage(cluster), cluster.Spec.PostgresVersion) } -// LogDirectory returns the absolute path to the "log_directory" of cluster. -// - https://www.postgresql.org/docs/current/runtime-config-logging.html -func LogDirectory() string { - return fmt.Sprintf("%s/logs/postgres", dataMountPath) +// DataStorage returns the absolute path to the disk where cluster stores its data. +// Use [DataDirectory] for the exact directory that Postgres uses. +func DataStorage(cluster *v1beta1.PostgresCluster) string { + return dataMountPath } // LogRotation returns parameters that rotate log files while keeping a minimum amount. @@ -322,52 +355,53 @@ done // PostgreSQL. func startupCommand( ctx context.Context, - cluster *v1beta1.PostgresCluster, instance *v1beta1.PostgresInstanceSetSpec, + cluster *v1beta1.PostgresCluster, + instance *v1beta1.PostgresInstanceSetSpec, + parameters *ParameterSet, ) []string { version := fmt.Sprint(cluster.Spec.PostgresVersion) + dataDir := DataDirectory(cluster) + logDir := parameters.Value("log_directory") walDir := WALDirectory(cluster, instance) - // If the user requests tablespaces, we want to make sure the directories exist with the - // correct owner and permissions. - tablespaceCmd := "" - if feature.Enabled(ctx, feature.TablespaceVolumes) { - // This command checks if a dir exists and if not, creates it; - // if the dir does exist, then we `recreate` it to make sure the owner is correct; - // if the dir exists with the wrong owner and is not writeable, we error. - // This is the same behavior we use for the main PGDATA directory. - // Note: Postgres requires the tablespace directory to be "an existing, empty directory - // that is owned by the PostgreSQL operating system user." - // - https://www.postgresql.org/docs/current/manage-ag-tablespaces.html - // However, unlike the PGDATA directory, Postgres will _not_ error out - // if the permissions are wrong on the tablespace directory. - // Instead, when a tablespace is created in Postgres, Postgres will `chmod` the - // tablespace directory to match permissions on the PGDATA directory (either 700 or 750). - // Postgres setting the tablespace directory permissions: - // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/commands/tablespace.c;hb=REL_14_0#l600 - // Postgres choosing directory permissions: - // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/common/file_perm.c;hb=REL_14_0#l27 - // Note: This permission change seems to happen only when the tablespace is created in Postgres. - // If the user manually `chmod`'ed the directory after the creation of the tablespace, Postgres - // would not attempt to change the directory permissions. - // Note: as noted below, we mount the tablespace directory to the mountpoint `/tablespaces/NAME`, - // and so we add the subdirectory `data` in order to set the permissions. - checkInstallRecreateCmd := strings.Join([]string{ - `if [[ ! -e "${tablespace_dir}" || -O "${tablespace_dir}" ]]; then`, - `install --directory --mode=0750 "${tablespace_dir}"`, - `elif [[ -w "${tablespace_dir}" && -g "${tablespace_dir}" ]]; then`, - `recreate "${tablespace_dir}" '0750'`, - `else (halt Permissions!); fi ||`, - `halt "$(permissions "${tablespace_dir}" ||:)"`, - }, "\n") + mkdirs := make([]string, 0, 9+len(instance.TablespaceVolumes)) + mkdirs = append(mkdirs, `dataDirectory "${postgres_data_directory}" || halt "$(permissions "${postgres_data_directory}" ||:)"`) + // If the user requests tablespaces, we want to make sure the directories exist with the correct owner and permissions. + // + // The path for tablespaces volumes is /tablespaces/NAME/data -- the `data` directory is so we can arrange the permissions. + if feature.Enabled(ctx, feature.TablespaceVolumes) { for _, tablespace := range instance.TablespaceVolumes { - // The path for tablespaces volumes is /tablespaces/NAME/data - // -- the `data` path is added so that we can arrange the permissions. - tablespaceCmd = tablespaceCmd + "\ntablespace_dir=/tablespaces/" + tablespace.Name + "/data" + "\n" + - checkInstallRecreateCmd + dir := shell.QuoteWord("/tablespaces/" + tablespace.Name + "/data") + mkdirs = append(mkdirs, `dataDirectory `+dir+` || halt "$(permissions `+dir+` ||:)"`) } } + // Postgres creates "log_directory" but does *not* create any of its parent directories. + // Postgres omits the group-write S_IWGRP permission on the directory. Do both here while being + // careful to *not* touch "data_directory" contents until after `initdb` or Patroni bootstrap. + if path.IsAbs(logDir) && !strings.HasPrefix(logDir, dataDir) { + mkdirs = append(mkdirs, + `(`+shell.MakeDirectories(dataMountPath, logDir)+`) ||`, + `halt "$(permissions `+shell.QuoteWord(logDir)+` ||:)"`, + ) + } else { + mkdirs = append(mkdirs, + `[[ ! -f `+shell.QuoteWord(path.Join(dataDir, "PG_VERSION"))+` ]] ||`, + `(`+shell.MakeDirectories(dataDir, logDir)+`) ||`, + `halt "$(permissions `+shell.QuoteWord(path.Join(dataDir, logDir))+` ||:)"`, + ) + } + + // These directories are outside "data_directory" and can be created. + mkdirs = append(mkdirs, + `(`+shell.MakeDirectories(dataMountPath, naming.PatroniPGDataLogPath)+`) ||`, + `halt "$(permissions `+shell.QuoteWord(naming.PatroniPGDataLogPath)+` ||:)"`, + + `(`+shell.MakeDirectories(dataMountPath, naming.PGBackRestPGDataLogPath)+`) ||`, + `halt "$(permissions `+shell.QuoteWord(naming.PGBackRestPGDataLogPath)+` ||:)"`, + ) + pg_rewind_override := "" if config.FetchKeyCommand(&cluster.Spec) != "" { // Quoting "EOF" disables parameter substitution during write. @@ -384,6 +418,9 @@ chmod +x /tmp/pg_rewind_tde.sh script := strings.Join([]string{ `declare -r expected_major_version="$1" pgwal_directory="$2"`, + // Function to create a Postgres data directory. + bashDataDirectory, + // Function to print the permissions of a file or directory and its parents. bashPermissions, @@ -425,42 +462,10 @@ chmod +x /tmp/pg_rewind_tde.sh // Determine if the data directory has been prepared for bootstrapping the cluster `bootstrap_dir="${postgres_data_directory}_bootstrap"`, - `[[ -d "${bootstrap_dir}" ]] && results 'bootstrap directory' "${bootstrap_dir}"`, - `[[ -d "${bootstrap_dir}" ]] && postgres_data_directory="${bootstrap_dir}"`, - - // PostgreSQL requires its directory to be writable by only itself. - // Pod "securityContext.fsGroup" sets g+w on directories for *some* - // storage providers. Ensure the current user owns the directory, and - // remove group-write permission. - // - https://www.postgresql.org/docs/current/creating-cluster.html - // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/postmaster/postmaster.c;hb=REL_10_0#l1522 - // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/utils/init/miscinit.c;hb=REL_11_0#l142 - // - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/utils/init/miscinit.c;hb=REL_17_0#l386 - // - https://issue.k8s.io/93802#issuecomment-717646167 - // - // When the directory does not exist, create it with the correct permissions. - // When the directory has the correct owner, set the correct permissions. - `if [[ ! -e "${postgres_data_directory}" || -O "${postgres_data_directory}" ]]; then`, - `install --directory --mode=0750 "${postgres_data_directory}"`, - // - // The directory exists but its owner is wrong. When it is writable, - // the set-group-ID bit indicates that "fsGroup" probably ran on its - // contents making them safe to use. In this case, we can make a new - // directory (owned by this user) and refill it. - `elif [[ -w "${postgres_data_directory}" && -g "${postgres_data_directory}" ]]; then`, - `recreate "${postgres_data_directory}" '0750'`, - // - // The directory exists, its owner is wrong, and it is not writable. - `else (halt Permissions!); fi ||`, - `halt "$(permissions "${postgres_data_directory}" ||:)"`, + `[[ -d "${bootstrap_dir}" ]] && postgres_data_directory="${bootstrap_dir}" && results 'bootstrap directory' "${bootstrap_dir}"`, - // Create log directories. - `(` + shell.MakeDirectories(dataMountPath, naming.PGBackRestPGDataLogPath) + `) ||`, - `halt "$(permissions ` + naming.PGBackRestPGDataLogPath + ` ||:)"`, - `(` + shell.MakeDirectories(dataMountPath, naming.PatroniPGDataLogPath) + `) ||`, - `halt "$(permissions ` + naming.PatroniPGDataLogPath + ` ||:)"`, - `(` + shell.MakeDirectories(dataMountPath, LogDirectory()) + `) ||`, - `halt "$(permissions ` + LogDirectory() + ` ||:)"`, + // Create directories for and related to the data directory. + strings.Join(mkdirs, "\n"), // Copy replication client certificate files // from the /pgconf/tls/replication directory to the /tmp/replication directory in order @@ -478,7 +483,6 @@ chmod +x /tmp/pg_rewind_tde.sh // Add the pg_rewind wrapper script, if TDE is enabled. pg_rewind_override, - tablespaceCmd, // When the data directory is empty, there's nothing more to do. `[[ -f "${postgres_data_directory}/PG_VERSION" ]] || exit 0`, diff --git a/internal/postgres/config_test.go b/internal/postgres/config_test.go index 762bd8a0b..32490df26 100644 --- a/internal/postgres/config_test.go +++ b/internal/postgres/config_test.go @@ -40,6 +40,13 @@ func TestDataDirectory(t *testing.T) { assert.Equal(t, DataDirectory(cluster), "/pgdata/pg12") } +func TestDataStorage(t *testing.T) { + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.PostgresVersion = rand.IntN(20) + + assert.Equal(t, DataStorage(cluster), "/pgdata") +} + func TestLogRotation(t *testing.T) { t.Parallel() @@ -543,8 +550,10 @@ func TestStartupCommand(t *testing.T) { cluster.Spec.PostgresVersion = 13 instance := new(v1beta1.PostgresInstanceSetSpec) + parameters := NewParameters().Default + ctx := context.Background() - command := startupCommand(ctx, cluster, instance) + command := startupCommand(ctx, cluster, instance, parameters) // Expect a bash command with an inline script. assert.DeepEqual(t, command[:3], []string{"bash", "-ceu", "--"}) @@ -579,7 +588,7 @@ func TestStartupCommand(t *testing.T) { }, }, } - command := startupCommand(ctx, cluster, instance) + command := startupCommand(ctx, cluster, instance, parameters) assert.Assert(t, len(command) > 3) assert.Assert(t, strings.Contains(command[3], `cat << "EOF" > /tmp/pg_rewind_tde.sh #!/bin/sh diff --git a/internal/postgres/parameters.go b/internal/postgres/parameters.go index 6fb7b0d2f..3590816b4 100644 --- a/internal/postgres/parameters.go +++ b/internal/postgres/parameters.go @@ -48,6 +48,17 @@ func NewParameters() Parameters { // - https://www.postgresql.org/docs/current/auth-password.html parameters.Default.Add("password_encryption", "scram-sha-256") + // Explicitly use Postgres' default log directory. This value is relative to the "data_directory" parameter. + // + // Controllers look at this parameter to grant group-write S_IWGRP on the directory. + // Postgres does not grant this permission on the directories it creates. + // + // TODO(logs): A better default would be *outside* the data directory. + // This parameter needs to be configurable and documented before the default can change. + // + // PostgreSQL must be reloaded when changing this parameter. + parameters.Default.Add("log_directory", "log") + // Pod "securityContext.fsGroup" ensures processes and filesystems agree on a GID; // use the same permissions for group and owner. // This allows every process in the pod to read Postgres log files. diff --git a/internal/postgres/parameters_test.go b/internal/postgres/parameters_test.go index ad8c6e90c..54095750f 100644 --- a/internal/postgres/parameters_test.go +++ b/internal/postgres/parameters_test.go @@ -28,6 +28,8 @@ func TestNewParameters(t *testing.T) { assert.DeepEqual(t, parameters.Default.AsMap(), map[string]string{ "jit": "off", + "log_directory": "log", + "password_encryption": "scram-sha-256", }) } diff --git a/internal/postgres/reconcile.go b/internal/postgres/reconcile.go index 81c6cc31f..bd191549e 100644 --- a/internal/postgres/reconcile.go +++ b/internal/postgres/reconcile.go @@ -62,6 +62,7 @@ func InstancePod(ctx context.Context, inClusterCertificates, inClientCertificates *corev1.SecretProjection, inDataVolume, inWALVolume *corev1.PersistentVolumeClaim, inTablespaceVolumes []*corev1.PersistentVolumeClaim, + inParameters *ParameterSet, outInstancePod *corev1.PodTemplateSpec, ) { certVolumeMount := corev1.VolumeMount{ @@ -191,7 +192,7 @@ func InstancePod(ctx context.Context, startup := corev1.Container{ Name: naming.ContainerPostgresStartup, - Command: startupCommand(ctx, inCluster, inInstanceSpec), + Command: startupCommand(ctx, inCluster, inInstanceSpec, inParameters), Env: Environment(inCluster), Image: container.Image, diff --git a/internal/postgres/reconcile_test.go b/internal/postgres/reconcile_test.go index c001ce890..9ec45a96b 100644 --- a/internal/postgres/reconcile_test.go +++ b/internal/postgres/reconcile_test.go @@ -69,6 +69,8 @@ func TestInstancePod(t *testing.T) { cluster.Spec.ImagePullPolicy = corev1.PullAlways cluster.Spec.PostgresVersion = 11 + parameters := NewParameters().Default + dataVolume := new(corev1.PersistentVolumeClaim) dataVolume.Name = "datavol" @@ -117,7 +119,7 @@ func TestInstancePod(t *testing.T) { // without WAL volume nor WAL volume spec pod := new(corev1.PodTemplateSpec) InstancePod(ctx, cluster, instance, - serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, parameters, pod) assert.Assert(t, cmp.MarshalMatches(pod.Spec, ` containers: @@ -239,6 +241,7 @@ initContainers: - -- - |- declare -r expected_major_version="$1" pgwal_directory="$2" + dataDirectory() { if [[ ! -e "$1" || -O "$1" ]]; then install --directory --mode=0750 "$1"; elif [[ -w "$1" && -g "$1" ]]; then recreate "$1" '0750'; else false; fi; } permissions() { while [[ -n "$1" ]]; do set "${1%/*}" "$@"; done; shift; stat -Lc '%A %4u %4g %n' "$@"; } halt() { local rc=$?; >&2 echo "$@"; exit "${rc/#0/1}"; } results() { printf '::postgres-operator: %s::%s\n' "$@"; } @@ -267,23 +270,17 @@ initContainers: [[ "${postgres_data_directory}" == "${PGDATA}" ]] || halt Expected matching config and data directories bootstrap_dir="${postgres_data_directory}_bootstrap" - [[ -d "${bootstrap_dir}" ]] && results 'bootstrap directory' "${bootstrap_dir}" - [[ -d "${bootstrap_dir}" ]] && postgres_data_directory="${bootstrap_dir}" - if [[ ! -e "${postgres_data_directory}" || -O "${postgres_data_directory}" ]]; then - install --directory --mode=0750 "${postgres_data_directory}" - elif [[ -w "${postgres_data_directory}" && -g "${postgres_data_directory}" ]]; then - recreate "${postgres_data_directory}" '0750' - else (halt Permissions!); fi || - halt "$(permissions "${postgres_data_directory}" ||:)" - (mkdir -p '/pgdata/pgbackrest/log' && { chmod 0775 '/pgdata/pgbackrest/log' '/pgdata/pgbackrest' || :; }) || - halt "$(permissions /pgdata/pgbackrest/log ||:)" + [[ -d "${bootstrap_dir}" ]] && postgres_data_directory="${bootstrap_dir}" && results 'bootstrap directory' "${bootstrap_dir}" + dataDirectory "${postgres_data_directory}" || halt "$(permissions "${postgres_data_directory}" ||:)" + [[ ! -f '/pgdata/pg11/PG_VERSION' ]] || + (mkdir -p '/pgdata/pg11/log' && { chmod 0775 '/pgdata/pg11/log' || :; }) || + halt "$(permissions '/pgdata/pg11/log' ||:)" (mkdir -p '/pgdata/patroni/log' && { chmod 0775 '/pgdata/patroni/log' '/pgdata/patroni' || :; }) || - halt "$(permissions /pgdata/patroni/log ||:)" - (mkdir -p '/pgdata/logs/postgres' && { chmod 0775 '/pgdata/logs/postgres' '/pgdata/logs' || :; }) || - halt "$(permissions /pgdata/logs/postgres ||:)" + halt "$(permissions '/pgdata/patroni/log' ||:)" + (mkdir -p '/pgdata/pgbackrest/log' && { chmod 0775 '/pgdata/pgbackrest/log' '/pgdata/pgbackrest' || :; }) || + halt "$(permissions '/pgdata/pgbackrest/log' ||:)" install -D --mode=0600 -t "/tmp/replication" "/pgconf/tls/replication"/{tls.crt,tls.key,ca.crt} - [[ -f "${postgres_data_directory}/PG_VERSION" ]] || exit 0 results 'data version' "${postgres_data_version:=$(< "${postgres_data_directory}/PG_VERSION")}" [[ "${postgres_data_version}" == "${expected_major_version}" ]] || @@ -392,7 +389,7 @@ volumes: pod := new(corev1.PodTemplateSpec) InstancePod(ctx, cluster, instance, - serverSecretProjection, clientSecretProjection, dataVolume, walVolume, nil, pod) + serverSecretProjection, clientSecretProjection, dataVolume, walVolume, nil, parameters, pod) assert.Assert(t, len(pod.Spec.Containers) > 0) assert.Assert(t, len(pod.Spec.InitContainers) > 0) @@ -493,7 +490,7 @@ volumes: pod := new(corev1.PodTemplateSpec) InstancePod(ctx, clusterWithConfig, instance, - serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, parameters, pod) assert.Assert(t, len(pod.Spec.Containers) > 0) assert.Assert(t, len(pod.Spec.InitContainers) > 0) @@ -530,7 +527,7 @@ volumes: t.Run("SidecarNotEnabled", func(t *testing.T) { InstancePod(ctx, cluster, sidecarInstance, - serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, parameters, pod) assert.Equal(t, len(pod.Spec.Containers), 2, "expected 2 containers in Pod") }) @@ -543,7 +540,7 @@ volumes: ctx := feature.NewContext(ctx, gate) InstancePod(ctx, cluster, sidecarInstance, - serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, parameters, pod) assert.Equal(t, len(pod.Spec.Containers), 3, "expected 3 containers in Pod") @@ -580,7 +577,7 @@ volumes: tablespaceVolumes := []*corev1.PersistentVolumeClaim{tablespaceVolume1, tablespaceVolume2} InstancePod(ctx, cluster, instance, - serverSecretProjection, clientSecretProjection, dataVolume, nil, tablespaceVolumes, pod) + serverSecretProjection, clientSecretProjection, dataVolume, nil, tablespaceVolumes, parameters, pod) assert.Assert(t, cmp.MarshalMatches(pod.Spec.Containers[0].VolumeMounts, ` - mountPath: /pgconf/tls @@ -618,7 +615,7 @@ volumes: pod := new(corev1.PodTemplateSpec) InstancePod(ctx, cluster, instance, - serverSecretProjection, clientSecretProjection, dataVolume, walVolume, nil, pod) + serverSecretProjection, clientSecretProjection, dataVolume, walVolume, nil, parameters, pod) assert.Assert(t, len(pod.Spec.Containers) > 0) assert.Assert(t, len(pod.Spec.InitContainers) > 0) @@ -720,7 +717,7 @@ volumes: pod := new(corev1.PodTemplateSpec) InstancePod(ctx, cluster, instance, - serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, parameters, pod) assert.Assert(t, len(pod.Spec.Containers) > 0) assert.Assert(t, cmp.MarshalContains(pod.Spec.Containers[0].VolumeMounts, ` @@ -750,7 +747,7 @@ volumes: annotated.Labels = map[string]string{"gg": "asdf"} InstancePod(ctx, cluster, instance, - serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, annotated) + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, parameters, annotated) assert.Assert(t, cmp.MarshalContains(annotated.Spec.Volumes, ` - ephemeral: diff --git a/internal/shell/paths.go b/internal/shell/paths.go index 701144694..8fcf6b357 100644 --- a/internal/shell/paths.go +++ b/internal/shell/paths.go @@ -36,6 +36,8 @@ func CleanFileName(path string) string { // exists. It creates every directory leading to path from (but not including) // base and sets their permissions for Kubernetes, regardless of umask. // +// Relative paths are expanded relative to base. +// // See: // - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/chmod.html // - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/mkdir.html @@ -47,8 +49,19 @@ func MakeDirectories(base string, paths ...string) string { return `test -d ` + QuoteWord(base) } - allPaths := slices.Clone(paths) - for _, p := range paths { + // Expand each path relative to the base path. + expandedPaths := make([]string, len(paths)) + for i, p := range paths { + if filepath.IsAbs(p) { + expandedPaths[i] = p + } else { + expandedPaths[i] = filepath.Join(base, p) + } + } + + // Gather parent directories of each path. + allPaths := slices.Clone(expandedPaths) + for _, p := range expandedPaths { if r, err := filepath.Rel(base, p); err == nil && filepath.IsLocal(r) { // The result of [filepath.Rel] is a shorter representation of the full path; skip it. r = filepath.Dir(r) @@ -72,7 +85,7 @@ func MakeDirectories(base string, paths ...string) string { return `` + // Create all the paths and any missing parents. - `mkdir -p ` + strings.Join(QuoteWords(paths...), " ") + + `mkdir -p ` + strings.Join(QuoteWords(expandedPaths...), " ") + // Try to set the permissions of every path and each parent. // This swallows the exit status of `chmod` because not all filesystems diff --git a/internal/shell/paths_test.go b/internal/shell/paths_test.go index b5adb69b1..f3b9aea82 100644 --- a/internal/shell/paths_test.go +++ b/internal/shell/paths_test.go @@ -94,6 +94,38 @@ func TestMakeDirectories(t *testing.T) { }) }) + t.Run("Relative", func(t *testing.T) { + assert.Equal(t, + MakeDirectories("/x", "one", "two/three"), + `mkdir -p '/x/one' '/x/two/three' && { chmod 0775 '/x/one' '/x/two/three' '/x/two' || :; }`, + "expected paths relative to base") + + assert.Equal(t, + MakeDirectories("/x/y/z", "../one", "./two", "../../../../three"), + `mkdir -p '/x/y/one' '/x/y/z/two' '/three' && { chmod 0775 '/x/y/one' '/x/y/z/two' '/three' || :; }`, + "expected paths relative to base") + + script := MakeDirectories("x/y", "../one", "./two", "../../../../three") + assert.Equal(t, script, + `mkdir -p 'x/one' 'x/y/two' '../../three' && { chmod 0775 'x/one' 'x/y/two' '../../three' || :; }`, + "expected paths relative to base") + + // Calling `mkdir -p '../..'` works, but run it by ShellCheck as a precaution. + t.Run("ShellCheckPOSIX", func(t *testing.T) { + shellcheck := require.ShellCheck(t) + + dir := t.TempDir() + file := filepath.Join(dir, "script.sh") + assert.NilError(t, os.WriteFile(file, []byte(script), 0o600)) + + // Expect ShellCheck for "sh" to be happy. + // - https://www.shellcheck.net/wiki/SC2148 + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", "--shell=sh", file) + output, err := cmd.CombinedOutput() + assert.NilError(t, err, "%q\n%s", cmd.Args, output) + }) + }) + t.Run("Unrelated", func(t *testing.T) { assert.Equal(t, MakeDirectories("/one", "/two/three/four"), diff --git a/testing/chainsaw/e2e/pgbackrest-restore/templates/clone-cluster.yaml b/testing/chainsaw/e2e/pgbackrest-restore/templates/clone-cluster.yaml index 5360ef23f..a768fa2ab 100644 --- a/testing/chainsaw/e2e/pgbackrest-restore/templates/clone-cluster.yaml +++ b/testing/chainsaw/e2e/pgbackrest-restore/templates/clone-cluster.yaml @@ -49,3 +49,18 @@ spec: replicas: 1 readyReplicas: 1 updatedReplicas: 1 + + catch: + - description: Read all log lines from job pods + podLogs: + selector: > + batch.kubernetes.io/job-name, + postgres-operator.crunchydata.com/cluster in (clone-one) + tail: -1 + + - description: Read all log lines from postgres pods + podLogs: + selector: > + postgres-operator.crunchydata.com/instance, + postgres-operator.crunchydata.com/cluster in (clone-one) + tail: -1 diff --git a/testing/chainsaw/e2e/pgbackrest-restore/templates/point-in-time-restore.yaml b/testing/chainsaw/e2e/pgbackrest-restore/templates/point-in-time-restore.yaml index 714227ab4..3b842f5eb 100644 --- a/testing/chainsaw/e2e/pgbackrest-restore/templates/point-in-time-restore.yaml +++ b/testing/chainsaw/e2e/pgbackrest-restore/templates/point-in-time-restore.yaml @@ -62,9 +62,16 @@ spec: finished: true catch: - - - description: > - Read all log lines from the restore job pods + - description: Read all log lines from job pods + podLogs: + selector: > + batch.kubernetes.io/job-name, + postgres-operator.crunchydata.com/cluster in (original) + tail: -1 + + - description: Read all log lines from postgres pods podLogs: - selector: postgres-operator.crunchydata.com/pgbackrest-restore + selector: > + postgres-operator.crunchydata.com/instance, + postgres-operator.crunchydata.com/cluster in (original) tail: -1