// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package config provides basic infrastructure to set configuration settings
// for runsc. The configuration is set by flags to the command line. They can
// also propagate to a different process using the same flags.
package config

import (
	"fmt"
	"strings"

	"gvisor.dev/gvisor/pkg/refs"
	controlpb "gvisor.dev/gvisor/pkg/sentry/control/control_go_proto"
	"gvisor.dev/gvisor/pkg/sentry/watchdog"
)

// Config holds configuration that is not part of the runtime spec.
//
// Follow these steps to add a new flag:
//   1. Create a new field in Config.
//   2. Add a field tag with the flag name
//   3. Register a new flag in flags.go, with name and description
//   4. Add any necessary validation into validate()
//   5. If adding an enum, follow the same pattern as FileAccessType
//
type Config struct {
	// RootDir is the runtime root directory.
	RootDir string `flag:"root"`

	// Traceback changes the Go runtime's traceback level.
	Traceback string `flag:"traceback"`

	// Debug indicates that debug logging should be enabled.
	Debug bool `flag:"debug"`

	// LogFilename is the filename to log to, if not empty.
	LogFilename string `flag:"log"`

	// LogFormat is the log format.
	LogFormat string `flag:"log-format"`

	// DebugLog is the path to log debug information to, if not empty.
	DebugLog string `flag:"debug-log"`

	// PanicLog is the path to log GO's runtime messages, if not empty.
	PanicLog string `flag:"panic-log"`

	// CoverageReport is the path to write Go coverage information, if not empty.
	CoverageReport string `flag:"coverage-report"`

	// DebugLogFormat is the log format for debug.
	DebugLogFormat string `flag:"debug-log-format"`

	// FileAccess indicates how the root filesystem is accessed.
	FileAccess FileAccessType `flag:"file-access"`

	// FileAccessMounts indicates how non-root volumes are accessed.
	FileAccessMounts FileAccessType `flag:"file-access-mounts"`

	// Overlay is whether to wrap the root filesystem in an overlay.
	Overlay bool `flag:"overlay"`

	// Verity is whether there's one or more verity file system to mount.
	Verity bool `flag:"verity"`

	// FSGoferHostUDS enables the gofer to mount a host UDS.
	FSGoferHostUDS bool `flag:"fsgofer-host-uds"`

	// Network indicates what type of network to use.
	Network NetworkType `flag:"network"`

	// EnableRaw indicates whether raw sockets should be enabled. Raw
	// sockets are disabled by stripping CAP_NET_RAW from the list of
	// capabilities.
	EnableRaw bool `flag:"net-raw"`

	// AllowPacketEndpointWrite enables write operations on packet endpoints.
	AllowPacketEndpointWrite bool `flag:"TESTONLY-allow-packet-endpoint-write"`

	// HardwareGSO indicates that hardware segmentation offload is enabled.
	HardwareGSO bool `flag:"gso"`

	// SoftwareGSO indicates that software segmentation offload is enabled.
	SoftwareGSO bool `flag:"software-gso"`

	// TXChecksumOffload indicates that TX Checksum Offload is enabled.
	TXChecksumOffload bool `flag:"tx-checksum-offload"`

	// RXChecksumOffload indicates that RX Checksum Offload is enabled.
	RXChecksumOffload bool `flag:"rx-checksum-offload"`

	// QDisc indicates the type of queuening discipline to use by default
	// for non-loopback interfaces.
	QDisc QueueingDiscipline `flag:"qdisc"`

	// LogPackets indicates that all network packets should be logged.
	LogPackets bool `flag:"log-packets"`

	// Platform is the platform to run on.
	Platform string `flag:"platform"`

	// Strace indicates that strace should be enabled.
	Strace bool `flag:"strace"`

	// StraceSyscalls is the set of syscalls to trace (comma-separated values).
	// If StraceEnable is true and this string is empty, then all syscalls will
	// be traced.
	StraceSyscalls string `flag:"strace-syscalls"`

	// StraceLogSize is the max size of data blobs to display.
	StraceLogSize uint `flag:"strace-log-size"`

	// StraceEvent indicates sending strace to events if true. Strace is
	// sent to log if false.
	StraceEvent bool `flag:"strace-event"`

	// DisableSeccomp indicates whether seccomp syscall filters should be
	// disabled. Pardon the double negation, but default to enabled is important.
	DisableSeccomp bool

	// WatchdogAction sets what action the watchdog takes when triggered.
	WatchdogAction watchdog.Action `flag:"watchdog-action"`

	// PanicSignal registers signal handling that panics. Usually set to
	// SIGUSR2(12) to troubleshoot hangs. -1 disables it.
	PanicSignal int `flag:"panic-signal"`

	// ProfileEnable is set to prepare the sandbox to be profiled.
	ProfileEnable bool `flag:"profile"`

	// ProfileBlock collects a block profile to the passed file for the
	// duration of the container execution. Requires ProfileEnabled.
	ProfileBlock string `flag:"profile-block"`

	// ProfileCPU collects a CPU profile to the passed file for the
	// duration of the container execution. Requires ProfileEnabled.
	ProfileCPU string `flag:"profile-cpu"`

	// ProfileHeap collects a heap profile to the passed file for the
	// duration of the container execution. Requires ProfileEnabled.
	ProfileHeap string `flag:"profile-heap"`

	// ProfileMutex collects a mutex profile to the passed file for the
	// duration of the container execution. Requires ProfileEnabled.
	ProfileMutex string `flag:"profile-mutex"`

	// TraceFile collects a Go runtime execution trace to the passed file
	// for the duration of the container execution.
	TraceFile string `flag:"trace"`

	// Controls defines the controls that may be enabled.
	Controls controlConfig `flag:"controls"`

	// RestoreFile is the path to the saved container image
	RestoreFile string

	// NumNetworkChannels controls the number of AF_PACKET sockets that map
	// to the same underlying network device. This allows netstack to better
	// scale for high throughput use cases.
	NumNetworkChannels int `flag:"num-network-channels"`

	// Rootless allows the sandbox to be started with a user that is not root.
	// Defense in depth measures are weaker in rootless mode. Specifically, the
	// sandbox and Gofer process run as root inside a user namespace with root
	// mapped to the caller's user. When using rootless, the container root path
	// should not have a symlink.
	Rootless bool `flag:"rootless"`

	// AlsoLogToStderr allows to send log messages to stderr.
	AlsoLogToStderr bool `flag:"alsologtostderr"`

	// ReferenceLeakMode sets reference leak check mode
	ReferenceLeak refs.LeakMode `flag:"ref-leak-mode"`

	// CPUNumFromQuota sets CPU number count to available CPU quota, using
	// least integer value greater than or equal to quota.
	//
	// E.g. 0.2 CPU quota will result in 1, and 1.9 in 2.
	CPUNumFromQuota bool `flag:"cpu-num-from-quota"`

	// Enables VFS2.
	VFS2 bool `flag:"vfs2"`

	// Enable lisafs.
	Lisafs bool `flag:"lisafs"`

	// Enables FUSE usage.
	FUSE bool `flag:"fuse"`

	// Allows overriding of flags in OCI annotations.
	AllowFlagOverride bool `flag:"allow-flag-override"`

	// Enables seccomp inside the sandbox.
	OCISeccomp bool `flag:"oci-seccomp"`

	// Mounts the cgroup filesystem backed by the sentry's cgroupfs.
	Cgroupfs bool `flag:"cgroupfs"`

	// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
	// tests. It allows runsc to start the sandbox process as the current
	// user, and without chrooting the sandbox process. This can be
	// necessary in test environments that have limited capabilities. When
	// disabling chroot, the container root path should not have a symlink.
	TestOnlyAllowRunAsCurrentUserWithoutChroot bool `flag:"TESTONLY-unsafe-nonroot"`

	// TestOnlyTestNameEnv should only be used in tests. It looks up for the
	// test name in the container environment variables and adds it to the debug
	// log file name. This is done to help identify the log with the test when
	// multiple tests are run in parallel, since there is no way to pass
	// parameters to the runtime from docker.
	TestOnlyTestNameEnv string `flag:"TESTONLY-test-name-env"`
}

func (c *Config) validate() error {
	if c.FileAccess == FileAccessShared && c.Overlay {
		return fmt.Errorf("overlay flag is incompatible with shared file access")
	}
	if c.NumNetworkChannels <= 0 {
		return fmt.Errorf("num_network_channels must be > 0, got: %d", c.NumNetworkChannels)
	}
	// Require profile flags to explicitly opt-in to profiling with
	// -profile rather than implying it since these options have security
	// implications.
	if c.ProfileBlock != "" && !c.ProfileEnable {
		return fmt.Errorf("profile-block flag requires enabling profiling with profile flag")
	}
	if c.ProfileCPU != "" && !c.ProfileEnable {
		return fmt.Errorf("profile-cpu flag requires enabling profiling with profile flag")
	}
	if c.ProfileHeap != "" && !c.ProfileEnable {
		return fmt.Errorf("profile-heap flag requires enabling profiling with profile flag")
	}
	if c.ProfileMutex != "" && !c.ProfileEnable {
		return fmt.Errorf("profile-mutex flag requires enabling profiling with profile flag")
	}
	return nil
}

// FileAccessType tells how the filesystem is accessed.
type FileAccessType int

const (
	// FileAccessExclusive gives the sandbox exclusive access over files and
	// directories in the filesystem. No external modifications are permitted and
	// can lead to undefined behavior.
	//
	// Exclusive filesystem access enables more aggressive caching and offers
	// significantly better performance. This is the default mode for the root
	// volume.
	FileAccessExclusive FileAccessType = iota

	// FileAccessShared is used for volumes that can have external changes. It
	// requires revalidation on every filesystem access to detect external
	// changes, and reduces the amount of caching that can be done. This is the
	// default mode for non-root volumes.
	FileAccessShared
)

func fileAccessTypePtr(v FileAccessType) *FileAccessType {
	return &v
}

// Set implements flag.Value.
func (f *FileAccessType) Set(v string) error {
	switch v {
	case "shared":
		*f = FileAccessShared
	case "exclusive":
		*f = FileAccessExclusive
	default:
		return fmt.Errorf("invalid file access type %q", v)
	}
	return nil
}

// Get implements flag.Value.
func (f *FileAccessType) Get() interface{} {
	return *f
}

// String implements flag.Value.
func (f FileAccessType) String() string {
	switch f {
	case FileAccessShared:
		return "shared"
	case FileAccessExclusive:
		return "exclusive"
	}
	panic(fmt.Sprintf("Invalid file access type %d", f))
}

// NetworkType tells which network stack to use.
type NetworkType int

const (
	// NetworkSandbox uses internal network stack, isolated from the host.
	NetworkSandbox NetworkType = iota

	// NetworkHost redirects network related syscalls to the host network.
	NetworkHost

	// NetworkNone sets up just loopback using netstack.
	NetworkNone
)

func networkTypePtr(v NetworkType) *NetworkType {
	return &v
}

// Set implements flag.Value.
func (n *NetworkType) Set(v string) error {
	switch v {
	case "sandbox":
		*n = NetworkSandbox
	case "host":
		*n = NetworkHost
	case "none":
		*n = NetworkNone
	default:
		return fmt.Errorf("invalid network type %q", v)
	}
	return nil
}

// Get implements flag.Value.
func (n *NetworkType) Get() interface{} {
	return *n
}

// String implements flag.Value.
func (n NetworkType) String() string {
	switch n {
	case NetworkSandbox:
		return "sandbox"
	case NetworkHost:
		return "host"
	case NetworkNone:
		return "none"
	}
	panic(fmt.Sprintf("Invalid network type %d", n))
}

// QueueingDiscipline is used to specify the kind of Queueing Discipline to
// apply for a give FDBasedLink.
type QueueingDiscipline int

const (
	// QDiscNone disables any queueing for the underlying FD.
	QDiscNone QueueingDiscipline = iota

	// QDiscFIFO applies a simple fifo based queue to the underlying FD.
	QDiscFIFO
)

func queueingDisciplinePtr(v QueueingDiscipline) *QueueingDiscipline {
	return &v
}

// Set implements flag.Value.
func (q *QueueingDiscipline) Set(v string) error {
	switch v {
	case "none":
		*q = QDiscNone
	case "fifo":
		*q = QDiscFIFO
	default:
		return fmt.Errorf("invalid qdisc %q", v)
	}
	return nil
}

// Get implements flag.Value.
func (q *QueueingDiscipline) Get() interface{} {
	return *q
}

// String implements flag.Value.
func (q QueueingDiscipline) String() string {
	switch q {
	case QDiscNone:
		return "none"
	case QDiscFIFO:
		return "fifo"
	}
	panic(fmt.Sprintf("Invalid qdisc %d", q))
}

// controlConfig represents control endpoints.
type controlConfig struct {
	Controls *controlpb.ControlConfig
}

// Set implements flag.Value.
func (c *controlConfig) Set(v string) error {
	controls := strings.Split(v, ",")
	var controlList []controlpb.ControlConfig_Endpoint
	for _, control := range controls {
		switch control {
		case "EVENTS":
			controlList = append(controlList, controlpb.ControlConfig_EVENTS)
		case "FS":
			controlList = append(controlList, controlpb.ControlConfig_FS)
		case "LIFECYCLE":
			controlList = append(controlList, controlpb.ControlConfig_LIFECYCLE)
		case "LOGGING":
			controlList = append(controlList, controlpb.ControlConfig_LOGGING)
		case "PROFILE":
			controlList = append(controlList, controlpb.ControlConfig_PROFILE)
		case "USAGE":
			controlList = append(controlList, controlpb.ControlConfig_USAGE)
		case "PROC":
			controlList = append(controlList, controlpb.ControlConfig_PROC)
		case "STATE":
			controlList = append(controlList, controlpb.ControlConfig_STATE)
		case "DEBUG":
			controlList = append(controlList, controlpb.ControlConfig_DEBUG)
		default:
			return fmt.Errorf("invalid control %q", control)
		}
	}
	c.Controls.AllowedControls = controlList
	return nil
}

// Get implements flag.Value.
func (c *controlConfig) Get() interface{} {
	return *c
}

// String implements flag.Value.
func (c *controlConfig) String() string {
	v := ""
	for _, control := range c.Controls.GetAllowedControls() {
		if len(v) > 0 {
			v += ","
		}
		switch control {
		case controlpb.ControlConfig_EVENTS:
			v += "EVENTS"
		case controlpb.ControlConfig_FS:
			v += "FS"
		case controlpb.ControlConfig_LIFECYCLE:
			v += "LIFECYCLE"
		case controlpb.ControlConfig_LOGGING:
			v += "LOGGING"
		case controlpb.ControlConfig_PROFILE:
			v += "PROFILE"
		case controlpb.ControlConfig_USAGE:
			v += "USAGE"
		case controlpb.ControlConfig_PROC:
			v += "PROC"
		case controlpb.ControlConfig_STATE:
			v += "STATE"
		case controlpb.ControlConfig_DEBUG:
			v += "DEBUG"
		default:
			panic(fmt.Sprintf("Invalid control %d", control))
		}
	}
	return v
}

func defaultControlConfig() *controlConfig {
	c := controlConfig{}
	c.Controls = &controlpb.ControlConfig{}
	c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_EVENTS)
	c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_FS)
	c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_LIFECYCLE)
	c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_LOGGING)
	c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_PROFILE)
	c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_USAGE)
	c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_PROC)
	c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_STATE)
	c.Controls.AllowedControls = append(c.Controls.AllowedControls, controlpb.ControlConfig_DEBUG)
	return &c
}

func leakModePtr(v refs.LeakMode) *refs.LeakMode {
	return &v
}

func watchdogActionPtr(v watchdog.Action) *watchdog.Action {
	return &v
}