// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package kernel

import (
	"gvisor.dev/gvisor/pkg/abi/linux"
	"gvisor.dev/gvisor/pkg/context"
	"gvisor.dev/gvisor/pkg/refs"
	"gvisor.dev/gvisor/pkg/sentry/arch"
	"gvisor.dev/gvisor/pkg/syserror"
)

// SessionID is the public identifier.
type SessionID ThreadID

// ProcessGroupID is the public identifier.
type ProcessGroupID ThreadID

// Session contains a leader threadgroup and a list of ProcessGroups.
//
// +stateify savable
type Session struct {
	refs refs.AtomicRefCount

	// leader is the originator of the Session.
	//
	// Note that this may no longer be running (and may be reaped), so the
	// ID is cached upon initial creation. The leader is still required
	// however, since its PIDNamespace defines the scope of the Session.
	//
	// The leader is immutable.
	leader *ThreadGroup

	// id is the cached identifier in the leader's namespace.
	//
	// The id is immutable.
	id SessionID

	// foreground is the foreground process group.
	//
	// This is protected by TaskSet.mu.
	foreground *ProcessGroup

	// ProcessGroups is a list of process groups in this Session. This is
	// protected by TaskSet.mu.
	processGroups processGroupList

	// sessionEntry is the embed for TaskSet.sessions. This is protected by
	// TaskSet.mu.
	sessionEntry
}

// incRef grabs a reference.
func (s *Session) incRef() {
	s.refs.IncRef()
}

// decRef drops a reference.
//
// Precondition: callers must hold TaskSet.mu for writing.
func (s *Session) decRef() {
	s.refs.DecRefWithDestructor(nil, func(context.Context) {
		// Remove translations from the leader.
		for ns := s.leader.pidns; ns != nil; ns = ns.parent {
			id := ns.sids[s]
			delete(ns.sids, s)
			delete(ns.sessions, id)
		}

		// Remove from the list of global Sessions.
		s.leader.pidns.owner.sessions.Remove(s)
	})
}

// ProcessGroup contains an originator threadgroup and a parent Session.
//
// +stateify savable
type ProcessGroup struct {
	refs refs.AtomicRefCount // not exported.

	// originator is the originator of the group.
	//
	// See note re: leader in Session. The same applies here.
	//
	// The originator is immutable.
	originator *ThreadGroup

	// id is the cached identifier in the originator's namespace.
	//
	// The id is immutable.
	id ProcessGroupID

	// Session is the parent Session.
	//
	// The session is immutable.
	session *Session

	// ancestors is the number of thread groups in this process group whose
	// parent is in a different process group in the same session.
	//
	// The name is derived from the fact that process groups where
	// ancestors is zero are considered "orphans".
	//
	// ancestors is protected by TaskSet.mu.
	ancestors uint32

	// processGroupEntry is the embedded entry for Sessions.groups. This is
	// protected by TaskSet.mu.
	processGroupEntry
}

// Originator retuns the originator of the process group.
func (pg *ProcessGroup) Originator() *ThreadGroup {
	return pg.originator
}

// IsOrphan returns true if this process group is an orphan.
func (pg *ProcessGroup) IsOrphan() bool {
	pg.originator.TaskSet().mu.RLock()
	defer pg.originator.TaskSet().mu.RUnlock()
	return pg.ancestors == 0
}

// incRefWithParent grabs a reference.
//
// This function is called when this ProcessGroup is being associated with some
// new ThreadGroup, tg. parentPG is the ProcessGroup of tg's parent
// ThreadGroup. If tg is init, then parentPG may be nil.
//
// Precondition: callers must hold TaskSet.mu for writing.
func (pg *ProcessGroup) incRefWithParent(parentPG *ProcessGroup) {
	// We acquire an "ancestor" reference in the case of a nil parent.
	// This is because the process being associated is init, and init can
	// never be orphaned (we count it as always having an ancestor).
	if pg != parentPG && (parentPG == nil || pg.session == parentPG.session) {
		pg.ancestors++
	}

	pg.refs.IncRef()
}

// decRefWithParent drops a reference.
//
// parentPG is per incRefWithParent.
//
// Precondition: callers must hold TaskSet.mu for writing.
func (pg *ProcessGroup) decRefWithParent(parentPG *ProcessGroup) {
	// See incRefWithParent regarding parent == nil.
	if pg != parentPG && (parentPG == nil || pg.session == parentPG.session) {
		pg.ancestors--
	}

	alive := true
	pg.refs.DecRefWithDestructor(nil, func(context.Context) {
		alive = false // don't bother with handleOrphan.

		// Remove translations from the originator.
		for ns := pg.originator.pidns; ns != nil; ns = ns.parent {
			id := ns.pgids[pg]
			delete(ns.pgids, pg)
			delete(ns.processGroups, id)
		}

		// Remove the list of process groups.
		pg.session.processGroups.Remove(pg)
		pg.session.decRef()
	})
	if alive {
		pg.handleOrphan()
	}
}

// parentPG returns the parent process group.
//
// Precondition: callers must hold TaskSet.mu.
func (tg *ThreadGroup) parentPG() *ProcessGroup {
	if tg.leader.parent != nil {
		return tg.leader.parent.tg.processGroup
	}
	return nil
}

// handleOrphan checks whether the process group is an orphan and has any
// stopped jobs. If yes, then appropriate signals are delivered to each thread
// group within the process group.
//
// Precondition: callers must hold TaskSet.mu for writing.
func (pg *ProcessGroup) handleOrphan() {
	// Check if this process is an orphan.
	if pg.ancestors != 0 {
		return
	}

	// See if there are any stopped jobs.
	hasStopped := false
	pg.originator.pidns.owner.forEachThreadGroupLocked(func(tg *ThreadGroup) {
		if tg.processGroup != pg {
			return
		}
		tg.signalHandlers.mu.Lock()
		if tg.groupStopComplete {
			hasStopped = true
		}
		tg.signalHandlers.mu.Unlock()
	})
	if !hasStopped {
		return
	}

	// Deliver appropriate signals to all thread groups.
	pg.originator.pidns.owner.forEachThreadGroupLocked(func(tg *ThreadGroup) {
		if tg.processGroup != pg {
			return
		}
		tg.signalHandlers.mu.Lock()
		tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGHUP), true /* group */)
		tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGCONT), true /* group */)
		tg.signalHandlers.mu.Unlock()
	})

	return
}

// Session returns the process group's session without taking a reference.
func (pg *ProcessGroup) Session() *Session {
	return pg.session
}

// SendSignal sends a signal to all processes inside the process group. It is
// analagous to kernel/signal.c:kill_pgrp.
func (pg *ProcessGroup) SendSignal(info *arch.SignalInfo) error {
	tasks := pg.originator.TaskSet()
	tasks.mu.RLock()
	defer tasks.mu.RUnlock()

	var lastErr error
	for tg := range tasks.Root.tgids {
		if tg.processGroup == pg {
			tg.signalHandlers.mu.Lock()
			infoCopy := *info
			if err := tg.leader.sendSignalLocked(&infoCopy, true /*group*/); err != nil {
				lastErr = err
			}
			tg.signalHandlers.mu.Unlock()
		}
	}
	return lastErr
}

// CreateSession creates a new Session, with the ThreadGroup as the leader.
//
// EPERM may be returned if either the given ThreadGroup is already a Session
// leader, or a ProcessGroup already exists for the ThreadGroup's ID.
func (tg *ThreadGroup) CreateSession() error {
	tg.pidns.owner.mu.Lock()
	defer tg.pidns.owner.mu.Unlock()
	tg.signalHandlers.mu.Lock()
	defer tg.signalHandlers.mu.Unlock()
	return tg.createSession()
}

// createSession creates a new session for a threadgroup.
//
// Precondition: callers must hold TaskSet.mu and the signal mutex for writing.
func (tg *ThreadGroup) createSession() error {
	// Get the ID for this thread in the current namespace.
	id := tg.pidns.tgids[tg]

	// Check if this ThreadGroup already leads a Session, or
	// if the proposed group is already taken.
	for s := tg.pidns.owner.sessions.Front(); s != nil; s = s.Next() {
		if s.leader.pidns != tg.pidns {
			continue
		}
		if s.leader == tg {
			return syserror.EPERM
		}
		if s.id == SessionID(id) {
			return syserror.EPERM
		}
		for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() {
			if pg.id == ProcessGroupID(id) {
				return syserror.EPERM
			}
		}
	}

	// Create a new Session, with a single reference.
	s := &Session{
		id:     SessionID(id),
		leader: tg,
	}
	s.refs.EnableLeakCheck("kernel.Session")

	// Create a new ProcessGroup, belonging to that Session.
	// This also has a single reference (assigned below).
	//
	// Note that since this is a new session and a new process group, there
	// will be zero ancestors for this process group. (It is an orphan at
	// this point.)
	pg := &ProcessGroup{
		id:         ProcessGroupID(id),
		originator: tg,
		session:    s,
		ancestors:  0,
	}
	pg.refs.EnableLeakCheck("kernel.ProcessGroup")

	// Tie them and return the result.
	s.processGroups.PushBack(pg)
	tg.pidns.owner.sessions.PushBack(s)

	// Leave the current group, and assign the new one.
	if tg.processGroup != nil {
		oldParentPG := tg.parentPG()
		tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) {
			childTG.processGroup.incRefWithParent(pg)
			childTG.processGroup.decRefWithParent(oldParentPG)
		})
		// If tg.processGroup is an orphan, decRefWithParent will lock
		// the signal mutex of each thread group in tg.processGroup.
		// However, tg's signal mutex may already be locked at this
		// point. We change tg's process group before calling
		// decRefWithParent to avoid locking tg's signal mutex twice.
		oldPG := tg.processGroup
		tg.processGroup = pg
		oldPG.decRefWithParent(oldParentPG)
	} else {
		// The current process group may be nil only in the case of an
		// unparented thread group (i.e. the init process). This would
		// not normally occur, but we allow it for the convenience of
		// CreateSession working from that point. There will be no
		// child processes. We always say that the very first group
		// created has ancestors (avoids checks elsewhere).
		//
		// Note that this mirrors the parent == nil logic in
		// incRef/decRef/reparent, which counts nil as an ancestor.
		tg.processGroup = pg
		tg.processGroup.ancestors++
	}

	// Ensure a translation is added to all namespaces.
	for ns := tg.pidns; ns != nil; ns = ns.parent {
		local := ns.tgids[tg]
		ns.sids[s] = SessionID(local)
		ns.sessions[SessionID(local)] = s
		ns.pgids[pg] = ProcessGroupID(local)
		ns.processGroups[ProcessGroupID(local)] = pg
	}

	// Disconnect from the controlling terminal.
	tg.tty = nil

	return nil
}

// CreateProcessGroup creates a new process group.
//
// An EPERM error will be returned if the ThreadGroup belongs to a different
// Session, is a Session leader or the group already exists.
func (tg *ThreadGroup) CreateProcessGroup() error {
	tg.pidns.owner.mu.Lock()
	defer tg.pidns.owner.mu.Unlock()

	// Get the ID for this thread in the current namespace.
	id := tg.pidns.tgids[tg]

	// Per above, check for a Session leader or existing group.
	for s := tg.pidns.owner.sessions.Front(); s != nil; s = s.Next() {
		if s.leader.pidns != tg.pidns {
			continue
		}
		if s.leader == tg {
			return syserror.EPERM
		}
		for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() {
			if pg.id == ProcessGroupID(id) {
				return syserror.EPERM
			}
		}
	}

	// Create a new ProcessGroup, belonging to the current Session.
	//
	// We manually adjust the ancestors if the parent is in the same
	// session.
	tg.processGroup.session.incRef()
	pg := ProcessGroup{
		id:         ProcessGroupID(id),
		originator: tg,
		session:    tg.processGroup.session,
	}
	pg.refs.EnableLeakCheck("kernel.ProcessGroup")

	if tg.leader.parent != nil && tg.leader.parent.tg.processGroup.session == pg.session {
		pg.ancestors++
	}

	// Assign the new process group; adjust children.
	oldParentPG := tg.parentPG()
	tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) {
		childTG.processGroup.incRefWithParent(&pg)
		childTG.processGroup.decRefWithParent(oldParentPG)
	})
	tg.processGroup.decRefWithParent(oldParentPG)
	tg.processGroup = &pg

	// Add the new process group to the session.
	pg.session.processGroups.PushBack(&pg)

	// Ensure this translation is added to all namespaces.
	for ns := tg.pidns; ns != nil; ns = ns.parent {
		local := ns.tgids[tg]
		ns.pgids[&pg] = ProcessGroupID(local)
		ns.processGroups[ProcessGroupID(local)] = &pg
	}

	return nil
}

// JoinProcessGroup joins an existing process group.
//
// This function will return EACCES if an exec has been performed since fork
// by the given ThreadGroup, and EPERM if the Sessions are not the same or the
// group does not exist.
//
// If checkExec is set, then the join is not permitted after the process has
// executed exec at least once.
func (tg *ThreadGroup) JoinProcessGroup(pidns *PIDNamespace, pgid ProcessGroupID, checkExec bool) error {
	pidns.owner.mu.Lock()
	defer pidns.owner.mu.Unlock()

	// Lookup the ProcessGroup.
	pg := pidns.processGroups[pgid]
	if pg == nil {
		return syserror.EPERM
	}

	// Disallow the join if an execve has performed, per POSIX.
	if checkExec && tg.execed {
		return syserror.EACCES
	}

	// See if it's in the same session as ours.
	if pg.session != tg.processGroup.session {
		return syserror.EPERM
	}

	// Join the group; adjust children.
	parentPG := tg.parentPG()
	pg.incRefWithParent(parentPG)
	tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) {
		childTG.processGroup.incRefWithParent(pg)
		childTG.processGroup.decRefWithParent(tg.processGroup)
	})
	tg.processGroup.decRefWithParent(parentPG)
	tg.processGroup = pg

	return nil
}

// Session returns the ThreadGroup's Session.
//
// A reference is not taken on the session.
func (tg *ThreadGroup) Session() *Session {
	tg.pidns.owner.mu.RLock()
	defer tg.pidns.owner.mu.RUnlock()
	return tg.processGroup.session
}

// IDOfSession returns the Session assigned to s in PID namespace ns.
//
// If this group isn't visible in this namespace, zero will be returned. It is
// the callers responsibility to check that before using this function.
func (pidns *PIDNamespace) IDOfSession(s *Session) SessionID {
	pidns.owner.mu.RLock()
	defer pidns.owner.mu.RUnlock()
	return pidns.sids[s]
}

// SessionWithID returns the Session with the given ID in the PID namespace ns,
// or nil if that given ID is not defined in this namespace.
//
// A reference is not taken on the session.
func (pidns *PIDNamespace) SessionWithID(id SessionID) *Session {
	pidns.owner.mu.RLock()
	defer pidns.owner.mu.RUnlock()
	return pidns.sessions[id]
}

// ProcessGroup returns the ThreadGroup's ProcessGroup.
//
// A reference is not taken on the process group.
func (tg *ThreadGroup) ProcessGroup() *ProcessGroup {
	tg.pidns.owner.mu.RLock()
	defer tg.pidns.owner.mu.RUnlock()
	return tg.processGroup
}

// IDOfProcessGroup returns the process group assigned to pg in PID namespace ns.
//
// The same constraints apply as IDOfSession.
func (pidns *PIDNamespace) IDOfProcessGroup(pg *ProcessGroup) ProcessGroupID {
	pidns.owner.mu.RLock()
	defer pidns.owner.mu.RUnlock()
	return pidns.pgids[pg]
}

// ProcessGroupWithID returns the ProcessGroup with the given ID in the PID
// namespace ns, or nil if that given ID is not defined in this namespace.
//
// A reference is not taken on the process group.
func (pidns *PIDNamespace) ProcessGroupWithID(id ProcessGroupID) *ProcessGroup {
	pidns.owner.mu.RLock()
	defer pidns.owner.mu.RUnlock()
	return pidns.processGroups[id]
}