diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/abi/linux/socket.go | 16 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/BUILD | 3 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/net.go | 126 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/proc.go | 2 | ||||
-rw-r--r-- | pkg/sentry/kernel/kernel.go | 58 | ||||
-rw-r--r-- | pkg/sentry/socket/socket.go | 10 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/unix.go | 2 |
7 files changed, 211 insertions, 6 deletions
diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go index 929814752..a5f78506a 100644 --- a/pkg/abi/linux/socket.go +++ b/pkg/abi/linux/socket.go @@ -191,6 +191,15 @@ const ( SO_TXTIME = 61 ) +// enum socket_state, from uapi/linux/net.h. +const ( + SS_FREE = 0 // Not allocated. + SS_UNCONNECTED = 1 // Unconnected to any socket. + SS_CONNECTING = 2 // In process of connecting. + SS_CONNECTED = 3 // Connected to socket. + SS_DISCONNECTING = 4 // In process of disconnecting. +) + // SockAddrMax is the maximum size of a struct sockaddr, from // uapi/linux/socket.h. const SockAddrMax = 128 @@ -343,3 +352,10 @@ const SizeOfControlMessageRight = 4 // SCM_MAX_FD is the maximum number of FDs accepted in a single sendmsg call. // From net/scm.h. const SCM_MAX_FD = 253 + +// SO_ACCEPTCON is defined as __SO_ACCEPTCON in +// include/uapi/linux/net.h, which represents a listening socket +// state. Note that this is distinct from SO_ACCEPTCONN, which is a +// socket option for querying whether a socket is in a listening +// state. +const SO_ACCEPTCON = 1 << 16 diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index f6bc90634..666b0ab3a 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -30,6 +30,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/log", "//pkg/sentry/context", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", @@ -43,6 +44,8 @@ go_library( "//pkg/sentry/kernel/time", "//pkg/sentry/mm", "//pkg/sentry/socket/rpcinet", + "//pkg/sentry/socket/unix", + "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", "//pkg/sentry/usermem", "//pkg/syserror", diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index 219eea7f8..55a958f9e 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -15,19 +15,24 @@ package proc import ( + "bytes" "fmt" "time" "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/sentry/context" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs" "gvisor.googlesource.com/gvisor/pkg/sentry/inet" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix" + "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" ) // newNet creates a new proc net entry. -func (p *proc) newNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { +func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSource) *fs.Inode { var contents map[string]*fs.Inode if s := p.k.NetworkStack(); s != nil { contents = map[string]*fs.Inode{ @@ -52,6 +57,8 @@ func (p *proc) newNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { "tcp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode")), "udp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops")), + + "unix": seqfile.NewSeqFileInode(ctx, &netUnix{k: k}, msrc), } if s.SupportsIPv6() { @@ -182,3 +189,120 @@ func (n *netDev) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se return data, 0 } + +// netUnix implements seqfile.SeqSource for /proc/net/unix. +// +// +stateify savable +type netUnix struct { + k *kernel.Kernel +} + +// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. +func (*netUnix) NeedsUpdate(generation int64) bool { + return true +} + +// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. +func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { + if h != nil { + return []seqfile.SeqData{}, 0 + } + + var buf bytes.Buffer + // Header + fmt.Fprintf(&buf, "Num RefCount Protocol Flags Type St Inode Path\n") + + // Entries + for _, sref := range n.k.ListSockets(linux.AF_UNIX) { + s := sref.Get() + if s == nil { + log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", sref) + continue + } + sfile := s.(*fs.File) + sops, ok := sfile.FileOperations.(*unix.SocketOperations) + if !ok { + panic(fmt.Sprintf("Found non-unix socket file in unix socket table: %+v", sfile)) + } + + addr, err := sops.Endpoint().GetLocalAddress() + if err != nil { + log.Warningf("Failed to retrieve socket name from %+v: %v", sfile, err) + addr.Addr = "<unknown>" + } + + sockFlags := 0 + if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok { + if ce.Listening() { + // For unix domain sockets, linux reports a single flag + // value if the socket is listening, of __SO_ACCEPTCON. + sockFlags = linux.SO_ACCEPTCON + } + } + + var sockState int + switch sops.Endpoint().Type() { + case linux.SOCK_DGRAM: + sockState = linux.SS_CONNECTING + // Unlike Linux, we don't have unbound connection-less sockets, + // so no SS_DISCONNECTING. + + case linux.SOCK_SEQPACKET: + fallthrough + case linux.SOCK_STREAM: + // Connectioned. + if sops.Endpoint().(transport.ConnectingEndpoint).Connected() { + sockState = linux.SS_CONNECTED + } else { + sockState = linux.SS_UNCONNECTED + } + } + + // In the socket entry below, the value for the 'Num' field requires + // some consideration. Linux prints the address to the struct + // unix_sock representing a socket in the kernel, but may redact the + // value for unprivileged users depending on the kptr_restrict + // sysctl. + // + // One use for this field is to allow a privileged user to + // introspect into the kernel memory to determine information about + // a socket not available through procfs, such as the socket's peer. + // + // On gvisor, returning a pointer to our internal structures would + // be pointless, as it wouldn't match the memory layout for struct + // unix_sock, making introspection difficult. We could populate a + // struct unix_sock with the appropriate data, but even that + // requires consideration for which kernel version to emulate, as + // the definition of this struct changes over time. + // + // For now, we always redact this pointer. + fmt.Fprintf(&buf, "%#016p: %08X %08X %08X %04X %02X %5d", + (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct. + sfile.ReadRefs()-1, // RefCount, don't count our own ref. + 0, // Protocol, always 0 for UDS. + sockFlags, // Flags. + sops.Endpoint().Type(), // Type. + sockState, // State. + sfile.InodeID(), // Inode. + ) + + // Path + if len(addr.Addr) != 0 { + if addr.Addr[0] == 0 { + // Abstract path. + fmt.Fprintf(&buf, " @%s", string(addr.Addr[1:])) + } else { + fmt.Fprintf(&buf, " %s", string(addr.Addr)) + } + } + fmt.Fprintf(&buf, "\n") + + sfile.DecRef() + } + + data := []seqfile.SeqData{{ + Buf: buf.Bytes(), + Handle: (*netUnix)(nil), + }} + return data, 0 +} diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go index be04f94af..88018e707 100644 --- a/pkg/sentry/fs/proc/proc.go +++ b/pkg/sentry/fs/proc/proc.go @@ -85,7 +85,7 @@ func New(ctx context.Context, msrc *fs.MountSource) (*fs.Inode, error) { if _, ok := p.k.NetworkStack().(*rpcinet.Stack); ok { p.AddChild(ctx, "net", newRPCInetProcNet(ctx, msrc)) } else { - p.AddChild(ctx, "net", p.newNetDir(ctx, msrc)) + p.AddChild(ctx, "net", p.newNetDir(ctx, k, msrc)) } return newProcInode(p, msrc, fs.SpecialDirectory, nil), nil diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 43e9823cb..e7e5ff777 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -43,6 +43,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/cpuid" "gvisor.googlesource.com/gvisor/pkg/eventchannel" "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/refs" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/context" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" @@ -164,7 +165,7 @@ type Kernel struct { // nextInotifyCookie is a monotonically increasing counter used for // generating unique inotify event cookies. // - // nextInotifyCookie is mutable, and is accesed using atomic memory + // nextInotifyCookie is mutable, and is accessed using atomic memory // operations. nextInotifyCookie uint32 @@ -177,6 +178,10 @@ type Kernel struct { // danglingEndpoints is used to save / restore tcpip.DanglingEndpoints. danglingEndpoints struct{} `state:".([]tcpip.Endpoint)"` + + // socketTable is used to track all sockets on the system. Protected by + // extMu. + socketTable map[int]map[*refs.WeakRef]struct{} } // InitKernelArgs holds arguments to Init. @@ -266,6 +271,7 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.monotonicClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Monotonic} k.futexes = futex.NewManager() k.netlinkPorts = port.New() + k.socketTable = make(map[int]map[*refs.WeakRef]struct{}) return nil } @@ -1051,6 +1057,56 @@ func (k *Kernel) EmitUnimplementedEvent(ctx context.Context) { }) } +// socketEntry represents a socket recorded in Kernel.socketTable. It implements +// refs.WeakRefUser for sockets stored in the socket table. +// +// +stateify savable +type socketEntry struct { + k *Kernel + sock *refs.WeakRef + family int +} + +// WeakRefGone implements refs.WeakRefUser.WeakRefGone. +func (s *socketEntry) WeakRefGone() { + s.k.extMu.Lock() + // k.socketTable is guaranteed to point to a valid socket table for s.family + // at this point, since we made sure of the fact when we created this + // socketEntry, and we never delete socket tables. + delete(s.k.socketTable[s.family], s.sock) + s.k.extMu.Unlock() +} + +// RecordSocket adds a socket to the system-wide socket table for tracking. +// +// Precondition: Caller must hold a reference to sock. +func (k *Kernel) RecordSocket(sock *fs.File, family int) { + k.extMu.Lock() + table, ok := k.socketTable[family] + if !ok { + table = make(map[*refs.WeakRef]struct{}) + k.socketTable[family] = table + } + se := socketEntry{k: k, family: family} + se.sock = refs.NewWeakRef(sock, &se) + table[se.sock] = struct{}{} + k.extMu.Unlock() +} + +// ListSockets returns a snapshot of all sockets of a given family. +func (k *Kernel) ListSockets(family int) []*refs.WeakRef { + k.extMu.Lock() + socks := []*refs.WeakRef{} + if table, ok := k.socketTable[family]; ok { + socks = make([]*refs.WeakRef, 0, len(table)) + for s, _ := range table { + socks = append(socks, s) + } + } + k.extMu.Unlock() + return socks +} + type supervisorContext struct { context.NoopSleeper log.Logger diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go index e28d2c4fa..5ab423f3c 100644 --- a/pkg/sentry/socket/socket.go +++ b/pkg/sentry/socket/socket.go @@ -147,6 +147,7 @@ func New(t *kernel.Task, family int, stype transport.SockType, protocol int) (*f return nil, err } if s != nil { + t.Kernel().RecordSocket(s, family) return s, nil } } @@ -163,12 +164,15 @@ func Pair(t *kernel.Task, family int, stype transport.SockType, protocol int) (* } for _, p := range providers { - s, t, err := p.Pair(t, stype, protocol) + s1, s2, err := p.Pair(t, stype, protocol) if err != nil { return nil, nil, err } - if s != nil && t != nil { - return s, t, nil + if s1 != nil && s2 != nil { + k := t.Kernel() + k.RecordSocket(s1, family) + k.RecordSocket(s2, family) + return s1, s2, nil } } diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 19258e692..c857a0f33 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -219,6 +219,8 @@ func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, return 0, nil, 0, syserr.FromError(e) } + t.Kernel().RecordSocket(ns, linux.AF_UNIX) + return fd, addr, addrLen, nil } |