summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2019-09-05 02:17:36 +0000
committergVisor bot <gvisor-bot@google.com>2019-09-05 02:17:36 +0000
commitef0576778d7cf4ed1ac63830c75ba856dfbda73a (patch)
tree66d9f1b158eb520665e6f7be930e3ffe842c5825
parente8ab86cf153ab133a21c29a5b41ed1868cf2c279 (diff)
parentfbbb2f7ed6a2f735c8e8d48e8b5264d2057e93ad (diff)
Merge fbbb2f7e (automated)
-rwxr-xr-xpkg/sentry/platform/ring0/defs_impl.go6
-rw-r--r--pkg/tcpip/stack/nic.go63
-rw-r--r--pkg/tcpip/stack/stack.go32
-rw-r--r--pkg/tcpip/tcpip.go9
-rw-r--r--pkg/tcpip/transport/tcp/accept.go30
-rw-r--r--pkg/tcpip/transport/tcp/connect.go19
-rw-r--r--runsc/boot/loader.go49
7 files changed, 122 insertions, 86 deletions
diff --git a/pkg/sentry/platform/ring0/defs_impl.go b/pkg/sentry/platform/ring0/defs_impl.go
index d4bfc5a4a..8efc3825f 100755
--- a/pkg/sentry/platform/ring0/defs_impl.go
+++ b/pkg/sentry/platform/ring0/defs_impl.go
@@ -1,14 +1,14 @@
package ring0
import (
- "syscall"
-
"fmt"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
"io"
"reflect"
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
var (
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index ae56e0ffd..43719085e 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -36,13 +36,13 @@ type NIC struct {
demux *transportDemuxer
- mu sync.RWMutex
- spoofing bool
- promiscuous bool
- primary map[tcpip.NetworkProtocolNumber]*ilist.List
- endpoints map[NetworkEndpointID]*referencedNetworkEndpoint
- subnets []tcpip.Subnet
- mcastJoins map[NetworkEndpointID]int32
+ mu sync.RWMutex
+ spoofing bool
+ promiscuous bool
+ primary map[tcpip.NetworkProtocolNumber]*ilist.List
+ endpoints map[NetworkEndpointID]*referencedNetworkEndpoint
+ addressRanges []tcpip.Subnet
+ mcastJoins map[NetworkEndpointID]int32
stats NICStats
}
@@ -224,7 +224,17 @@ func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address t
// the caller or if the address is found in the NIC's subnets.
createTempEP := spoofingOrPromiscuous
if !createTempEP {
- for _, sn := range n.subnets {
+ for _, sn := range n.addressRanges {
+ // Skip the subnet address.
+ if address == sn.ID() {
+ continue
+ }
+ // For now just skip the broadcast address, until we support it.
+ // FIXME(b/137608825): Add support for sending/receiving directed
+ // (subnet) broadcast.
+ if address == sn.Broadcast() {
+ continue
+ }
if sn.Contains(address) {
createTempEP = true
break
@@ -381,45 +391,38 @@ func (n *NIC) Addresses() []tcpip.ProtocolAddress {
return addrs
}
-// AddSubnet adds a new subnet to n, so that it starts accepting packets
-// targeted at the given address and network protocol.
-func (n *NIC) AddSubnet(protocol tcpip.NetworkProtocolNumber, subnet tcpip.Subnet) {
+// AddAddressRange adds a range of addresses to n, so that it starts accepting
+// packets targeted at the given addresses and network protocol. The range is
+// given by a subnet address, and all addresses contained in the subnet are
+// used except for the subnet address itself and the subnet's broadcast
+// address.
+func (n *NIC) AddAddressRange(protocol tcpip.NetworkProtocolNumber, subnet tcpip.Subnet) {
n.mu.Lock()
- n.subnets = append(n.subnets, subnet)
+ n.addressRanges = append(n.addressRanges, subnet)
n.mu.Unlock()
}
-// RemoveSubnet removes the given subnet from n.
-func (n *NIC) RemoveSubnet(subnet tcpip.Subnet) {
+// RemoveAddressRange removes the given address range from n.
+func (n *NIC) RemoveAddressRange(subnet tcpip.Subnet) {
n.mu.Lock()
// Use the same underlying array.
- tmp := n.subnets[:0]
- for _, sub := range n.subnets {
+ tmp := n.addressRanges[:0]
+ for _, sub := range n.addressRanges {
if sub != subnet {
tmp = append(tmp, sub)
}
}
- n.subnets = tmp
+ n.addressRanges = tmp
n.mu.Unlock()
}
-// ContainsSubnet reports whether this NIC contains the given subnet.
-func (n *NIC) ContainsSubnet(subnet tcpip.Subnet) bool {
- for _, s := range n.Subnets() {
- if s == subnet {
- return true
- }
- }
- return false
-}
-
// Subnets returns the Subnets associated with this NIC.
-func (n *NIC) Subnets() []tcpip.Subnet {
+func (n *NIC) AddressRanges() []tcpip.Subnet {
n.mu.RLock()
defer n.mu.RUnlock()
- sns := make([]tcpip.Subnet, 0, len(n.subnets)+len(n.endpoints))
+ sns := make([]tcpip.Subnet, 0, len(n.addressRanges)+len(n.endpoints))
for nid := range n.endpoints {
sn, err := tcpip.NewSubnet(nid.LocalAddress, tcpip.AddressMask(strings.Repeat("\xff", len(nid.LocalAddress))))
if err != nil {
@@ -429,7 +432,7 @@ func (n *NIC) Subnets() []tcpip.Subnet {
}
sns = append(sns, sn)
}
- return append(sns, n.subnets...)
+ return append(sns, n.addressRanges...)
}
func (n *NIC) removeEndpointLocked(r *referencedNetworkEndpoint) {
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 1d5e84a8b..6beca6ae8 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -702,14 +702,14 @@ func (s *Stack) CheckNIC(id tcpip.NICID) bool {
}
// NICSubnets returns a map of NICIDs to their associated subnets.
-func (s *Stack) NICSubnets() map[tcpip.NICID][]tcpip.Subnet {
+func (s *Stack) NICAddressRanges() map[tcpip.NICID][]tcpip.Subnet {
s.mu.RLock()
defer s.mu.RUnlock()
nics := map[tcpip.NICID][]tcpip.Subnet{}
for id, nic := range s.nics {
- nics[id] = append(nics[id], nic.Subnets()...)
+ nics[id] = append(nics[id], nic.AddressRanges()...)
}
return nics
}
@@ -810,45 +810,35 @@ func (s *Stack) AddProtocolAddressWithOptions(id tcpip.NICID, protocolAddress tc
return nic.AddAddress(protocolAddress, peb)
}
-// AddSubnet adds a subnet range to the specified NIC.
-func (s *Stack) AddSubnet(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, subnet tcpip.Subnet) *tcpip.Error {
+// AddAddressRange adds a range of addresses to the specified NIC. The range is
+// given by a subnet address, and all addresses contained in the subnet are
+// used except for the subnet address itself and the subnet's broadcast
+// address.
+func (s *Stack) AddAddressRange(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, subnet tcpip.Subnet) *tcpip.Error {
s.mu.RLock()
defer s.mu.RUnlock()
if nic, ok := s.nics[id]; ok {
- nic.AddSubnet(protocol, subnet)
+ nic.AddAddressRange(protocol, subnet)
return nil
}
return tcpip.ErrUnknownNICID
}
-// RemoveSubnet removes the subnet range from the specified NIC.
-func (s *Stack) RemoveSubnet(id tcpip.NICID, subnet tcpip.Subnet) *tcpip.Error {
+// RemoveAddressRange removes the range of addresses from the specified NIC.
+func (s *Stack) RemoveAddressRange(id tcpip.NICID, subnet tcpip.Subnet) *tcpip.Error {
s.mu.RLock()
defer s.mu.RUnlock()
if nic, ok := s.nics[id]; ok {
- nic.RemoveSubnet(subnet)
+ nic.RemoveAddressRange(subnet)
return nil
}
return tcpip.ErrUnknownNICID
}
-// ContainsSubnet reports whether the specified NIC contains the specified
-// subnet.
-func (s *Stack) ContainsSubnet(id tcpip.NICID, subnet tcpip.Subnet) (bool, *tcpip.Error) {
- s.mu.RLock()
- defer s.mu.RUnlock()
-
- if nic, ok := s.nics[id]; ok {
- return nic.ContainsSubnet(subnet), nil
- }
-
- return false, tcpip.ErrUnknownNICID
-}
-
// RemoveAddress removes an existing network-layer address from the specified
// NIC.
func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) *tcpip.Error {
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 05aa42c98..418e771d2 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -219,6 +219,15 @@ func (s *Subnet) Mask() AddressMask {
return s.mask
}
+// Broadcast returns the subnet's broadcast address.
+func (s *Subnet) Broadcast() Address {
+ addr := []byte(s.address)
+ for i := range addr {
+ addr[i] |= ^s.mask[i]
+ }
+ return Address(addr)
+}
+
// NICID is a number that uniquely identifies a NIC.
type NICID int32
diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go
index e9c5099ea..0802e984e 100644
--- a/pkg/tcpip/transport/tcp/accept.go
+++ b/pkg/tcpip/transport/tcp/accept.go
@@ -143,6 +143,15 @@ func decSynRcvdCount() {
synRcvdCount.Unlock()
}
+// synCookiesInUse() returns true if the synRcvdCount is greater than
+// SynRcvdCountThreshold.
+func synCookiesInUse() bool {
+ synRcvdCount.Lock()
+ v := synRcvdCount.value
+ synRcvdCount.Unlock()
+ return v >= SynRcvdCountThreshold
+}
+
// newListenContext creates a new listen context.
func newListenContext(stk *stack.Stack, listenEP *endpoint, rcvWnd seqnum.Size, v6only bool, netProto tcpip.NetworkProtocolNumber) *listenContext {
l := &listenContext{
@@ -446,6 +455,27 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
return
}
+ if !synCookiesInUse() {
+ // Send a reset as this is an ACK for which there is no
+ // half open connections and we are not using cookies
+ // yet.
+ //
+ // The only time we should reach here when a connection
+ // was opened and closed really quickly and a delayed
+ // ACK was received from the sender.
+ replyWithReset(s)
+ return
+ }
+
+ // Since SYN cookies are in use this is potentially an ACK to a
+ // SYN-ACK we sent but don't have a half open connection state
+ // as cookies are being used to protect against a potential SYN
+ // flood. In such cases validate the cookie and if valid create
+ // a fully connected endpoint and deliver to the accept queue.
+ //
+ // If not, silently drop the ACK to avoid leaking information
+ // when under a potential syn flood attack.
+ //
// Validate the cookie.
data, ok := ctx.isCookieValid(s.id, s.ackNumber-1, s.sequenceNumber-1)
if !ok || int(data) >= len(mssTable) {
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 00d2ae524..21038a65a 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -720,13 +720,18 @@ func (e *endpoint) handleClose() *tcpip.Error {
return nil
}
-// resetConnectionLocked sends a RST segment and puts the endpoint in an error
-// state with the given error code. This method must only be called from the
-// protocol goroutine.
+// resetConnectionLocked puts the endpoint in an error state with the given
+// error code and sends a RST if and only if the error is not ErrConnectionReset
+// indicating that the connection is being reset due to receiving a RST. This
+// method must only be called from the protocol goroutine.
func (e *endpoint) resetConnectionLocked(err *tcpip.Error) {
- e.sendRaw(buffer.VectorisedView{}, header.TCPFlagAck|header.TCPFlagRst, e.snd.sndUna, e.rcv.rcvNxt, 0)
+ // Only send a reset if the connection is being aborted for a reason
+ // other than receiving a reset.
e.state = StateError
e.hardError = err
+ if err != tcpip.ErrConnectionReset {
+ e.sendRaw(buffer.VectorisedView{}, header.TCPFlagAck|header.TCPFlagRst, e.snd.sndUna, e.rcv.rcvNxt, 0)
+ }
}
// completeWorkerLocked is called by the worker goroutine when it's about to
@@ -806,7 +811,7 @@ func (e *endpoint) keepaliveTimerExpired() *tcpip.Error {
if e.keepalive.unacked >= e.keepalive.count {
e.keepalive.Unlock()
- return tcpip.ErrConnectionReset
+ return tcpip.ErrTimeout
}
// RFC1122 4.2.3.6: TCP keepalive is a dataless ACK with
@@ -1068,6 +1073,10 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error {
e.workMu.Lock()
if err := funcs[v].f(); err != nil {
e.mu.Lock()
+ // Ensure we release all endpoint registration and route
+ // references as the connection is now in an error
+ // state.
+ e.workerCleanup = true
e.resetConnectionLocked(err)
// Lock released below.
epilogue()
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 19b738705..823a34619 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -760,26 +760,34 @@ func (l *Loader) destroyContainer(cid string) error {
if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil {
return fmt.Errorf("sending SIGKILL to all container processes: %v", err)
}
+ // Wait for all processes that belong to the container to exit (including
+ // exec'd processes).
+ for _, t := range l.k.TaskSet().Root.Tasks() {
+ if t.ContainerID() == cid {
+ t.ThreadGroup().WaitExited()
+ }
+ }
+
+ // At this point, all processes inside of the container have exited,
+ // releasing all references to the container's MountNamespace and
+ // causing all submounts and overlays to be unmounted.
+ //
+ // Since the container's MountNamespace has been released,
+ // MountNamespace.destroy() will have executed, but that function may
+ // trigger async close operations. We must wait for those to complete
+ // before returning, otherwise the caller may kill the gofer before
+ // they complete, causing a cascade of failing RPCs.
+ fs.AsyncBarrier()
}
- // Remove all container thread groups from the map.
+ // No more failure from this point on. Remove all container thread groups
+ // from the map.
for key := range l.processes {
if key.cid == cid {
delete(l.processes, key)
}
}
- // At this point, all processes inside of the container have exited,
- // releasing all references to the container's MountNamespace and
- // causing all submounts and overlays to be unmounted.
- //
- // Since the container's MountNamespace has been released,
- // MountNamespace.destroy() will have executed, but that function may
- // trigger async close operations. We must wait for those to complete
- // before returning, otherwise the caller may kill the gofer before
- // they complete, causing a cascade of failing RPCs.
- fs.AsyncBarrier()
-
log.Debugf("Container destroyed %q", cid)
return nil
}
@@ -1037,21 +1045,8 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error {
// the signal is delivered. This prevents process leaks when SIGKILL is
// sent to the entire container.
l.k.Pause()
- if err := l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo}); err != nil {
- l.k.Unpause()
- return err
- }
- l.k.Unpause()
-
- // If SIGKILLing all processes, wait for them to exit.
- if linux.Signal(signo) == linux.SIGKILL {
- for _, t := range l.k.TaskSet().Root.Tasks() {
- if t.ContainerID() == cid {
- t.ThreadGroup().WaitExited()
- }
- }
- }
- return nil
+ defer l.k.Unpause()
+ return l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo})
}
// threadGroupFromID same as threadGroupFromIDLocked except that it acquires