| package libcontainer |
| |
| import ( |
| "bytes" |
| "errors" |
| "fmt" |
| "os" |
| "path/filepath" |
| "strconv" |
| |
| "github.com/opencontainers/runc/libcontainer/configs" |
| "github.com/opencontainers/runc/types" |
| "github.com/sirupsen/logrus" |
| "github.com/vishvananda/netlink" |
| "github.com/vishvananda/netlink/nl" |
| "github.com/vishvananda/netns" |
| |
| "golang.org/x/sys/unix" |
| ) |
| |
| var strategies = map[string]networkStrategy{ |
| "loopback": &loopback{}, |
| } |
| |
| // networkStrategy represents a specific network configuration for |
| // a container's networking stack |
| type networkStrategy interface { |
| create(*network, int) error |
| initialize(*network) error |
| detach(*configs.Network) error |
| attach(*configs.Network) error |
| } |
| |
| // getStrategy returns the specific network strategy for the |
| // provided type. |
| func getStrategy(tpe string) (networkStrategy, error) { |
| s, exists := strategies[tpe] |
| if !exists { |
| return nil, fmt.Errorf("unknown strategy type %q", tpe) |
| } |
| return s, nil |
| } |
| |
| // Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. |
| func getNetworkInterfaceStats(interfaceName string) (*types.NetworkInterface, error) { |
| out := &types.NetworkInterface{Name: interfaceName} |
| // This can happen if the network runtime information is missing - possible if the |
| // container was created by an old version of libcontainer. |
| if interfaceName == "" { |
| return out, nil |
| } |
| type netStatsPair struct { |
| // Where to write the output. |
| Out *uint64 |
| // The network stats file to read. |
| File string |
| } |
| // Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container. |
| netStats := []netStatsPair{ |
| {Out: &out.RxBytes, File: "tx_bytes"}, |
| {Out: &out.RxPackets, File: "tx_packets"}, |
| {Out: &out.RxErrors, File: "tx_errors"}, |
| {Out: &out.RxDropped, File: "tx_dropped"}, |
| |
| {Out: &out.TxBytes, File: "rx_bytes"}, |
| {Out: &out.TxPackets, File: "rx_packets"}, |
| {Out: &out.TxErrors, File: "rx_errors"}, |
| {Out: &out.TxDropped, File: "rx_dropped"}, |
| } |
| for _, netStat := range netStats { |
| data, err := readSysfsNetworkStats(interfaceName, netStat.File) |
| if err != nil { |
| return nil, err |
| } |
| *(netStat.Out) = data |
| } |
| return out, nil |
| } |
| |
| // Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics |
| func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) { |
| data, err := os.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile)) |
| if err != nil { |
| return 0, err |
| } |
| return strconv.ParseUint(string(bytes.TrimSpace(data)), 10, 64) |
| } |
| |
| // loopback is a network strategy that provides a basic loopback device |
| type loopback struct{} |
| |
| func (l *loopback) create(n *network, nspid int) error { |
| return nil |
| } |
| |
| func (l *loopback) initialize(config *network) error { |
| return netlink.LinkSetUp(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: "lo"}}) |
| } |
| |
| func (l *loopback) attach(n *configs.Network) (err error) { |
| return nil |
| } |
| |
| func (l *loopback) detach(n *configs.Network) (err error) { |
| return nil |
| } |
| |
| // devChangeNetNamespace allows to move a device given by name to a network namespace given by nsPath |
| // and optionally change the device name. |
| // The device name will be kept the same if device.Name is the zero value. |
| // This function ensures that the move and rename operations occur atomically. |
| // It preserves existing interface attributes, including global IP addresses. |
| func devChangeNetNamespace(name, nsPath string, device configs.LinuxNetDevice) error { |
| logrus.Debugf("attaching network device %s with attrs %+v to network namespace %s", name, device, nsPath) |
| link, err := netlink.LinkByName(name) |
| // recover same behavior on vishvananda/netlink@1.2.1 and do not fail when the kernel returns NLM_F_DUMP_INTR. |
| if err != nil && !errors.Is(err, netlink.ErrDumpInterrupted) { |
| return fmt.Errorf("link not found for interface %s on runtime namespace: %w", name, err) |
| } |
| |
| // Set the interface link state to DOWN before modifying attributes like namespace or name. |
| // This prevents potential conflicts or disruptions on the host network during the transition, |
| // particularly if other host components depend on this specific interface or its properties. |
| err = netlink.LinkSetDown(link) |
| if err != nil { |
| return fmt.Errorf("fail to set link down: %w", err) |
| } |
| |
| // Get the existing IP addresses on the interface. |
| addresses, err := netlink.AddrList(link, netlink.FAMILY_ALL) |
| // recover same behavior on vishvananda/netlink@1.2.1 and do not fail when the kernel returns NLM_F_DUMP_INTR. |
| if err != nil && !errors.Is(err, netlink.ErrDumpInterrupted) { |
| return fmt.Errorf("fail to get ip addresses: %w", err) |
| } |
| |
| // Do interface rename and namespace change in the same operation to avoid |
| // possible conflicts with the interface name. |
| // NLM_F_REQUEST: "It must be set on all request messages." |
| // NLM_F_ACK: "Request for an acknowledgment on success." |
| // netlink(7) man page: https://man7.org/linux/man-pages/man7/netlink.7.html |
| flags := unix.NLM_F_REQUEST | unix.NLM_F_ACK |
| req := nl.NewNetlinkRequest(unix.RTM_NEWLINK, flags) |
| |
| // Get a netlink socket in current namespace |
| nlSock, err := nl.GetNetlinkSocketAt(netns.None(), netns.None(), unix.NETLINK_ROUTE) |
| if err != nil { |
| return fmt.Errorf("could not get network namespace handle: %w", err) |
| } |
| defer nlSock.Close() |
| |
| req.Sockets = map[int]*nl.SocketHandle{ |
| unix.NETLINK_ROUTE: {Socket: nlSock}, |
| } |
| |
| // Set the interface index. |
| msg := nl.NewIfInfomsg(unix.AF_UNSPEC) |
| msg.Index = int32(link.Attrs().Index) |
| req.AddData(msg) |
| |
| // Set the interface name, also rename if requested. |
| newName := name |
| if device.Name != "" { |
| newName = device.Name |
| } |
| nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(newName)) |
| req.AddData(nameData) |
| |
| // Get the new network namespace. |
| ns, err := netns.GetFromPath(nsPath) |
| if err != nil { |
| return fmt.Errorf("could not get network namespace from path %s for network device %s : %w", nsPath, name, err) |
| } |
| defer ns.Close() |
| |
| val := nl.Uint32Attr(uint32(ns)) |
| attr := nl.NewRtAttr(unix.IFLA_NET_NS_FD, val) |
| req.AddData(attr) |
| |
| _, err = req.Execute(unix.NETLINK_ROUTE, 0) |
| // recover same behavior on vishvananda/netlink@1.2.1 and do not fail when the kernel returns NLM_F_DUMP_INTR. |
| if err != nil && !errors.Is(err, netlink.ErrDumpInterrupted) { |
| return fmt.Errorf("fail to move network device %s to network namespace %s: %w", name, nsPath, err) |
| } |
| |
| // To avoid us the husle with goroutines when joining a netns, |
| // we let the library create the socket in the namespace for us. |
| nhNs, err := netlink.NewHandleAt(ns) |
| if err != nil { |
| return err |
| } |
| defer nhNs.Close() |
| |
| nsLink, err := nhNs.LinkByName(newName) |
| // recover same behavior on vishvananda/netlink@1.2.1 and do not fail when the kernel returns NLM_F_DUMP_INTR. |
| if err != nil && !errors.Is(err, netlink.ErrDumpInterrupted) { |
| return fmt.Errorf("link not found for interface %s on namespace %s : %w", newName, nsPath, err) |
| } |
| |
| // Re-add the original IP addresses to the interface in the new namespace. |
| // The kernel removes IP addresses when an interface is moved between network namespaces. |
| for _, address := range addresses { |
| logrus.Debugf("processing address %s from network device %s", address.String(), name) |
| // Only move permanent IP addresses configured by the user, dynamic addresses are excluded because |
| // their validity may rely on the original network namespace's context and they may have limited |
| // lifetimes and are not guaranteed to be available in a new namespace. |
| // Ref: https://www.ietf.org/rfc/rfc3549.txt |
| if address.Flags&unix.IFA_F_PERMANENT == 0 { |
| logrus.Debugf("skipping address %s from network device %s: not a permanent address", address.String(), name) |
| continue |
| } |
| // Only move IP addresses with global scope because those are not host-specific, auto-configured, |
| // or have limited network scope, making them unsuitable inside the container namespace. |
| // Ref: https://www.ietf.org/rfc/rfc3549.txt |
| if address.Scope != unix.RT_SCOPE_UNIVERSE { |
| logrus.Debugf("skipping address %s from network device %s: not an address with global scope", address.String(), name) |
| continue |
| } |
| // Remove the interface attribute of the original address |
| // to avoid issues when the interface is renamed. |
| err = nhNs.AddrAdd(nsLink, &netlink.Addr{IPNet: address.IPNet}) |
| if err != nil { |
| return fmt.Errorf("fail to set up address %s on namespace %s: %w", address.String(), nsPath, err) |
| } |
| } |
| |
| err = nhNs.LinkSetUp(nsLink) |
| if err != nil { |
| return fmt.Errorf("fail to set up interface %s on namespace %s: %w", nsLink.Attrs().Name, nsPath, err) |
| } |
| |
| return nil |
| } |