Ssh-server somewhat broken since Rocky 9

Perhaps I have expressed myself in a misleading way. Not the number of CPUs is relevant, but the speed of the CPU. This part of the code is not threaded - therefore it’s irrelevant, if you have 1 or 128 CPUs. The CPU clock is relevant!

I used a fresh, unchanged installation via bootimage. Installation: small server. Changed nothing. Reboot after install. Logged in via ssh. That’s it. DNS is ok and no, there is no AD or any other thing involved. It’s just pure ssh (password or key authentication doesn’t matter). IP via DHCP (IPv4 only - no global IPv6 address - behavior just out of the box).
Do you want to have a etc.tar.gz containig the complete configuration? But this shouldn’t be exciting, because it’s just the output of the installer.

Works like a charme. But anyway, it’s not involved at all during login.

Let’s take a look at the strace and the code responsible for this behavior (PAM - pam_modutil_sanitize.c):

/* Closes all descriptors after stderr. */
static void
close_fds(void)
{
        /*
         * An arbitrary upper limit for the maximum file descriptor number
         * returned by RLIMIT_NOFILE.
         */
        const int MAX_FD_NO = 65535;

        /* The lower limit is the same as for _POSIX_OPEN_MAX. */
        const int MIN_FD_NO = 20;

        int fd;
        struct rlimit rlim;

        if (getrlimit(RLIMIT_NOFILE, &rlim) || rlim.rlim_max > (rlim_t)MAX_FD_NO)
                fd = MAX_FD_NO;
        else if (rlim.rlim_max < (rlim_t)MIN_FD_NO)
                fd = MIN_FD_NO;
        else
                fd = (int)rlim.rlim_max - 1;

        for (; fd > STDERR_FILENO; --fd)
                close(fd);
}

For each login, 65535 fds are closed after the PAM-module is loaded by ssh via clone. 65535 is an arbitrary value. Why not 10000000? Or even more? MIN_FD_NO would be enough!

Excerpt from strace:

1660  write(2, "debug3: PAM: sshpam_passwd_conv "..., 56) = 56 (ssh - auth_pam.c sshpam_passwd_conv)
1660  newfstatat(AT_FDCWD, "/etc/nsswitch.conf", {st_mode=S_IFREG|0644, st_size=2124, ...}, 0) = 0
1660  openat(AT_FDCWD, "/var/lib/sss/mc/passwd", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
1660  openat(AT_FDCWD, "/var/lib/sss/mc/passwd", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
1660  getpid()                          = 1660
1660  socket(AF_UNIX, SOCK_STREAM, 0)   = 4
1660  fcntl(4, F_GETFL)                 = 0x2 (flags O_RDWR)
1660  fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
1660  fcntl(4, F_GETFD)                 = 0
1660  fcntl(4, F_SETFD, FD_CLOEXEC)     = 0
1660  connect(4, {sa_family=AF_UNIX, sun_path="/var/lib/sss/pipes/nss"}, 110) = -1 ENOENT (No such file or directory)
1660  close(4)                          = 0
1660  openat(AT_FDCWD, "/etc/passwd", O_RDONLY|O_CLOEXEC) = 4
1660  newfstatat(4, "", {st_mode=S_IFREG|0644, st_size=1571, ...}, AT_EMPTY_PATH) = 0
1660  lseek(4, 0, SEEK_SET)             = 0
1660  read(4, "root:x:0:0:root:/root:/bin/bash\n"..., 4096) = 1571
1660  close(4)                          = 0
1660  geteuid()                         = 0
1660  pipe([4, 7])                      = 0
1660  rt_sigaction(SIGCHLD, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f3405454df0}, {sa_handler=SIG_DFL, sa_mask=~[KILL STOP RTMIN RT_1], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7f3405454df0}, 8) = 0
1660  clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f3405610050) = 1664
1660  write(7, "passwd", 8)           = 8
1660  write(7, "\0", 1 <unfinished ...>
1664  set_robust_list(0x7f3405610060, 24 <unfinished ...>
1660  <... write resumed>)              = 1
1664  <... set_robust_list resumed>)    = 0
1660  close(4)                          = 0
1660  close(7)                          = 0
1664  dup2(4, 0 <unfinished ...>
1660  wait4(1664,  <unfinished ...>
1664  <... dup2 resumed>)               = 0
1664  pipe([8, 9])                      = 0
1664  close(9)                          = 0
1664  dup2(8, 1)                        = 1
1664  close(8)                          = 0
1664  dup2(1, 2)                        = 2
1664  prlimit64(0, RLIMIT_NOFILE, NULL, {rlim_cur=1024, rlim_max=512*1024}) = 0
1664  close(65535)                      = -1 EBADF (Bad file descriptor)
...
1664  close(10)                         = -1 EBADF (Bad file descriptor)
1664  close(9)                          = -1 EBADF (Bad file descriptor)
1664  close(8)                          = -1 EBADF (Bad file descriptor)
1664  close(7)                          = 0
1664  close(6)                          = 0
1664  close(5)                          = 0
1664  close(4)                          = 0
1664  close(3)                          = 0
1664  geteuid()                         = 0
1664  setuid(0)                         = 0
1664  execve("/usr/sbin/unix_chkpwd", ["/usr/sbin/unix_chkpwd", "andreas", "nonull"], 0x7f34050ff040 /* 0 vars */) = 0
1664  close(3)                          = 0
1664  getuid()                          = 0
1664  exit_group(0)                     = ?
1664  +++ exited with 0 +++
1660  <... wait4 resumed>[{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 1664
1660  --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=1664, si_uid=0, si_status=0, si_utime=0, si_stime=1} ---
1660  rt_sigaction(SIGCHLD, {sa_handler=SIG_DFL, sa_mask=~[KILL STOP RTMIN RT_1], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7f3405454df0}, NULL, 8) = 0
1660  socket(AF_NETLINK, SOCK_RAW, NETLINK_AUDIT) = 4
1660  fcntl(4, F_SETFD, FD_CLOEXEC)     = 0
1660  socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 7
1660  bind(7, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0
1660  getsockname(7, {sa_family=AF_NETLINK, nl_pid=1660, nl_groups=00000000}, [12]) = 0
1660  sendto(7, [{nlmsg_len=20, nlmsg_type=RTM_GETADDR, nlmsg_flags=NLM_F_REQUEST|NLM_F_DUMP, nlmsg_seq=1690995697, nlmsg_pid=0}, {ifa_family=AF_UNSPEC, ...}], 20, 0, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 20
1660  recvmsg(7, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=76, nlmsg_type=RTM_NEWADDR, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1690995697, nlmsg_pid=1660}, {ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")}, [[{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")], [{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")], [{nla_len=7, nla_type=IFA_LABEL}, "lo"], [{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT], [{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=50, tstamp=50}]]], [{nlmsg_len=88, nlmsg_type=RTM_NEWADDR, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1690995697, nlmsg_pid=1660}, {ifa_family=AF_INET, ifa_prefixlen=24, ifa_flags=0, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("ens3")}, [[{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("192.168.1.76")], [{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("192.168.1.76")], [{nla_len=8, nla_type=IFA_BROADCAST}, inet_addr("192.168.1.255")], [{nla_len=9, nla_type=IFA_LABEL}, "ens3"], [{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_NOPREFIXROUTE], [{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=1727819, ifa_valid=1727819, cstamp=370, tstamp=370}]]]], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 164
1660  recvmsg(7, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=72, nlmsg_type=RTM_NEWADDR, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1690995697, nlmsg_pid=1660}, {ifa_family=AF_INET6, ifa_prefixlen=128, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")}, [[{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "::1")], [{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=50, tstamp=50}], [{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT]]], [{nlmsg_len=72, nlmsg_type=RTM_NEWADDR, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1690995697, nlmsg_pid=1660}, {ifa_family=AF_INET6, ifa_prefixlen=64, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_LINK, ifa_index=if_nametoindex("ens3")}, [[{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "fe80::5054:ff:fe3e:ec59")], [{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=367, tstamp=367}], [{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT|IFA_F_NOPREFIXROUTE]]]], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 144
1660  recvmsg(7, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=20, nlmsg_type=NLMSG_DONE, nlmsg_flags=NLM_F_MULTI, nlmsg_seq=1690995697, nlmsg_pid=1660}, 0], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20
1660  close(7)                          = 0
1660  readlink("/proc/self/exe", "/usr/sbin/sshd", 4096) = 14
1660  sendto(4, [{nlmsg_len=156, nlmsg_type=0x44c /* NLMSG_??? */, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK, nlmsg_seq=3, nlmsg_pid=0}, "\x6f\x70\x3d\x50\x41\x4d\x3a\x61\x75\x74\x68\x65\x6e\x74\x69\x63\x61\x74\x69\x6f\x6e\x20\x67\x72\x61\x6e\x74\x6f\x72\x73\x3d\x70"...], 156, 0, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 156
1660  poll([{fd=4, events=POLLIN}], 1, 500) = 1 ([{fd=4, revents=POLLIN}])
1660  recvfrom(4, [{nlmsg_len=36, nlmsg_type=NLMSG_ERROR, nlmsg_flags=NLM_F_CAPPED, nlmsg_seq=3, nlmsg_pid=1660}, {error=0, msg={nlmsg_len=156, nlmsg_type=AUDIT_FIRST_USER_MSG, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK, nlmsg_seq=3, nlmsg_pid=0}}], 8988, MSG_PEEK|MSG_DONTWAIT, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, [12]) = 36
1660  recvfrom(4, [{nlmsg_len=36, nlmsg_type=NLMSG_ERROR, nlmsg_flags=NLM_F_CAPPED, nlmsg_seq=3, nlmsg_pid=1660}, {error=0, msg={nlmsg_len=156, nlmsg_type=AUDIT_FIRST_USER_MSG, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK, nlmsg_seq=3, nlmsg_pid=0}}], 8988, MSG_DONTWAIT, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, [12]) = 36
1660  close(4)                          = 0
1660  getpid()                          = 1660
1660  write(2, "debug1: PAM: password authentica"..., 59) = 59

Yes, you are right, the major problem is: why is this code path used at all? That’s what I’m wondering. No other distribution uses this code path. Just tested additionally Ubuntu Server 22.04 e.g. - behaves as it should, too. It’s just Rock 9 and Alma 9 (at least, they are bug compatible :slight_smile: ).

I “fixed” (= workaround) the bug for me and now, it can be used on slow machines, too. The implementation above is highly questionable and for me it is especially questionable, why this code path is used at all. From my point of view, it shouldn’t be used at all. Normally, the closefrom() function from ssh should be used (this at least is the behavior of Rocky 8 and CentOS 7).

Thanks!
laola23