It looks like the error can have different intensities. But when it starts, it won’t go away till the next reboot. We are still searching for a way to reliably reproduce the error. Some clients seem to have a greater tendency to get the problem ~days instead of ~weeks. And it looks like compiling code can trigger the problem since one of the most affected NFS clients is a compile only server, which does not execute the code it compiles.
The pattern is: the client runs fine till perf top
shows the 4 symbols:
x.xx% [k] nfs_mark_test_expired_all_delegations
x.xx% [k] nfs_server_reap_expired_delegations
x.xx% [k] nfs_server_return_marked_delegations
x.xx% [k] nfs_expire_unreferenced_delegations
And NFS access gets slower. When the percentage of nfs_mark_test_expired_all_delegations
is around ~10% the system is a little sluggish. It can get better. echo 3 > /proc/sys/vm/drop_caches
always helps, echo 2 > /proc/sys/vm/drop_caches
helps in most cases, echo 1 > /proc/sys/vm/drop_caches
helps in some cases. But nfs_mark_test_expired_all_delegations
does not go completely away, once it was there. Only a reboot makes this, for some days.
If we could reproduce the problem, we could test if it started with a certain kernel if a newer one would fix the issue and what is the cause behind it. Like: 2176575 – intermittent severe NFS client performance drop via nfs_server_reap_expired_delegations looping?
Some details about the kernel symbols.
nfs_mark_test_expired_all_delegations:
Percent│ nop
│ push %r13
│ push %r12
│ push %rbp
│ push %rbx
│ mov 0xc8(%rdi),%rax
│ lea 0xc8(%rdi),%r13
│ cmp %rax,%r13
│ ↓ je 5c
│ lea -0x8(%rax),%rbp
│22: mov 0x300(%rbp),%rbx
0.04 │ lea 0x300(%rbp),%r12
│ cmp %r12,%rbx
│ ↓ je 4f
│35: mov 0x30(%rbx),%eax
98.61 │ test %eax,%eax
│ ↓ je 47
│ mov %rbx,%rsi
│ mov %rbp,%rdi
0.01 │ → callq nfs_mark_test_expired_delegation.isra.16.part.17
│47: mov (%rbx),%rbx
1.29 │ cmp %r12,%rbx
0.02 │ ↑ jne 35
│4f: mov 0x8(%rbp),%rax
0.04 │ lea -0x8(%rax),%rbp
│ cmp %rax,%r13
│ ↑ jne 22
│5c: pop %rbx
│ pop %rbp
│ pop %r12
nfs_server_reap_expired_delegations:
Percent│ lea 0x300(%rdi),%r13
│ push %r12
│ push %rbx
│ and $0xfffffffffffffff0,%rsp
│ sub $0x30,%rsp
│ mov %gs:0x28,%rax
│ mov %rax,0x28(%rsp)
│ xor %eax,%eax
│ mov 0x320(%rdi),%rax
0.01 │ lea 0x1(%rax),%r12
│ mov %r12,0x320(%rdi)
│ 46: mov 0x300(%r14),%rbx
0.03 │ cmp %r13,%rbx
│ ↓ je 168
│ 56: mov 0x50(%rbx),%rax
80.48 │ test $0x80,%al
0.03 │ ↓ jne 15c
0.39 │ mov 0x50(%rbx),%rax
2.03 │ test $0x10,%al
0.32 │ ↓ jne 15c
│ mov 0x50(%rbx),%rax
1.07 │ test $0x40,%al
0.17 │ ↓ je 15c
0.03 │ cmp 0x48(%rbx),%r12
0.28 │ ↓ je 15c
0.04 │ mov %rbx,%rdi
│ → callq nfs_delegation_grab_inode
│ mov %rax,%r15
│ test %rax,%rax
│ ↑ je 46
nfs_server_return_marked_delegations:
Percent│ pop %r14
│ pop %r15
│ → jmpq 0xffffffffb5626840
│ b3: lea 0x5c(%rbx),%rax
│ mov %rdx,0x10(%rsp)
│ mov %rax,%rdi
│ mov %rax,0x8(%rsp)
│ → callq _raw_spin_lock
│ mov 0x18(%rbx),%rsi
0.53 │ xor %r8d,%r8d
│ mov 0x8(%rsp),%rax
│ mov 0x10(%rsp),%rdx
│ test %rsi,%rsi
│ ↓ je ee
│ mov -0x78(%rsi),%rdi
41.26 │ sub $0x78,%rsi
│ cmp %rdi,%rsi
│ sete %r8b
│ ee: mov %rax,%rdi
│ movb $0x0,(%rdi)
│ nop
│ test %r8b,%r8b
│ ↑ je 70
│101: lock andb $0xfb,(%rdx)
5.37 │ mov 0x50(%rbx),%rax
1.81 │ mov $0x1,%r15d
│ test $0x10,%al
│ ↑ jne 7c
│117: mov 0x50(%rbx),%rax
│ test $0x1,%ah
nfs_expire_unreferenced_delegations:
Percent│
│ Disassembly of section load0:
│
│ ffffffffc0c3b200 <load0>:
│ nop
│ mov 0xc8(%rdi),%rax
│ lea 0xc8(%rdi),%r8
│ lea -0x8(%rax),%rdx
│ cmp %rax,%r8
│ ↓ je 5d
│1c: mov 0x300(%rdx),%rax
│ lea 0x300(%rdx),%rsi
│ cmp %rsi,%rax
│ ↓ je 50
│2f: lock btrq $0x3,0x50(%rax)
78.58 │ ┌──jb 48
│ │ lock orb $0x4,0x50(%rax)
9.85 │ │ mov (%rdx),%rcx
│ │ lock orb $0x20,0x130(%rcx)
10.14 │48:└─→mov (%rax),%rax
1.43 │ cmp %rsi,%rax
│ ↑ jne 2f
│50: mov 0x8(%rdx),%rax
│ lea -0x8(%rax),%rdx
│ cmp %rax,%r8
│ ↑ jne 1c
│5d: mov 0x130(%rdi),%rax
│ test $0x20,%al
│ ↓ jne 6d
│ → jmpq 0xffffffffb5626840