Univention Bugzilla – Bug 52477
Cups frequently stops working with 100% cpu usage
Last modified: 2021-01-19 16:44:31 CET
cupsd frequently stops working. Once happened this can be obsesrved: The cupsd had 1511 sockets open. Using strace, it was possible to trace that the cupsd was listening on a very large number (probably 750; the socket numbers were all even) of sockets and, as shown below in the strace output (output is from dthe server, where the problem occurred exactly the same way with far fewer sockets), looked at one byte from all the sockets without taking it out of the socket, only to do the same again afterwards. The code is waiting in httpWait() according to gdb traceback: #0 0x00007fb3166518a0 in __poll_nocancel () at ../sysdeps/unix/syscall-template.S:84 #1 0x00007fb316dac287 in _httpWait () from /usr/lib/x86_64-linux-gnu/libcups.so.2 #2 0x0000557e072d4617 in ?? () #3 0x0000557e072fbf5e in ?? () #4 0x0000557e072cbfa2 in ?? () #5 0x00007fb3165922e1 in __libc_start_main (main=0x557e072cb5e0, argc=2, argv=0x7ffdda3320d8, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7ffdda3320c8) at ../csu/libc-start.c:291 #6 0x0000557e072cd2ba in ?? ()
Steps to reproduce: If the following command is executed once for each printer (→ replace $PRINTER), it is possible to bring the cupsd to 100% CPU state even with fewer connections without reaching the limit of 100/300 clients: univention-lpadmin -E -p "$PRINTER" -o printer-error-policy=abort-job -o PageSize=A4 Output of "strace -tt -p $PID": [...] epoll_wait(4, [{EPOLLIN, {u32=3990127824, u64=94334356849872}}, {EPOLLIN, {u32=3990246576, u64=94334356968624}}, {EPOLLIN, {u32=3992490816, u64=94334359212864}}, {EPOLLIN, {u32=3989978896, u64=94334356700944}}, {EPOLLIN, {u32=3991067152, u64=94334357789200}}, {EPOLLIN, {u32=3990201632, u64=94334356923680}}, {EPOLLIN, {u32=3991840592, u64=94334358562640}}, {EPOLLIN, {u32=3991052464, u64=94334357774512}}, {EPOLLIN, {u32=3991161280, u64=94334357883328}}, {EPOLLIN, {u32=3992422736, u64=94334359144784}}, {EPOLLIN, {u32=3992028576, u64=94334358750624}}], 4096, 1000) = 11 recvfrom(13, "\230", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=13, events=POLLIN}], 1, 0) = 1 ([{fd=13, revents=POLLIN}]) recvfrom(14, "\304", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=14, events=POLLIN}], 1, 0) = 1 ([{fd=14, revents=POLLIN}]) recvfrom(16, "\"", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=16, events=POLLIN}], 1, 0) = 1 ([{fd=16, revents=POLLIN}]) recvfrom(19, "J", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=19, events=POLLIN}], 1, 0) = 1 ([{fd=19, revents=POLLIN}]) recvfrom(21, "\250", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=21, events=POLLIN}], 1, 0) = 1 ([{fd=21, revents=POLLIN}]) recvfrom(23, "\350", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=23, events=POLLIN}], 1, 0) = 1 ([{fd=23, revents=POLLIN}]) recvfrom(25, "\201", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=25, events=POLLIN}], 1, 0) = 1 ([{fd=25, revents=POLLIN}]) recvfrom(27, "T", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=27, events=POLLIN}], 1, 0) = 1 ([{fd=27, revents=POLLIN}]) recvfrom(29, "X", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=29, events=POLLIN}], 1, 0) = 1 ([{fd=29, revents=POLLIN}]) recvfrom(31, "\337", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=31, events=POLLIN}], 1, 0) = 1 ([{fd=31, revents=POLLIN}]) recvfrom(33, "\331", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=33, events=POLLIN}], 1, 0) = 1 ([{fd=33, revents=POLLIN}]) epoll_wait(4, [{EPOLLIN, {u32=3990127824, u64=94334356849872}}, {EPOLLIN, {u32=3990246576, u64=94334356968624}}, {EPOLLIN, {u32=3992490816, u64=94334359212864}}, {EPOLLIN, {u32=3989978896, u64=94334356700944}}, {EPOLLIN, {u32=3991067152, u64=94334357789200}}, {EPOLLIN, {u32=3990201632, u64=94334356923680}}, {EPOLLIN, {u32=3991840592, u64=94334358562640}}, {EPOLLIN, {u32=3991052464, u64=94334357774512}}, {EPOLLIN, {u32=3991161280, u64=94334357883328}}, {EPOLLIN, {u32=3992422736, u64=94334359144784}}, {EPOLLIN, {u32=3992028576, u64=94334358750624}}], 4096, 1000) = 11 recvfrom(13, "\230", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=13, events=POLLIN}], 1, 0) = 1 ([{fd=13, revents=POLLIN}]) recvfrom(14, "\304", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=14, events=POLLIN}], 1, 0) = 1 ([{fd=14, revents=POLLIN}]) recvfrom(16, "\"", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=16, events=POLLIN}], 1, 0) = 1 ([{fd=16, revents=POLLIN}]) recvfrom(19, "J", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=19, events=POLLIN}], 1, 0) = 1 ([{fd=19, revents=POLLIN}]) recvfrom(21, "\250", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=21, events=POLLIN}], 1, 0) = 1 ([{fd=21, revents=POLLIN}]) recvfrom(23, "\350", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=23, events=POLLIN}], 1, 0) = 1 ([{fd=23, revents=POLLIN}]) recvfrom(25, "\201", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=25, events=POLLIN}], 1, 0) = 1 ([{fd=25, revents=POLLIN}]) recvfrom(27, "T", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=27, events=POLLIN}], 1, 0) = 1 ([{fd=27, revents=POLLIN}]) recvfrom(29, "X", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=29, events=POLLIN}], 1, 0) = 1 ([{fd=29, revents=POLLIN}]) recvfrom(31, "\337", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=31, events=POLLIN}], 1, 0) = 1 ([{fd=31, revents=POLLIN}]) recvfrom(33, "\331", 1, MSG_PEEK, NULL, NULL) = 1 poll([{fd=33, events=POLLIN}], 1, 0) = 1 ([{fd=33, revents=POLLIN}]) epoll_wait(4, [{EPOLLIN, {u32=3990127824, u64=94334356849872}}, {EPOLLIN, {u32=3990246576, u64=94334356968624}}, {EPOLLIN, {u32=3992490816, u64=94334359212864}}, {EPOLLIN, {u32=3989978896, u64=94334356700944}}, {EPOLLIN, {u32=3991067152, u64=94334357789200}}, {EPOLLIN, {u32=3990201632, u64=94334356923680}}, {EPOLLIN, {u32=3991840592, u64=94334358562640}}, {EPOLLIN, {u32=3991052464, u64=94334357774512}}, {EPOLLIN, {u32=3991161280, u64=94334357883328}}, {EPOLLIN, {u32=3992422736, u64=94334359144784}}, {EPOLLIN, {u32=3992028576, u64=94334358750624}}], 4096, 1000) = 11 [...]
Customer opened a ticket again. At some schools the problem occurs often and if it is not possible to print at that school.