← Back to team overview

kernel-packages team mailing list archive

[Bug 1541326] [NEW] WARNING: at /build/linux-lts-wily-W0lTWH/linux-lts-wily-4.2.0/net/core/skbuff.c:4174 (Travis IB)

 

You have been subscribed to a public bug:

== Comment: #0 - Manvanthara B. Puttashankar <mputtash@xxxxxxxxxx> - 2016-01-03 10:52:39 ==
---Problem Description---
WARNING: at /build/linux-lts-wily-W0lTWH/linux-lts-wily-4.2.0/net/core/skbuff.c:4174
 
Contact Information = mputtash@xxxxxxxxxx 
 
---uname output---
Linux ltcalpine-lp4 4.2.0-21-generic #25~14.04.1-Ubuntu SMP Thu Dec 3 13:55:42 UTC 2015 ppc64le ppc64le ppc64le GNU/Linux
 
Machine Type = 8408-E8E 
 
---Debugger---
A debugger is not configured
 
---Steps to Reproduce---
 trace was found in syslog within 30 min while running fsstress on travis IB controller.


NFS server:

root@ubuntu140404:~# mkdir -p /home/share
root@ubuntu140404:~#  /sbin/rpcbind start
rpcbind: another rpcbind is already running. Aborting
root@ubuntu140404:~# /sbin/rpcbind  status
rpcbind: another rpcbind is already running. Aborting
root@ubuntu140404:~# rpcinfo
   program version netid     address                service    owner
    100000    4    tcp6      ::.0.111               portmapper superuser
    100000    3    tcp6      ::.0.111               portmapper superuser
    100000    4    udp6      ::.0.111               portmapper superuser
    100000    3    udp6      ::.0.111               portmapper superuser
    100000    4    tcp       0.0.0.0.0.111          portmapper superuser
    100000    3    tcp       0.0.0.0.0.111          portmapper superuser
    100000    2    tcp       0.0.0.0.0.111          portmapper superuser
    100000    4    udp       0.0.0.0.0.111          portmapper superuser
    100000    3    udp       0.0.0.0.0.111          portmapper superuser
    100000    2    udp       0.0.0.0.0.111          portmapper superuser
    100000    4    local     /run/rpcbind.sock      portmapper superuser
    100000    3    local     /run/rpcbind.sock      portmapper superuser
    100024    1    udp       0.0.0.0.146.239        status     104
    100024    1    tcp       0.0.0.0.175.171        status     104
    100024    1    udp6      ::.235.98              status     104
    100024    1    tcp6      ::.215.65              status     104
root@ubuntu140404:~# cat /etc/exports 
# /etc/exports: the access control list for filesystems which may be exported
#		to NFS clients.  See exports(5).
#
# Example for NFSv2 and NFSv3:
# /srv/homes       hostname1(rw,sync,no_subtree_check) hostname2(ro,sync,no_subtree_check)
#
# Example for NFSv4:
# /srv/nfs4        gss/krb5i(rw,sync,fsid=0,crossmnt,no_subtree_check)
# /srv/nfs4/homes  gss/krb5i(rw,sync,no_subtree_check)
#
/home/share 12.12.12.13(rw,sync,no_subtree_check)
root@ubuntu140404:~# /etc/init.d/nfs-kernel-server  start
 * Exporting directories for NFS kernel daemon...
   ...done.
 * Starting NFS kernel daemon
   ...done.
root@ubuntu140404:~# exportfs -av
exporting 12.12.12.13:/home/share
root@ubuntu140404:~# rpcinfo
   program version netid     address                service    owner
    100000    4    tcp6      ::.0.111               portmapper superuser
    100000    3    tcp6      ::.0.111               portmapper superuser
    100000    4    udp6      ::.0.111               portmapper superuser
    100000    3    udp6      ::.0.111               portmapper superuser
    100000    4    tcp       0.0.0.0.0.111          portmapper superuser
    100000    3    tcp       0.0.0.0.0.111          portmapper superuser
    100000    2    tcp       0.0.0.0.0.111          portmapper superuser
    100000    4    udp       0.0.0.0.0.111          portmapper superuser
    100000    3    udp       0.0.0.0.0.111          portmapper superuser
    100000    2    udp       0.0.0.0.0.111          portmapper superuser
    100000    4    local     /run/rpcbind.sock      portmapper superuser
    100000    3    local     /run/rpcbind.sock      portmapper superuser
    100024    1    udp       0.0.0.0.146.239        status     104
    100024    1    tcp       0.0.0.0.175.171        status     104
    100024    1    udp6      ::.235.98              status     104
    100024    1    tcp6      ::.215.65              status     104
    100003    2    tcp       0.0.0.0.8.1            nfs        superuser
    100003    3    tcp       0.0.0.0.8.1            nfs        superuser
    100003    4    tcp       0.0.0.0.8.1            nfs        superuser
    100227    2    tcp       0.0.0.0.8.1            -          superuser
    100227    3    tcp       0.0.0.0.8.1            -          superuser
    100003    2    udp       0.0.0.0.8.1            nfs        superuser
    100003    3    udp       0.0.0.0.8.1            nfs        superuser
    100003    4    udp       0.0.0.0.8.1            nfs        superuser
    100227    2    udp       0.0.0.0.8.1            -          superuser
    100227    3    udp       0.0.0.0.8.1            -          superuser
    100003    2    tcp6      ::.8.1                 nfs        superuser
    100003    3    tcp6      ::.8.1                 nfs        superuser
    100003    4    tcp6      ::.8.1                 nfs        superuser
    100227    2    tcp6      ::.8.1                 -          superuser
    100227    3    tcp6      ::.8.1                 -          superuser
    100003    2    udp6      ::.8.1                 nfs        superuser
    100003    3    udp6      ::.8.1                 nfs        superuser
    100003    4    udp6      ::.8.1                 nfs        superuser
    100227    2    udp6      ::.8.1                 -          superuser
    100227    3    udp6      ::.8.1                 -          superuser
    100021    1    udp       0.0.0.0.191.68         nlockmgr   superuser
    100021    3    udp       0.0.0.0.191.68         nlockmgr   superuser
    100021    4    udp       0.0.0.0.191.68         nlockmgr   superuser
    100021    1    tcp       0.0.0.0.174.157        nlockmgr   superuser
    100021    3    tcp       0.0.0.0.174.157        nlockmgr   superuser
    100021    4    tcp       0.0.0.0.174.157        nlockmgr   superuser
    100021    1    udp6      ::.168.136             nlockmgr   superuser
    100021    3    udp6      ::.168.136             nlockmgr   superuser
    100021    4    udp6      ::.168.136             nlockmgr   superuser
    100021    1    tcp6      ::.138.214             nlockmgr   superuser
    100021    3    tcp6      ::.138.214             nlockmgr   superuser
    100021    4    tcp6      ::.138.214             nlockmgr   superuser
    100005    1    udp       0.0.0.0.147.176        mountd     superuser
    100005    1    tcp       0.0.0.0.228.2          mountd     superuser
    100005    1    udp6      ::.144.73              mountd     superuser
    100005    1    tcp6      ::.205.71              mountd     superuser
    100005    2    udp       0.0.0.0.232.199        mountd     superuser
    100005    2    tcp       0.0.0.0.150.167        mountd     superuser
    100005    2    udp6      ::.149.148             mountd     superuser
    100005    2    tcp6      ::.178.116             mountd     superuser
    100005    3    udp       0.0.0.0.223.102        mountd     superuser
    100005    3    tcp       0.0.0.0.213.191        mountd     superuser
    100005    3    udp6      ::.169.30              mountd     superuser
    100005    3    tcp6      ::.233.128             mountd     superuser
root@ubuntu140404:~# 



--------------------------------------------------------


NFS client:
root@ltcalpine-lp4:~# cat /proc/cpuinfo 
processor	: 0
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 1
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 2
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 3
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 4
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 5
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 6
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 7
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 8
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 9
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 10
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 11
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 12
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 13
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 14
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

processor	: 15
cpu		: POWER8 (architected), altivec supported
clock		: 3026.000000MHz
revision	: 2.1 (pvr 004b 0201)

timebase	: 512000000
platform	: pSeries
model		: IBM,8408-E8E
machine		: CHRP IBM,8408-E8E
root@ltcalpine-lp4:~# 
root@ltcalpine-lp4:~# free -g
             total       used       free     shared    buffers     cached
Mem:            49          0         49          0          0          0
-/+ buffers/cache:          0         49
Swap:            2          0          2

root@ltcalpine-lp4:~# apt-get install nfs-common
root@ltcalpine-lp4:~# mount -t nfs 12.12.12.12:/home/share /mnt
root@ltcalpine-lp4:~/ltp-full-20150420/testcases/kernel/fs/fsstress# echo "==============GAN fsstress" >> /var/log/syslog
root@ltcalpine-lp4:~/ltp-full-20150420/testcases/kernel/fs/fsstress# 
root@ltcalpine-lp4:~/ltp-full-20150420/testcases/kernel/fs/fsstress# nohup  ./fsstress  -d /mnt -l 0  -n 300 -p 300 -r &
[1] 2400
root@ltcalpine-lp4:~/ltp-full-20150420/testcases/kernel/fs/fsstress# nohup: ignoring input and appending output to ?nohup.out?


root@ltcalpine-lp4:~# netstat -i
Kernel Interface table
Iface   MTU Met   RX-OK RX-ERR RX-DRP RX-OVR    TX-OK TX-ERR TX-DRP TX-OVR Flg
eth0       1500 0     26078      0      0 0           199      0      0      0 BMRU
ib0        2044 0    169370      0      0 0         60577      0      7      0 BMRU
lo        65536 0        23      0      0 0            23      0      0      0 LRU



 root@ltcalpine-lp4:~# tail -f /var/log/syslog
Jan  3 10:20:57 ltcalpine-lp4 kernel: [  260.938162] Key type id_legacy registered
Jan  3 10:21:01 ltcalpine-lp4 CRON[2302]: (nobody) CMD (if [ -x /usr/bin/php5 ] && [ -f /usr/share/serverstats/update.php ]; then /usr/bin/php5 /usr/share/serverstats/update.php > /dev/null; fi)
Jan  3 10:21:01 ltcalpine-lp4 CRON[2301]: (CRON) info (No MTA installed, discarding output)
Jan  3 10:22:01 ltcalpine-lp4 CRON[2335]: (nobody) CMD (if [ -x /usr/bin/php5 ] && [ -f /usr/share/serverstats/update.php ]; then /usr/bin/php5 /usr/share/serverstats/update.php > /dev/null; fi)
Jan  3 10:22:01 ltcalpine-lp4 CRON[2334]: (CRON) info (No MTA installed, discarding output)
Jan  3 10:23:01 ltcalpine-lp4 CRON[2367]: (nobody) CMD (if [ -x /usr/bin/php5 ] && [ -f /usr/share/serverstats/update.php ]; then /usr/bin/php5 /usr/share/serverstats/update.php > /dev/null; fi)
Jan  3 10:23:01 ltcalpine-lp4 CRON[2366]: (CRON) info (No MTA installed, discarding output)
==============GAN fsstress
Jan  3 10:24:01 ltcalpine-lp4 CRON[2758]: (nobody) CMD (if [ -x /usr/bin/php5 ] && [ -f /usr/share/serverstats/update.php ]; then /usr/bin/php5 /usr/share/serverstats/update.php > /dev/null; fi)
Jan  3 10:24:01 ltcalpine-lp4 CRON[2757]: (CRON) info (No MTA installed, discarding output)
Jan  3 10:25:01 ltcalpine-lp4 CRON[2864]: (nobody) CMD (if [ -x /usr/bin/php5 ] && [ -f /usr/share/serverstats/update.php ]; then /usr/bin/php5 /usr/share/serverstats/update.php > /dev/null; fi)
Jan  3 10:25:01 ltcalpine-lp4 CRON[2863]: (CRON) info (No MTA installed, discarding output)
Jan  3 10:26:01 ltcalpine-lp4 CRON[2981]: (nobody) CMD (if [ -x /usr/bin/php5 ] && [ -f /usr/share/serverstats/update.php ]; then /usr/bin/php5 /usr/share/serverstats/update.php > /dev/null; fi)
Jan  3 10:26:01 ltcalpine-lp4 CRON[2980]: (CRON) info (No MTA installed, discarding output)
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229293] ------------[ cut here ]------------
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229301] WARNING: at /build/linux-lts-wily-W0lTWH/linux-lts-wily-4.2.0/net/core/skbuff.c:4174
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229303] Modules linked in: rpcsec_gss_krb5 nfsv4 nfsd auth_rpcgss nfs_acl nfs lockd grace sunrpc fscache mlx4_ib ib_ipoib rdma_ucm rdma_cm iw_cm ib_umad ib_ucm ib_cm ib_sa ib_mad ib_uverbs ib_core ib_addr pseries_rng rtc_generic mlx4_en vxlan ip6_udp_tunnel udp_tunnel bnx2x mlx4_core mdio libcrc32c
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229338] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.2.0-21-generic #25~14.04.1-Ubuntu
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229342] task: c000000001480b30 ti: c000000c7ffdc000 task.ti: c0000000014f8000
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229344] NIP: c0000000008f484c LR: c000000000981844 CTR: c0000000009d40e0
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229347] REGS: c000000c7ffdf2d0 TRAP: 0700   Not tainted  (4.2.0-21-generic)
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229349] MSR: 8000000000029033 <SF,EE,ME,IR,DR,RI,LE>  CR: 28008048  XER: 00000000
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] CFAR: c0000000008f46a4 SOFTE: 1 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR00: 0000000000000000 c000000c7ffdf550 c0000000014fdf00 c00000000145df00 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR04: c000000bfa4fec00 0000000000000000 c000000c2aeedb00 c000000bfa4fdb00 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR08: 0000000000000001 c000000c2aeeec00 0000000000000005 0000000bfa4f0000 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR12: 0000000000002200 c00000000e7e0000 0000000000000001 000000000000012c 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR16: 0000000000000000 0000000000000001 c000000c7ffdfd90 0000000000000000 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR20: 000000000000dd86 0000000000000000 c000000000d83070 c000000000d83048 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR24: c000000c6f6c0098 0000000000000234 c000000c7ffdf5d0 0000000000000458 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR28: 0000000000000458 c000000c7ffdf660 c000000c40cf0d00 c000000c40cf9200 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229390] NIP [c0000000008f484c] skb_try_coalesce+0x40c/0x450
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229393] LR [c000000000981844] tcp_try_coalesce+0x94/0x140
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229395] Call Trace:
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229398] [c000000c7ffdf550] [c000000c7ffdf5b0] 0xc000000c7ffdf5b0 (unreliable)
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229402] [c000000c7ffdf5b0] [c000000000981844] tcp_try_coalesce+0x94/0x140
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229405] [c000000c7ffdf600] [c000000000983358] tcp_data_queue+0x3f8/0xf90
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229408] [c000000c7ffdf6d0] [c0000000009861ac] tcp_rcv_established+0x1cc/0x7a0
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229412] [c000000c7ffdf730] [c000000000992104] tcp_v4_do_rcv+0x1b4/0x490
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229415] [c000000c7ffdf790] [c000000000995f60] tcp_v4_rcv+0xb40/0xb60
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229418] [c000000c7ffdf870] [c000000000963eb8] ip_local_deliver_finish+0x178/0x350
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229422] [c000000c7ffdf8c0] [c0000000009646d4] ip_local_deliver+0x54/0x130
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229424] [c000000c7ffdf930] [c0000000009641e4] ip_rcv_finish+0x154/0x420
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229427] [c000000c7ffdf9b0] [c000000000964a3c] ip_rcv+0x28c/0x430
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229432] [c000000c7ffdfa40] [c00000000090ad44] __netif_receive_skb_core+0x754/0xd30
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229435] [c000000c7ffdfb20] [c00000000090e3f4] netif_receive_skb_internal+0x34/0xd0
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229438] [c000000c7ffdfb60] [c00000000090efe4] dev_gro_receive+0x2d4/0x430
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229441] [c000000c7ffdfbd0] [c00000000090f408] napi_gro_receive+0x48/0x1b0
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229447] [c000000c7ffdfc10] [d00000000d9846c0] ipoib_ib_handle_rx_wc+0x180/0x310 [ib_ipoib]
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229452] [c000000c7ffdfcb0] [d00000000d98571c] ipoib_poll+0x16c/0x250 [ib_ipoib]
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229455] [c000000c7ffdfd30] [c00000000090ebb8] net_rx_action+0x2d8/0x430
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229459] [c000000c7ffdfe40] [c0000000000ba124] __do_softirq+0x174/0x390
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229463] [c000000c7ffdff40] [c0000000000ba6c8] irq_exit+0xc8/0x100
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229466] [c000000c7ffdff60] [c0000000000111ec] __do_irq+0x8c/0x190
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229470] [c000000c7ffdff90] [c000000000024278] call_do_irq+0x14/0x24
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229473] [c0000000014fb980] [c000000000011390] do_IRQ+0xa0/0x120
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229477] [c0000000014fb9e0] [c000000000002594] hardware_interrupt_common+0x114/0x180
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229483] --- interrupt: 501 at plpar_hcall_norets+0x1c/0x28
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229483]     LR = check_and_cede_processor+0x34/0x50
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229488] [c0000000014fbcd0] [c0000000008a8d90] check_and_cede_processor+0x20/0x50 (unreliable)
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229492] [c0000000014fbd30] [c0000000008a8fb8] shared_cede_loop+0x68/0x170
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229495] [c0000000014fbd70] [c0000000008a615c] cpuidle_enter_state+0xbc/0x350
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229499] [c0000000014fbdd0] [c000000000110f3c] call_cpuidle+0x7c/0xd0
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229502] [c0000000014fbe10] [c0000000001112d0] cpu_startup_entry+0x340/0x450
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229506] [c0000000014fbee0] [c00000000000bdcc] rest_init+0xac/0xc0
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229510] [c0000000014fbf00] [c000000000e13e9c] start_kernel+0x54c/0x568
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229514] [c0000000014fbf90] [c000000000008c6c] start_here_common+0x20/0xa8
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229515] Instruction dump:
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229517] 61290008 913f0090 4bfffe44 7fa3eb78 4b93441d 60000000 2fa30000 40feff14 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229523] 4bfffefc 3c62fff6 89032f80 69080001 <0b080000> 2fa80000 41fefe54 39000001 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229529] ---[ end trace 990311d357d2b2ab ]---
Jan  3 10:27:01 ltcalpine-lp4 CRON[3091]: (nobody) CMD (if [ -x /usr/bin/php5 ] && [ -f /usr/share/serverstats/update.php ]; then /usr/bin/php5 /usr/share/serverstats/update.php > /dev/null; fi)
Jan  3 10:27:01 ltcalpine-lp4 CRON[3090]: (CRON) info (No MTA installed, discarding output)
Jan  3 10:28:01 ltcalpine-lp4 CRON[3218]: (nobody) CMD (if [ -x /usr/bin/php5 ] && [ -f /usr/share/serverstats/update.php ]; then /usr/bin/php5 /usr/share/serverstats/update.php > /dev/null; fi)
Jan  3 10:28:01 ltcalpine-lp4 CRON[3217]: (CRON) info (No MTA installed, discarding output)
Jan  3 10:29:01 ltcalpine-lp4 CRON[3339]: (nobody) CMD (if [ -x /usr/bin/php5 ] && [ -f /usr/share/serverstats/update.php ]; then /usr/bin/php5 /usr/share/serverstats/update.php > /dev/null; fi)
Jan  3 10:29:01 ltcalpine-lp4 CRON[3338]: (CRON) info (No MTA installed, discarding output)
Jan  3 10:30:01 ltcalpine-lp4 CRON[3445]: (nobody) CMD (if [ -x /usr/bin/php5 ] && [ -f /usr/share/serverstats/update.php ]; then /usr/bin/php5 /usr/share/serverstats/update.php > /dev/null; fi)
Jan  3 10:30:02 ltcalpine-lp4 CRON[3444]: (CRON) info (No MTA installed, discarding output)
Jan  3 10:31:01 ltcalpine-lp4 CRON[3560]: (nobody) CMD (if [ -x /usr/bin/php5 ] && [ -f /usr/share/serverstats/update.php ]; then /usr/bin/php5 /usr/share/serverstats/update.php > /dev/null; fi)
Jan  3 10:31:01 ltcalpine-lp4 CRON[3559]: (CRON) info (No MTA installed, discarding output)

 
Stack trace output:
 Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229293] ------------[ cut here ]------------
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229301] WARNING: at /build/linux-lts-wily-W0lTWH/linux-lts-wily-4.2.0/net/core/skbuff.c:4174
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229303] Modules linked in: rpcsec_gss_krb5 nfsv4 nfsd auth_rpcgss nfs_acl nfs lockd grace sunrpc fscache mlx4_ib ib_ipoib rdma_ucm rdma_cm iw_cm ib_umad ib_ucm ib_cm ib_sa ib_mad ib_uverbs ib_core ib_addr pseries_rng rtc_generic mlx4_en vxlan ip6_udp_tunnel udp_tunnel bnx2x mlx4_core mdio libcrc32c
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229338] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.2.0-21-generic #25~14.04.1-Ubuntu
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229342] task: c000000001480b30 ti: c000000c7ffdc000 task.ti: c0000000014f8000
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229344] NIP: c0000000008f484c LR: c000000000981844 CTR: c0000000009d40e0
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229347] REGS: c000000c7ffdf2d0 TRAP: 0700   Not tainted  (4.2.0-21-generic)
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229349] MSR: 8000000000029033 <SF,EE,ME,IR,DR,RI,LE>  CR: 28008048  XER: 00000000
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] CFAR: c0000000008f46a4 SOFTE: 1 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR00: 0000000000000000 c000000c7ffdf550 c0000000014fdf00 c00000000145df00 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR04: c000000bfa4fec00 0000000000000000 c000000c2aeedb00 c000000bfa4fdb00 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR08: 0000000000000001 c000000c2aeeec00 0000000000000005 0000000bfa4f0000 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR12: 0000000000002200 c00000000e7e0000 0000000000000001 000000000000012c 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR16: 0000000000000000 0000000000000001 c000000c7ffdfd90 0000000000000000 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR20: 000000000000dd86 0000000000000000 c000000000d83070 c000000000d83048 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR24: c000000c6f6c0098 0000000000000234 c000000c7ffdf5d0 0000000000000458 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229357] GPR28: 0000000000000458 c000000c7ffdf660 c000000c40cf0d00 c000000c40cf9200 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229390] NIP [c0000000008f484c] skb_try_coalesce+0x40c/0x450
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229393] LR [c000000000981844] tcp_try_coalesce+0x94/0x140
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229395] Call Trace:
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229398] [c000000c7ffdf550] [c000000c7ffdf5b0] 0xc000000c7ffdf5b0 (unreliable)
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229402] [c000000c7ffdf5b0] [c000000000981844] tcp_try_coalesce+0x94/0x140
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229405] [c000000c7ffdf600] [c000000000983358] tcp_data_queue+0x3f8/0xf90
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229408] [c000000c7ffdf6d0] [c0000000009861ac] tcp_rcv_established+0x1cc/0x7a0
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229412] [c000000c7ffdf730] [c000000000992104] tcp_v4_do_rcv+0x1b4/0x490
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229415] [c000000c7ffdf790] [c000000000995f60] tcp_v4_rcv+0xb40/0xb60
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229418] [c000000c7ffdf870] [c000000000963eb8] ip_local_deliver_finish+0x178/0x350
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229422] [c000000c7ffdf8c0] [c0000000009646d4] ip_local_deliver+0x54/0x130
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229424] [c000000c7ffdf930] [c0000000009641e4] ip_rcv_finish+0x154/0x420
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229427] [c000000c7ffdf9b0] [c000000000964a3c] ip_rcv+0x28c/0x430
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229432] [c000000c7ffdfa40] [c00000000090ad44] __netif_receive_skb_core+0x754/0xd30
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229435] [c000000c7ffdfb20] [c00000000090e3f4] netif_receive_skb_internal+0x34/0xd0
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229438] [c000000c7ffdfb60] [c00000000090efe4] dev_gro_receive+0x2d4/0x430
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229441] [c000000c7ffdfbd0] [c00000000090f408] napi_gro_receive+0x48/0x1b0
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229447] [c000000c7ffdfc10] [d00000000d9846c0] ipoib_ib_handle_rx_wc+0x180/0x310 [ib_ipoib]
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229452] [c000000c7ffdfcb0] [d00000000d98571c] ipoib_poll+0x16c/0x250 [ib_ipoib]
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229455] [c000000c7ffdfd30] [c00000000090ebb8] net_rx_action+0x2d8/0x430
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229459] [c000000c7ffdfe40] [c0000000000ba124] __do_softirq+0x174/0x390
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229463] [c000000c7ffdff40] [c0000000000ba6c8] irq_exit+0xc8/0x100
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229466] [c000000c7ffdff60] [c0000000000111ec] __do_irq+0x8c/0x190
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229470] [c000000c7ffdff90] [c000000000024278] call_do_irq+0x14/0x24
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229473] [c0000000014fb980] [c000000000011390] do_IRQ+0xa0/0x120
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229477] [c0000000014fb9e0] [c000000000002594] hardware_interrupt_common+0x114/0x180
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229483] --- interrupt: 501 at plpar_hcall_norets+0x1c/0x28
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229483]     LR = check_and_cede_processor+0x34/0x50
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229488] [c0000000014fbcd0] [c0000000008a8d90] check_and_cede_processor+0x20/0x50 (unreliable)
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229492] [c0000000014fbd30] [c0000000008a8fb8] shared_cede_loop+0x68/0x170
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229495] [c0000000014fbd70] [c0000000008a615c] cpuidle_enter_state+0xbc/0x350
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229499] [c0000000014fbdd0] [c000000000110f3c] call_cpuidle+0x7c/0xd0
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229502] [c0000000014fbe10] [c0000000001112d0] cpu_startup_entry+0x340/0x450
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229506] [c0000000014fbee0] [c00000000000bdcc] rest_init+0xac/0xc0
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229510] [c0000000014fbf00] [c000000000e13e9c] start_kernel+0x54c/0x568
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229514] [c0000000014fbf90] [c000000000008c6c] start_here_common+0x20/0xa8
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229515] Instruction dump:
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229517] 61290008 913f0090 4bfffe44 7fa3eb78 4b93441d 60000000 2fa30000 40feff14 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229523] 4bfffefc 3c62fff6 89032f80 69080001 <0b080000> 2fa80000 41fefe54 39000001 
Jan  3 10:26:06 ltcalpine-lp4 kernel: [  569.229529] ---[ end trace 990311d357d2b2ab ]---
 
Oops output:
 no
 
System Dump Info:
  The system was configured to capture a dump, however a dump was not produced.
 
== Comment: #4 - Pradeep Satyanarayana <pradeep@xxxxxxxxxx> - 2016-01-19 18:51:44 ==
Is this just a warning that was printed, or was there a crash as well?

If it is just a warning, it is likely not very serious. It is indicating
that IPoIB (in this case) is not computing the memory consumption
correctly. Hence the warning. Of course that could end up as OOM in case
of a heavily loaded system.

Took a quick look and believe it is coming from

WARN_ON_ONCE(delta < len); in function skb_try_coalesce().

This was supposed to be fixed. Please see :

http://www.spinics.net/lists/netdev/msg204071.html

Is that fix not present? If so, maybe we should ask Canonical to pick it
up.

== Comment: #7 - Carol L. Soto <clsoto@xxxxxxxxxx> - 2016-01-19 22:52:30 ==
the problem is that after the patch Pradeep pointed there is another patch that touches 
the truesize value
http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/patch/drivers/infiniband/ulp/ipoib/ipoib_ib.c?id=a44878d100630a34a44f54960115b81e449858db
>From a44878d100630a34a44f54960115b81e449858db Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@xxxxxxxxxxxx>
Date: Thu, 2 Apr 2015 13:39:00 +0300
Subject: IB/ipoib: Use one linear skb in RX flow

it looks like maybe UD path will hit it but maybe not connected mode.
(tester can verify this)

SKB_TRUESIZE(skb->len) maybe works ok in x86 but power the cache line size is different than ppc64 so maybe that is why the warning. 
MOFED has this line but I have not run UD in a long time with MOFED because the new IB cards does not use UD mode. 
skb->truesize = skb->len + sizeof(struct sk_buff);

== Comment: #13 - David Z. Dai <zdai@xxxxxxxxxx> - 2016-01-20 13:38:07 ==
The test box is actually using Ubuntu-wily release, with kernel at 4.2.0-21-generic.

Regarding the "WARN_ON_ONCE(delta < len);" warning messages, there are 2 related patches:
1) The first one is what Pradeep pointed out:
commit b28ba72665356438e3a6e3be365c3c3071496840
Author: Eric Dumazet <edumazet@xxxxxxxxxx>
Date:   Tue Jul 10 10:03:41 2012 +0000

    IPoIB: fix skb truesize underestimatiom

    Or Gerlitz reported triggering of WARN_ON_ONCE(delta < len); in
    skb_try_coalesce()
    This warning tracks drivers that incorrectly set skb->truesize

    IPoIB indeed allocates a full page to store a fragment, but only
    accounts in skb->truesize the used part of the page (frame length)

    This patch fixes skb truesize underestimation, and
    also fixes a performance issue, because RX skbs have not enough tailroom
    to allow IP and TCP stacks to pull their header in skb linear part
    without an expensive call to pskb_expand_head()

This patch is included in Ubuntu-vivid release with kernel at 3.19.

The main change to fix the WARN_ON_ONCE() warning message is in function ipoib_ud_skb_put_frags():
@@ -123,7 +123,7 @@ static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
        if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
                unsigned int size;
                /*
                 * There is only two buffers needed for max_payload = 4K,
                 * first buf size is IPOIB_UD_HEAD_SIZE
                 */
                skb->tail += IPOIB_UD_HEAD_SIZE;
                skb->len  += length;

                size = length - IPOIB_UD_HEAD_SIZE;

                skb_frag_size_set(frag, size);
-               skb->truesize += size;
+               skb->truesize += PAGE_SIZE;
        } else
                skb_put(skb, length);

2) Next on new Ubuntu-wily relase, it includes the patch Carol pointed out:
commit a44878d100630a34a44f54960115b81e449858db  (in wily)
Author: Erez Shitrit <erezsh@xxxxxxxxxxxx>
Date:   Thu Apr 2 13:39:00 2015 +0300

    IB/ipoib: Use one linear skb in RX flow

    The current code in the RX flow uses two sg entries for each incoming
    packet, the first one was for the IB headers and the second for the rest
    of the data, that causes two  dma map/unmap and two allocations, and few
    more actions that were done at the data path.

    Use only one linear skb on each incoming packet, for the data (IB
    headers and payload), that reduces the packet processing in the
    data-path (only one skb, no frags, the first frag was not used anyway,
    less memory allocations) and the dma handling (only one dma map/unmap
    over each incoming packet instead of two map/unmap per each incoming packet).

    After commit 73d3fe6d1c6d ("gro: fix aggregation for skb using frag_list") from
    Eric Dumazet, we will get full aggregation for large packets.

    When running bandwidth tests before and after the (over the card's numa node),
    using "netperf -H 1.1.1.3 -T -t TCP_STREAM", the results before are ~12Gbs before
    and after ~16Gbs on my setup (Mellanox's ConnectX3).

Look at the code change in this patch, it completely removed the functions ipoib_ud_skb_put_frags(), ipoib_ud_need_sg(),
and just call skb_put() directly in ipoib_ib_handle_rx_wc(), followed by "skb->truesize = SKB_TRUESIZE(skb->len);":
static int ipoib_ib_post_receive(struct net_device *dev, int id)
@@ -156,18 +126,11 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
        ipoib_ud_dma_unmap_rx(priv, mapping);
-       ipoib_ud_skb_put_frags(priv, skb, wc->byte_len);
+
+       skb_put(skb, wc->byte_len);
        ...
        skb_pull(skb, IPOIB_ENCAP_LEN);

+       skb->truesize = SKB_TRUESIZE(skb->len);

/* include/linux/skbuff.h */
/* return minimum truesize of one skb containing X bytes of data */
#define SKB_TRUESIZE(X) ((X) +                                          \
                         SKB_DATA_ALIGN(sizeof(struct sk_buff)) +       \
                         SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))

Looks like the 2nd patch broke the fix in the 1st patch related to the
warning message.

One thought is to keep the 1st patch's code change related to skb->truesize on top of the 2nd patch. 
In ipoib_alloc_rx_skb():
        skb_put(skb, wc->byte_len);
        ...
+       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
+               skb->truesize += PAGE_SIZE;
+       else
                skb->truesize = SKB_TRUESIZE(skb->len);

Will discuss with Pradeep/Carol on the best approach for the fix.
********************************************************************
Also on this Ubuntu-wily test box, please verify what Carol suggested to set the ipoib in "connected" mode.
# echo 'connected' > /sys/class/net/ib0/mode
(you might need to bring down the test interface ib0/ib1 ip configuration first before running above command)
After you set it to connected mode, rerun the same stress test, most likely you will not see the warning message any more.
Please update the test result with the "connected" mode.

** Affects: linux (Ubuntu)
     Importance: High
     Assignee: Canonical Kernel Team (canonical-kernel-team)
         Status: Triaged


** Tags: architecture-ppc64le bot-comment bugnameltc-134417 severity-high targetmilestone-inin14044
-- 
WARNING: at /build/linux-lts-wily-W0lTWH/linux-lts-wily-4.2.0/net/core/skbuff.c:4174 (Travis IB)
https://bugs.launchpad.net/bugs/1541326
You received this bug notification because you are a member of Kernel Packages, which is subscribed to linux in Ubuntu.