rsyslog 简介及故障处理一例
简介
在我们日常生活中,我们经常需要回顾以前发生的一些事情;或者,当出现了一些问题的时候,可以从某些地方去查找原因,寻找发生问题的痕迹。无可避免需要用到文字的、图像的等等不同形式的记录。用计算机的术语表达,就是 LOG,或日志。
日志,对于任何系统来说都是及其重要的组成部分。
实际讲解
比如我们安装keepalvied, 日志规范存放在 /var/log/keepalived.log
vim /etc/rsyslog.conf
# keepalived -S 0
local0.* /var/log/keepalived.log
重启服务生效:
systemctl restart rsyslog
我们手工创建日志
logger -tLAB -plocal0.info haha-bbc-classic
看看效果:
tail /var/log/keepalived.log
Sep 3 11:40:12 CNSZ147001 LAB: haha-bbc-classic
如果你写三次,日志也会打印三次
# keepalived -S 0
local0.* /var/log/keepalived.log
# keepalived -S 0
local0.* /var/log/keepalived.log
# keepalived -S 0
local0.* /var/log/keepalived.log
Sep 3 11:40:12 CNSZ147001 LAB: haha-bbc-classic
Sep 3 11:40:12 CNSZ147001 LAB: haha-bbc-classic
Sep 3 11:40:12 CNSZ147001 LAB: haha-bbc-classic
我们发现messages 也会打印local0的日志,如何避免重复写入呢?
不将 local0 类型的日志写入messages,加上 local0.none 即可:
*.info;mail.none;authpriv.none;cron.none;local0.none /var/log/messages
local0-local7, 每种类型又包含日志级别: emerg-0, alert-1, crit-2, err-3, warn-4, notice-5, info-6, debug-7 (数字越小告警级别越高)
故障处理一例
有一天碰到一个应用输出日志类型为local1和local2, 我按照上述方式配置好,启动应用竟然没有任何日志输出。
检查服务状态,重启服务,检查配置文件,一顿操作下来,毫无进展。 设置local0,使用logger手工生成日志,依然没有日志输出。但message中有日志输出。配置文件没有特殊字符,服务也重启了,就是没有日志输出,问题出在哪里呢?我还特意请教了专家日志输出就这样配置,没什么特别的,又不是输出到远程服务器,研究了半天,无果。好在日志输出的问题不急,手头又有其他事情忙。
在一台全新的虚拟机做测试,同样的配置,输出日志一切正常。
使用绝招: 跟踪写入日志过程,看看问题到底出在哪里。
正常的:
# strace logger -tLAB -plocal0.info haha-bbc-classic
execve("/bin/logger", ["logger", "-tLAB", "-plocal0.info", "haha-bbc-classic"], [/* 21 vars */]) = 0
brk(NULL) = 0x18e9000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2af3be48c000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=27687, ...}) = 0
mmap(NULL, 27687, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2af3be48d000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\35\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2127336, ...}) = 0
mmap(NULL, 3940800, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2af3be68f000
mprotect(0x2af3be847000, 2097152, PROT_NONE) = 0
mmap(0x2af3bea47000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b8000) = 0x2af3bea47000
mmap(0x2af3bea4d000, 16832, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2af3bea4d000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2af3be494000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2af3be495000
arch_prctl(ARCH_SET_FS, 0x2af3be494b80) = 0
mprotect(0x2af3bea47000, 16384, PROT_READ) = 0
mprotect(0x605000, 4096, PROT_READ) = 0
mprotect(0x2af3be68c000, 4096, PROT_READ) = 0
munmap(0x2af3be48d000, 27687) = 0
brk(NULL) = 0x18e9000
brk(0x190a000) = 0x190a000
brk(NULL) = 0x190a000
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=106070960, ...}) = 0
mmap(NULL, 106070960, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2af3bea52000
close(3) = 0
open("/etc/localtime", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=388, ...}) = 0
fstat(3, {st_mode=S_IFREG|0644, st_size=388, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2af3be48d000
read(3, "TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2\0\0\0\2\0\0\0\0"..., 4096) = 388
lseek(3, -240, SEEK_CUR) = 148
read(3, "TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\3\0\0\0\3\0\0\0\0"..., 4096) = 240
close(3) = 0
munmap(0x2af3be48d000, 4096) = 0
socket(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 3
connect(3, {sa_family=AF_LOCAL, sun_path="/dev/log"}, 110) = 0
sendto(3, "<134>Sep 3 15:27:16 LAB: haha-b"..., 42, MSG_NOSIGNAL, NULL, 0) = 42
close(3) = 0
close(1) = 0
close(2) = 0
exit_group(0) = ?
+++ exited with 0 +++
异常的:
# strace logger -tLAB -plocal0.info haha-bbc-classic
execve("/bin/logger", ["logger", "-tLAB", "-plocal0.info", "haha-bbc-classic"], [/* 20 vars */]) = 0
brk(NULL) = 0xad6000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b86a4894000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=23500, ...}) = 0
mmap(NULL, 23500, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2b86a4895000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240%\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2151672, ...}) = 0
mmap(NULL, 3981792, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b86a4a96000
mprotect(0x2b86a4c58000, 2097152, PROT_NONE) = 0
mmap(0x2b86a4e58000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c2000) = 0x2b86a4e58000
mmap(0x2b86a4e5e000, 16864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b86a4e5e000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b86a489b000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b86a489c000
arch_prctl(ARCH_SET_FS, 0x2b86a489bb80) = 0
mprotect(0x2b86a4e58000, 16384, PROT_READ) = 0
mprotect(0x605000, 4096, PROT_READ) = 0
mprotect(0x2b86a4a93000, 4096, PROT_READ) = 0
munmap(0x2b86a4895000, 23500) = 0
brk(NULL) = 0xad6000
brk(0xaf7000) = 0xaf7000
brk(NULL) = 0xaf7000
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=106075056, ...}) = 0
mmap(NULL, 106075056, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2b86a4e63000
close(3) = 0
open("/etc/localtime", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=528, ...}) = 0
fstat(3, {st_mode=S_IFREG|0644, st_size=528, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b86a4895000
read(3, "TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2\0\0\0\2\0\0\0\0"..., 4096) = 528
lseek(3, -330, SEEK_CUR) = 198
read(3, "TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\3\0\0\0\3\0\0\0\0"..., 4096) = 330
close(3) = 0
munmap(0x2b86a4895000, 4096) = 0
socket(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 3
connect(3, {sa_family=AF_LOCAL, sun_path="/dev/log"}, 110) = -1 ENOENT (No such file or directory)
close(3) = 0
close(1) = 0
close(2) = 0
exit_group(0) = ?
+++ exited with 0 +++
对比发现,异常主机没有 /dev/log, 这是个什么东东? 我们看看:
# ls -lh /dev/log
srw-rw-rw- 1 root root 0 Sep 3 09:09 /dev/log
# file /dev/log
/dev/log: socket
谁在使用这个socket呢?
# lsof | grep /dev/log
systemd 1 root 46u unix 0xffff91bd3fc3e400 0t0 84348749 /dev/log
systemd-j 30018 root 5u unix 0xffff91bd3fc3e400 0t0 84348749 /dev/log
在CentOS 7 systemd-journald取代了rsyslogd很大一部分功能。也许两者在某些情况下会产生冲突,导致 /dev/log 被删除。然后就出现了前面所说的不打印日志的诡异现象。
通过lsof知道是systemd-journald在使用/dev/log,重启服务会重新创建该socket。
systemctl restart systemd-journald.socket
然后再使用logger创建日志,一切恢复正常。搞定,收工。
消息限制
如果在message 看到这样的信息,说明有消息被丢弃了,原因是 journald 默认30秒内允许1000条消息
Nov 6 14:56:13 VRA journal: Suppressed 12546 messages from /user.slice/user-602.slice
/etc/systemd/journald.conf 这两个参数可以根据需要调整,如果要禁用消息限制,可以都改成0,实测改成0在一段时间都没有日志输出……
建议根据实际情况调大限制。
#RateLimitInterval=30s
#RateLimitBurst=1000
重启服务生效。
systemctl restart systemd-journald