Linux SIGCHLD信号处理与僵尸进程清理实验
七月 23, 2025
次阅读
实验背景
在Unix/Linux系统中,当子进程终止时,如果父进程没有及时调用wait或waitpid回收,子进程会变成僵尸进程(Zombie)。本实验通过两种方式验证如何有效清理僵尸进程:
- 自定义SIGCHLD信号处理函数(可移植方案)
- 直接忽略SIGCHLD信号(Linux特有方案)
实验一:通过信号处理函数清理僵尸进程(改进版)
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <time.h>
#define CHILD_NUM 20
#define DURATION 20
int child_cnt = CHILD_NUM;
void sigchld_handler(int signo) {
int status;
pid_t pid;
// 非阻塞回收所有终止的子进程
while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
--child_cnt;
if (WIFEXITED(status)) {
printf("[Parent] Child %d exited with status %d\n\n",
pid, WEXITSTATUS(status));
} else {
printf("[Parent] Child %d terminated abnormally\n\n", pid);
}
}
}
int main() {
srand(time(NULL));
struct sigaction sa;
sa.sa_handler = sigchld_handler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_RESTART | SA_NOCLDSTOP;
if (sigaction(SIGCHLD, &sa, NULL) == -1) {
perror("sigaction");
exit(EXIT_FAILURE);
}
printf("[Parent] PID=%d will create %d children in %d seconds\n\n",
getpid(), CHILD_NUM, DURATION);
// 在20秒内随机创建20个子进程
for (int i = 0; i < CHILD_NUM; i++) {
sleep(rand() % 3); // 随机延迟0-2秒
pid_t pid = fork();
if (pid == 0) {
// 子进程代码
int sleep_time = 1 + rand() % 5;
printf("[Child] PID=%d started (will exit after %ds)\n\n",
getpid(), sleep_time);
sleep(sleep_time);
exit(rand() % 100); // 随机退出状态
} else if (pid < 0) {
perror("fork");
}
}
// 父进程持续工作
while (1) {
if(child_cnt == 0) break;
printf("[Parent] I am working...\n\n");
sleep(1);
}
return 0;
}
1. 程序结构设计
- 父进程:创建20个子进程,每个子进程随机延时启动,随机生存时间
- 子进程:执行简单任务后以随机状态退出
- 信号处理:通过SIGCHLD信号异步回收子进程
2. 关键组件分析
信号处理函数(核心机制)
void sigchld_handler(int signo) {
int status;
pid_t pid;
while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
--child_cnt;
if (WIFEXITED(status)) {
printf("[Parent] Child %d exited with status %d\n\n",
pid, WEXITSTATUS(status));
}
}
}
技术亮点:
- 使用
waitpid(-1, &status, WNOHANG)非阻塞回收任意子进程 WIFEXITED和WEXITSTATUS宏规范处理退出状态- 全局计数器
child_cnt实现优雅终止
进程创建逻辑
for (int i = 0; i < CHILD_NUM; i++) {
sleep(rand() % 3); // 随机延迟0-2秒
pid_t pid = fork();
if (pid == 0) {
int sleep_time = 1 + rand() % 5;
printf("[Child] PID=%d started (will exit after %ds)\n\n",
getpid(), sleep_time);
sleep(sleep_time);
exit(rand() % 100); // 随机退出状态
}
}
设计特点:
- 随机延迟创建(0-2秒)模拟真实场景
- 子进程随机生存时间(1-5秒)
- 随机退出状态(0-99)测试状态捕获
3. 执行流程优化点
信号处理配置:
sa.sa_flags = SA_RESTART | SA_NOCLDSTOP;SA_RESTART:自动重启被中断的系统调用SA_NOCLDSTOP:忽略子进程停止产生的信号
父进程工作循环:
while (1) { if(child_cnt == 0) break; printf("[Parent] I am working...\n\n"); sleep(1); }通过全局计数器实现无忙等待的优雅退出
4. 可能的改进建议
错误处理增强:
if (pid < 0) { if (errno == EAGAIN) { delay = 1; sleep(delay); continue; } perror("fork"); exit(EXIT_FAILURE); }日志记录优化:
#define LOG(fmt, ...) \ printf("[%s] PID=%d " fmt "\n", \ __func__, getpid(), ##__VA_ARGS__)信号安全处理:
void sigchld_handler(int signo) { // 使用原子操作修改全局变量 __sync_fetch_and_sub(&child_cnt, 1); }
实验结果如下:
╭─ljx@VM-16-15-debian ~/linux_review/sign
╰─➤ ./signal_child.o
[Parent] PID=710610 will create 20 children in 20 seconds
[Child] PID=710611 started (will exit after 2s)
[Child] PID=710623 started (will exit after 1s)
[Parent] Child 710611 exited with status 45
[Child] PID=710624 started (will exit after 1s)
[Parent] Child 710623 exited with status 20
[Parent] Child 710624 exited with status 84
[Child] PID=710634 started (will exit after 5s)
[Child] PID=710635 started (will exit after 4s)
[Child] PID=710644 started (will exit after 2s)
[Child] PID=710645 started (will exit after 4s)
[Child] PID=710646 started (will exit after 1s)
[Child] PID=710648 started (will exit after 5s)
[Parent] Child 710646 exited with status 39
[Child] PID=710649 started (will exit after 1s)
[Parent] Child 710635 exited with status 21
[Parent] Child 710644 exited with status 3
[Child] PID=710653 started (will exit after 2s)
[Parent] Child 710649 exited with status 41
[Child] PID=710654 started (will exit after 2s)
[Parent] Child 710634 exited with status 93
[Child] PID=710662 started (will exit after 3s)
[Child] PID=710663 started (will exit after 1s)
[Child] PID=710664 started (will exit after 1s)
[Parent] Child 710645 exited with status 55
[Parent] Child 710653 exited with status 21
[Parent] Child 710663 exited with status 30
[Parent] Child 710664 exited with status 13
[Child] PID=710666 started (will exit after 4s)
[Child] PID=710667 started (will exit after 2s)
[Parent] Child 710654 exited with status 92
[Child] PID=710668 started (will exit after 2s)
[Parent] Child 710662 exited with status 55
[Parent] Child 710648 exited with status 75
[Child] PID=710671 started (will exit after 1s)
[Parent] I am working...
[Child] PID=710672 started (will exit after 5s)
[Parent] Child 710667 exited with status 36
[Parent] I am working...
[Parent] Child 710668 exited with status 35
[Parent] I am working...
[Parent] Child 710671 exited with status 99
[Parent] I am working...
[Parent] Child 710666 exited with status 86
[Parent] I am working...
[Parent] I am working...
[Parent] I am working...
[Parent] Child 710672 exited with status 57
实验二:通过SIG_IGN自动清理僵尸进程(保持原方案)
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <sys/wait.h>
#include <sys/types.h>
int main() {
pid_t pid;
// 设置SIGCHLD的处理动作为SIG_IGN
struct sigaction sa;
sa.sa_handler = SIG_IGN;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_RESTART | SA_NOCLDSTOP;
if (sigaction(SIGCHLD, &sa, NULL) == -1) {
perror("sigaction");
exit(EXIT_FAILURE);
}
// 创建子进程
if ((pid = fork()) < 0) {
perror("fork");
exit(EXIT_FAILURE);
} else if (pid == 0) {
// 子进程
printf("Child process %d started\n", getpid());
sleep(2); // 模拟子进程工作
printf("Child process %d exiting\n", getpid());
exit(2); // 子进程以状态2退出
}
// 父进程继续自己的工作
printf("Parent process %d continues working\n", getpid());
for (int i = 0; i < 10; i++) {
printf("Parent working...\n");
sleep(1);
// 检查子进程状态
if (waitpid(pid, NULL, WNOHANG) == -1) {
printf("Child process %d has been automatically reaped\n", pid);
}
}
return 0;
}
关键验证点
- 使用
waitpid(pid, NULL, WNOHANG)会立即返回-1(errno=ECHILD) ps aux | grep defunct确认无僵尸进程存在
实验结果如下:
╭─ljx@VM-16-15-debian ~/linux_review/sign
╰─➤ ./signal_child.o
Parent process 711693 continues working
Parent working...
Child process 711694 started
Parent working...
Child process 711694 exiting
Parent working...
Child process 711694 has been automatically reaped
Parent working...
Child process 711694 has been automatically reaped
Parent working...
Child process 711694 has been automatically reaped
Parent working...
Child process 711694 has been automatically reaped
Parent working...
Child process 711694 has been automatically reaped
Parent working...
Child process 711694 has been automatically reaped
Parent working...
Child process 711694 has been automatically reaped
Parent working...
Child process 711694 has been automatically reaped
实验对比分析
| 特性 | 自定义信号处理函数 | SIG_IGN方式 |
|---|---|---|
| 子进程控制 | 可精确控制每个子进程的生命周期 | 完全由系统自动管理 |
| 状态获取 | 能获取每个子进程的退出状态 | 无法获取任何状态信息 |
| 系统负载 | 父进程需要处理信号 | 零开销 |
| 适用场景 | 需要监控子进程状态的场景 | 只关心任务是否完成的场景 |
| 可移植性 | 所有UNIX/Linux系统通用 | 仅Linux可靠支持 |
实验结论
- 生产环境推荐使用自定义信号处理函数(方案一),兼具可移植性和可控性
- 临时任务可考虑
SIG_IGN方案(方案二),但需注意:- 无法获取子进程退出状态
- 可能干扰其他依赖SIGCHLD的库(如Apache)
- 两种方案都能有效避免僵尸进程,但设计哲学不同:
- 方案一体现”显式管理”思想
- 方案二体现”自动回收”思想
延伸思考
- 如果子进程数量极大(如1000+),信号处理函数可能成为性能瓶颈
- 在多线程环境中,信号处理需要额外注意线程安全问题
- 某些场景下可结合
epoll+signalfd实现更优雅的子进程监控
查看评论