看我的測試代碼,似乎應該是在創建子進程之后創建epoll的fd,否則程序將會有問題,試將代碼中兩個CreateWorker函數的調用位置分別調用,一個在創建epoll fd之前,一個在之后,在調用在創建之前的代碼會出問題,在我的機器上(linux內核2.6.26)表現的癥狀就是所有進程的epoll_wait函數返回0, 而客戶端似乎被阻塞了:
服務器端:
#include <iostream>
#include <sys/socket.h>
#include <sys/epoll.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/wait.h>
using namespace std;
#define MAXLINE 5
#define OPEN_MAX 100
#define LISTENQ 20
#define SERV_PORT 5000
#define INFTIM 1000
typedef struct task_t
{
int fd;
char buffer[100];
int n;
}task_t;
int CreateWorker(int nWorker)
{
if (0 < nWorker)
{
bool bIsChild;
pid_t nPid;
while (!bIsChild)
{
if (0 < nWorker)
{
nPid = ::fork();
if (nPid > 0)
{
bIsChild = false;
--nWorker;
}
else if (0 == nPid)
{
bIsChild = true;
printf("create worker %d success!\n", ::getpid());
}
else
{
printf("fork error: %s\n", ::strerror(errno));
return -1;
}
}
else
{
int nStatus;
if (-1 == ::wait(&nStatus))
{
++nWorker;
}
}
}
}
return 0;
}
void setnonblocking(int sock)
{
int opts;
opts=fcntl(sock,F_GETFL);
if(opts<0)
{
perror("fcntl(sock,GETFL)");
exit(1);
}
opts = opts|O_NONBLOCK;
if(fcntl(sock,F_SETFL,opts)<0)
{
perror("fcntl(sock,SETFL,opts)");
exit(1);
}
}
int main()
{
int i, maxi, listenfd, connfd, sockfd,epfd,nfds;
ssize_t n;
char line[MAXLINE];
socklen_t clilen;
struct epoll_event ev,events[20];
struct sockaddr_in clientaddr;
struct sockaddr_in serveraddr;
listenfd = socket(AF_INET, SOCK_STREAM, 0);
bzero(&serveraddr, sizeof(serveraddr));
serveraddr.sin_family = AF_INET;
char *local_addr="127.0.0.1";
inet_aton(local_addr,&(serveraddr.sin_addr));//htons(SERV_PORT);
serveraddr.sin_port=htons(SERV_PORT);
// 地址重用
int nOptVal = 1;
socklen_t nOptLen = sizeof(int);
if (-1 == ::setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, &nOptVal, nOptLen))
{
return -1;
}
setnonblocking(listenfd);
bind(listenfd,(sockaddr *)&serveraddr, sizeof(serveraddr));
listen(listenfd, LISTENQ);
CreateWorker(5);
//把socket設置為非阻塞方式
//生成用于處理accept的epoll專用的文件描述符
epfd=epoll_create(256);
//設置與要處理的事件相關的文件描述符
ev.data.fd=listenfd;
//設置要處理的事件類型
ev.events=EPOLLIN|EPOLLET;
//ev.events=EPOLLIN;
//注冊epoll事件
epoll_ctl(epfd,EPOLL_CTL_ADD,listenfd,&ev);
//CreateWorker(5);
maxi = 0;
task_t task;
task_t *ptask;
while(true)
{
//等待epoll事件的發生
nfds=epoll_wait(epfd,events,20,500);
//處理所發生的所有事件
for(i=0;i<nfds;++i)
{
if(events[i].data.fd==listenfd)
{
connfd = accept(listenfd,NULL, NULL);
if(connfd<0){
printf("connfd<0, listenfd = %d\n", listenfd);
printf("error = %s\n", strerror(errno));
exit(1);
}
setnonblocking(connfd);
//設置用于讀操作的文件描述符
memset(&task, 0, sizeof(task));
task.fd = connfd;
ev.data.ptr = &task;
//設置用于注冊的讀操作事件
ev.events=EPOLLIN|EPOLLET;
//ev.events=EPOLLIN;
//注冊ev
epoll_ctl(epfd,EPOLL_CTL_ADD,connfd,&ev);
}
else if(events[i].events&EPOLLIN)
{
cout << "EPOLLIN" << endl;
ptask = (task_t*)events[i].data.ptr;
sockfd = ptask->fd;
if ( (ptask->n = read(sockfd, ptask->buffer, 100)) < 0) {
if (errno == ECONNRESET) {
close(sockfd);
events[i].data.ptr = NULL;
} else
std::cout<<"readline error"<<std::endl;
} else if (ptask->n == 0) {
close(sockfd);
events[i].data.ptr = NULL;
}
ptask->buffer[ptask->n] = '\0';
cout << "read " << ptask->buffer << endl;
//設置用于寫操作的文件描述符
ev.data.ptr = ptask;
//設置用于注測的寫操作事件
ev.events=EPOLLOUT|EPOLLET;
//修改sockfd上要處理的事件為EPOLLOUT
epoll_ctl(epfd,EPOLL_CTL_MOD,sockfd,&ev);
}
else if(events[i].events&EPOLLOUT)
{
cout << "EPOLLOUT" << endl;
ptask = (task_t*)events[i].data.ptr;
sockfd = ptask->fd;
write(sockfd, ptask->buffer, ptask->n);
//設置用于讀操作的文件描述符
ev.data.ptr = ptask;
//修改sockfd上要處理的事件為EPOLIN
epoll_ctl(epfd,EPOLL_CTL_DEL,sockfd,&ev);
cout << "write " << ptask->buffer;
memset(ptask, 0, sizeof(*ptask));
close(sockfd);
}
}
}
return 0;
}
測試客戶端:
#!/usr/bin/perl
use strict;
use Socket;
use IO::Handle;
sub echoclient
{
my $host = "127.0.0.1";
my $port = 5000;
my $protocol = getprotobyname("TCP");
$host = inet_aton($host);
socket(SOCK, AF_INET, SOCK_STREAM, $protocol) or die "socket() failed: $!";
my $dest_addr = sockaddr_in($port, $host);
connect(SOCK, $dest_addr) or die "connect() failed: $!";
SOCK->autoflush(1);
my $msg_out = "hello world\n";
print "out = ", $msg_out;
print SOCK $msg_out;
my $msg_in = <SOCK>;
print "in = ", $msg_in;
close SOCK;
}
#&echoclient;
#exit(0);
for (my $i = 0; $i < 9999; $i++)
{
echoclient;
}
我查看了lighttpd的實現,也是在創建完子進程之后才創建的epoll的fd.
請問誰知道哪里有講解這個的文檔?
這是美麗的分割線:
-----------------------------------------------------------------------
感謝luke, 他幫我解釋了這個問題的原因:
假如fd1是由A進程加入epfd的,而且用的是ET模式,那么加入通知的是進程B,顯然B進程不會對fd1進行處理,所以以后fd1的事件再不會通知,所以
經過幾次循環之后,所有的fd都沒有事件通知了,所以epoll_wait在timeout之后就返回0了。而在客戶端的結果可想而知,只能是被阻塞。
也就是說, 這是一種發生在epoll fd上面的類似于"驚群"的現象.