Crossfire Archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

CF: Bug in server socket code...



Hi,

I was experiencing strange client crashes after setting up server
(0.94.3) in a Linux box. It took a while to trace the problem back
to the server and socket code in ericserver.c.... 

Here's my patch, which fixed the problems, and the server/client
system has run very reliably after fixing the socket writing code (plus
bunch of other 'bugs' in socket stuff).


-- 
-------------------------------------------------------111010-101101-101001---
 Timo Kokkonen                                email: tjko@iki.fi, tjko@jyu.fi
 University of Jyvaskyla, Finland.              URL: http://www.iki.fi/tjko/
-----------------------"Not a plucky hero until one reaches the Great Wall"---
*** crossfire-0.94.3/server/ericserver.c	Sat Aug  1 13:23:56 1998
--- crossfire-0.94.3-timo/server/ericserver.c	Sun Sep 27 14:50:14 1998
***************
*** 120,134 ****
   */
  static void Write_To_Socket(int cnum, unsigned char *buf, int len)
  {
!     int amt=0;
      unsigned char *pos=buf;
  
  /*    fprintf(stderr,"Trying to write %d bytes to socket %d\n", len, cnum);*/
      if (cs_sockets[cnum].status == Ns_Dead || !buf)
  	return;
      /* If we manage to write more than we wanted, take it as a bonus */
      while (len>0) {
! 	amt=write(cs_sockets[cnum].fd, pos, len);
  	if (amt < 0) { /* We got an error */
  	    if (errno != EWOULDBLOCK) {
  		LOG(llevError,"New socket %d write failed (%d: %s).\n", cnum,
--- 120,158 ----
   */
  static void Write_To_Socket(int cnum, unsigned char *buf, int len)
  {
!     int amt=0, r=0;
      unsigned char *pos=buf;
+     fd_set tmpwrite;
  
  /*    fprintf(stderr,"Trying to write %d bytes to socket %d\n", len, cnum);*/
      if (cs_sockets[cnum].status == Ns_Dead || !buf)
  	return;
      /* If we manage to write more than we wanted, take it as a bonus */
      while (len>0) {
!         /* call first select to see if we can write to the socket...
!            we have to do this before calling write, otherwise we run
!            into trouble if socket output buffer fills (this is the
! 	   case in Linux, at least) causing weird errors (core dumps) on
! 	   client...  -- Timo <tjko@iki.fi> 28-Sep-98 */
!  	FD_ZERO(&tmpwrite);
! 	FD_SET(cs_sockets[cnum].fd,&tmpwrite);
! 	/* initialize timeout always since after call to select() we cannot
! 	   trust it anymore... */
! 	timeout.tv_sec = 0;
! 	timeout.tv_usec = 0;
! 	r=select(cs_sockets[cnum].fd+1,NULL,&tmpwrite,NULL,&timeout);
! 	if (r<0) {
! 	  LOG(llevError,"New socket %d select() failed (%d: %s).\n", cnum,
! 	      errno, strerror_local(errno));
! 	  cs_sockets[cnum].status=Ns_Dead;
! 	  return;
! 	}
! 	else if (r==0) continue;
! 
!         do {
! 	  amt=write(cs_sockets[cnum].fd, pos, len);
! 	} while ((amt<0) && (errno==EINTR));
! 
  	if (amt < 0) { /* We got an error */
  	    if (errno != EWOULDBLOCK) {
  		LOG(llevError,"New socket %d write failed (%d: %s).\n", cnum,
***************
*** 136,151 ****
  		cs_sockets[cnum].status=Ns_Dead;
  		return;
  	    }
! 	    else { /* It would block - run another select (0 timeout should
! 		    * should already be set.)
! 		    */
! 		fd_set tmpwrite;
! 		FD_ZERO(&tmpwrite);
! 		FD_SET(cs_sockets[cnum].fd,&tmpwrite);
! 		select(cs_sockets[cnum].fd,NULL,&tmpwrite,NULL,&timeout);
! 		/* So len & pos processing below are correct */
! 		amt=0;
! 	    }
  	}
  	/* amt gets set to 0 above in blocking code, so we do this as
  	 * an else if to make sure we don't reprocess it.
--- 160,168 ----
  		cs_sockets[cnum].status=Ns_Dead;
  		return;
  	    }
! 
! 	    /* So len & pos processing below are correct */
! 	    amt=0;
  	}
  	/* amt gets set to 0 above in blocking code, so we do this as
  	 * an else if to make sure we don't reprocess it.
***************
*** 1065,1071 ****
  	FD_SET(cs_sockets[i].fd, &tmp_read);
  	FD_SET(cs_sockets[i].fd, &tmp_exceptions);
      }
!     pollret= select(max_filedescriptors, &tmp_read, NULL, &tmp_exceptions, &timeout);
      if (pollret==-1) {
  	perror("doeric_serover: error on select");
  	LOG(llevError,"doeric_server: error on select\n");
--- 1082,1094 ----
  	FD_SET(cs_sockets[i].fd, &tmp_read);
  	FD_SET(cs_sockets[i].fd, &tmp_exceptions);
      }
! 
!     /* initialize timeout always since after call to select() we cannot
!        trust it anymore... */
!     timeout.tv_sec = 0;
!     timeout.tv_usec = 0;
!     pollret= select(max_filedescriptors, &tmp_read, NULL, &tmp_exceptions, 
! 		    &timeout);
      if (pollret==-1) {
  	perror("doeric_serover: error on select");
  	LOG(llevError,"doeric_server: error on select\n");
*** crossfire-0.94.3/server/newsocket.c	Sat Aug  1 13:23:56 1998
--- crossfire-0.94.3-timo/server/newsocket.c	Sat Sep 26 18:34:23 1998
***************
*** 112,118 ****
  
      /* We already have a partial packet */
      if (sl->len<2) {
! 	stat=read(fd, sl->buf + sl->len, 2-sl->len);
  	if (stat<0) {
  	    /* In non blocking mode, EAGAIN is set when there is no
  	     * data available.
--- 112,120 ----
  
      /* We already have a partial packet */
      if (sl->len<2) {
!         do {
! 	  stat=read(fd, sl->buf + sl->len, 2-sl->len);
! 	} while ((stat<0) && (errno==EINTR));
  	if (stat<0) {
  	    /* In non blocking mode, EAGAIN is set when there is no
  	     * data available.
***************
*** 142,148 ****
  	return -1;
      }
      do {
! 	stat = read(fd, sl->buf+ sl->len, toread);
  	if (stat<0) {
  	    if (errno!=EAGAIN && errno!=EWOULDBLOCK) {
  		perror("ReadPacket got an error.");
--- 144,152 ----
  	return -1;
      }
      do {
!         do {
! 	  stat=read(fd, sl->buf+ sl->len, toread);
! 	} while ((stat<0) && (errno==EINTR));
  	if (stat<0) {
  	    if (errno!=EAGAIN && errno!=EWOULDBLOCK) {
  		perror("ReadPacket got an error.");
***************
*** 172,188 ****
   */
  static int write_socket(int fd, unsigned char *buf, int len)
  {
!     int amt=0;
      unsigned char *pos=buf;
  
      /* If we manage to write more than we wanted, take it as a bonus */
      while (len>0) {
! 	amt=write(fd, pos, len);
! 	if (amt < 0) { /* We got an error */
! 	    LOG(llevError,"New socket (fd=%d) write failed.\n", fd);
! 	    return -1;
  	}
! 	if (amt==0) {
  	    LOG(llevError,"Write_To_Socket: No data written out.\n");
  	}
  	len =- amt;
--- 176,216 ----
   */
  static int write_socket(int fd, unsigned char *buf, int len)
  {
!     int amt=0,r=0;
      unsigned char *pos=buf;
+     static fd_set tmpwrite;
+     static struct timeval timeout;
+ 
  
      /* If we manage to write more than we wanted, take it as a bonus */
      while (len>0) {
!         /* call first select to see if we can write to the socket... */
!  	FD_ZERO(&tmpwrite);
! 	FD_SET(fd,&tmpwrite);
! 	/* initialize timeout always since after call to select() we cannot
! 	   trust it anymore... */
! 	timeout.tv_sec = 0;
! 	timeout.tv_usec = 0;
! 	r=select(fd+1,NULL,&tmpwrite,NULL,&timeout);
!         if (r<0) {
! 	  LOG(llevError,"New socket (fd=%d) select() failed.\n", fd);
!           return -1;
  	}
!         if (r==0) continue;
! 
!         do {
! 	  amt=write(fd, pos, len);
! 	} while ((amt<0) && (errno==EINTR));
! 	if (amt < 0) { /* We got an error */
!            if (errno!=EWOULDBLOCK) {
!               LOG(llevError,"New socket (fd=%d) write failed.\n", fd);
! 	      return -1;
!            }
! 
! 	   /* So len & pos processing below are correct */
!            amt=0;
! 	} 
! 	else if (amt==0) {
  	    LOG(llevError,"Write_To_Socket: No data written out.\n");
  	}
  	len =- amt;
***************
*** 205,211 ****
      sbuf[0] = ((uint32)(msg.len) >> 8) & 0xFF;
      sbuf[1] = ((uint32)(msg.len)) & 0xFF;
  
!     write_socket(fd, sbuf, 2);
      return write_socket(fd, msg.buf, msg.len);
  }
  
--- 233,239 ----
      sbuf[0] = ((uint32)(msg.len) >> 8) & 0xFF;
      sbuf[1] = ((uint32)(msg.len)) & 0xFF;
  
!     if (write_socket(fd, sbuf, 2) < 0) return -1;
      return write_socket(fd, msg.buf, msg.len);
  }
  
*** crossfire-0.94.3/server/socket.c	Sat Aug  1 13:23:56 1998
--- crossfire-0.94.3-timo/server/socket.c	Sat Sep 26 16:12:32 1998
***************
*** 154,160 ****
    }
    FD_SET(acceptfd,&tmp_read);
  
!   pollret = select(max_filedescriptors,&tmp_read,NULL,&tmp_exceptions,&timeout);
    if (pollret == (-1)) {
      perror("select");
      return;
--- 154,165 ----
    }
    FD_SET(acceptfd,&tmp_read);
  
!   /* initialize timeout always since after call to select we cannot
!      trust it to anymore... */
!   timeout.tv_sec = 0;
!   timeout.tv_usec = 0;
!   pollret = select(max_filedescriptors,&tmp_read,NULL,&tmp_exceptions,
! 		   &timeout);
    if (pollret == (-1)) {
      perror("select");
      return;
***************
*** 182,188 ****
        int readct;
        char buf[SOCKET_BUFLEN+1];
        /* Have input */
!       readct = read(active_socket->fd,buf,SOCKET_BUFLEN);
        if(readct == (-1)) {
          perror("read");
          continue;
--- 187,195 ----
        int readct;
        char buf[SOCKET_BUFLEN+1];
        /* Have input */
!       do {
! 	readct = read(active_socket->fd,buf,SOCKET_BUFLEN);
!       } while ((readct<0) && (errno==EINTR));
        if(readct == (-1)) {
          perror("read");
          continue;
***************
*** 310,316 ****
    str = buf;
  
    while ((len = strlen(str))) {
!     i=write(fd,str,len);
      if(i < 0) {
        perror("draw_socket");
        break;
--- 317,325 ----
    str = buf;
  
    while ((len = strlen(str))) {
!     do {
!       i=write(fd,str,len);
!     } while ((i<0) && (errno==EINTR));
      if(i < 0) {
        perror("draw_socket");
        break;