diff options
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r-- | net/ipv4/ip_output.c | 289 |
1 files changed, 103 insertions, 186 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 63fbbfe1e..69179738e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -81,46 +81,24 @@ int sysctl_ip_dynaddr = 0; int ip_id_count = 0; -int ip_build_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr, - struct ip_options *opt) +/* Generate a checksum for an outgoing IP datagram. */ +__inline__ void ip_send_check(struct iphdr *iph) { - struct rtable *rt; - u32 final_daddr = daddr; + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); +} + +void ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, + u32 saddr, u32 daddr, struct ip_options *opt) +{ + struct rtable *rt = (struct rtable *)skb->dst; struct iphdr *iph; - int err; - if (opt && opt->srr) - daddr = opt->faddr; - - err = ip_route_output(&rt, daddr, saddr, RT_TOS(sk->ip_tos) | - RTO_CONN | sk->localroute, sk->bound_dev_if); - if (err) - { - ip_statistics.IpOutNoRoutes++; - return err; - } - - if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { - ip_rt_put(rt); - ip_statistics.IpOutNoRoutes++; - return -ENETUNREACH; - } - - skb->dst = dst_clone(&rt->u.dst); - skb_reserve(skb, (rt->u.dst.dev->hard_header_len+15)&~15); - - /* - * Now build the IP header. - */ - - /* - * Build the IP addresses - */ - + /* Build the IP header. */ if (opt) - iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr) + opt->optlen); + iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen); else - iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr)); + iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr)); iph->version = 4; iph->ihl = 5; @@ -133,92 +111,19 @@ int ip_build_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr, iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; iph->protocol = sk->protocol; + iph->tot_len = htons(skb->len); + iph->id = htons(ip_id_count++); skb->nh.iph = iph; - skb->h.raw = (unsigned char*)(iph+1); - if (opt && opt->optlen) - { + if (opt && opt->optlen) { iph->ihl += opt->optlen>>2; - skb->h.raw += opt->optlen; - ip_options_build(skb, opt, final_daddr, rt, 0); + ip_options_build(skb, opt, daddr, rt, 0); } - - ip_rt_put(rt); - return 0; -} -/* - * This routine builds the appropriate hardware/IP headers for - * the routine. - */ -int ip_build_header(struct sk_buff *skb, struct sock *sk) -{ - struct rtable *rt; - struct ip_options *opt = sk->opt; - u32 daddr = sk->daddr; - u32 final_daddr = daddr; - struct iphdr *iph; - int err; - - if (opt && opt->srr) - daddr = opt->faddr; - - rt = (struct rtable*)sk->dst_cache; - - if (!rt || rt->u.dst.obsolete) { - sk->dst_cache = NULL; - ip_rt_put(rt); - err = ip_route_output(&rt, daddr, sk->saddr, RT_TOS(sk->ip_tos) | - RTO_CONN | sk->localroute, sk->bound_dev_if); - if (err) - return err; - sk->dst_cache = &rt->u.dst; - } - - if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { - sk->dst_cache = NULL; - ip_rt_put(rt); - ip_statistics.IpOutNoRoutes++; - return -ENETUNREACH; - } - - skb->dst = dst_clone(sk->dst_cache); - skb_reserve(skb, MAX_HEADER); - - /* - * Now build the IP header. - */ - - /* - * Build the IP addresses - */ - - if (opt) - iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr) + opt->optlen); - else - iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr)); - - iph->version = 4; - iph->ihl = 5; - iph->tos = sk->ip_tos; - iph->frag_off = 0; - if (sk->ip_pmtudisc == IP_PMTUDISC_WANT && - !(rt->u.dst.mxlock&(1<<RTAX_MTU))) - iph->frag_off |= htons(IP_DF); - iph->ttl = sk->ip_ttl; - iph->daddr = rt->rt_dst; - iph->saddr = rt->rt_src; - iph->protocol = sk->protocol; - skb->nh.iph = iph; - skb->h.raw = (unsigned char*)(iph+1); - - if (!opt || !opt->optlen) - return 0; - iph->ihl += opt->optlen>>2; - skb->h.raw += opt->optlen; - ip_options_build(skb, opt, final_daddr, rt, 0); + ip_send_check(iph); - return 0; + /* Send it out. */ + skb->dst->output(skb); } int __ip_finish_output(struct sk_buff *skb) @@ -322,78 +227,101 @@ int ip_acct_output(struct sk_buff *skb) } #endif -/* - * Generate a checksum for an outgoing IP datagram. - */ - -void ip_send_check(struct iphdr *iph) -{ - iph->check = 0; - iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); -} - - - -/* - * Queues a packet to be sent, and starts the transmitter if necessary. +/* Queues a packet to be sent, and starts the transmitter if necessary. * This routine also needs to put in the total length and compute the - * checksum + * checksum. We use to do this in two stages, ip_build_header() then + * this, but that scheme created a mess when routes disappeared etc. + * So we do it all here, and the TCP send engine has been changed to + * match. (No more unroutable FIN disasters, etc. wheee...) This will + * most likely make other reliable transport layers above IP easier + * to implement under Linux. */ - void ip_queue_xmit(struct sk_buff *skb) { struct sock *sk = skb->sk; - struct rtable *rt = (struct rtable*)skb->dst; + struct ip_options *opt = sk->opt; + struct rtable *rt; struct device *dev; + struct iphdr *iph; unsigned int tot_len; - struct iphdr *iph = skb->nh.iph; - tot_len = skb->len; - iph->tot_len = htons(tot_len); - iph->id = htons(ip_id_count++); + /* Make sure we can route this packet. */ + rt = (struct rtable *) sk->dst_cache; + if(rt == NULL || rt->u.dst.obsolete) { + u32 daddr; - if (rt->u.dst.obsolete) { - /* Ugly... ugly... but what can I do? - Essentially it is "ip_reroute_output" function. --ANK - */ - struct rtable *nrt; - if (ip_route_output(&nrt, rt->key.dst, rt->key.src, - rt->key.tos | RTO_CONN, - sk?sk->bound_dev_if:0)) - goto drop; - skb->dst = &nrt->u.dst; + sk->dst_cache = NULL; ip_rt_put(rt); - rt = nrt; + + /* Use correct destination address if we have options. */ + daddr = sk->daddr; + if(opt && opt->srr) + daddr = opt->faddr; + + /* If this fails, retransmit mechanism of transport layer will + * keep trying until route appears or the connection times itself + * out. + */ + if(ip_route_output(&rt, daddr, sk->saddr, + RT_TOS(sk->ip_tos) | RTO_CONN | sk->localroute, + sk->bound_dev_if)) + goto drop; + sk->dst_cache = &rt->u.dst; + } + if(opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) + goto no_route; + + /* We have a route, so grab a reference. */ + skb->dst = dst_clone(sk->dst_cache); + + /* OK, we know where to send it, allocate and build IP header. */ + iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); + iph->version = 4; + iph->ihl = 5; + iph->tos = sk->ip_tos; + iph->frag_off = 0; + if(sk->ip_pmtudisc == IP_PMTUDISC_WANT && !(rt->u.dst.mxlock & (1 << RTAX_MTU))) + iph->frag_off |= __constant_htons(IP_DF); + iph->ttl = sk->ip_ttl; + iph->daddr = rt->rt_dst; + iph->saddr = rt->rt_src; + iph->protocol = sk->protocol; + skb->nh.iph = iph; + /* Transport layer set skb->h.foo itself. */ + + if(opt && opt->optlen) { + iph->ihl += opt->optlen >> 2; + ip_options_build(skb, opt, sk->daddr, rt, 0); } + tot_len = skb->len; + iph->tot_len = htons(tot_len); + iph->id = htons(ip_id_count++); + dev = rt->u.dst.dev; - if (call_out_firewall(PF_INET, dev, iph, NULL,&skb) < FW_ACCEPT) + if (call_out_firewall(PF_INET, dev, iph, NULL, &skb) < FW_ACCEPT) goto drop; #ifdef CONFIG_NET_SECURITY - /* - * Add an IP checksum (must do this before SECurity because - * of possible tunneling) + /* Add an IP checksum (must do this before SECurity because + * of possible tunneling). */ - ip_send_check(iph); - - if (call_out_firewall(PF_SECURITY, NULL, NULL, (void *) 4, &skb)<FW_ACCEPT) + if (call_out_firewall(PF_SECURITY, NULL, NULL, (void *) 4, &skb) < FW_ACCEPT) goto drop; - iph = skb->nh.iph; - /* don't update tot_len, as the dev->mtu is already decreased */ + /* Don't update tot_len, as the dev->mtu is already decreased. */ #endif - + /* This can happen when the transport layer has segments queued + * with a cached route, and by the time we get here things are + * re-routed to a device with a different MTU than the original + * device. Sick, but we must cover it. + */ if (skb_headroom(skb) < dev->hard_header_len && dev->hard_header) { struct sk_buff *skb2; - /* ANK: It is almost impossible, but - * if you loaded module device with hh_len > MAX_HEADER, - * and if a route changed to this device, - * and if (uh...) TCP had segments queued on this route... - */ - skb2 = skb_realloc_headroom(skb, (dev->hard_header_len+15)&~15); + + skb2 = skb_realloc_headroom(skb, (dev->hard_header_len + 15) & ~15); kfree_skb(skb); if (skb2 == NULL) return; @@ -401,40 +329,35 @@ void ip_queue_xmit(struct sk_buff *skb) iph = skb->nh.iph; } - /* - * Do we need to fragment. Again this is inefficient. - * We need to somehow lock the original buffer and use - * bits of it. + /* Do we need to fragment. Again this is inefficient. We + * need to somehow lock the original buffer and use bits of it. */ - if (tot_len > rt->u.dst.pmtu) goto fragment; #ifndef CONFIG_NET_SECURITY - /* - * Add an IP checksum - */ - + /* Add an IP checksum. */ ip_send_check(iph); #endif - - if (sk) - skb->priority = sk->priority; + skb->priority = sk->priority; skb->dst->output(skb); return; fragment: - if ((iph->frag_off & htons(IP_DF))) - { + if ((iph->frag_off & htons(IP_DF)) != 0) { printk(KERN_DEBUG "sending pkt_too_big to self\n"); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(rt->u.dst.pmtu)); goto drop; } - ip_fragment(skb, skb->dst->output); return; +no_route: + sk->dst_cache = NULL; + ip_rt_put(rt); + ip_statistics.IpOutNoRoutes++; + /* Fall through... */ drop: kfree_skb(skb); } @@ -948,14 +871,7 @@ struct sk_buff * ip_reply(struct sk_buff *skb, int payload) reply->dst = &rt->u.dst; skb_reserve(reply, (rt->u.dst.dev->hard_header_len+15)&~15); - /* - * Now build the IP header. - */ - - /* - * Build the IP addresses - */ - + /* Now build the IP header. */ reply->nh.iph = iph = (struct iphdr *)skb_put(reply, iphlen); iph->version = 4; @@ -966,6 +882,7 @@ struct sk_buff * ip_reply(struct sk_buff *skb, int payload) iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; iph->protocol = skb->nh.iph->protocol; + iph->id = htons(ip_id_count++); ip_options_build(reply, &replyopts.opt, daddr, rt, 0); |