diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index ed1440ea4c91..d2c390eff1b2 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -269,6 +269,8 @@ u32 ethtool_op_get_tso(struct net_device *dev); int ethtool_op_set_tso(struct net_device *dev, u32 data); int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data); +u32 ethtool_op_get_ufo(struct net_device *dev); +int ethtool_op_set_ufo(struct net_device *dev, u32 data); /** * ðtool_ops - Alter and report network device settings @@ -298,6 +300,8 @@ int ethtool_op_get_perm_addr(struct net_device *dev, * set_sg: Turn scatter-gather on or off * get_tso: Report whether TCP segmentation offload is enabled * set_tso: Turn TCP segmentation offload on or off + * get_ufo: Report whether UDP fragmentation offload is enabled + * set_ufo: Turn UDP fragmentation offload on or off * self_test: Run specified self-tests * get_strings: Return a set of strings that describe the requested objects * phys_id: Identify the device @@ -364,6 +368,8 @@ struct ethtool_ops { int (*get_perm_addr)(struct net_device *, struct ethtool_perm_addr *, u8 *); int (*begin)(struct net_device *); void (*complete)(struct net_device *); + u32 (*get_ufo)(struct net_device *); + int (*set_ufo)(struct net_device *, u32); }; /* CMDs currently supported */ @@ -400,6 +406,8 @@ struct ethtool_ops { #define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ #define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ #define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */ +#define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */ +#define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a9281b24c40b..c6efce4a04a4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -308,6 +308,7 @@ struct net_device #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ #define NETIF_F_LLTX 4096 /* LockLess TX */ +#define NETIF_F_UFO 8192 /* Can offload UDP Large Send*/ struct net_device *next_sched; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b756935da9c8..4286d832166f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -137,6 +137,8 @@ struct skb_shared_info { unsigned int nr_frags; unsigned short tso_size; unsigned short tso_segs; + unsigned short ufo_size; + unsigned int ip6_frag_id; struct sk_buff *frag_list; skb_frag_t frags[MAX_SKB_FRAGS]; }; @@ -341,6 +343,11 @@ extern void skb_over_panic(struct sk_buff *skb, int len, extern void skb_under_panic(struct sk_buff *skb, int len, void *here); +extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, + int getfrag(void *from, char *to, int offset, + int len,int odd, struct sk_buff *skb), + void *from, int length); + struct skb_seq_state { __u32 lower_offset; diff --git a/net/core/dev.c b/net/core/dev.c index a44eeef24edf..8d1541595277 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2717,6 +2717,20 @@ int register_netdevice(struct net_device *dev) dev->name); dev->features &= ~NETIF_F_TSO; } + if (dev->features & NETIF_F_UFO) { + if (!(dev->features & NETIF_F_HW_CSUM)) { + printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " + "NETIF_F_HW_CSUM feature.\n", + dev->name); + dev->features &= ~NETIF_F_UFO; + } + if (!(dev->features & NETIF_F_SG)) { + printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " + "NETIF_F_SG feature.\n", + dev->name); + dev->features &= ~NETIF_F_UFO; + } + } /* * nil rebuild_header routine, diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 404b761e82ce..0350586e9195 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -93,6 +93,20 @@ int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *a } +u32 ethtool_op_get_ufo(struct net_device *dev) +{ + return (dev->features & NETIF_F_UFO) != 0; +} + +int ethtool_op_set_ufo(struct net_device *dev, u32 data) +{ + if (data) + dev->features |= NETIF_F_UFO; + else + dev->features &= ~NETIF_F_UFO; + return 0; +} + /* Handlers for each ethtool command */ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) @@ -483,6 +497,11 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) return err; } + if (!data && dev->ethtool_ops->set_ufo) { + err = dev->ethtool_ops->set_ufo(dev, 0); + if (err) + return err; + } return dev->ethtool_ops->set_sg(dev, data); } @@ -569,6 +588,32 @@ static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_tso(dev, edata.data); } +static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GTSO }; + + if (!dev->ethtool_ops->get_ufo) + return -EOPNOTSUPP; + edata.data = dev->ethtool_ops->get_ufo(dev); + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} +static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata; + + if (!dev->ethtool_ops->set_ufo) + return -EOPNOTSUPP; + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + if (edata.data && !(dev->features & NETIF_F_SG)) + return -EINVAL; + if (edata.data && !(dev->features & NETIF_F_HW_CSUM)) + return -EINVAL; + return dev->ethtool_ops->set_ufo(dev, edata.data); +} + static int ethtool_self_test(struct net_device *dev, char __user *useraddr) { struct ethtool_test test; @@ -854,6 +899,12 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_GPERMADDR: rc = ethtool_get_perm_addr(dev, useraddr); break; + case ETHTOOL_GUFO: + rc = ethtool_get_ufo(dev, useraddr); + break; + case ETHTOOL_SUFO: + rc = ethtool_set_ufo(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } @@ -882,3 +933,5 @@ EXPORT_SYMBOL(ethtool_op_set_sg); EXPORT_SYMBOL(ethtool_op_set_tso); EXPORT_SYMBOL(ethtool_op_set_tx_csum); EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); +EXPORT_SYMBOL(ethtool_op_set_ufo); +EXPORT_SYMBOL(ethtool_op_get_ufo); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ef9d46b91eb9..95501e40100e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -176,6 +176,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb_shinfo(skb)->tso_size = 0; skb_shinfo(skb)->tso_segs = 0; skb_shinfo(skb)->frag_list = NULL; + skb_shinfo(skb)->ufo_size = 0; + skb_shinfo(skb)->ip6_frag_id = 0; out: return skb; nodata: @@ -1696,6 +1698,78 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, return textsearch_find(config, state); } +/** + * skb_append_datato_frags: - append the user data to a skb + * @sk: sock structure + * @skb: skb structure to be appened with user data. + * @getfrag: call back function to be used for getting the user data + * @from: pointer to user message iov + * @length: length of the iov message + * + * Description: This procedure append the user data in the fragment part + * of the skb if any page alloc fails user this procedure returns -ENOMEM + */ +int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length) +{ + int frg_cnt = 0; + skb_frag_t *frag = NULL; + struct page *page = NULL; + int copy, left; + int offset = 0; + int ret; + + do { + /* Return error if we don't have space for new frag */ + frg_cnt = skb_shinfo(skb)->nr_frags; + if (frg_cnt >= MAX_SKB_FRAGS) + return -EFAULT; + + /* allocate a new page for next frag */ + page = alloc_pages(sk->sk_allocation, 0); + + /* If alloc_page fails just return failure and caller will + * free previous allocated pages by doing kfree_skb() + */ + if (page == NULL) + return -ENOMEM; + + /* initialize the next frag */ + sk->sk_sndmsg_page = page; + sk->sk_sndmsg_off = 0; + skb_fill_page_desc(skb, frg_cnt, page, 0, 0); + skb->truesize += PAGE_SIZE; + atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); + + /* get the new initialized frag */ + frg_cnt = skb_shinfo(skb)->nr_frags; + frag = &skb_shinfo(skb)->frags[frg_cnt - 1]; + + /* copy the user data to page */ + left = PAGE_SIZE - frag->page_offset; + copy = (length > left)? left : length; + + ret = getfrag(from, (page_address(frag->page) + + frag->page_offset + frag->size), + offset, copy, 0, skb); + if (ret < 0) + return -EFAULT; + + /* copy was successful so update the size parameters */ + sk->sk_sndmsg_off += copy; + frag->size += copy; + skb->len += copy; + skb->data_len += copy; + offset += copy; + length -= copy; + + } while (length > 0); + + return 0; +} + void __init skb_init(void) { skbuff_head_cache = kmem_cache_create("skbuff_head_cache", @@ -1747,3 +1821,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read); EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); +EXPORT_SYMBOL(skb_append_datato_frags); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 87e350069abb..17758234a3e3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -275,7 +275,8 @@ int ip_output(struct sk_buff *skb) { IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); - if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->tso_size) + if (skb->len > dst_mtu(skb->dst) && + !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) return ip_fragment(skb, ip_finish_output); else return ip_finish_output(skb); @@ -688,6 +689,60 @@ csum_page(struct page *page, int offset, int copy) return csum; } +inline int ip_ufo_append_data(struct sock *sk, + int getfrag(void *from, char *to, int offset, int len, + int odd, struct sk_buff *skb), + void *from, int length, int hh_len, int fragheaderlen, + int transhdrlen, int mtu,unsigned int flags) +{ + struct sk_buff *skb; + int err; + + /* There is support for UDP fragmentation offload by network + * device, so create one single skb packet containing complete + * udp datagram + */ + if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + skb = sock_alloc_send_skb(sk, + hh_len + fragheaderlen + transhdrlen + 20, + (flags & MSG_DONTWAIT), &err); + + if (skb == NULL) + return err; + + /* reserve space for Hardware header */ + skb_reserve(skb, hh_len); + + /* create space for UDP/IP header */ + skb_put(skb,fragheaderlen + transhdrlen); + + /* initialize network header pointer */ + skb->nh.raw = skb->data; + + /* initialize protocol header pointer */ + skb->h.raw = skb->data + fragheaderlen; + + skb->ip_summed = CHECKSUM_HW; + skb->csum = 0; + sk->sk_sndmsg_off = 0; + } + + err = skb_append_datato_frags(sk,skb, getfrag, from, + (length - transhdrlen)); + if (!err) { + /* specify the length of each IP datagram fragment*/ + skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); + __skb_queue_tail(&sk->sk_write_queue, skb); + + return 0; + } + /* There is not enough support do UFO , + * so follow normal path + */ + kfree_skb(skb); + return err; +} + /* * ip_append_data() and ip_append_page() can make one large IP datagram * from many pieces of data. Each pieces will be holded on the socket @@ -777,6 +832,15 @@ int ip_append_data(struct sock *sk, csummode = CHECKSUM_HW; inet->cork.length += length; + if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && + (rt->u.dst.dev->features & NETIF_F_UFO)) { + + if(ip_ufo_append_data(sk, getfrag, from, length, hh_len, + fragheaderlen, transhdrlen, mtu, flags)) + goto error; + + return 0; + } /* So, what's going on in the loop below? * @@ -1008,14 +1072,23 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, return -EINVAL; inet->cork.length += size; + if ((sk->sk_protocol == IPPROTO_UDP) && + (rt->u.dst.dev->features & NETIF_F_UFO)) + skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); + while (size > 0) { int i; - /* Check if the remaining data fits into current packet. */ - len = mtu - skb->len; - if (len < size) - len = maxfraglen - skb->len; + if (skb_shinfo(skb)->ufo_size) + len = size; + else { + + /* Check if the remaining data fits into current packet. */ + len = mtu - skb->len; + if (len < size) + len = maxfraglen - skb->len; + } if (len <= 0) { struct sk_buff *skb_prev; char *data; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 563b442ffab8..614296a920c6 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -147,7 +147,8 @@ static int ip6_output2(struct sk_buff *skb) int ip6_output(struct sk_buff *skb) { - if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst)) + if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) || + dst_allfrag(skb->dst)) return ip6_fragment(skb, ip6_output2); else return ip6_output2(skb); @@ -768,6 +769,65 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) *dst = NULL; return err; } +inline int ip6_ufo_append_data(struct sock *sk, + int getfrag(void *from, char *to, int offset, int len, + int odd, struct sk_buff *skb), + void *from, int length, int hh_len, int fragheaderlen, + int transhdrlen, int mtu,unsigned int flags) + +{ + struct sk_buff *skb; + int err; + + /* There is support for UDP large send offload by network + * device, so create one single skb packet containing complete + * udp datagram + */ + if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + skb = sock_alloc_send_skb(sk, + hh_len + fragheaderlen + transhdrlen + 20, + (flags & MSG_DONTWAIT), &err); + if (skb == NULL) + return -ENOMEM; + + /* reserve space for Hardware header */ + skb_reserve(skb, hh_len); + + /* create space for UDP/IP header */ + skb_put(skb,fragheaderlen + transhdrlen); + + /* initialize network header pointer */ + skb->nh.raw = skb->data; + + /* initialize protocol header pointer */ + skb->h.raw = skb->data + fragheaderlen; + + skb->ip_summed = CHECKSUM_HW; + skb->csum = 0; + sk->sk_sndmsg_off = 0; + } + + err = skb_append_datato_frags(sk,skb, getfrag, from, + (length - transhdrlen)); + if (!err) { + struct frag_hdr fhdr; + + /* specify the length of each IP datagram fragment*/ + skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) - + sizeof(struct frag_hdr); + ipv6_select_ident(skb, &fhdr); + skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + __skb_queue_tail(&sk->sk_write_queue, skb); + + return 0; + } + /* There is not enough support do UPD LSO, + * so follow normal path + */ + kfree_skb(skb); + + return err; +} int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), @@ -860,6 +920,15 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, */ inet->cork.length += length; + if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && + (rt->u.dst.dev->features & NETIF_F_UFO)) { + + if(ip6_ufo_append_data(sk, getfrag, from, length, hh_len, + fragheaderlen, transhdrlen, mtu, flags)) + goto error; + + return 0; + } if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) goto alloc_new_skb;