Re: Packet mmap: TX RING and zero copy

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Johann Baudy
Date: Wednesday, September 3, 2008 - 3:38 am

Hi Evgeniy,

I'm not able to exceed 15Mo/s even with vmsplice/splice duo.

Due to some issues:
- I didn't manage to adjust size of packets sent over the network (it
seems to be aligned with page). And maximum packet size seems to be
the page size (4096).
- I need approximately two system calls (vmsplice and splice) for
~4096*8 bytes maximum which is maybe a limit of pipe.
- I'm still going through packet_sendmsg() (packet socket) which
allocates a sk_buff and copies all data inside.

As reference, with my "patch": I need to send more than 32 packets of
7200 bytes (pc network card limit) in one system call (send()) and
without sk_buff data copy. (To reach 85 Mbytes/s)

Please find below my test program for vmsplice/splice:

Best regards,
Johann

#include <stdio.h>
#define _GNU_SOURCE

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/uio.h>

#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <netinet/in.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <sys/select.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <net/if.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <poll.h>


int main (void)
{
	struct tpacket_req s_packet_req;
	uint32_t size, opt_len;
	int fd, i, ec, i_sz_packet = 7150;
	struct pollfd s_pfd;
	struct sockaddr_ll my_addr, peer_addr;
	struct ifreq s_ifr; /* points to one interface returned from ioctl */
	int len;
	int fd_socket;
	int i_nb_buffer = 64;
	int i_buffer_size = 8192;
	int i_index;
	int i_updated_cnt;
	int i_ifindex;
	int i_header_size;
	struct tpacket_hdr * ps_header_start;
	struct tpacket_hdr * ps_header;
	char buffer[8000];

	/* reset indes */
	i_index = 0;

	fd_socket = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
	if(fd_socket == -1)
	{
		perror("socket");
		return EXIT_FAILURE;
	}

	/* start socket config: device and mtu */

	/* clear structure */
	memset(&my_addr, 0, sizeof(struct sockaddr_ll));
	my_addr.sll_family = PF_PACKET;
	my_addr.sll_protocol = htons(ETH_P_ALL);

	/* initialize interface struct */
	strncpy (s_ifr.ifr_name, "eth0", sizeof(s_ifr.ifr_name));

	/* Get the broad cast address */
	ec = ioctl(fd_socket, SIOCGIFINDEX, &s_ifr);
	if(ec == -1)
	{
		perror("iotcl");
		return EXIT_FAILURE;
	}
	/* update with interface index */
	i_ifindex = s_ifr.ifr_ifindex;

	/* new mtu value */
	s_ifr.ifr_mtu = 7200;

	/* update the mtu through ioctl */
	ec = ioctl(fd_socket, SIOCSIFMTU, &s_ifr);
	if(ec == -1)
	{
		perror("iotcl");
		return EXIT_FAILURE;
	}

	/* set sockaddr info */
	memset(&my_addr, 0, sizeof(struct sockaddr_ll));
	my_addr.sll_family = AF_PACKET;
	my_addr.sll_protocol = ETH_P_ALL;
	my_addr.sll_ifindex = i_ifindex;

	/* bind port */
	if (bind(fd_socket, (struct sockaddr *)&my_addr, sizeof(struct
sockaddr_ll)) == -1)
	{
		perror("bind");
		return EXIT_FAILURE;
	}
	/* prepare Tx ring request */
	s_packet_req.tp_block_size = i_buffer_size;
	s_packet_req.tp_frame_size = i_buffer_size;
	s_packet_req.tp_block_nr = i_nb_buffer;
	s_packet_req.tp_frame_nr = i_nb_buffer;


	/* calculate memory to mmap in the kernel */
	size = s_packet_req.tp_block_size * s_packet_req.tp_block_nr;


	{

		/* Splice flags (not laid down in stone yet). */
#ifndef SPLICE_F_MOVE
#define SPLICE_F_MOVE           0x01
#endif
#ifndef SPLICE_F_NONBLOCK
#define SPLICE_F_NONBLOCK       0x02
#endif
#ifndef SPLICE_F_MORE
#define SPLICE_F_MORE           0x04
#endif
#ifndef SPLICE_F_GIFT
#define SPLICE_F_GIFT           0x08
#endif
#ifndef __NR_splice
#define __NR_splice             313
#endif

		int filedes [2];
		int ret;
		int to_write;
		struct iovec iov;
		iov.iov_base = &buffer;
		iov.iov_len = 4096;


		ret = pipe (filedes);
		printf("fd = %d %d %d %p\n", fd, filedes[0], filedes[1], iov.iov_base);
		for(i=0; i< sizeof buffer; i++)
		{
			buffer[i] = (char) i;
		}
		for(i=0; i< 500000; i++)
		{
			to_write = 0;
			while (to_write < iov.iov_len*7) {
				ret = vmsplice (filedes [1],&iov, 1, SPLICE_F_MOVE | SPLICE_F_MORE);
				if (ret < 0)
				{
					perror("splice");
					return EXIT_FAILURE;
				}
				else
					to_write += ret;
			}

			while (to_write > 0) {
				ret = splice (filedes [0], NULL, fd_socket,
											NULL, to_write,
											SPLICE_F_MOVE | SPLICE_F_MORE);

				if (ret < 0)
				{
					perror("write splice");
					return EXIT_FAILURE;
				}
				else
					to_write -= ret;
			}
		}


	}

	return EXIT_SUCCESS;
}

On Wed, Sep 3, 2008 at 9:56 AM, Johann Baudy <johaahn@gmail.com> wrote:



-- 
Johann Baudy
johaahn@gmail.com
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
Packet mmap: TX RING and zero copy, Johann Baudy, (Tue Sep 2, 11:27 am)
Re: Packet mmap: TX RING and zero copy, Evgeniy Polyakov, (Tue Sep 2, 12:46 pm)
Re: Packet mmap: TX RING and zero copy, Johann Baudy, (Wed Sep 3, 12:56 am)
Re: Packet mmap: TX RING and zero copy, Johann Baudy, (Wed Sep 3, 3:38 am)
Re: Packet mmap: TX RING and zero copy, David Miller, (Wed Sep 3, 4:06 am)
Re: Packet mmap: TX RING and zero copy, Johann Baudy, (Wed Sep 3, 6:05 am)
Re: Packet mmap: TX RING and zero copy, Evgeniy Polyakov, (Wed Sep 3, 6:27 am)
Re: Packet mmap: TX RING and zero copy, Christoph Lameter, (Wed Sep 3, 7:57 am)
Re: Packet mmap: TX RING and zero copy, Johann Baudy, (Wed Sep 3, 8:00 am)
Re: Packet mmap: TX RING and zero copy, Evgeniy Polyakov, (Wed Sep 3, 8:13 am)
Re: Packet mmap: TX RING and zero copy, Johann Baudy, (Wed Sep 3, 8:58 am)
Re: Packet mmap: TX RING and zero copy, Evgeniy Polyakov, (Wed Sep 3, 9:43 am)
Re: Packet mmap: TX RING and zero copy, Johann Baudy, (Wed Sep 3, 1:30 pm)
Re: Packet mmap: TX RING and zero copy, Evgeniy Polyakov, (Wed Sep 3, 3:03 pm)
Re: Packet mmap: TX RING and zero copy, Johann Baudy, (Thu Sep 4, 7:44 am)
Re: Packet mmap: TX RING and zero copy, Evgeniy Polyakov, (Fri Sep 5, 12:17 am)
Re: Packet mmap: TX RING and zero copy, Robert Iakobashvili, (Fri Sep 5, 3:28 am)
Re: Packet mmap: TX RING and zero copy, Johann Baudy, (Fri Sep 5, 6:06 am)