Hello!
I have done a little testing on my own. My results is that memcpy is
many times faster even with aligned data.
I am testing in an ordinary console program. I am including the code below.
If I'm doing something wrong, please tell me so.
As you can see I am not using the same datadeclarations as the kernel
but I'm testing the algorithm here not the data. By testing various
data and types of data I can make sure the algorithm behaves correctly
in all situations.
The datamember 'd' in flowi is not part of the comparison, but by
changing it into an 'unsigned int' it becomes part of the comparison.
const int NUM_REP = 0x7FFFFFFF;
typedef unsigned int flow_compare_t;
struct flowi {
unsigned int a,b,c;
unsigned char d;
};
/* I hear what you're saying, use memcmp. But memcmp cannot make
* important assumptions that we can here, such as alignment and
* constant size.
*/
static int flow_key_compare(struct flowi *key1, struct flowi *key2)
{
flow_compare_t *k1, *k1_lim, *k2;
const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
k1 = (flow_compare_t *) key1;
k1_lim = k1 + n_elem;
k2 = (flow_compare_t *) key2;
do {
if (*k1++ != *k2++)
return 1;
} while (k1 < k1_lim);
return 0;
}
static int flow_key_compare2(struct flowi *key1, struct flowi *key2)
{
return memcmp(key1, key2, (sizeof(struct flowi) /
sizeof(flow_compare_t)) * sizeof(flow_compare_t));
}
int main()
{
struct flowi key1 = {0,1,2,3};
struct flowi key2 = {0,1,2,0};
char str[300];
int i;
/* put data in aligned addresses */
struct flowi *k1 = (struct flowi *)((int)(&str[100]) & 0xFFFFFFF0);
struct flowi *k2 = (struct flowi *)((int)(&str[200]) & 0xFFFFFFF0);
memcpy(k1, &key1, sizeof(struct flowi));
memcpy(k2, &key2, sizeof(struct flowi));
/* Compare data */
printf("compare1..\n");
for (i = 0; i < NUM_REP; i++)
flow_key_compare(k1, k2);
printf("compare2..\n");
for (i = 0; i < NUM_REP; i++)
flow_key_compare2(k1, k2);
printf((flow_key_compare(k1,k2)==(flow_key_compare2(k1,k2)?1:0))?"ok\n":"error\n");
return 0;
}
2007/1/1, Daniel Marjamäki <daniel.marjamaki@gmail.com>:
-
| Christoph Lameter | Re: [RFC 00/15] x86_64: Optimize percpu accesses |
| Linus Torvalds | Re: [Patch v2] Make PCI extended config space (MMCONFIG) a driver opt-in |
| Greg Kroah-Hartman | [PATCH 005/196] Chinese: add translation of SubmittingDrivers |
| Bart Van Assche | Integration of SCST in the mainstream Linux kernel |
git: | |
| David Miller | [GIT]: Networking |
| David Miller | Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_lock(). |
| Christoph Hellwig | Re: [PATCH 06/32] IGET: Mark iget() and read_inode() as being obsolete [try #2] |
| Gerrit Renker | [PATCH 26/37] dccp: Integration of dynamic feature activation - part 1 (socket set... |
