When below program is executed i got kernel panic.
Please anybody check and let me know my mistake
I compiled and executed in kernel 2.6.18.5
-----------------snip starts----------------
typedef struct queue_test
{
struct timer_list cache_timer;
} q_test;
static q_test Q;
static int timer_starts = 0;
static int first_time =1;
static int global_value = 0;
DECLARE_WAIT_QUEUE_HEAD(queue_test_waitq);
static void queue_test_timer_handler(unsigned long a_cache)
{
printk("Timer Handler function called\n");
if (!a_cache)
{
printk("Invalid entry \n");
return;
}
printk("Wake up all queued packets\n");
wake_up(&queue_test_waitq);
printk("timer stopped\n");
printk("============================\n");
//timer_starts = 0;
global_value = 1;
}
static unsigned int queue_test_prerouting(unsigned int hook,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
printk("Prerouting skb: %p\n", (*pskb));
return NF_ACCEPT;
}
static int check_global_value()
{
if (global_value == 1)
return 1;
else
return 0;
}
static unsigned int queue_test_forwarding(unsigned int hook,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
u32 genid;
genid = atomic_read(&flow_cache_genid);
if (first_time)
{
first_time = 0;
printk("====================================\n");
printk("First Packet: skb: %p \n", (*pskb));
init_timer(&Q.cache_timer);
Q.cache_timer.data = 1;
Q.cache_timer.function = queue_test_timer_handler;
Q.cache_timer.expires = jiffies + (60 * 1) * HZ;
add_timer(&Q.cache_timer);
}
if (check_global_value() == 0)
{
DECLARE_WAITQUEUE(wait, current);
add_wait_queue(&queue_test_waitq, &wait);
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(5);
set_current_state(TASK_RUNNING);
remove_wait_queue(&queue_test_waitq, &wait);
printk("Packet from queue\n");
}
return NF_ACCEPT;
}
static unsigned int queue_test_postrouting(unsigned int hook,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
printk("Post Routing \n");
printk("Packet received: skb: %p \n", (*pskb));
return NF_ACCEPT;
}
static struct nf_hook_ops queue_test_ops[] =
{
{
.hook = queue_test_prerouting,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_IP_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK + 1,
},
{
.hook = queue_test_forwarding,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_IP_FORWARD,
.priority = NF_IP_PRI_FILTER + 1,
},
{
.hook = queue_test_postrouting,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_IP_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
};
int __init queue_test_init(void)
{
int index;
int ret;
for (index = 0; index < ARRAY_SIZE(queue_test_ops); index++)
{
if ((ret = nf_register_hook(&queue_test_ops[index])) < 0)
{
return -1;
}
}
return 0;
}
void __exit queue_test_exit(void)
{
int index;
for (index = 0; index < ARRAY_SIZE(queue_test_ops); index++)
{
nf_unregister_hook(&queue_test_ops[index]);
}
}
MODULE_LICENSE("GPL") ;
module_init(queue_test_init) ;
module_exit(queue_test_exit) ;
--------------snip ends---------------
Thanks in advance..
Sathish
Reply
/* DECLARE_WAITQUEUE(wait,
November 21, 2008 - 8:42am
gat3way (not verified)
/*
DECLARE_WAITQUEUE(wait, current);
add_wait_queue(&queue_test_waitq, &wait);
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(5);
set_current_state(TASK_RUNNING);
remove_wait_queue(&queue_test_waitq, &wait);
*/
From what context do you think your nf hook function is being called? Why don't you check where does "current" point to prior to doing that magic above ? :)
Reply
Re: /* DECLARE_WAITQUEUE(wait,
November 24, 2008 - 5:26am
sathishkumarjo
Thanks for the response.
Still i am not clear.
The context is process context.
Can't we do the above magic in processor context?
I find the code in the following
linux-2.6.18.8\net\xfrm\xfrm_policy.c
Line: No: 919
Does it return error or panic?
Thanks,
Sat
Reply
Well, no, it's not being
November 24, 2008 - 6:57am
gat3way (not verified)
Well, no, it's not being called from a process context.
You see your hook is called from ip_forward() (see net/ipv4/ip_forward.c).
See the flow of events when an ethernet frame is received and the IP packet is being routed:
When your ethernet device receives an ethernet frame, an IRQ is being raised.
The IRQ handler calls netif_rx(), puts the packet on the receive queue, later on the softirq is raised, it calls netif_receive_skb, checks src/dst addresses, determine the packet has to be routed, makes some checks and finally call the ip_forward() function.
Your nf hook is being called from ip_forward(). Everything up to this moment happens from a softirq context. You do not have a backing process, thus "current" (the backing process' task_struct) should be NULL. When you try to pass a NULL task_struct to add_wait_queue or smth like that, you'll most certainly shoot the system down making it access invalid memory.
Reply
Re: Well, no, it's not being
November 25, 2008 - 1:52am
sathishkumarjo
Yes you are right.
We get the same context (that is SOFTIRQ) in xfrm_policy also
For forwarding packets, we call xfrm_lookup (which has the schedule function call) from __xfrm_route_forward.
Case 1
---- snip from xfrm_lookup function------
nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
if (unlikely(nx<0)) {
err = nx;
if (err == -EAGAIN && flags) {
DECLARE_WAITQUEUE(wait, current);
add_wait_queue(&km_waitq, &wait);
set_current_state(TASK_INTERRUPTIBLE);
schedule();
set_current_state(TASK_RUNNING);
remove_wait_queue(&km_waitq, &wait);
nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
---- snip from xfrm_lookup function------
The packet is from ethernet device and puts in the queue and raises the SOFTIRQ.
In SOFTIRQ context we call ip_forward which calls the xfrm4_route_forward.
Function calls:
ip_rcv->ip_rcv_finish->routing->ip_forward->xfrm4_route_forward
Here we may reach the above snip portion (case -when SA is not available but still negotiating)
My doubt is
The above case1 will work but our sample program reached panic.
(I didn't test and confirm whether above case1 is reached or not)
Thanks for your reply
Reply
Hmmm....OTOH I am not right,
November 24, 2008 - 7:17am
gat3way (not verified)
Hmmm....OTOH I am not right, ksoftirqd should have its own task struct so current should not be NULL.
That's weird...can you please paste the kernel panic message?
Reply
Hmmm....OTOH I am not right,
November 24, 2008 - 7:17am
gat3way (not verified)
Hmmm....OTOH I am not right, ksoftirqd should have its own task struct so current should not be NULL.
That's weird...can you please paste the kernel panic message?
Reply
I use VMWare for testing so i cant take the snap of the kernel panic.
Can we get once we restarted the system? If so please tell me how?Thanks :-)
debugging
can't you configure VMware to show the message or use another testing environment just for this test, like UML (user mode linux), which prints its console message to a normal terminal on the host, which of course survives every crash? or a real machine?
you could try removing (commenting out) more of the code until it doesn't crash anymore and then adding back line by line until it crashes.
and please post indented code next time, use the magic <pre> tag