2016年9月28日 星期三

Linux:驅動程式除錯技術

Kernel Module不同於Application,需用其他方式的除錯方法,沒有printf可用,需用printk,甚至有可能讓Linux系統當機,於是比Application更難除錯,底下介紹幾種除錯技術。

Debugging Support in the Kernel

Kernel Configuration裡有個選項「Kernel hacking」供開發者勾選需要的除錯功能。

LDD3建議啟用下列選項(不同Kernel版本與不同CPU架構會有不同的選項)


以上都有附上說明的網址,若有需要請自行研究。


Debugging by Printing

應用程式所使用的printf是輸出到standard output(buffer I/O),但有時程式會在訊息輸出前就掛掉,而程式裡的buffer尚未印出來,因此要記得印到standard error。但Kernel Module不能使用printf輸出訊息到STDOUT,要改用printk

printk的輸出訊息是分等級的,如下表:
Loglevel
Description
KERN_EMERG
An emergency condition; the system is probably dead
KERN_ALERT
A problem that requires immediate attention
KERN_CRIT
A critical condition
KERN_ERR
An error
KERN_WARNING
A warning
KERN_NOTICE
A normal, but perhaps noteworthy, condition
KERN_INFO
An informational message
KERN_DEBUG
A debug messagetypically superfluous


Redirecting Console Messages

使用ioctl(TIOCLINUX)可將Kernel Message輸出到/dev/console裡,程式碼請見github

設定Console的位置
./setconsole 2

載入含有Kernel Messagemodule,例如hello
sudo insmod hello.ko
sudo rmmod hello

這樣printk的訊息也會輸出到tty2裡。
而檔案/proc/kmsg存有Kernel Message,於是可以用cat /proc/kmsg來觀察訊息。


Turning the Messages On and Off

可用macro的寫法,例如在scull.h裡:
#undef PDEBUG /* undef it, just in case */
#ifdef SCULL_DEBUG
# ifdef __KERNEL__
/* This one if debugging is on, and kernel space */
# define PDEBUG(fmt, args...) printk( KERN_DEBUG "scull: " fmt, ## args)
# else
/* This one for user space */
# define PDEBUG(fmt, args...) fprintf(stderr, fmt, ## args)
# endif
#else
# define PDEBUG(fmt, args...) /* not debugging: nothing */
#endif
#undef PDEBUGG
#define PDEBUGG(fmt, args...) /* nothing: it's a placeholder */

Makefile中:
# Comment/uncomment the following line to disable/enable debugging
DEBUG = y
# Add your debugging flag (or not) to CFLAGS
ifeq ($(DEBUG),y)
DEBFLAGS = -O -g -DSCULL_DEBUG # "-O" is needed to expand inlines
else
DEBFLAGS = -O2
endif
CFLAGS += $(DEBFLAGS)

Debugging by Querying

Debugging by Querying的方法如下:
  1. /proc檔案系統中建立檔案。
  2. ioctl API函式。
  3. 透過sysfs輸出特徵。
ioctl sysfs 不在此紀錄說明。

/proc檔案系統中建立檔案

此範例使用seq_file 介面,需寫四個function(start, next, stop, show)來使用seq_file,程式碼如下:

/* The scullmem proc implementation. */
int scull_read_procmem(struct seq_file *s, void *v)
{
int i, j;
int limit = s->size - 80; /* Don't print more characters than this. */

for (i = 0; i < scull_nr_devs && s->count <= limit; i++) {
struct scull_dev *d = &scull_devices[i];
struct scull_qset *qs = d->data;
if (mutex_lock_interruptible(&d->mutex))
return -ERESTARTSYS;
seq_printf(s, "\nDevice %i: qset %i, q %i, sz %li\n",
i, d->qset, d->quantum, d->size);
for (; qs && s->count <= limit; qs = qs->next) { /* Scan the list. */
seq_printf(s, " item at %p, qset at %p\n",
qs, qs->data);
if (qs->data && !qs->next) /* Dump only the last item. */
for (j = 0; j < d->qset; j++) {
if (qs->data[j])
seq_printf(s, " % 4i: %8p\n",
j, qs->data[j]);
}
}
mutex_unlock(&scull_devices[i].mutex);
}
return 0;
}

static int scullmem_proc_open(struct inode *inode, struct file *filp)
{
return single_open(filp, scull_read_procmem, NULL);
}

struct file_operations scullmem_proc_ops = {
.owner = THIS_MODULE,
.open = scullmem_proc_open,
.llseek = seq_lseek,
.read = seq_read,
.release = single_release,
};

/* The scullseq proc implementation. */

static void *scull_seq_start(struct seq_file *s, loff_t *pos)
{
if (*pos >= scull_nr_devs)
return NULL; /* No more to read. */
return scull_devices + *pos;
}

static void *scull_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
(*pos)++;
if (*pos >= scull_nr_devs)
return NULL;
return scull_devices + *pos;
}

static void scull_seq_stop(struct seq_file *s, void *v)
{
/* There's nothing to do here! */
}

static int scull_seq_show(struct seq_file *s, void *v)
{
struct scull_dev *dev = (struct scull_dev *) v;
struct scull_qset *d;
int i;

if (mutex_lock_interruptible(&dev->mutex))
return -ERESTARTSYS;
seq_printf(s, "\nDevice %i: qset %i, q %i, sz %li\n",
(int) (dev - scull_devices), dev->qset,
dev->quantum, dev->size);
for (d = dev->data; d; d = d->next) { /* Scan the list. */
seq_printf(s, " item at %p, qset at %p\n", d, d->data);
if (d->data && !d->next) /* Dump only the last item. */
for (i = 0; i < dev->qset; i++) {
if (d->data[i])
seq_printf(s, " % 4i: %8p\n",
i, d->data[i]);
}
}
mutex_unlock(&dev->mutex);
return 0;
}
/*
* Set up the sequence operator pointers.
*/
static struct seq_operations scull_seq_ops = {
.start = scull_seq_start,
.next = scull_seq_next,
.stop = scull_seq_stop,
.show = scull_seq_show
};

static int scullseq_proc_open(struct inode *inode, struct file *filp)
{
return seq_open(filp, &scull_seq_ops);
}

static struct file_operations scullseq_proc_ops = {
.owner = THIS_MODULE,
.open = scullseq_proc_open,
.llseek = seq_lseek,
.read = seq_read,
.release = seq_release,
};

/* Set up and remove the proc entries */

static void scull_create_proc(void)
{
proc_create_data("scullmem", 0 /* default mode */,
NULL /* parent dir */, &scullmem_proc_ops,
NULL /* client data */);
proc_create_data("scullseq", 0, NULL, &scullseq_proc_ops, NULL);
}

static void scull_remove_proc(void)
{
/* No problem if it was not registered. */
remove_proc_entry("scullmem", NULL /* parent dir */);
remove_proc_entry("scullseq", NULL);
}

若還不清楚seq_file介面怎麼用,請參考底下範例
/*
* seq_file interface sample.
*
*/

#include <linux/init.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/fs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>

static void *holan_seq_start(struct seq_file *s, loff_t *pos)
{
loff_t *spos = kmalloc(sizeof(loff_t), GFP_KERNEL);
if (! spos)
return NULL;
*spos = *pos;
return spos;
}

static void *holan_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
loff_t *spos = (loff_t *) v;
*pos = ++(*spos);
return spos;
}

static void holan_seq_stop(struct seq_file *s, void *v)
{
kfree (v);
}

static int holan_seq_show(struct seq_file *s, void *v)
{
loff_t *spos = (loff_t *) v;
seq_printf(s, "%Ld\n", *spos);
return 0;
}

static struct seq_operations holan_seq_ops = {
.start = holan_seq_start,
.next = holan_seq_next,
.stop = holan_seq_stop,
.show = holan_seq_show
};


static int holan_open(struct inode *inode, struct file *file)
{
return seq_open(file, &holan_seq_ops);
};

static struct file_operations holan_file_ops = {
.owner = THIS_MODULE,
.open = holan_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
static int holan_init(void)
{
proc_create("holan_seq", 0, NULL, &holan_file_ops);
return 0;
}

static void holan_exit(void)
{
remove_proc_entry("holan_seq", NULL);
}

MODULE_AUTHOR("Holan Liao");
MODULE_LICENSE("Free Sample");

module_init(holan_init);
module_exit(holan_exit);

Oops Messages

faulty.c程式可產生Oops Messages,此例子是因為null pointer的緣故,這種訊息通常很難除錯,下圖可看到錯誤訊息是從faulty_write所產生的。



System Hangs

此種狀況有個工具可以用—SysRq組合鍵,SysnRq在鍵盤上的位置如下圖,
SysRq需搭配Alt和第三個指令按鍵,例如SysRq + Alt + b會將系統重開機,其它的指令按鍵請參考這兒