2009年7月3日 星期五

Linux Interrupts and Exceptions - 4

當 I/O Interrupt 發生時,CPU 從 IDT 表中找出該中斷的位置,並取得記錄於 IDT 的 Segment SelectorOffset
再利用這兩個值將目前指令跳至 Interrupt Handler,如下圖所示:


Interrupt Handler 的動作即是呼叫 do_IRQ() 函式來處理該中斷,其 IRQ Number 由引數 struct pt_regs 傳入。

定義於 include/asm-i386/ptrace.h

/* this struct defines the way the registers are stored on the
stack during a system call. */
struct pt_regs {
  long ebx;
  long ecx;
  long edx;
  long esi;
  long edi;
  long ebp;
  long eax;
  int xds;
  int xes;
  long orig_eax;
  long eip;
  int xcs;
  long eflags;
  long esp;
  int xss;
};

定義於 arch/i386/kernel/irq.c

/*
* do_IRQ handles all normal device IRQ's (the special
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
fastcall unsigned int do_IRQ(struct pt_regs *regs){
  /* high bits used in ret_from_ code */
  int irq = regs->orig_eax & 0xff;
#ifdef CONFIG_4KSTACKS
  union irq_ctx *curctx, *irqctx;
  u32 *isp;#endif
  irq_enter();
#ifdef CONFIG_DEBUG_STACKOVERFLOW
  /* Debugging check for stack overflow: is there less than 1KB free? */
  {
    long esp;
    __asm__ __volatile__("andl %%esp,%0" :
          "=r" (esp) : "0" (THREAD_SIZE - 1));
    if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
      printk("do_IRQ: stack overflow: %ld\n",
        esp - sizeof(struct thread_info));
      dump_stack();
    }
  }
#endif

#ifdef CONFIG_4KSTACKS
  curctx = (union irq_ctx *) current_thread_info();
  irqctx = hardirq_ctx[smp_processor_id()];

  /*
   * this is where we switch to the IRQ stack. However, if we are
   * already using the IRQ stack (because we interrupted a hardirq
   * handler) we can't do that and just have to keep using the
   * current stack (which is the irq stack already after all)
   */
  if (curctx != irqctx) {
    int arg1, arg2, ebx;
    /* build the stack frame on the IRQ stack */
    isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
    irqctx->tinfo.task = curctx->tinfo.task;
    irqctx->tinfo.previous_esp = current_stack_pointer;
    asm volatile(
      "  xchgl %%ebx,%%esp  \n"
      "  call __do_IRQ   \n"
      "  movl %%ebx,%%esp  \n"
      : "=a" (arg1), "=d" (arg2), "=b" (ebx)
      : "0" (irq), "1" (regs), "2" (isp)
      : "memory", "cc", "ecx"
    );
  } else
#endif

  __do_IRQ(irq, regs);

  irq_exit();

  return 1;
}

定義於 kernel/irq/handle.c

/*
* do_IRQ handles all normal device IRQ's (the special
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
{
  irq_desc_t *desc = irq_desc + irq;
  struct irqaction * action;
  unsigned int status;

  kstat_this_cpu.irqs[irq]++;
  if (desc->status & IRQ_PER_CPU) {
    irqreturn_t action_ret;

    /*
     * No locking required for CPU-local interrupts:
     */
    desc->handler->ack(irq);
    action_ret = handle_IRQ_event(irq, regs, desc->action);
    if (!noirqdebug)
      note_interrupt(irq, desc, action_ret);
    desc->handler->end(irq);
    return 1;
  }

  spin_lock(&desc->lock);
  desc->handler->ack(irq);
  /*
   * REPLAY is when Linux resends an IRQ that was dropped earlier
   * WAITING is used by probe to mark irqs that are being tested
   */
  status = desc->status & ~(IRQ_REPLAY IRQ_WAITING);
  status = IRQ_PENDING; /* we _want_ to handle it */

  /*
   * If the IRQ is disabled for whatever reason, we cannot
   * use the action we have.
   */
  action = NULL;
  if (likely(!(status & (IRQ_DISABLED IRQ_INPROGRESS)))) {
    action = desc->action;
    status &= ~IRQ_PENDING; /* we commit to handling */
    status = IRQ_INPROGRESS; /* we are handling it */
  }
  desc->status = status;

  /*
   * If there is no IRQ handler or it was disabled, exit early.
   * Since we set PENDING, if another processor is handling
   * a different instance of this same irq, the other processor
   * will take care of it.
   */
  if (unlikely(!action))
    goto out;

  /*
   * Edge triggered interrupts need to remember
   * pending events.
   * This applies to any hw interrupts that allow a second
   * instance of the same irq to arrive while we are in do_IRQ
   * or in the handler. But the code here only handles the _second_
   * instance of the irq, not the third or fourth. So it is mostly
   * useful for irq hardware that does not mask cleanly in an
   * SMP environment.
   */
  for (;;) {
    irqreturn_t action_ret;

    spin_unlock(&desc->lock);

    action_ret = handle_IRQ_event(irq, regs, action);

    spin_lock(&desc->lock);
    if (!noirqdebug)
      note_interrupt(irq, desc, action_ret);
    if (likely(!(desc->status & IRQ_PENDING)))
      break;
    desc->status &= ~IRQ_PENDING;
  }
  desc->status &= ~IRQ_INPROGRESS;

out:
  /*
   * The ->end() handler has to deal with interrupts which got
   * disabled while the handler was running.
   */
  desc->handler->end(irq);
  spin_unlock(&desc->lock);

  return 1;
}

2009年3月17日 星期二

Linux Interrupts and Exceptions - 3

I/O Interrupt (硬體裝置的中斷) 處理函式是以 request_irq() 來向核心註冊的,並且以 free_irq() 來清除

定義於 include/linux/interrupt.h

int request_irq(
    unsigned int  irq,
    irqreturn_t   (*handler)(int, void *, struct pt_regs *),
    unsigned long  irqflags,
    const char   *devname,
    void      *dev_id
  );

void free_irq(
    unsigned int irq,
    void     *dev_id
  );

struct irqaction {
  irqreturn_t (*handler)(int, void *, struct pt_regs *);
  unsigned long flags;
  cpumask_t mask;
  const char *name;
  void *dev_id;
  struct irqaction *next;
  int irq;
  struct proc_dir_entry *dir;
};

request_irq() 函式內部則是呼叫 setup_irq() 來進行 IRQ Line 的註冊
其動作是將要求中斷處理的資訊包裝成 struct irqaction 的結構,然後記錄於 irq_desc 陣列中的 IRQ Line 的位置
若有共用 IRQ Line 的中斷,則會以鏈結串列的方式掛在同一個陣列的位置上,如下圖所示

圖中的 hw_interrupt_type 是一個描述中斷控制器的資料結構,用來抽象化不同的中斷控制硬體
例如:舊式的 8259A,或新式的 APIC,也可能是無任何中斷控制器

定義於 include/linux/irq.h

/*
* Interrupt controller descriptor. This is all we need
* to describe about the low-level hardware.
*/
struct hw_interrupt_type {
  const char * typename;
  unsigned int (*startup)(unsigned int irq);
  void (*shutdown)(unsigned int irq);
  void (*enable)(unsigned int irq);
  void (*disable)(unsigned int irq);
  void (*ack)(unsigned int irq);
  void (*end)(unsigned int irq);
  void (*set_affinity)(unsigned int irq, cpumask_t dest);
};

typedef struct hw_interrupt_type hw_irq_controller;

定義於 arch/i386/kernel/i8259.c

static struct hw_interrupt_type i8259A_irq_type = {
  "XT-PIC",
  startup_8259A_irq,
  shutdown_8259A_irq,
  enable_8259A_irq,
  disable_8259A_irq,
  mask_and_ack_8259A,
  end_8259A_irq,
  NULL
};

定義於 arch/i386/kernel/io_apic.c

/*
* Level and edge triggered IO-APIC interrupts need different handling,
* so we use two separate IRQ descriptors. Edge triggered IRQs can be
* handled with the level-triggered descriptor, but that one has slightly
* more overhead. Level-triggered interrupts cannot be handled with the
* edge-triggered handler, without risking IRQ storms and other ugly
* races.
*/
static struct hw_interrupt_type ioapic_edge_type = {
  .typename  = "IO-APIC-edge",
  .startup   = startup_edge_ioapic,
  .shutdown  = shutdown_edge_ioapic,
  .enable    = enable_edge_ioapic,
  .disable   = disable_edge_ioapic,
  .ack     = ack_edge_ioapic,
  .end    = end_edge_ioapic,
  .set_affinity = set_ioapic_affinity,
};

static struct hw_interrupt_type ioapic_level_type = {
  .typename   = "IO-APIC-level",
  .startup   = startup_level_ioapic,
  .shutdown   = shutdown_level_ioapic,
  .enable    = enable_level_ioapic,
  .disable   = disable_level_ioapic,
  .ack     = mask_and_ack_level_ioapic,
  .end     = end_level_ioapic,
  .set_affinity = set_ioapic_affinity,
};

定義於 kernel/irq/handle.c

struct hw_interrupt_type no_irq_type = {
  .typename = "none",
  .startup = startup_none,
  .shutdown = shutdown_none,
  .enable = enable_none,
  .disable = disable_none,
  .ack = ack_none,
  .end = end_none,
  .set_affinity = NULL
};

2009年3月6日 星期五

Linux Interrupts and Exceptions - 2

下圖為 x86 處理器的 256 個中斷配置,其中 vector 編號 0-19 是前面提過的 Exception
編號 32-127 是硬體裝置的 I/O Interrupt,而編號 128 則是由 Linux 核心作為 System Call 之用


在 PC 架構中,有幾個裝置的中斷訊號必須分配到中斷控制器的固定 IRQ Line
如下圖的 IRQ 0、2 及 13
IRQ 欄位表示硬體的 IRQ Line,INT 欄位表示 Linux 核心使用的 IRQ Number

上述的 I/O Interrupt 及其處理函式被儲存於 Linux 核心的 irq_desc 陣列中
變數型態為 struct irq_desc (或 irq_desc_t),陣列大小為 NR_IRQS

定義於 kernel/irq/handle.c

irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
  [0 ... NR_IRQS-1] = {
    .handler = &no_irq_type,
    .lock = SPIN_LOCK_UNLOCKED
  }
};


定義於 include/linux/irq.h

typedef struct irq_desc {
  hw_irq_controller *handler;
  void *handler_data;
  struct irqaction *action;  /* IRQ action list */
  unsigned int status;    /* IRQ status */
  unsigned int depth;    /* nested irq disables */
  unsigned int irq_count;  /* For detecting broken interrupts */
  unsigned int irqs_unhandled;
  spinlock_t lock;
} ____cacheline_aligned irq_desc_t;


定義於 include/asm-i386/mach-default/irq_vectors_limits.h

#ifdef CONFIG_X86_IO_APIC
#define NR_IRQS 224
# if (224 >= 32 * NR_CPUS)
# define NR_IRQ_VECTORS NR_IRQS
# else
# define NR_IRQ_VECTORS (32 * NR_CPUS)
# endif
#else
#define NR_IRQS 16
#define NR_IRQ_VECTORS NR_IRQS
#endif


以上可看出,若系統使用 APIC 來管理裝置中斷時,IRQ 的個數為 224 個,否則為 16 個
當系統初始化時,核心呼叫 init_IRQ() 函式將 irq_desc 陣列中的 status 欄位設定為 IRQ_DISABLED
並且呼叫 set_intr_gate() 函式設定 CPU 的 Interrupt Gate,這個動作就是設定中斷發生時的處理函式

定義於 arch/i386/kernel/i8259.c

void __init init_IRQ(void)
{
  int i;

  /* all the set up before the call gates are initialised */
  pre_intr_init_hook();

  /*
   * Cover the whole vector space, no vector can escape
   * us. (some of these will be overridden and become
   * 'special' SMP interrupts)
   */
  for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
    int vector = FIRST_EXTERNAL_VECTOR + i;
    if (i >= NR_IRQS)
      break;
    if (vector != SYSCALL_VECTOR)
      set_intr_gate(vector, interrupt[i]);
  }

  /* setup after call gates are initialised (usually add in
   * the architecture specific gates)
   */
  intr_init_hook();

  /*
   * Set the clock to HZ Hz, we already have a valid
   * vector now:
   */
  setup_pit_timer();

  /*
   * External FPU? Set up irq13 if so, for
   * original braindamaged IBM FERR coupling.
   */
  if (boot_cpu_data.hard_math && !cpu_has_fpu)
    setup_irq(FPU_IRQ, &fpu_irq);

  irq_ctx_init(smp_processor_id());
}


void __init init_ISA_irqs (void)
{
  int i;

#ifdef CONFIG_X86_LOCAL_APIC
  init_bsp_APIC();
#endif
  init_8259A(0);

  for (i = 0; i < NR_IRQS; i++) {
    irq_desc[i].status = IRQ_DISABLED;
    irq_desc[i].action = NULL;
    irq_desc[i].depth = 1;

    if (i < 16) {
      /*
       * 16 old-style INTA-cycle interrupts:
       */
      irq_desc[i].handler = &i8259A_irq_type;
    } else {
      /*
       * 'high' PCI IRQs filled in on demand
       */
      irq_desc[i].handler = &no_irq_type;
    }
  }
}

2009年2月15日 星期日

Linux Interrupts and Exceptions - 1

Intel x86 處理器將中斷分成 同步的 Exception 以及 非同步的 Interrupt 兩類
並且給與 0-255 的識別號碼,稱之為 vector,在 Linux 核心則被稱為 IRQ Number

• Exceptions
由 CPU 產生的中斷,例如:執行指令時發生錯誤,或是使用者程式的系統呼叫
目前 x86 處理器定義了 20 個 Exception,即 IRQ Number 0-19,如下表所示:


• Interrupts
由其他硬體裝置或計時器發出的中斷,通常是將裝置的 IRQ Line 全部接到一顆
Programmable Interrupt Controller (PIC) 的控制晶片上,當裝置發出中斷時,由這顆 PIC 來通知 CPU
舊型的 PC 會使用兩個 Intel 8259 來串接成為中斷控制器,可處理 15 個硬體裝置的中斷要求
x86 處理器的 Interrupt 則是定義為 IRQ Number 32-255,如下圖所示:

對於多處理器的系統,則是使用新型的 Advanced Programmable Interrupt Controller (APIC)
來代替 8259 管理裝置的中斷,並具有單處理器系統的相容性

APIC 是由 Local APIC (LAPIC) 及 I/O APIC (IOAPIC) 兩部份所組成,LAPIC 位於 CPU 端,IOAPIC 位於南橋晶片上
多處理器系統的每個 CPU 會有一個 LAPIC,搭配一個連接硬體裝置的 IOAPIC,如下圖:



此外,x86 處理器使用 8 bytes 的 IDT descriptors 來描述中斷,分為三種型別:
1. Task Gate
2. Interrupt Gate
3. Trap Gate


Linux 則使用 256*8 = 2048 bytes 的空間,來儲存 CPU 全部 256 個中斷的 IDT 值
並且以 Interrupt Gate 來處理 Interrupt 的中斷,以 Trap Gate 來處理 Exception 的中斷,如下:

struct desc_struct { unsigned long a,b;};

定義於 include/asm-i386/processor.h


struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };

定義於 arch/i386/kernel/traps.c

2009年1月11日 星期日

單車遊記 - 三坑自行車道

在2008年的最後一個假日來到 三坑自行車道 探路,是一條由大溪通往龍潭三坑老街的自行車道
根據路線圖的指示,可以由大溪橋出發走主線到三坑老街,再走其他支線繞一圈回到大溪橋


這條自行車道頗具特色:從大溪橋開始會經過韭菜田,到了三坑便是老街的風景
走另一條支線的話,可以看到石門大圳的水渠道。此外,大漢溪旁的自然生態公園相當別緻
是一個不錯的休息點,走上大漢溪的河堤還可以看到遠方的石門水庫呢~




騎完三坑自行車道後,亦可順道一遊大溪老街
全部的照片在此:
http://picasaweb.google.com/yichung.shen/20081228