2010年8月15日 星期日

Timing Measurements - 2

Linux 核心依據 HZ 的值設定 PIT 裝置(Intel 8254)發出中斷的頻率,並且初始化 IRQ 0 為 Timer Interrupt,其中 1193182 是 8254 晶片內部的頻率。
每當時間中斷產生時,就更新變數 jiffies_64xtime 之值。以下列出核心的相關程序的原始碼:

setup_pit_timer()     設定 PIT
time_init_hook()    初始化 IRQ 0
do_timer_interrupt()  時間中斷的處理函式

定義於 include/asm-i386/timex.h

#ifdef CONFIG_X86_ELAN
# define CLOCK_TICK_RATE 1189200 /* AMD Elan has different frequency! */
#else
# define CLOCK_TICK_RATE 1193182 /* Underlying HZ */
#endif


定義於 include/linux/jiffies.h

/* LATCH is used in the interval timer and ftape setup. */
#define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */


定義於 arch/i386/kernel/timers/timer_pit.c

void setup_pit_timer(void)
{
  extern spinlock_t i8253_lock;
  unsigned long flags;

  spin_lock_irqsave(&i8253_lock, flags);
  outb_p(0x34,PIT_MODE);  /* binary, mode 2, LSB/MSB, ch 0 */
  udelay(10);
  outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
  udelay(10);
  outb(LATCH >> 8 , PIT_CH0); /* MSB */
  spin_unlock_irqrestore(&i8253_lock, flags);
}


定義於 arch/i386/mach-default/setup.c

static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL};

/**
* time_init_hook - do any specific initialisations for the system timer.
*
* Description:
*  Must plug the system timer interrupt source at HZ into the IRQ listed
*  in irq_vectors.h:TIMER_IRQ
**/
void __init time_init_hook(void)
{
  setup_irq(0, &irq0);
}


定義於 include/asm-i386/mach-default/do_timer.h

/**
* do_timer_interrupt_hook - hook into timer tick
* @regs:  standard registers from interrupt
*
* Description:
*  This hook is called immediately after the timer interrupt is ack'd.
*  It's primary purpose is to allow architectures that don't possess
*  individual per CPU clocks (like the CPU APICs supply) to broadcast the
*  timer interrupt as a means of triggering reschedules etc.
**/

static inline void do_timer_interrupt_hook(struct pt_regs *regs)
{
  do_timer(regs);
#ifndef CONFIG_SMP
  update_process_times(user_mode(regs));
#endif
/*
* In the SMP case we use the local APIC timer interrupt to do the
* profiling, except when we simulate SMP mode on a uniprocessor
* system, in that case we have to call the local interrupt handler.
*/
#ifndef CONFIG_X86_LOCAL_APIC
  profile_tick(CPU_PROFILING, regs);
#else
  if (!using_apic_timer)
    smp_local_timer_interrupt(regs);
#endif
}


定義於 arch/i386/kernel/time.c

/*
* Called by the timer interrupt. xtime_lock must already be taken
* by the timer IRQ!
*/
static inline void update_times(void)
{
  unsigned long ticks;

  ticks = jiffies - wall_jiffies;
  if (ticks) {
    wall_jiffies += ticks;
    update_wall_time(ticks);
  }
  calc_load(ticks);
}

/*
* The 64-bit jiffies value is not atomic - you MUST NOT read it
* without sampling the sequence number in xtime_lock.
* jiffies is defined in the linker script...
*/

void do_timer(struct pt_regs *regs)
{
  jiffies_64++;
  update_times();
}

/*
* timer_interrupt() needs to keep up the real-time clock,
* as well as call the "do_timer()" routine every clocktick
*/
static inline void do_timer_interrupt(int irq, void *dev_id,
          struct pt_regs *regs)
{
#ifdef CONFIG_X86_IO_APIC
  if (timer_ack) {
    /*
     * Subtle, when I/O APICs are used we have to ack timer IRQ
     * manually to reset the IRR bit for do_slow_gettimeoffset().
     * This will also deassert NMI lines for the watchdog if run
     * on an 82489DX-based system.
     */
    spin_lock(&i8259A_lock);
    outb(0x0c, PIC_MASTER_OCW3);
    /* Ack the IRQ; AEOI will end it automatically. */
    inb(PIC_MASTER_POLL);
    spin_unlock(&i8259A_lock);
  }
#endif

  do_timer_interrupt_hook(regs);

  /*
   * If we have an externally synchronized Linux clock, then update
   * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
   * called as close as possible to 500 ms before the new second starts.
   */
  if ((time_status & STA_UNSYNC) == 0 &&
   xtime.tv_sec > last_rtc_update + 660 &&
   (xtime.tv_nsec / 1000)
      >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
   (xtime.tv_nsec / 1000)
      <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) {
    /* horrible...FIXME */
    if (efi_enabled) {
      if (efi_set_rtc_mmss(xtime.tv_sec) == 0)
        last_rtc_update = xtime.tv_sec;
      else
        last_rtc_update = xtime.tv_sec - 600;
    } else if (set_rtc_mmss(xtime.tv_sec) == 0)
      last_rtc_update = xtime.tv_sec;
    else
      last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
  }

  if (MCA_bus) {
    /* The PS/2 uses level-triggered interrupts. You can't
    turn them off, nor would you want to (any attempt to
    enable edge-triggered interrupts usually gets intercepted by a
    special hardware circuit). Hence we have to acknowledge
    the timer interrupt. Through some incredibly stupid
    design idea, the reset for IRQ 0 is done by setting the
    high bit of the PPI port B (0x61). Note that some PS/2s,
    notably the 55SX, work fine if this is removed. */

    irq = inb_p( 0x61 ); /* read the current state */
    outb_p( irq0x80, 0x61 ); /* reset the IRQ */
  }
}

/*
* This is the same as the above, except we _also_ save the current
* Time Stamp Counter value at the time of the timer interrupt, so that
* we later on can estimate the time of day more exactly.
*/
irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
  /*
   * Here we are in the timer irq handler. We just have irqs locally
   * disabled but we don't know if the timer_bh is running on the other
   * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
   * the irq version of write_lock because as just said we have irq
   * locally disabled. -arca
   */
  write_seqlock(&xtime_lock);

  cur_timer->mark_offset();

  do_timer_interrupt(irq, NULL, regs);

  write_sequnlock(&xtime_lock);
  return IRQ_HANDLED;
}

2010年8月14日 星期六

Timing Measurements - 1

System Time and Date

Linux 的系統時間與日期是靠一顆 Real Time Clock (RTC) 的晶片所維護的,應用程式可經由 /dev/rtc 來控制晶片,
核心則是透過 I/O port 0x700x71 來控制。


Time Measurement

x86 處理器有一名為 Time Stamp Counter (TSC) 的暫存器,每當 CPU 的 clock 腳位有訊號時,TSC 就會遞增。
若 clock 的頻率是 1 GHz,則 TSC 會每 1 nanosecond 做加 1 的動作。Linux 核心使用 TSC 的值便可讓做出精準的時間量測。


Timer Interrupt

PC 硬體上會有至少一個的 Programmable Interval Timer (PIT) 裝置,如 Intel 的 8254 晶片,提供定時的功能,
並且於 timeout 時對 CPU 發出中斷通知,也就是 Timer Interrupt - IRQ 0
PIT 可以透過 I/O port 0x400x43 的 4 個埠口來控制。


tick, jiffies and xtime

Linux 核心設定 PIT 產生頻率為 1000 Hz 的 Timer Interrupt,即每 1 millisecond 會產生一次中斷,
此 1 millisecond 的時間稱為一個 "tick",該值存於變數 tick_nsec 之中,而頻率值以巨集 HZ 表示。

愈短的 tick 值會得到快速的 I/O 多工的反應時間,但亦會拖慢應用程式的執行速度。
通常較慢的機器會設定 10 millisecond 的 tick 值,而較快的機器會設定 1 millisecond 的 tick 值。

定義於 kernel/timer.c

/*
* Timekeeping variables
*/
unsigned long tick_usec = TICK_USEC;  /* USER_HZ period (usec) */
unsigned long tick_nsec = TICK_NSEC;  /* ACTHZ period (nsec) */

/*
* The current time
* wall_to_monotonic is what we need to add to xtime (or xtime corrected
* for sub jiffie times) to get to monotonic time. Monotonic is pegged
* at zero at system boot time, so wall_to_monotonic will be negative,
* however, we will ALWAYS keep the tv_nsec part positive so we can use
* the usual normalization.
*/
struct timespec xtime __attribute__ ((aligned (16)));
struct timespec wall_to_monotonic __attribute__ ((aligned (16)));

EXPORT_SYMBOL(xtime);


定義於 include/asm-i386/param.h

#ifdef __KERNEL__
# define HZ      1000    /* Internal kernel timer frequency */
# define USER_HZ  100    /* .. some user interfaces are in "ticks" */
# define CLOCKS_PER_SEC (USER_HZ) /* like times() */
#endif

#ifndef HZ
#define HZ 100
#endif

核心變數 jiffies 用來儲存自系統啟動以來,共經過了多少次 tick 的值。故每次的 tick 就會使 jiffies 遞增。
由於 jiffies 的長度只有 32-bit,對於 tick = 1 millisecond 機器,經過約 50 天就會讓 jiffies 溢位。
因此真正用來儲存 jiffies 值的是長度為 64-bit 的變數 jiffies_64

定義於 include/linux/jiffies.h

/*
* The 64-bit value is not volatile - you MUST NOT read it
* without sampling the sequence number in xtime_lock.
* get_jiffies_64() will do this for you as appropriate.
*/
extern u64 __jiffy_data jiffies_64;
extern unsigned long volatile __jiffy_data jiffies;

核心變數 xtime 用來記錄自系目前的時間與日期,其結構包含兩個欄位:
tv_sec  儲存自 1970.01.01 以來,共經過多少秒 (second)
• tv_nsec 儲存自上一秒的時間以來,共經過多少的奈秒 (nanosecond)

定義於 include/linux/time.h

#ifndef _STRUCT_TIMESPEC
#define _STRUCT_TIMESPEC
struct timespec {
  time_t tv_sec;   /* seconds */
  long   tv_nsec;  /* nanoseconds */
};
#endif /* _STRUCT_TIMESPEC */