Busy loop calibration

Non-TSC capable systems rely on the busy loop that currently assumed 1000 loops
per 1 us.  In case of older system such as 486, this is highly overestimated.

Add code for busy loop calibration to provide a better number than 1000.
As we are not allowed to use interrupts, the precision is not perfect,
but should provide a number that is close enough, for example 7 on my
486 DX4-120.
This commit is contained in:
01e3 2022-07-21 10:42:13 -07:00
parent cebdf67d2c
commit ea1aadc4e0
4 changed files with 86 additions and 6 deletions

View File

@ -22,8 +22,8 @@ void usleep(unsigned int usec)
__builtin_ia32_pause();
} while ((get_tsc() - t0) < cycles);
} else {
// This will be highly inaccurate, but should give at least the requested delay.
volatile uint64_t count = (uint64_t)usec * 1000;
// This will be inaccurate, but should be close enough to the requested delay.
volatile uint64_t count = (uint64_t)usec * loops_per_usec;
while (count > 0) {
count--;
}

View File

@ -55,6 +55,7 @@ uint32_t ram_speed = 0;
bool no_temperature = false;
uint32_t clks_per_msec = 0;
uint32_t loops_per_usec = 0;
//------------------------------------------------------------------------------
// Private Functions

View File

@ -117,6 +117,11 @@ extern bool no_temperature;
*/
extern uint32_t clks_per_msec;
/**
* Number of loops required for 1us delay. Not very accurate but close enough.
*/
extern uint32_t loops_per_usec;
/**
* Determines the CPU info and stores it in the exported variables.
*/

View File

@ -24,6 +24,23 @@
// Private Functions
//------------------------------------------------------------------------------
static inline void setup_pit() {
/* Set the gate high, disable speaker. */
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
/**
* 10 = Channel #2
* 11 = Access mode: lobyte/hibyte
* 000 = Mode 0: Interrupt On Terminal Count
* 0 = 16-bit binary
*
* 10110010 = 0xb0
*/
outb(0xb0, 0x43);
outb(PIT_TICKS_50mS & 0xff, 0x42);
outb(PIT_TICKS_50mS >> 8, 0x42);
}
static void correct_tsc(void)
{
uint32_t start_time, end_time, run_time, counter;
@ -62,10 +79,7 @@ static void correct_tsc(void)
}
// Use PIT Timer to find TSC correction factor if APIC not available
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
outb(0xb0, 0x43);
outb(PIT_TICKS_50mS & 0xff, 0x42);
outb(PIT_TICKS_50mS >> 8, 0x42);
setup_pit();
rdtscl(start_time);
@ -84,6 +98,60 @@ static void correct_tsc(void)
}
}
static void calculate_loops_per_usec(void) {
// For accuracy, uint64_t has to be used both here and in usleep().
uint64_t loops_per_50ms = 0;
uint64_t increment;
uint32_t step = 0;
/**
* Start with an initial estimate. This is going to be significantly
* underestimated due to extra cycles taken by executing and evaluating
* inb() but should be a good starting point.
*/
setup_pit();
do {
loops_per_50ms++;
} while ((inb(0x61) & 0x20) == 0);
// Fast systems could benefit from a larger initial value.
increment = loops_per_50ms = loops_per_50ms * 8;
/**
* Now continue incremeneting count until we execute a loop that ends
* when PIT has already triggered. Then execute binary search until
* increment is lower than we care about.
*/
for(;;) {
volatile uint64_t count = loops_per_50ms;
uint8_t pit_state;
setup_pit();
while (count > 0) {
count--;
}
pit_state = inb(0x61);
if (!(step % 2) ^ !(pit_state & 0x20)) {
increment = increment / 2;
// Stop if we are below the accuracy threshold.
if (increment < 25000)
break;
step++;
}
if (step % 2)
loops_per_50ms -= increment;
else
loops_per_50ms += increment;
}
// Calculate the loop count. Add 1 for rounding up.
loops_per_usec = (loops_per_50ms / 50000) + 1;
}
//------------------------------------------------------------------------------
// Public Functions
//------------------------------------------------------------------------------
@ -91,4 +159,10 @@ static void correct_tsc(void)
void timers_init(void)
{
correct_tsc();
// Calculate loops_per_usec for the busy loop if TSC is not available.
if (!clks_per_msec) {
calculate_loops_per_usec();
}
}