Implemented a class uint128 with the basic arithmetic operations and

replaced the previous, somewhat complicated and inexact method of computing the TSC conversion factor using it. git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@34542 a95241bf-73f2-0310-859d-f6bbb57e9c96
2009-12-07 21:37:56 +00:00 · 2009-12-07 21:37:56 +00:00 · db905187d5
parent 559a6a891c
commit db905187d5
1 changed files with 105 additions and 78 deletions
--- a/src/system/boot/platform/bios_ia32/cpu.cpp
+++ b/src/system/boot/platform/bios_ia32/cpu.cpp
@ -1,4 +1,5 @@
 /*
+ * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
 * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
 * Distributed under the terms of the MIT License.
 *
@ -39,6 +40,107 @@ uint32 gTimeConversionFactor;
 #define RDTSC_FEATURE	(1UL << 4)


+struct uint128 {
+	uint128(uint64 low, uint64 high = 0)
+		:
+		low(low),
+		high(high)
+	{
+	}
+
+	bool operator<(const uint128& other) const
+	{
+		return high < other.high || (high == other.high && low < other.low);
+	}
+
+	bool operator<=(const uint128& other) const
+	{
+		return !(other < *this);
+	}
+
+	uint128 operator<<(int count) const
+	{
+		if (count == 0)
+			return *this;
+
+		if (count >= 128)
+			return 0;
+
+		if (count >= 64)
+			return uint128(0, low << (count - 64));
+
+		return uint128(low << count, (high << count) | (low >> (64 - count)));
+	}
+
+	uint128 operator>>(int count) const
+	{
+		if (count == 0)
+			return *this;
+
+		if (count >= 128)
+			return 0;
+
+		if (count >= 64)
+			return uint128(high >> (count - 64), 0);
+
+		return uint128((low >> count) | (high << (64 - count)), high >> count);
+	}
+
+	uint128 operator+(const uint128& other) const
+	{
+		uint64 resultLow = low + other.low;
+		return uint128(resultLow,
+			high + other.high + (resultLow < low ? 1 : 0));
+	}
+
+	uint128 operator-(const uint128& other) const
+	{
+		uint64 resultLow = low - other.low;
+		return uint128(resultLow,
+			high - other.high - (resultLow > low ? 1 : 0));
+	}
+
+	uint128 operator*(uint32 other) const
+	{
+		uint64 resultMid = (low >> 32) * other;
+		uint64 resultLow = (low & 0xffffffff) * other + (resultMid << 32);
+		return uint128(resultLow,
+			high * other + (resultMid >> 32)
+				+ (resultLow < resultMid << 32 ? 1 : 0));
+	}
+
+	uint128 operator/(const uint128& other) const
+	{
+		int shift = 0;
+		uint128 shiftedDivider = other;
+		while (shiftedDivider.high >> 63 == 0 && shiftedDivider < *this) {
+			shiftedDivider = shiftedDivider << 1;
+			shift++;
+		}
+
+		uint128 result = 0;
+		uint128 temp = *this;
+		for (; shift >= 0; shift--, shiftedDivider = shiftedDivider >> 1) {
+			if (shiftedDivider <= temp) {
+				result = result + (uint128(1) << shift);
+				temp = temp - shiftedDivider;
+			}
+		}
+
+		return result;
+	}
+
+	operator uint64() const
+	{
+		return low;
+	}
+
+private:
+	uint64	low;
+	uint64	high;
+};
+
+
 static void
 calculate_cpu_conversion_factor()
 {
@ -124,83 +226,8 @@ not_so_quick_sample:
 	expired = ((s_high << 8) | s_low) - ((high << 8) | low);
 	p3 *= TIMER_CLKNUM_HZ;

-	/*
-	 * cv_factor contains time in usecs per CPU cycle * 2^32
-	 *
-	 * The code below is a bit fancy. Originally Michael Noistering
-	 * had it like:
-	 *
-	 *     cv_factor = ((uint64)1000000<<32) * expired / p3;
-	 *
-	 * whic is perfect, but unfortunately 1000000ULL<<32*expired
-	 * may overflow in fast cpus with the long sampling period
-	 * i put there for being as accurate as possible under
-	 * vmware.
-	 *
-	 * The below calculation is based in that we are trying
-	 * to calculate:
-	 *
-	 *     (C*expired)/p3 -> (C*(x0<<k + x1))/p3 ->
-	 *     (C*(x0<<k))/p3 + (C*x1)/p3
-	 *
-	 * Now the term (C*(x0<<k))/p3 is rewritten as:
-	 *
-	 *     (C*(x0<<k))/p3 -> ((C*x0)/p3)<<k + reminder
-	 *
-	 * where reminder is:
-	 *
-	 *     floor((1<<k)*decimalPart((C*x0)/p3))
-	 *
-	 * which is approximated as:
-	 *
-	 *     floor((1<<k)*decimalPart(((C*x0)%p3)/p3)) ->
-	 *     (((C*x0)%p3)<<k)/p3
-	 *
-	 * So the final expression is:
-	 *
-	 *     ((C*x0)/p3)<<k + (((C*x0)%p3)<<k)/p3 + (C*x1)/p3
-	 */
-	 /*
-	 * To get the highest accuracy with this method
-	 * x0 should have the 12 most significant bits of expired
-	 * to minimize the error upon <<k.
-	 */
-	 /*
-	 * Of course, you are not expected to understand any of this.
-	 */
-	{
-		unsigned i;
-		unsigned k;
-		uint64 C;
-		uint64 x0;
-		uint64 x1;
-		uint64 a, b, c;
-
-		/* first calculate k*/
-		k = 0;
-		for (i = 12; i < 16; i++) {
-			if (expired & (1<<i))
-				k = i - 11;
-		}
-
-		C = 1000000ULL << 32;
-		x0 = expired >> k;
-		x1 = expired & ((1 << k) - 1);
-
-		a = ((C * x0) / p3) << k;
-		b = (((C * x0) % p3) << k) / p3;
-		c = (C * x1) / p3;
-#if 0
-		dprintf("a=%Ld\n", a);
-		dprintf("b=%Ld\n", b);
-		dprintf("c=%Ld\n", c);
-		dprintf("%d %Ld\n", expired, p3);
-#endif
-		gTimeConversionFactor = a + b + c;
-#if 0
-		dprintf("cvf=%Ld\n", cv_factor);
-#endif
-	}
+	gTimeConversionFactor = ((uint128(expired) * uint32(1000000)) << 32)
+		/ uint128(p3);

 #ifdef TRACE_CPU
 	if (p3 / expired / 1000000000LL)
@ -210,7 +237,7 @@ not_so_quick_sample:
 #endif

 	gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor;
-	gKernelArgs.arch_args.cpu_clock_speed = p3/expired;
+	gKernelArgs.arch_args.cpu_clock_speed = p3 / expired;
 }