mersenneforum.org  

Go Back   mersenneforum.org > Factoring Projects > Cunningham Tables

Reply
 
Thread Tools
Old 2010-10-02, 00:09   #12
R.D. Silverman
 
R.D. Silverman's Avatar
 
Nov 2003

22·5·373 Posts
Default

Quote:
Originally Posted by R.D. Silverman View Post
Instead, I am just going to kick out the assembler code and look at it.
I looked at the assembler output. Nothing obvious stands out except
that the get_time() call is in-lined.

Code:
; 5650 : {   /* start of prepare_bounds */

	push	ebp
	mov	ebp, esp
	sub	esp, 20					; 00000014H

; 5651 : int do_nothing(char *x);
; 5652 : double m_over_a1, n_over_b1, b0_over_b1, a0_over_a1;
; 5653 : double a0, a1, b0, b1, one_over_a1, one_over_b1;
; 5654 : double l1and4, l2and3, l1and3, determ, inv_determ;
; 5655 : double stime;
; 5656 : double temp;
; 5657 : 
; 5658 : 
; 5659 : if (TIME_STATS) stime = get_time();

	DB	15					; 0000000fH
	DB	49					; 00000031H
	mov	DWORD PTR _a$89577[ebp], eax
	mov	DWORD PTR _b$89578[ebp], edx

; 5660 : 
; 5661 : a0 = (double)v1[0];
; 5662 : a1 = (double)v2[0];
; 5663 : b0 = (double)v1[1];
; 5664 : b1 = (double)v2[1];
; 5665 : 
; 5666 : one_over_a1 = 1.0/a1;      
; 5667 : a0_over_a1 = a0 * one_over_a1;
; 5668 : one_over_b1 = 1.0/b1;      
; 5669 : b0_over_b1 = b0 * one_over_b1;
; 5670 : 
; 5671 : /*  We have a parallelogram.  One point is always (0,0).  f is the   */
; 5672 : /*  vertical axis,  e the horizontal.  Compute emin & emax           */
; 5673 : /*   Also, compute intersections and boundary slopes                 */
; 5674 : /*   Note that determ = p (up to sign) so could precompute: but it   */
; 5675 : /*   would require xtra storage to hold the sign bit                 */
; 5676 : 
; 5677 : determ = (a0 * b1 - a1 * b0);  
; 5678 : inv_determ = 1.0/determ;                        
; 5679 : 
; 5680 : if (SHOW_PREP)
; 5681 :    {
; 5682 :    (void) printf("Prep: a0,a1,b0,b1 = %g %g %g %g\n",a0,a1,b0,b1);
; 5683 :    (void) printf("m_over_a1, n_over_b1 = %g %g\n",m_over_a1, n_over_b1);
; 5684 :    (void) printf("a0_over_a1 , b0_over_b1 = %g %g\n",a0_over_a1, b0_over_b1);
; 5685 :    (void) printf("determ, inv = %g %g\n",determ,inv_determ);
; 5686 :    }
; 5687 : 
; 5688 : if (sign == 1) {

	cmp	DWORD PTR _sign$[ebp], 1
	mov	ecx, DWORD PTR _a$89577[ebp]
	movsd	xmm2, QWORD PTR __real@3ff0000000000000
	movd	xmm5, DWORD PTR [edx+4]
	movd	xmm7, DWORD PTR [edx]
	mov	DWORD PTR _t$89576[ebp], ecx
	mov	ecx, DWORD PTR _b$89578[ebp]
	mov	DWORD PTR _t$89576[ebp+4], ecx
	fild	QWORD PTR _t$89576[ebp]
	mov	ecx, DWORD PTR [eax]
	mov	eax, DWORD PTR [eax+4]
	xorps	xmm4, xmm4
	cvtsi2sd xmm4, eax
	movapd	xmm1, xmm2
	divsd	xmm1, xmm4
	cvtdq2pd xmm5, xmm5
	cvtdq2pd xmm7, xmm7
	xorps	xmm6, xmm6
	cvtsi2sd xmm6, ecx
	movapd	xmm0, xmm2
	mulsd	xmm1, xmm5
	mulsd	xmm4, xmm7
	mulsd	xmm5, xmm6
	subsd	xmm4, xmm5
	divsd	xmm0, xmm6
	movapd	xmm3, xmm0
	divsd	xmm2, xmm4

; 5689 : 
; 5690 : if (a0 > 0  && a1 > 0)
R.D. Silverman is offline   Reply With Quote
Old 2010-10-02, 00:24   #13
retina
Undefined
 
retina's Avatar
 
"The unspeakable one"
Jun 2006
My evil lair

22×1,553 Posts
Default

Quote:
Originally Posted by R.D. Silverman View Post
Code:
	DB	15					; 0000000fH
	DB	49					; 00000031H
	mov	DWORD PTR _a$89577[ebp], eax
	mov	DWORD PTR _b$89578[ebp], edx

	cmp	DWORD PTR _sign$[ebp], 1
	mov	ecx, DWORD PTR _a$89577[ebp]
	movsd	xmm2, QWORD PTR __real@3ff0000000000000
	movd	xmm5, DWORD PTR [edx+4]
	movd	xmm7, DWORD PTR [edx]
RDTSC (0x0f,0x31) returns the counter in edx:eax
And then "movd xmm5, DWORD PTR [edx+4]" will randomly crash.

Why is edx never initialised to point to anything proper after reading the TSC? Did you really show all the compiled code for that section? If so then get a new compiler.
retina is online now   Reply With Quote
Old 2010-10-02, 00:38   #14
R.D. Silverman
 
R.D. Silverman's Avatar
 
Nov 2003

22·5·373 Posts
Default

Quote:
Originally Posted by retina View Post
RDTSC (0x0f,0x31) returns the counter in edx:eax
And then "movd xmm5, DWORD PTR [edx+4]" will randomly crash.

Why is edx never initialised to point to anything proper after reading the TSC? Did you really show all the compiled code for that section? If so then get a new compiler.
Yes. This is all the code. It grabs the clock counter, then converts it to a double.

The compiler is Microsoft Visual Studio 2010 (and VS 2008)
R.D. Silverman is offline   Reply With Quote
Old 2010-10-02, 00:45   #15
R.D. Silverman
 
R.D. Silverman's Avatar
 
Nov 2003

22×5×373 Posts
Default

Quote:
Originally Posted by retina View Post
RDTSC (0x0f,0x31) returns the counter in edx:eax
And then "movd xmm5, DWORD PTR [edx+4]" will randomly crash.

Why is edx never initialised to point to anything proper after reading the TSC? Did you really show all the compiled code for that section? If so then get a new compiler.
Nice catch. I failed to see it.

Here is the debug assembler. Note that it does not in-line the get_time()
call and subsequently does not use the edx register:

Code:
; 5650 : {   /* start of prepare_bounds */

	push	ebp
	mov	ebp, esp
	sub	esp, 228				; 000000e4H
	push	ebx
	push	esi
	push	edi

; 5651 : int do_nothing(char *x);
; 5652 : double m_over_a1, n_over_b1, b0_over_b1, a0_over_a1;
; 5653 : double a0, a1, b0, b1, one_over_a1, one_over_b1;
; 5654 : double l1and4, l2and3, l1and3, determ, inv_determ;
; 5655 : double stime;
; 5656 : double temp;
; 5657 : 
; 5658 : 
; 5659 : if (TIME_STATS) stime = get_time();

	mov	eax, 1
	test	eax, eax
	je	SHORT $LN21@prepare_bo
	call	_get_time
	fstp	QWORD PTR _stime$[ebp]
$LN21@prepare_bo:

; 5660 : 
; 5661 : a0 = (double)v1[0];

	mov	eax, DWORD PTR _v1$[ebp]
	fild	DWORD PTR [eax]
	fstp	QWORD PTR _a0$[ebp]

; 5662 : a1 = (double)v2[0];

	mov	eax, DWORD PTR _v2$[ebp]
	fild	DWORD PTR [eax]
	fstp	QWORD PTR _a1$[ebp]

; 5663 : b0 = (double)v1[1];

	mov	eax, DWORD PTR _v1$[ebp]
	fild	DWORD PTR [eax+4]
	fstp	QWORD PTR _b0$[ebp]

; 5664 : b1 = (double)v2[1];

	mov	eax, DWORD PTR _v2$[ebp]

etc.
R.D. Silverman is offline   Reply With Quote
Old 2010-10-02, 00:47   #16
retina
Undefined
 
retina's Avatar
 
"The unspeakable one"
Jun 2006
My evil lair

22×1,553 Posts
Default

Quote:
Originally Posted by R.D. Silverman View Post
Yes. This is all the code. It grabs the clock counter, then converts it to a double.
The double conversion is done further down, after transferring through ecx to another location - "fild QWORD PTR _t$89576[ebp]"
Quote:
Originally Posted by R.D. Silverman View Post
The compiler is Microsoft Visual Studio 2010 (and VS 2008)
Write a nice letter to MS and complain.
retina is online now   Reply With Quote
Old 2010-10-02, 01:04   #17
R.D. Silverman
 
R.D. Silverman's Avatar
 
Nov 2003

22·5·373 Posts
Default

Quote:
Originally Posted by retina View Post
The double conversion is done further down, after transferring through ecx to another location - "fild QWORD PTR _t$89576[ebp]"Write a nice letter to MS and complain.
get_time calls an assembler routine that samples the clock and returns
a 64 bit int. It converts the 64-bit int to a double and returns it.

I could reorganize the code inside get_time(), but I doubt it will help.

The problem is the misuse of the edx register AFTER get_time() returns.
I suspect it is a bug in the code optimizer when dealing with (the other) floating point code.
R.D. Silverman is offline   Reply With Quote
Old 2010-10-02, 01:47   #18
R.D. Silverman
 
R.D. Silverman's Avatar
 
Nov 2003

746010 Posts
Default

Quote:
Originally Posted by R.D. Silverman View Post
get_time calls an assembler routine that samples the clock and returns
a 64 bit int. It converts the 64-bit int to a double and returns it.

I could reorganize the code inside get_time(), but I doubt it will help.

The problem is the misuse of the edx register AFTER get_time() returns.
I suspect it is a bug in the code optimizer when dealing with (the other) floating point code.
I changed the code so that the 64-bit routine that samples the clock
returns a double instead of an int64 and then called it directly.
The call is not inlined. Instead I just get

call _get_time1

However, the emitted code STILL mis-uses the edx register in the
middle of the floating computations that follow.

3 lines later it does:

mov ecx DWORD PTR [edx] but without initializing where edx is pointing.

In fact, it is still pointing to whatever was placed in it by the clock
sample code.
R.D. Silverman is offline   Reply With Quote
Old 2010-10-02, 03:31   #19
axn
 
axn's Avatar
 
Jun 2003

22×33×47 Posts
Default

Quote:
Originally Posted by R.D. Silverman View Post
I changed the code so that the 64-bit routine that samples the clock
returns a double instead of an int64 and then called it directly.
The call is not inlined. Instead I just get

call _get_time1

However, the emitted code STILL mis-uses the edx register in the
middle of the floating computations that follow.

3 lines later it does:

mov ecx DWORD PTR [edx] but without initializing where edx is pointing.

In fact, it is still pointing to whatever was placed in it by the clock
sample code.
I believe the first few parameters are passed via registers -- look at the place where it _calls_ your routine. I bet v1 and v2 are passed in eax and edx. These four lines use apparently uninitialized registers.

Code:
	movd	xmm5, DWORD PTR [edx+4]
	movd	xmm7, DWORD PTR [edx]

	mov	ecx, DWORD PTR [eax]
	mov	eax, DWORD PTR [eax+4]
axn is online now   Reply With Quote
Old 2010-10-02, 04:08   #20
retina
Undefined
 
retina's Avatar
 
"The unspeakable one"
Jun 2006
My evil lair

22×1,553 Posts
Default

Quote:
Originally Posted by axn View Post
I believe the first few parameters are passed via registers -- look at the place where it _calls_ your routine. I bet v1 and v2 are passed in eax and edx.
Unlikely because the debug code show the values being loaded from the stack. It would be extremely strange code that places pointers to the data on the stack AND loads eax/edx with pointers to the data and then calls the subroutine. There is no calling standard that defines that behaviour.

Last fiddled with by retina on 2010-10-02 at 04:10
retina is online now   Reply With Quote
Old 2010-10-02, 09:56   #21
Random Poster
 
Random Poster's Avatar
 
Dec 2008

17910 Posts
Default

Quote:
Originally Posted by R.D. Silverman View Post
More weirdness. If I replace printf("1") with do_nothing("1") where do_nothing is just a dummy routine the code STILL fails.
Did you define do_nothing in the same source file? If so, then the optimizer probably replaced the call to it by the contents of the function. Try defining do_nothing in a different source file.

Quote:
Originally Posted by R.D. Silverman View Post
How can the addition of a printf of a static string cure a core dump caused by a read access failure?
Values of general registers aren't expected to survive across function calls, so adding any call (that can't be inlined away) will force the compiler to reassign registers, and (as you noticed) this often works around optimization bugs where registers get garbled.
Random Poster is offline   Reply With Quote
Old 2010-10-02, 11:20   #22
R.D. Silverman
 
R.D. Silverman's Avatar
 
Nov 2003

164448 Posts
Default

Quote:
Originally Posted by Random Poster View Post
Did you define do_nothing in the same source file? If so, then the optimizer probably replaced the call to it by the contents of the function. Try defining do_nothing in a different source file.



Values of general registers aren't expected to survive across function calls, so adding any call (that can't be inlined away) will force the compiler to reassign registers, and (as you noticed) this often works around optimization bugs where registers get garbled.
I found the following in an Intel development manual:

"As discussed in section 2.3, some compilers do not implicitly recognize the RDTSC and CPUID function in inline
assembly code. Compilers like Microsoft® Visual C++® 5.0 normally "guarantee" that any register affected by an
inline assembly code section will not affect the C code around it. When overriding the compiler by using the emit
statements, however, the compiler does not know those instructions are overwriting registers (RDTSC overwrites
EAX and EDX, and CPUID overwrites EAX, EBX, ECX, and EDX). Thus, the compiler may not properly store away
the affected registers, so this must be done manually by the programmer by pushing them onto the stack.
There are a few cases where this will not matter. If the code being time measured is a stand-alone section of code,
completely surrounded by the calls to RDTSC, then the register overwriting cannot affect the code around it. If the
measured code section is written in assembly, and the variables are actually used inside of this section, the compiler
will handle the stack allocation itself. Finally, it will not matter if affecting the correctness of the code around the
measured section is not an issue while cycle testing."

Note however, that I did a workaround such that the compiler does NOT
in-line the clock sample code, but instead calls a subroutine. The emitted
code is still mis-using the edx register a few lines later. It seems certain
that the clock sample subroutine is not restoring the edx register when it
returns.
R.D. Silverman is offline   Reply With Quote
Reply

Thread Tools


Similar Threads
Thread Thread Starter Forum Replies Last Post
mprime 28.10 compiler warning Explorer09 Software 1 2017-01-23 02:50
GCC/compiler warnings Dubslow Programming 2 2016-02-27 06:55
compiler/assembler optimizations possible? ixfd64 Software 7 2011-02-25 20:05
Linux32 -> Windows64 C compiler? geoff Programming 3 2007-09-26 03:09
/. Video processor compiler tha Hardware 17 2005-10-12 08:10

All times are UTC. The time now is 08:06.


Tue Jul 27 08:06:11 UTC 2021 up 4 days, 2:35, 0 users, load averages: 1.30, 1.63, 1.75

Powered by vBulletin® Version 3.8.11
Copyright ©2000 - 2021, Jelsoft Enterprises Ltd.

This forum has received and complied with 0 (zero) government requests for information.

Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.2 or any later version published by the Free Software Foundation.
A copy of the license is included in the FAQ.