Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inline Assembly version of a Pipeline #20

Open
ashvardanian opened this issue Jan 10, 2025 · 0 comments
Open

Inline Assembly version of a Pipeline #20

ashvardanian opened this issue Jan 10, 2025 · 0 comments
Labels
enhancement New feature or request good first issue Good for newcomers help wanted Extra attention is needed

Comments

@ashvardanian
Copy link
Owner

We can rewrite the whole "pipeline" as a single C 99 version:

void pipeline_kernel_c99(unsigned long start, unsigned long end, unsigned long *sum_out, unsigned long *count_out) {
    unsigned long sum = 0;
    unsigned long count = 0;
    unsigned long max_power_of_three = 12157665459056928801ull;

    for (unsigned long value = start; value <= end; ++value) {
        // Check if the value is a power of two
        if (value > 0 && (value & (value - 1)) == 0) continue;
        // Check if the value is a power of three
        if (value > 0 && max_power_of_three % value == 0) continue;

        // Prime factorization
        unsigned long n = value;
        unsigned long factor = 2;

        // Handle factor 2 separately
        while (n % 2 == 0) {
            sum += 2;
            count += 1;
            n /= 2;
        }

        // Handle odd factors
        factor = 3;
        while (factor * factor <= n) {
            while (n % factor == 0) {
                sum += factor;
                count += 1;
                n /= factor;
            }
            factor += 2;
        }

        // If n is still greater than 1, it is a prime factor
        if (n > 1) {
            sum += n;
            count += 1;
        }
    }

    *sum_out = sum;
    *count_out = count;
}

GCC 14.2 produces the following Assembly:

pipeline_kernel_c99(unsigned long, unsigned long, unsigned long*, unsigned long*):
        push    rbp
        mov     rbp, rcx
        push    rbx
        mov     rbx, rdx
        cmp     rsi, rdi
        jb      .L15
        mov     r9, rdi
        mov     r10, rsi
        xor     edi, edi
        xor     r8d, r8d
        movabs  r11, -6289078614652622815
        jmp     .L14
.L7:
        add     r9, 1
        cmp     r10, r9
        jb      .L2
.L14:
        xor     ecx, ecx
        test    r9, r9
        je      .L9
        lea     rax, [r9-1]
        test    rax, r9
        je      .L7
        mov     rax, r11
        xor     edx, edx
        div     r9
        test    rdx, rdx
        je      .L7
        mov     rcx, r9
        test    r9b, 1
        jne     .L39
.L9:
        shr     rcx
        add     r8, 2
        add     rdi, 1
        test    cl, 1
        je      .L9
        cmp     rcx, 8
        jbe     .L10
.L5:
        mov     esi, 3
.L11:
        mov     rax, rcx
        xor     edx, edx
        div     rsi
        test    rdx, rdx
        jne     .L13
.L12:
        mov     rax, rcx
        xor     edx, edx
        add     r8, rsi
        add     rdi, 1
        div     rsi
        xor     edx, edx
        mov     rcx, rax
        div     rsi
        test    rdx, rdx
        je      .L12
.L13:
        add     rsi, 2
        mov     rax, rsi
        imul    rax, rsi
        cmp     rcx, rax
        jnb     .L11
.L10:
        cmp     rcx, 1
        je      .L7
.L6:
        add     r9, 1
        add     r8, rcx
        add     rdi, 1
        cmp     r10, r9
        jnb     .L14
.L2:
        mov     QWORD PTR [rbx], r8
        mov     QWORD PTR [rbp+0], rdi
        pop     rbx
        pop     rbp
        ret
.L39:
        cmp     r9, 8
        ja      .L5
        jmp     .L6
.L15:
        xor     edi, edi
        xor     r8d, r8d
        mov     QWORD PTR [rbx], r8
        mov     QWORD PTR [rbp+0], rdi
        pop     rbx
        pop     rbp
        ret

It seems like this can be further optimized. Who's up for the task?

@ashvardanian ashvardanian added enhancement New feature or request help wanted Extra attention is needed good first issue Good for newcomers labels Jan 10, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request good first issue Good for newcomers help wanted Extra attention is needed
Projects
None yet
Development

No branches or pull requests

1 participant