odin-lang/Odin Issue #4502: global is misaligned
2024-11-19 20:57:29 bolhak
Odin: dev-2024-11:e6475fec4
OS: Windows 11 Professional (version: 23H2), build 22631.4460
CPU: AMD Ryzen 9 7950X3D 16-Core Processor
RAM: 64701 MiB
Backend: LLVM 18.1.8
Context
I started using Odin about a year ago, and I love it! I can't stop using it, it's so much better than C/C++. I use it whenever I can.
So far, I haven’t had many issues, but today my luck ran out when I tried to compile my code with AVX instructions. After a bit of debugging, I found that some of my global data was misaligned.
I’ve reduced the code to a small example to reproduce the problem.
Expected Behavior
For the global data to be correctly aligned.
Current Behavior
The global data is misaligned, and the AVX builds are blowing up in my face. :(
Steps to Reproduce
Run the code. add/remove padding to make the struct misaligned.
package main
import "core:fmt"
import "core:math/rand"
Globals :: struct
{
padding04: f32,
padding08: f32,
padding12: f32,
padding16: f32,
// padding20: f32,
// padding24: f32,
// padding28: f32,
// padding32: f32,
mat: matrix[4, 4]f32,
}
g: Globals = {} // <- struct is misaligned (seems to be stuck on 16 byte alignment)
// compiled with core-avx2
main :: proc()
{
g.mat = {
2, -1.0, -1.0, -1.0,
-1.0, 2, -1.0, -1.0,
-1.0, -1.0, 2, -1.0,
-1.0, -1.0, -1.0, 2,
}
mat: matrix[4, 4]f32
for i in 0..<4 {
for j in 0..<4 {
//g.mat[i, j] = rand.float32_uniform(1.0, 10.0) fails to compile
mat[i, j] = rand.float32_uniform(1.0, 10.0)
}
}
fmt.println(size_of(matrix[4, 4]f32))
fmt.println(align_of(matrix[4, 4]f32))
fmt.println(size_of(Globals))
fmt.println(align_of(Globals))
struct_memory_as_floats := cast([^]f32)&g
fmt.println("struct address =", rawptr(struct_memory_as_floats))
fmt.println(struct_memory_as_floats[:size_of(Globals) / size_of(f32)])
fmt.println(g.mat * mat) // CRASH: vmovaps ymm, m256 <- not 32 byte aligned
}

Comments (2)
2024-11-19 21:57:48 Kelimion
It doesn't crash for me (Win 10 Pro, AMD Ryzen 9 5950X).
But the last line does print "random" output every time you run it.
[0, 0, 0, 0, 0, 0, 0, 0, 2, -1, -1, -1, -1, 2, -1, -1, -1, -1, 2, -1, -1, -1, -1, 2] matrix[-6.1015005, -2.5927868, -5.497138, -4.8243656; 5.1538019, -19.57756, -7.1953697, -14.878971; -3.7303448, 1.4992466, -2.9034424, -2.4826412; -19.605844, -5.652091, -12.6513329, -3.7133465]
[0, 0, 0, 0, 0, 0, 0, 0, 2, -1, -1, -1, -1, 2, -1, -1, -1, -1, 2, -1, -1, -1, -1, 2] matrix[-13.136705, 1.1955423, -3.4499664, -10.6140099; -6.9107647, 1.10276508, 8.6884727, 1.2853293; 0.67079926, -9.7492828, -10.117503, -4.272027; 1.8718166, -10.0977879, -10.544214, -1.89500809]
etc.
2024-11-19 22:06:01 Kelimion
I did find a code gen bug by slightly changing the code:
package main
import "core:fmt"
import "core:math/rand"
Globals :: struct #align(128) {
padding04: f32,
padding08: f32,
padding12: f32,
padding16: f32,
// padding20: f32,
// padding24: f32,
// padding28: f32,
// padding32: f32,
mat: matrix[4, 4]f32,
}
g := Globals{} // <- struct is misaligned (seems to be stuck on 16 byte alignment)
// compiled with core-avx2
main :: proc() {
g.mat = {
2, -1.0, -1.0, -1.0,
-1.0, 2, -1.0, -1.0,
-1.0, -1.0, 2, -1.0,
-1.0, -1.0, -1.0, 2,
}
mat: matrix[4, 4]f32
for i in 0..<4 {
for j in 0..<4 {
g.mat[i, j] = rand.float32_uniform(1.0, 10.0) // fails to compile
// mat[i, j] = 1 // rand.float32_uniform(1.0, 10.0)
}
}
fmt.println(size_of(matrix[4, 4]f32))
fmt.println(align_of(matrix[4, 4]f32))
fmt.println(size_of(Globals))
fmt.println(align_of(Globals))
struct_memory_as_floats := cast([^]f32)&g
fmt.println("struct address =", rawptr(struct_memory_as_floats))
fmt.println(struct_memory_as_floats[:size_of(Globals) / size_of(f32)])
fmt.println(g.mat * mat) // CRASH: vmovaps ymm, m256 <- not 32 byte aligned
}
Which results in:
LLVM CODE GEN FAILED FOR PROCEDURE: main.main
define void @main.main(ptr noalias nocapture nonnull %__.context_ptr) {
decls:
%mat = alloca [16 x float], align 64
%i = alloca i64, align 8
%0 = alloca i64, align 8
%i1 = alloca i64, align 8
%j = alloca i64, align 8
%1 = alloca i64, align 8
%j4 = alloca i64, align 8
%2 = alloca { ptr, i64 }, align 8
%3 = alloca [32 x i8], align 16
%4 = alloca i64, align 8
%5 = alloca %..any, align 8
%6 = alloca i64, align 8
%7 = alloca %..any, align 8
%8 = alloca i64, align 8
%9 = alloca %..any, align 8
%10 = alloca i64, align 8
%11 = alloca %..any, align 8
%struct_memory_as_floats = alloca ptr, align 8
%12 = alloca %..string, align 8
%13 = alloca %..any, align 8
%14 = alloca %..any, align 8
%15 = alloca { ptr, i64 }, align 8
%16 = alloca %..any, align 8
%17 = alloca [16 x float], align 64
%18 = alloca %..any, align 8
br label %entry
entry: ; preds = %decls
store [16 x float] [float 2.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 2.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 2.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 2.000000e+00], ptr getelementptr inbounds (%main.Globals, ptr @main.g, i64 0, i32 5), align 4
call void @llvm.memset.p0.i64(ptr %mat, i8 0, i64 64, i1 false)
store i64 0, ptr %i, align 8
store i64 0, ptr %0, align 8
br label %for.interval.loop
for.interval.loop: ; preds = %for.interval.post5, %entry
%19 = load i64, ptr %i, align 8
%20 = icmp slt i64 %19, 4
br i1 %20, label %for.interval.body, label %for.interval.done6
for.interval.body: ; preds = %for.interval.loop
%21 = load i64, ptr %i, align 8
%22 = load i64, ptr %0, align 8
store i64 %21, ptr %i1, align 8
store i64 0, ptr %j, align 8
store i64 0, ptr %1, align 8
br label %for.interval.loop2
for.interval.loop2: ; preds = %for.interval.post, %for.interval.body
%23 = load i64, ptr %j, align 8
%24 = icmp slt i64 %23, 4
br i1 %24, label %for.interval.body3, label %for.interval.done
for.interval.body3: ; preds = %for.interval.loop2
%25 = load i64, ptr %j, align 8
%26 = load i64, ptr %1, align 8
store i64 %25, ptr %j4, align 8
%27 = load i64, ptr %i1, align 8
%28 = load i64, ptr %j4, align 8
%29 = mul i64 %28, 4
%30 = add i64 %27, %29
call void @runtime.matrix_bounds_check_error(ptr @"ggv$3e", i32 31, i32 10, i64 %27, i64 %28, i64 4, i64 4)
%31 = getelementptr inbounds %runtime.Context, ptr %__.context_ptr, i32 0, i32 4
%32 = load %runtime.Random_Generator, ptr %31, align 8
%33 = call float @rand.float32_range(float 1.000000e+00, float 1.000000e+01, ptr %31, ptr %__.context_ptr)
store float %33, ptr getelementptr (%main.Globals, ptr @main.g, i64 0, i32 5, i64 %30), align 4
br label %for.interval.post
for.interval.post: ; preds = %for.interval.body3
%34 = load i64, ptr %j, align 8
%35 = add i64 %34, 1
store i64 %35, ptr %j, align 8
%36 = load i64, ptr %1, align 8
%37 = add i64 %36, 1
store i64 %37, ptr %1, align 8
br label %for.interval.loop2
for.interval.done: ; preds = %for.interval.loop2
br label %for.interval.post5
for.interval.post5: ; preds = %for.interval.done
%38 = load i64, ptr %i, align 8
%39 = add i64 %38, 1
store i64 %39, ptr %i, align 8
%40 = load i64, ptr %0, align 8
%41 = add i64 %40, 1
store i64 %41, ptr %0, align 8
br label %for.interval.loop
for.interval.done6: ; preds = %for.interval.loop
call void @llvm.memset.inline.p0.i64(ptr %2, i8 0, i64 16, i1 false)
call void @llvm.memset.inline.p0.i64(ptr %3, i8 0, i64 32, i1 false)
%42 = getelementptr [1 x %..any], ptr %3, i64 0, i64 0
store i64 64, ptr %4, align 8
%43 = getelementptr inbounds %..any, ptr %5, i32 0, i32 0
%44 = getelementptr inbounds %..any, ptr %5, i32 0, i32 1
store ptr %4, ptr %43, align 8
store i64 4683743612465315844, ptr %44, align 8
%45 = load %..any, ptr %5, align 8
store %..any %45, ptr %42, align 8
%46 = getelementptr [1 x %..any], ptr %3, i64 0, i64 0
%47 = getelementptr inbounds { ptr, i64 }, ptr %2, i32 0, i32 0
store ptr %46, ptr %47, align 8
%48 = getelementptr inbounds { ptr, i64 }, ptr %2, i32 0, i32 1
store i64 1, ptr %48, align 8
%49 = load { ptr, i64 }, ptr %2, align 8
%50 = call i64 @fmt.println(ptr %2, ptr @"ggv$46", i1 zeroext true, ptr %__.context_ptr)
%51 = getelementptr [1 x %..any], ptr %3, i64 0, i64 0
store i64 32, ptr %6, align 8
%52 = getelementptr inbounds %..any, ptr %7, i32 0, i32 0
%53 = getelementptr inbounds %..any, ptr %7, i32 0, i32 1
store ptr %6, ptr %52, align 8
store i64 4683743612465315844, ptr %53, align 8
%54 = load %..any, ptr %7, align 8
store %..any %54, ptr %51, align 8
%55 = getelementptr [1 x %..any], ptr %3, i64 0, i64 0
%56 = getelementptr inbounds { ptr, i64 }, ptr %2, i32 0, i32 0
store ptr %55, ptr %56, align 8
%57 = getelementptr inbounds { ptr, i64 }, ptr %2, i32 0, i32 1
store i64 1, ptr %57, align 8
%58 = load { ptr, i64 }, ptr %2, align 8
%59 = call i64 @fmt.println(ptr %2, ptr @"ggv$48", i1 zeroext true, ptr %__.context_ptr)
%60 = getelementptr [1 x %..any], ptr %3, i64 0, i64 0
store i64 128, ptr %8, align 8
%61 = getelementptr inbounds %..any, ptr %9, i32 0, i32 0
%62 = getelementptr inbounds %..any, ptr %9, i32 0, i32 1
store ptr %8, ptr %61, align 8
store i64 4683743612465315844, ptr %62, align 8
%63 = load %..any, ptr %9, align 8
store %..any %63, ptr %60, align 8
%64 = getelementptr [1 x %..any], ptr %3, i64 0, i64 0
%65 = getelementptr inbounds { ptr, i64 }, ptr %2, i32 0, i32 0
store ptr %64, ptr %65, align 8
%66 = getelementptr inbounds { ptr, i64 }, ptr %2, i32 0, i32 1
store i64 1, ptr %66, align 8
%67 = load { ptr, i64 }, ptr %2, align 8
%68 = call i64 @fmt.println(ptr %2, ptr @"ggv$4c", i1 zeroext true, ptr %__.context_ptr)
%69 = getelementptr [1 x %..any], ptr %3, i64 0, i64 0
store i64 128, ptr %10, align 8
%70 = getelementptr inbounds %..any, ptr %11, i32 0, i32 0
%71 = getelementptr inbounds %..any, ptr %11, i32 0, i32 1
store ptr %10, ptr %70, align 8
store i64 4683743612465315844, ptr %71, align 8
%72 = load %..any, ptr %11, align 8
store %..any %72, ptr %69, align 8
%73 = getelementptr [1 x %..any], ptr %3, i64 0, i64 0
%74 = getelementptr inbounds { ptr, i64 }, ptr %2, i32 0, i32 0
store ptr %73, ptr %74, align 8
%75 = getelementptr inbounds { ptr, i64 }, ptr %2, i32 0, i32 1
store i64 1, ptr %75, align 8
%76 = load { ptr, i64 }, ptr %2, align 8
%77 = call i64 @fmt.println(ptr %2, ptr @"ggv$4f", i1 zeroext true, ptr %__.context_ptr)
store ptr @main.g, ptr %struct_memory_as_floats, align 8
%78 = load ptr, ptr %struct_memory_as_floats, align 8
%79 = getelementptr [2 x %..any], ptr %3, i64 0, i64 0
store %..string { ptr @"csbs$bbd", i64 16 }, ptr %12, align 8
%80 = getelementptr inbounds %..any, ptr %13, i32 0, i32 0
%81 = getelementptr inbounds %..any, ptr %13, i32 0, i32 1
store ptr %12, ptr %80, align 8
store i64 432345564227567625, ptr %81, align 8
%82 = load %..any, ptr %13, align 8
store %..any %82, ptr %79, align 8
%83 = getelementptr [2 x %..any], ptr %3, i64 0, i64 1
call void @llvm.memset.inline.p0.i64(ptr %14, i8 0, i64 16, i1 false)
%84 = getelementptr inbounds %..any, ptr %14, i32 0, i32 0
%85 = getelementptr inbounds %..any, ptr %14, i32 0, i32 1
store ptr %struct_memory_as_floats, ptr %84, align 8
store i64 720575940379279361, ptr %85, align 8
%86 = load %..any, ptr %14, align 8
store %..any %86, ptr %83, align 8
%87 = getelementptr [2 x %..any], ptr %3, i64 0, i64 0
%88 = getelementptr inbounds { ptr, i64 }, ptr %2, i32 0, i32 0
store ptr %87, ptr %88, align 8
%89 = getelementptr inbounds { ptr, i64 }, ptr %2, i32 0, i32 1
store i64 2, ptr %89, align 8
%90 = load { ptr, i64 }, ptr %2, align 8
%91 = call i64 @fmt.println(ptr %2, ptr @"ggv$5c", i1 zeroext true, ptr %__.context_ptr)
%92 = load ptr, ptr %struct_memory_as_floats, align 8
call void @runtime.multi_pointer_slice_expr_error(ptr @"ggv$60", i32 44, i32 37, i64 0, i64 32)
%93 = getelementptr float, ptr %92, i64 0
%94 = getelementptr inbounds { ptr, i64 }, ptr %15, i32 0, i32 0
%95 = getelementptr inbounds { ptr, i64 }, ptr %15, i32 0, i32 1
store ptr %93, ptr %94, align 8
store i64 32, ptr %95, align 8
%96 = load { ptr, i64 }, ptr %15, align 8
%97 = getelementptr [1 x %..any], ptr %3, i64 0, i64 0
call void @llvm.memset.inline.p0.i64(ptr %16, i8 0, i64 16, i1 false)
%98 = getelementptr inbounds %..any, ptr %16, i32 0, i32 0
%99 = getelementptr inbounds %..any, ptr %16, i32 0, i32 1
store ptr %15, ptr %98, align 8
store i64 1152921504606846981, ptr %99, align 8
%100 = load %..any, ptr %16, align 8
store %..any %100, ptr %97, align 8
%101 = getelementptr [1 x %..any], ptr %3, i64 0, i64 0
%102 = getelementptr inbounds { ptr, i64 }, ptr %2, i32 0, i32 0
store ptr %101, ptr %102, align 8
%103 = getelementptr inbounds { ptr, i64 }, ptr %2, i32 0, i32 1
store i64 1, ptr %103, align 8
%104 = load { ptr, i64 }, ptr %2, align 8
%105 = call i64 @fmt.println(ptr %2, ptr @"ggv$67", i1 zeroext true, ptr %__.context_ptr)
%106 = load [16 x float], ptr getelementptr inbounds (%main.Globals, ptr @main.g, i64 0, i32 5), align 4
%107 = load [16 x float], ptr %mat, align 4
%108 = load <16 x float>, ptr getelementptr inbounds (%main.Globals, ptr @main.g, i64 0, i32 5), align 32
%109 = load <16 x float>, ptr %mat, align 32
%110 = shufflevector <16 x float> %108, <16 x float> undef, <4 x i32>
%111 = shufflevector <16 x float> %108, <16 x float> undef, <4 x i32>
%112 = shufflevector <16 x float> %108, <16 x float> undef, <4 x i32>
%113 = shufflevector <16 x float> %108, <16 x float> undef, <4 x i32>
%114 = shufflevector <16 x float> %109, <16 x float> undef, <4 x i32>
%115 = shufflevector <16 x float> %109, <16 x float> undef, <4 x i32>
%116 = shufflevector <16 x float> %109, <16 x float> undef, <4 x i32>
%117 = shufflevector <16 x float> %109, <16 x float> undef, <4 x i32>
call void @llvm.memset.p0.i64(ptr %17, i8 0, i64 64, i1 false)
%118 = fmul <4 x float> %110, %114
%119 = shufflevector <4 x float> %118, <4 x float> undef, <2 x i32>
%120 = shufflevector <4 x float> %118, <4 x float> undef, <2 x i32>
%121 = fadd <2 x float> %119, %120
%122 = shufflevector <2 x float> %121, <2 x float> undef, <1 x i32> zeroinitializer
%123 = shufflevector <2 x float> %121, <2 x float> undef, <1 x i32>
%124 = fadd <1 x float> %122, %123
%125 = extractelement <1 x float> %124, i32 0
%126 = getelementptr [16 x float], ptr %17, i64 0, i64 0
store float %125, ptr %126, align 4
%127 = fmul <4 x float> %110, %115
%128 = shufflevector <4 x float> %127, <4 x float> undef, <2 x i32>
%129 = shufflevector <4 x float> %127, <4 x float> undef, <2 x i32>
%130 = fadd <2 x float> %128, %129
%131 = shufflevector <2 x float> %130, <2 x float> undef, <1 x i32> zeroinitializer
%132 = shufflevector <2 x float> %130, <2 x float> undef, <1 x i32>
%133 = fadd <1 x float> %131, %132
%134 = extractelement <1 x float> %133, i32 0
%135 = getelementptr [16 x float], ptr %17, i64 0, i64 4
store float %134, ptr %135, align 4
%136 = fmul <4 x float> %110, %116
%137 = shufflevector <4 x float> %136, <4 x float> undef, <2 x i32>
%138 = shufflevector <4 x float> %136, <4 x float> undef, <2 x i32>
%139 = fadd <2 x float> %137, %138
%140 = shufflevector <2 x float> %139, <2 x float> undef, <1 x i32> zeroinitializer
%141 = shufflevector <2 x float> %139, <2 x float> undef, <1 x i32>
%142 = fadd <1 x float> %140, %141
%143 = extractelement <1 x float> %142, i32 0
%144 = getelementptr [16 x float], ptr %17, i64 0, i64 8
store float %143, ptr %144, align 4
%145 = fmul <4 x float> %110, %117
%146 = shufflevector <4 x float> %145, <4 x float> undef, <2 x i32>
%147 = shufflevector <4 x float> %145, <4 x float> undef, <2 x i32>
%148 = fadd <2 x float> %146, %147
%149 = shufflevector <2 x float> %148, <2 x float> undef, <1 x i32> zeroinitializer
%150 = shufflevector <2 x float> %148, <2 x float> undef, <1 x i32>
%151 = fadd <1 x float> %149, %150
%152 = extractelement <1 x float> %151, i32 0
%153 = getelementptr [16 x float], ptr %17, i64 0, i64 12
store float %152, ptr %153, align 4
%154 = fmul <4 x float> %111, %114
%155 = shufflevector <4 x float> %154, <4 x float> undef, <2 x i32>
%156 = shufflevector <4 x float> %154, <4 x float> undef, <2 x i32>
%157 = fadd <2 x float> %155, %156
%158 = shufflevector <2 x float> %157, <2 x float> undef, <1 x i32> zeroinitializer
%159 = shufflevector <2 x float> %157, <2 x float> undef, <1 x i32>
%160 = fadd <1 x float> %158, %159
%161 = extractelement <1 x float> %160, i32 0
%162 = getelementptr [16 x float], ptr %17, i64 0, i64 1
store float %161, ptr %162, align 4
%163 = fmul <4 x float> %111, %115
%164 = shufflevector <4 x float> %163, <4 x float> undef, <2 x i32>
%165 = shufflevector <4 x float> %163, <4 x float> undef, <2 x i32>
%166 = fadd <2 x float> %164, %165
%167 = shufflevector <2 x float> %166, <2 x float> undef, <1 x i32> zeroinitializer
%168 = shufflevector <2 x float> %166, <2 x float> undef, <1 x i32>
%169 = fadd <1 x float> %167, %168
%170 = extractelement <1 x float> %169, i32 0
%171 = getelementptr [16 x float], ptr %17, i64 0, i64 5
store float %170, ptr %171, align 4
%172 = fmul <4 x float> %111, %116
%173 = shufflevector <4 x float> %172, <4 x float> undef, <2 x i32>
%174 = shufflevector <4 x float> %172, <4 x float> undef, <2 x i32>
%175 = fadd <2 x float> %173, %174
%176 = shufflevector <2 x float> %175, <2 x float> undef, <1 x i32> zeroinitializer
%177 = shufflevector <2 x float> %175, <2 x float> undef, <1 x i32>
%178 = fadd <1 x float> %176, %177
%179 = extractelement <1 x float> %178, i32 0
%180 = getelementptr [16 x float], ptr %17, i64 0, i64 9
store float %179, ptr %180, align 4
%181 = fmul <4 x float> %111, %117
%182 = shufflevector <4 x float> %181, <4 x float> undef, <2 x i32>
%183 = shufflevector <4 x float> %181, <4 x float> undef, <2 x i32>
%184 = fadd <2 x float> %182, %183
%185 = shufflevector <2 x float> %184, <2 x float> undef, <1 x i32> zeroinitializer
%186 = shufflevector <2 x float> %184, <2 x float> undef, <1 x i32>
%187 = fadd <1 x float> %185, %186
%188 = extractelement <1 x float> %187, i32 0
%189 = getelementptr [16 x float], ptr %17, i64 0, i64 13
store float %188, ptr %189, align 4
%190 = fmul <4 x float> %112, %114
%191 = shufflevector <4 x float> %190, <4 x float> undef, <2 x i32>
%192 = shufflevector <4 x float> %190, <4 x float> undef, <2 x i32>
%193 = fadd <2 x float> %191, %192
%194 = shufflevector <2 x float> %193, <2 x float> undef, <1 x i32> zeroinitializer
%195 = shufflevector <2 x float> %193, <2 x float> undef, <1 x i32>
%196 = fadd <1 x float> %194, %195
%197 = extractelement <1 x float> %196, i32 0
%198 = getelementptr [16 x float], ptr %17, i64 0, i64 2
store float %197, ptr %198, align 4
%199 = fmul <4 x float> %112, %115
%200 = shufflevector <4 x float> %199, <4 x float> undef, <2 x i32>
%201 = shufflevector <4 x float> %199, <4 x float> undef, <2 x i32>
%202 = fadd <2 x float> %200, %201
%203 = shufflevector <2 x float> %202, <2 x float> undef, <1 x i32> zeroinitializer
%204 = shufflevector <2 x float> %202, <2 x float> undef, <1 x i32>
%205 = fadd <1 x float> %203, %204
%206 = extractelement <1 x float> %205, i32 0
%207 = getelementptr [16 x float], ptr %17, i64 0, i64 6
store float %206, ptr %207, align 4
%208 = fmul <4 x float> %112, %116
%209 = shufflevector <4 x float> %208, <4 x float> undef, <2 x i32>
%210 = shufflevector <4 x float> %208, <4 x float> undef, <2 x i32>
%211 = fadd <2 x float> %209, %210
%212 = shufflevector <2 x float> %211, <2 x float> undef, <1 x i32> zeroinitializer
%213 = shufflevector <2 x float> %211, <2 x float> undef, <1 x i32>
%214 = fadd <1 x float> %212, %213
%215 = extractelement <1 x float> %214, i32 0
%216 = getelementptr [16 x float], ptr %17, i64 0, i64 10
store float %215, ptr %216, align 4
%217 = fmul <4 x float> %112, %117
%218 = shufflevector <4 x float> %217, <4 x float> undef, <2 x i32>
%219 = shufflevector <4 x float> %217, <4 x float> undef, <2 x i32>
%220 = fadd <2 x float> %218, %219
%221 = shufflevector <2 x float> %220, <2 x float> undef, <1 x i32> zeroinitializer
%222 = shufflevector <2 x float> %220, <2 x float> undef, <1 x i32>
%223 = fadd <1 x float> %221, %222
%224 = extractelement <1 x float> %223, i32 0
%225 = getelementptr [16 x float], ptr %17, i64 0, i64 14
store float %224, ptr %225, align 4
%226 = fmul <4 x float> %113, %114
%227 = shufflevector <4 x float> %226, <4 x float> undef, <2 x i32>
%228 = shufflevector <4 x float> %226, <4 x float> undef, <2 x i32>
%229 = fadd <2 x float> %227, %228
%230 = shufflevector <2 x float> %229, <2 x float> undef, <1 x i32> zeroinitializer
%231 = shufflevector <2 x float> %229, <2 x float> undef, <1 x i32>
%232 = fadd <1 x float> %230, %231
%233 = extractelement <1 x float> %232, i32 0
%234 = getelementptr [16 x float], ptr %17, i64 0, i64 3
store float %233, ptr %234, align 4
%235 = fmul <4 x float> %113, %115
%236 = shufflevector <4 x float> %235, <4 x float> undef, <2 x i32>
%237 = shufflevector <4 x float> %235, <4 x float> undef, <2 x i32>
%238 = fadd <2 x float> %236, %237
%239 = shufflevector <2 x float> %238, <2 x float> undef, <1 x i32> zeroinitializer
%240 = shufflevector <2 x float> %238, <2 x float> undef, <1 x i32>
%241 = fadd <1 x float> %239, %240
%242 = extractelement <1 x float> %241, i32 0
%243 = getelementptr [16 x float], ptr %17, i64 0, i64 7
store float %242, ptr %243, align 4
%244 = fmul <4 x float> %113, %116
%245 = shufflevector <4 x float> %244, <4 x float> undef, <2 x i32>
%246 = shufflevector <4 x float> %244, <4 x float> undef, <2 x i32>
%247 = fadd <2 x float> %245, %246
%248 = shufflevector <2 x float> %247, <2 x float> undef, <1 x i32> zeroinitializer
%249 = shufflevector <2 x float> %247, <2 x float> undef, <1 x i32>
%250 = fadd <1 x float> %248, %249
%251 = extractelement <1 x float> %250, i32 0
%252 = getelementptr [16 x float], ptr %17, i64 0, i64 11
store float %251, ptr %252, align 4
%253 = fmul <4 x float> %113, %117
%254 = shufflevector <4 x float> %253, <4 x float> undef, <2 x i32>
%255 = shufflevector <4 x float> %253, <4 x float> undef, <2 x i32>
%256 = fadd <2 x float> %254, %255
%257 = shufflevector <2 x float> %256, <2 x float> undef, <1 x i32> zeroinitializer
%258 = shufflevector <2 x float> %256, <2 x float> undef, <1 x i32>
%259 = fadd <1 x float> %257, %258
%260 = extractelement <1 x float> %259, i32 0
%261 = getelementptr [16 x float], ptr %17, i64 0, i64 15
store float %260, ptr %261, align 4
%262 = load [16 x float], ptr %17, align 4
%263 = getelementptr [1 x %..any], ptr %3, i64 0, i64 0
call void @llvm.memset.inline.p0.i64(ptr %18, i8 0, i64 16, i1 false)
%264 = getelementptr inbounds %..any, ptr %18, i32 0, i32 0
%265 = getelementptr inbounds %..any, ptr %18, i32 0, i32 1
store ptr %17, ptr %264, align 8
store i64 1729382256910270472, ptr %265, align 8
%266 = load %..any, ptr %18, align 8
store %..any %266, ptr %263, align 8
%267 = getelementptr [1 x %..any], ptr %3, i64 0, i64 0
%268 = getelementptr inbounds { ptr, i64 }, ptr %2, i32 0, i32 0
store ptr %267, ptr %268, align 8
%269 = getelementptr inbounds { ptr, i64 }, ptr %2, i32 0, i32 1
store i64 1, ptr %269, align 8
%270 = load { ptr, i64 }, ptr %2, align 8
%271 = call i64 @fmt.println(ptr %2, ptr @"ggv$6f", i1 zeroext true, ptr %__.context_ptr)
ret void
}
Use of instruction is not an instruction!
%30 = add i64 %27, %29