2019-05-15

[llvm cookbook] IR优化

本文介绍如何使用opt工具优化llvm ir。

使用之前编写的代码 multiply.c

int mult() {
  int a = 5;
  int b = 3;
  int c = a * b;
  return c;
}

执行命令

clang -emit-llvm -S multiply.c -o multiply.ll

生成 multiply.ll

; ModuleID = 'multiply.c'
source_filename = "multiply.c"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"

; Function Attrs: noinline nounwind optnone ssp uwtable
define i32 @mult() #0 {
entry:
  %a = alloca i32, align 4
  %b = alloca i32, align 4
  %c = alloca i32, align 4
  store i32 5, i32* %a, align 4
  store i32 3, i32* %b, align 4
  %0 = load i32, i32* %a, align 4
  %1 = load i32, i32* %b, align 4
  %mul = mul nsw i32 %0, %1
  store i32 %mul, i32* %c, align 4
  %2 = load i32, i32* %c, align 4
  ret i32 %2
}

attributes #0 = { noinline nounwind optnone ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 2}
!2 = !{!"clang version 7.0.0 (trunk 324834)"}

执行命令：

opt -mem2reg -S multiply.ll -o multiply1.ll

输出 multiply1.ll

; ModuleID = 'multiply.ll'
source_filename = "multiply.c"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"

; Function Attrs: noinline nounwind optnone ssp uwtable
define i32 @mult() #0 {
entry:
  %a = alloca i32, align 4
  %b = alloca i32, align 4
  %c = alloca i32, align 4
  store i32 5, i32* %a, align 4
  store i32 3, i32* %b, align 4
  %0 = load i32, i32* %a, align 4
  %1 = load i32, i32* %b, align 4
  %mul = mul nsw i32 %0, %1
  store i32 %mul, i32* %c, align 4
  %2 = load i32, i32* %c, align 4
  ret i32 %2
}

attributes #0 = { noinline nounwind optnone ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 2}
!2 = !{!"clang version 7.0.0 (trunk 324834)"}

我在mac上执行的，不知道为什么结果和书上的不一致，这个优化好像并没有生效。

其他的优化选项还有

adce: Aggressive Dead Code Elimination
bb-vectorize: Basic-Block Vectorization
constprop: Simple constant propagation
dce: Dead Code Elimination
deadargelim: Dead Argument Elimination
globaldce: Dead Global Elimination
globalopt: Global Variable Optimizer
gvn: Global Value Numbering
inline: Function Integration/Inlining
instcombine: Combine redundant instructions
licm: Loop Invariant Code Motion
loop: unswitch: Unswithch Loop
loweratomic: Lower atomic intrinsics to non-atomic form
lowerinvoke: Lower invokes to calls, for unwindless code generators
lowerswitch: Lower SwithcInsts to branches
mem2reg: Promote Memory to Registry
memcpyopt: MemCpy Optimization
simplifycfg: Simplify the CFG
sink: Code sinking
tailcallelim: Tail Call Elimination

可以在源码目录 test/Transforms/ 下找到测试代码。

[llvm cookbook] IR优化

最新文章

归档