From 28e3fc80336935bc8bed372e78616ef5be9f4908 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 27 Jul 2023 13:27:58 -0700 Subject: [PATCH 1/2] Don't zero out noreg operands A tail call may have $noreg operands. Fixes a crash. Reviewed By: xgupta Differential Revision: https://reviews.llvm.org/D156485 --- llvm/lib/CodeGen/PrologEpilogInserter.cpp | 9 +++++++-- llvm/test/CodeGen/X86/zero-call-used-regs.ll | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index e323aaaeefaf..49047719fdaa 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -1285,6 +1285,8 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { continue; MCRegister Reg = MO.getReg(); + if (!Reg) + continue; // This picks up sibling registers (e.q. %al -> %ah). for (MCRegUnit Unit : TRI.regunits(Reg)) @@ -1308,8 +1310,11 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { if (!MO.isReg()) continue; - for (const MCPhysReg &Reg : - TRI.sub_and_superregs_inclusive(MO.getReg())) + MCRegister Reg = MO.getReg(); + if (!Reg) + continue; + + for (const MCPhysReg Reg : TRI.sub_and_superregs_inclusive(Reg)) RegsToZero.reset(Reg); } } diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs.ll b/llvm/test/CodeGen/X86/zero-call-used-regs.ll index 63d51c916bb9..97ad5ce9c8cb 100644 --- a/llvm/test/CodeGen/X86/zero-call-used-regs.ll +++ b/llvm/test/CodeGen/X86/zero-call-used-regs.ll @@ -241,6 +241,20 @@ entry: ret i32 %x } +define dso_local void @tailcall(ptr %p) local_unnamed_addr #0 "zero-call-used-regs"="used-gpr" { +; I386-LABEL: tailcall: +; I386: # %bb.0: +; I386-NEXT: movl {{[0-9]+}}(%esp), %eax +; I386-NEXT: jmpl *(%eax) # TAILCALL +; +; X86-64-LABEL: tailcall: +; X86-64: # %bb.0: +; X86-64-NEXT: jmpq *(%rdi) # TAILCALL + %c = load ptr, ptr %p + tail call void %c() + ret void +} + ; Don't emit zeroing registers in "main" function. define dso_local i32 @main() local_unnamed_addr #1 { ; I386-LABEL: main: -- Gitee From 5d0b633cf6b41385cedb087295470780c590565e Mon Sep 17 00:00:00 2001 From: kom113 Date: Tue, 24 Sep 2024 13:15:39 +0800 Subject: [PATCH 2/2] feature: add stackanalyzer as a LLVM tool during OSPP 2024. --- llvm/lib/CodeGen/PrologEpilogInserter.cpp | 9 +- llvm/test/CMakeLists.txt | 1 + llvm/test/CodeGen/X86/zero-call-used-regs.ll | 14 - llvm/test/lit.cfg.py | 1 + .../AArch64/analysis_callback.ll | 65 ++ .../AArch64/analysis_no_callback.ll | 60 ++ .../AArch64/analysis_recursion.ll | 23 + .../AArch64/analysis_stacksize_config.ll | 48 ++ .../AArch64/callgraph_callback.ll | 80 +++ .../AArch64/callgraph_no_callback.ll | 93 +++ .../stackanalyzer/X86/analysis_callback.ll | 65 ++ .../stackanalyzer/X86/analysis_no_callback.ll | 56 ++ .../stackanalyzer/X86/analysis_recursion.ll | 23 + .../X86/analysis_stacksize_config.ll | 48 ++ .../stackanalyzer/X86/callgraph_callback.ll | 80 +++ .../X86/callgraph_no_callback.ll | 93 +++ llvm/test/tools/stackanalyzer/help.test | 21 + llvm/tools/stackanalyzer/CMakeLists.txt | 14 + llvm/tools/stackanalyzer/CallGraphGen.cpp | 584 ++++++++++++++++++ llvm/tools/stackanalyzer/CallGraphGen.h | 515 +++++++++++++++ llvm/tools/stackanalyzer/StackUsage.cpp | 297 +++++++++ llvm/tools/stackanalyzer/StackUsage.h | 92 +++ llvm/tools/stackanalyzer/stackanalyzer.cpp | 160 +++++ llvm/utils/gn/secondary/llvm/test/BUILD.gn | 1 + .../llvm/tools/stackanalyzer/BUILD.gn | 15 + .../llvm-project-overlay/llvm/BUILD.bazel | 18 + 26 files changed, 2455 insertions(+), 21 deletions(-) create mode 100644 llvm/test/tools/stackanalyzer/AArch64/analysis_callback.ll create mode 100644 llvm/test/tools/stackanalyzer/AArch64/analysis_no_callback.ll create mode 100644 llvm/test/tools/stackanalyzer/AArch64/analysis_recursion.ll create mode 100644 llvm/test/tools/stackanalyzer/AArch64/analysis_stacksize_config.ll create mode 100644 llvm/test/tools/stackanalyzer/AArch64/callgraph_callback.ll create mode 100644 llvm/test/tools/stackanalyzer/AArch64/callgraph_no_callback.ll create mode 100644 llvm/test/tools/stackanalyzer/X86/analysis_callback.ll create mode 100644 llvm/test/tools/stackanalyzer/X86/analysis_no_callback.ll create mode 100644 llvm/test/tools/stackanalyzer/X86/analysis_recursion.ll create mode 100644 llvm/test/tools/stackanalyzer/X86/analysis_stacksize_config.ll create mode 100644 llvm/test/tools/stackanalyzer/X86/callgraph_callback.ll create mode 100644 llvm/test/tools/stackanalyzer/X86/callgraph_no_callback.ll create mode 100644 llvm/test/tools/stackanalyzer/help.test create mode 100644 llvm/tools/stackanalyzer/CMakeLists.txt create mode 100644 llvm/tools/stackanalyzer/CallGraphGen.cpp create mode 100644 llvm/tools/stackanalyzer/CallGraphGen.h create mode 100644 llvm/tools/stackanalyzer/StackUsage.cpp create mode 100644 llvm/tools/stackanalyzer/StackUsage.h create mode 100644 llvm/tools/stackanalyzer/stackanalyzer.cpp create mode 100644 llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 49047719fdaa..e323aaaeefaf 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -1285,8 +1285,6 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { continue; MCRegister Reg = MO.getReg(); - if (!Reg) - continue; // This picks up sibling registers (e.q. %al -> %ah). for (MCRegUnit Unit : TRI.regunits(Reg)) @@ -1310,11 +1308,8 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { if (!MO.isReg()) continue; - MCRegister Reg = MO.getReg(); - if (!Reg) - continue; - - for (const MCPhysReg Reg : TRI.sub_and_superregs_inclusive(Reg)) + for (const MCPhysReg &Reg : + TRI.sub_and_superregs_inclusive(MO.getReg())) RegsToZero.reset(Reg); } } diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index 8cd77b72c987..71e0657fb065 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -142,6 +142,7 @@ set(LLVM_TEST_DEPENDS sancov sanstats split-file + stackanalyzer verify-uselistorder yaml-bench yaml2obj diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs.ll b/llvm/test/CodeGen/X86/zero-call-used-regs.ll index 97ad5ce9c8cb..63d51c916bb9 100644 --- a/llvm/test/CodeGen/X86/zero-call-used-regs.ll +++ b/llvm/test/CodeGen/X86/zero-call-used-regs.ll @@ -241,20 +241,6 @@ entry: ret i32 %x } -define dso_local void @tailcall(ptr %p) local_unnamed_addr #0 "zero-call-used-regs"="used-gpr" { -; I386-LABEL: tailcall: -; I386: # %bb.0: -; I386-NEXT: movl {{[0-9]+}}(%esp), %eax -; I386-NEXT: jmpl *(%eax) # TAILCALL -; -; X86-64-LABEL: tailcall: -; X86-64: # %bb.0: -; X86-64-NEXT: jmpq *(%rdi) # TAILCALL - %c = load ptr, ptr %p - tail call void %c() - ret void -} - ; Don't emit zeroing registers in "main" function. define dso_local i32 @main() local_unnamed_addr #1 { ; I386-LABEL: main: diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 9cc8520960c2..ff8df57f9f2f 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -241,6 +241,7 @@ tools.extend( "opt", "sancov", "sanstats", + "stackanalyzer", "llvm-remarkutil", ] ) diff --git a/llvm/test/tools/stackanalyzer/AArch64/analysis_callback.ll b/llvm/test/tools/stackanalyzer/AArch64/analysis_callback.ll new file mode 100644 index 000000000000..547e11594a16 --- /dev/null +++ b/llvm/test/tools/stackanalyzer/AArch64/analysis_callback.ll @@ -0,0 +1,65 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: stackanalyzer --analysis %t.bc --entry=main --anders --target=aarch64 | FileCheck %s + +@.str = private unnamed_addr constant [5 x i8] c"%hhu\00", align 1 + +define dso_local i32 @foo(i8 noundef zeroext %0, ptr noundef %1) #0 { + %3 = alloca i8, align 1 + %4 = alloca ptr, align 8 + %5 = alloca i8, align 1 + %6 = alloca [256 x i8], align 16 + store i8 %0, ptr %3, align 1 + store ptr %1, ptr %4, align 8 + %7 = load i8, ptr %5, align 1 + %8 = zext i8 %7 to i32 + %9 = call i32 @bar(i32 noundef %8) + %10 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str, ptr noundef %5) + %11 = load ptr, ptr %4, align 8 + %12 = load i8, ptr %5, align 1 + %13 = zext i8 %12 to i32 + %14 = call i32 %11(i32 noundef %13) + ret i32 %14 +} + +declare i32 @__isoc99_scanf(ptr noundef, ...) #1 + +define dso_local i32 @baz(i32 noundef %0) #0 { + %2 = alloca i32, align 4 + %3 = alloca [1024 x i8], align 16 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + %5 = mul nsw i32 %4, 3 + ret i32 %5 +} + +define dso_local i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca i8, align 1 + %3 = alloca ptr, align 8 + store i32 0, ptr %1, align 4 + store ptr @baz, ptr %3, align 8 + %4 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str, ptr noundef %2) + %5 = load i8, ptr %2, align 1 + %6 = load ptr, ptr %3, align 8 + %7 = call i32 @foo(i8 noundef zeroext %5, ptr noundef %6) + ret i32 0 +} + +define dso_local i32 @bar(i32 noundef %0) #0 { + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + store i32 %4, ptr %3, align 4 + %5 = load i32, ptr %3, align 4 + %6 = mul nsw i32 2, %5 + ret i32 %6 +} + +; CHECK: Potential stack overflow path found(limit:1024 bytes): +; CHECK-NEXT: CallStack: +; CHECK-NEXT: main +; CHECK-NEXT: foo +; CHECK-NEXT: baz +; CHECK-NEXT: Analysis: +; CHECK-NEXT: - Stack usage exceeds the limit along the call stack. \ No newline at end of file diff --git a/llvm/test/tools/stackanalyzer/AArch64/analysis_no_callback.ll b/llvm/test/tools/stackanalyzer/AArch64/analysis_no_callback.ll new file mode 100644 index 000000000000..b6e22da0d36a --- /dev/null +++ b/llvm/test/tools/stackanalyzer/AArch64/analysis_no_callback.ll @@ -0,0 +1,60 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: stackanalyzer --analysis %t.bc --entry=main --target=aarch64 | FileCheck %s --check-prefix=CHECK-MAIN +; RUN: stackanalyzer --analysis %t.bc --entry=foo --target=aarch64 | FileCheck %s --check-prefix=CHECK-FOO +; RUN: stackanalyzer --analysis %t.bc --entry=baz --target=aarch64 | FileCheck %s --check-prefix=CHECK-BAZ + +@.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1 + +define dso_local i32 @baz(i32 noundef %0) { + %2 = alloca i32, align 4 + %3 = alloca [1024 x i8], align 16 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + %5 = mul nsw i32 %4, 3 + ret i32 %5 +} + +define dso_local i32 @foo(i32 noundef %0) { + %2 = alloca i32, align 4 + %3 = alloca [256 x i8], align 16 + %4 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + %5 = load i32, ptr %2, align 4 + %6 = call i32 @baz(i32 noundef %5) + store i32 %6, ptr %4, align 4 + %7 = load i32, ptr %4, align 4 + ret i32 %7 +} + +define dso_local i32 @main() { + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + store i32 0, ptr %1, align 4 + %3 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str, ptr noundef %2) + %4 = load i32, ptr %2, align 4 + %5 = call i32 @foo(i32 noundef %4) + ret i32 0 +} + +declare i32 @__isoc99_scanf(ptr noundef, ...) + +; CHECK-MAIN: Potential stack overflow path found(limit:1024 bytes): +; CHECK-MAIN-NEXT: CallStack: +; CHECK-MAIN-NEXT: main +; CHECK-MAIN-NEXT: foo +; CHECK-MAIN-NEXT: baz +; CHECK-MAIN-NEXT: Analysis: +; CHECK-MAIN-NEXT: - Stack usage exceeds the limit along the call stack. + +; CHECK-FOO: Potential stack overflow path found(limit:1024 bytes): +; CHECK-FOO-NEXT: CallStack: +; CHECK-FOO-NEXT: foo +; CHECK-FOO-NEXT: baz +; CHECK-FOO-NEXT: Analysis: +; CHECK-FOO-NEXT: - Stack usage exceeds the limit along the call stack. + +; CHECK-BAZ: Potential stack overflow path found(limit:1024 bytes): +; CHECK-BAZ-NEXT: CallStack: +; CHECK-BAZ-NEXT: baz +; CHECK-BAZ-NEXT: Analysis: +; CHECK-BAZ-NEXT: - Stack usage exceeds the limit along the call stack. \ No newline at end of file diff --git a/llvm/test/tools/stackanalyzer/AArch64/analysis_recursion.ll b/llvm/test/tools/stackanalyzer/AArch64/analysis_recursion.ll new file mode 100644 index 000000000000..1a19e972eeb9 --- /dev/null +++ b/llvm/test/tools/stackanalyzer/AArch64/analysis_recursion.ll @@ -0,0 +1,23 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: stackanalyzer --analysis %t.bc --entry=recursive_function --target=aarch64 | FileCheck %s + +define void @recursive_function(i32 %n) { + %cmp = icmp eq i32 %n, 0 + br i1 %cmp, label %base_case, label %recursive_case + +base_case: + ret void + +recursive_case: + %dec = sub i32 %n, 1 + call void @recursive_function(i32 %dec) + ret void +} + +; CHECK: Potential stack overflow path found(limit:1024 bytes): +; CHECK-NEXT: CallStack: +; CHECK-NEXT: recursive_function +; CHECK-NEXT: recursive_function +; CHECK-NEXT: Analysis: +; CHECK-NEXT: - Recursive call without proper base case check. +; CHECK-NEXT: - Unbounded recursion may lead to stack overflow. \ No newline at end of file diff --git a/llvm/test/tools/stackanalyzer/AArch64/analysis_stacksize_config.ll b/llvm/test/tools/stackanalyzer/AArch64/analysis_stacksize_config.ll new file mode 100644 index 000000000000..d14ccdad348b --- /dev/null +++ b/llvm/test/tools/stackanalyzer/AArch64/analysis_stacksize_config.ll @@ -0,0 +1,48 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: stackanalyzer --analysis %t.bc --entry=main --stacksize=2048 --target=aarch64 | FileCheck %s --check-prefix=CHECK-2048 +; RUN: stackanalyzer --analysis %t.bc --entry=main --stacksize=512 --target=aarch64 | FileCheck %s --check-prefix=CHECK-512 + +@.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1 + +define dso_local i32 @baz(i32 noundef %0) { + %2 = alloca i32, align 4 + %3 = alloca [1024 x i8], align 16 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + %5 = mul nsw i32 %4, 3 + ret i32 %5 +} + +define dso_local i32 @foo(i32 noundef %0) { + %2 = alloca i32, align 4 + %3 = alloca [256 x i8], align 16 + %4 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + %5 = load i32, ptr %2, align 4 + %6 = call i32 @baz(i32 noundef %5) + store i32 %6, ptr %4, align 4 + %7 = load i32, ptr %4, align 4 + ret i32 %7 +} + +define dso_local i32 @main() { + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + store i32 0, ptr %1, align 4 + %3 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str, ptr noundef %2) + %4 = load i32, ptr %2, align 4 + %5 = call i32 @foo(i32 noundef %4) + ret i32 0 +} + +declare i32 @__isoc99_scanf(ptr noundef, ...) + +; CHECK-2048: No potential stack overflow path found(limit:2048 bytes). + +; CHECK-512: Potential stack overflow path found(limit:512 bytes): +; CHECK-512-NEXT: CallStack: +; CHECK-512-NEXT: main +; CHECK-512-NEXT: foo +; CHECK-512-NEXT: baz +; CHECK-512-NEXT: Analysis: +; CHECK-512-NEXT: - Stack usage exceeds the limit along the call stack. diff --git a/llvm/test/tools/stackanalyzer/AArch64/callgraph_callback.ll b/llvm/test/tools/stackanalyzer/AArch64/callgraph_callback.ll new file mode 100644 index 000000000000..78e40187f141 --- /dev/null +++ b/llvm/test/tools/stackanalyzer/AArch64/callgraph_callback.ll @@ -0,0 +1,80 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: stackanalyzer --callgraph %t.bc --entry=main --anders --target=aarch64 | FileCheck %s + +@.str = private unnamed_addr constant [5 x i8] c"%hhu\00", align 1 + +define dso_local i32 @foo(i8 noundef zeroext %0, ptr noundef %1) #0 { + %3 = alloca i8, align 1 + %4 = alloca ptr, align 8 + %5 = alloca i8, align 1 + %6 = alloca [256 x i8], align 16 + store i8 %0, ptr %3, align 1 + store ptr %1, ptr %4, align 8 + %7 = load i8, ptr %5, align 1 + %8 = zext i8 %7 to i32 + %9 = call i32 @bar(i32 noundef %8) + %10 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str, ptr noundef %5) + %11 = load ptr, ptr %4, align 8 + %12 = load i8, ptr %5, align 1 + %13 = zext i8 %12 to i32 + %14 = call i32 %11(i32 noundef %13) + ret i32 %14 +} + +declare i32 @__isoc99_scanf(ptr noundef, ...) #1 + +define dso_local i32 @baz(i32 noundef %0) #0 { + %2 = alloca i32, align 4 + %3 = alloca [1024 x i8], align 16 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + %5 = mul nsw i32 %4, 3 + ret i32 %5 +} + +define dso_local i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca i8, align 1 + %3 = alloca ptr, align 8 + store i32 0, ptr %1, align 4 + store ptr @baz, ptr %3, align 8 + %4 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str, ptr noundef %2) + %5 = load i8, ptr %2, align 1 + %6 = load ptr, ptr %3, align 8 + %7 = call i32 @foo(i8 noundef zeroext %5, ptr noundef %6) + ret i32 0 +} + +define dso_local i32 @bar(i32 noundef %0) #0 { + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + store i32 %4, ptr %3, align 4 + %5 = load i32, ptr %3, align 4 + %6 = mul nsw i32 2, %5 + ret i32 %6 +} + +; CHECK: Call graph node <>{{.*}} #uses=0 +; CHECK: CS calls function 'foo' +; CHECK: CS calls function '__isoc99_scanf' +; CHECK: CS calls function 'baz' +; CHECK: CS calls function 'main' +; CHECK: CS calls function 'bar' + +; CHECK: Call graph node for function: '__isoc99_scanf'{{.*}} #uses=3 +; CHECK: CS calls external node + +; CHECK: Call graph node for function: 'bar'{{.*}} #uses=2 + +; CHECK: Call graph node for function: 'baz'{{.*}} #uses=2 + +; CHECK: Call graph node for function: 'foo'{{.*}} #uses=2 +; CHECK: CS{{.*}} calls function 'bar' +; CHECK: CS{{.*}} calls function '__isoc99_scanf' +; CHECK: CS{{.*}} calls function 'baz' + +; CHECK: Call graph node for function: 'main'{{.*}} #uses=1 +; CHECK: CS{{.*}} calls function '__isoc99_scanf' +; CHECK: CS{{.*}} calls function 'foo' \ No newline at end of file diff --git a/llvm/test/tools/stackanalyzer/AArch64/callgraph_no_callback.ll b/llvm/test/tools/stackanalyzer/AArch64/callgraph_no_callback.ll new file mode 100644 index 000000000000..e0612d2a5698 --- /dev/null +++ b/llvm/test/tools/stackanalyzer/AArch64/callgraph_no_callback.ll @@ -0,0 +1,93 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: stackanalyzer --callgraph %t.bc --entry=main --target=aarch64 | FileCheck %s --check-prefix=CHECK-MAIN +; RUN: stackanalyzer --callgraph %t.bc --entry=foo --target=aarch64 | FileCheck %s --check-prefix=CHECK-FOO +; RUN: stackanalyzer --callgraph %t.bc --entry=baz --target=aarch64 | FileCheck %s --check-prefix=CHECK-BAZ + +@.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1 + +define dso_local i32 @baz(i32 noundef %0) #0 { + %2 = alloca i32, align 4 + %3 = alloca [1024 x i8], align 16 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + %5 = mul nsw i32 %4, 3 + ret i32 %5 +} + +define dso_local i32 @foo(i32 noundef %0) #0 { + %2 = alloca i32, align 4 + %3 = alloca [256 x i8], align 16 + %4 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + %5 = load i32, ptr %2, align 4 + %6 = call i32 @baz(i32 noundef %5) + store i32 %6, ptr %4, align 4 + %7 = load i32, ptr %4, align 4 + ret i32 %7 +} + +define dso_local i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + store i32 0, ptr %1, align 4 + %3 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str, ptr noundef %2) + %4 = load i32, ptr %2, align 4 + %5 = call i32 @foo(i32 noundef %4) + ret i32 0 +} + +declare i32 @__isoc99_scanf(ptr noundef, ...) #1 + +; CHECK-MAIN: Call graph node <>{{.*}} #uses=0 +; CHECK-MAIN: CS calls function 'baz' +; CHECK-MAIN: CS calls function 'foo' +; CHECK-MAIN: CS calls function 'main' +; CHECK-MAIN: CS calls function '__isoc99_scanf' + +; CHECK-MAIN: Call graph node for function: '__isoc99_scanf'{{.*}} #uses=2 +; CHECK-MAIN: CS calls external node + +; CHECK-MAIN: Call graph node for function: 'baz'{{.*}} #uses=2 + +; CHECK-MAIN: Call graph node for function: 'foo'{{.*}} #uses=2 +; CHECK-MAIN: CS{{.*}} calls function 'baz' + +; CHECK-MAIN: Call graph node for function: 'main'{{.*}} #uses=1 +; CHECK-MAIN: CS{{.*}} calls function '__isoc99_scanf' +; CHECK-MAIN: CS{{.*}} calls function 'foo' + +; CHECK-FOO: Call graph node <>{{.*}} #uses=0 +; CHECK-FOO: CS calls function 'baz' +; CHECK-FOO: CS calls function 'foo' +; CHECK-FOO: CS calls function 'main' +; CHECK-FOO: CS calls function '__isoc99_scanf' + +; CHECK-FOO: Call graph node for function: '__isoc99_scanf'{{.*}} #uses=2 +; CHECK-FOO: CS calls external node + +; CHECK-FOO: Call graph node for function: 'baz'{{.*}} #uses=2 + +; CHECK-FOO: Call graph node for function: 'foo'{{.*}} #uses=2 +; CHECK-FOO: CS{{.*}} calls function 'baz' + +; CHECK-FOO: Call graph node for function: 'main'{{.*}} #uses=1 +; CHECK-FOO: CS{{.*}} calls function '__isoc99_scanf' +; CHECK-FOO: CS{{.*}} calls function 'foo' + +; CHECK-BAZ: Call graph node <>{{.*}} #uses=0 +; CHECK-BAZ: CS calls function 'baz' +; CHECK-BAZ: CS calls function 'foo' +; CHECK-BAZ: CS calls function 'main' +; CHECK-BAZ: CS calls function '__isoc99_scanf' + +; CHECK-BAZ: Call graph node for function: '__isoc99_scanf'{{.*}} #uses=2 +; CHECK-BAZ: CS calls external node + +; CHECK-BAZ: Call graph node for function: 'baz'{{.*}} #uses=2 + +; CHECK-BAZ: Call graph node for function: 'foo'{{.*}} #uses=2 +; CHECK-BAZ: CS{{.*}} calls function 'baz' + +; CHECK-BAZ: Call graph node for function: 'main'{{.*}} #uses=1 +; CHECK-BAZ: CS{{.*}} calls function '__isoc99_scanf' +; CHECK-BAZ: CS{{.*}} calls function 'foo' \ No newline at end of file diff --git a/llvm/test/tools/stackanalyzer/X86/analysis_callback.ll b/llvm/test/tools/stackanalyzer/X86/analysis_callback.ll new file mode 100644 index 000000000000..3329d18d4507 --- /dev/null +++ b/llvm/test/tools/stackanalyzer/X86/analysis_callback.ll @@ -0,0 +1,65 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: stackanalyzer --analysis %t.bc --entry=main --anders --target=x86_64 | FileCheck %s + +@.str = private unnamed_addr constant [5 x i8] c"%hhu\00", align 1 + +define dso_local i32 @foo(i8 noundef zeroext %0, ptr noundef %1) #0 { + %3 = alloca i8, align 1 + %4 = alloca ptr, align 8 + %5 = alloca i8, align 1 + %6 = alloca [256 x i8], align 16 + store i8 %0, ptr %3, align 1 + store ptr %1, ptr %4, align 8 + %7 = load i8, ptr %5, align 1 + %8 = zext i8 %7 to i32 + %9 = call i32 @bar(i32 noundef %8) + %10 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str, ptr noundef %5) + %11 = load ptr, ptr %4, align 8 + %12 = load i8, ptr %5, align 1 + %13 = zext i8 %12 to i32 + %14 = call i32 %11(i32 noundef %13) + ret i32 %14 +} + +declare i32 @__isoc99_scanf(ptr noundef, ...) #1 + +define dso_local i32 @baz(i32 noundef %0) #0 { + %2 = alloca i32, align 4 + %3 = alloca [1024 x i8], align 16 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + %5 = mul nsw i32 %4, 3 + ret i32 %5 +} + +define dso_local i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca i8, align 1 + %3 = alloca ptr, align 8 + store i32 0, ptr %1, align 4 + store ptr @baz, ptr %3, align 8 + %4 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str, ptr noundef %2) + %5 = load i8, ptr %2, align 1 + %6 = load ptr, ptr %3, align 8 + %7 = call i32 @foo(i8 noundef zeroext %5, ptr noundef %6) + ret i32 0 +} + +define dso_local i32 @bar(i32 noundef %0) #0 { + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + store i32 %4, ptr %3, align 4 + %5 = load i32, ptr %3, align 4 + %6 = mul nsw i32 2, %5 + ret i32 %6 +} + +; CHECK: Potential stack overflow path found(limit:1024 bytes): +; CHECK-NEXT: CallStack: +; CHECK-NEXT: main +; CHECK-NEXT: foo +; CHECK-NEXT: baz +; CHECK-NEXT: Analysis: +; CHECK-NEXT: - Stack usage exceeds the limit along the call stack. \ No newline at end of file diff --git a/llvm/test/tools/stackanalyzer/X86/analysis_no_callback.ll b/llvm/test/tools/stackanalyzer/X86/analysis_no_callback.ll new file mode 100644 index 000000000000..179c83e7cbb4 --- /dev/null +++ b/llvm/test/tools/stackanalyzer/X86/analysis_no_callback.ll @@ -0,0 +1,56 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: stackanalyzer --analysis %t.bc --entry=main --target=x86_64 | FileCheck %s --check-prefix=CHECK-MAIN +; RUN: stackanalyzer --analysis %t.bc --entry=foo --target=x86_64 | FileCheck %s --check-prefix=CHECK-FOO +; RUN: stackanalyzer --analysis %t.bc --entry=baz --target=x86_64 | FileCheck %s --check-prefix=CHECK-BAZ + +@.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1 + +define dso_local i32 @baz(i32 noundef %0) { + %2 = alloca i32, align 4 + %3 = alloca [1024 x i8], align 16 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + %5 = mul nsw i32 %4, 3 + ret i32 %5 +} + +define dso_local i32 @foo(i32 noundef %0) { + %2 = alloca i32, align 4 + %3 = alloca [256 x i8], align 16 + %4 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + %5 = load i32, ptr %2, align 4 + %6 = call i32 @baz(i32 noundef %5) + store i32 %6, ptr %4, align 4 + %7 = load i32, ptr %4, align 4 + ret i32 %7 +} + +define dso_local i32 @main() { + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + store i32 0, ptr %1, align 4 + %3 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str, ptr noundef %2) + %4 = load i32, ptr %2, align 4 + %5 = call i32 @foo(i32 noundef %4) + ret i32 0 +} + +declare i32 @__isoc99_scanf(ptr noundef, ...) + +; CHECK-MAIN: Potential stack overflow path found(limit:1024 bytes): +; CHECK-MAIN-NEXT: CallStack: +; CHECK-MAIN-NEXT: main +; CHECK-MAIN-NEXT: foo +; CHECK-MAIN-NEXT: baz +; CHECK-MAIN-NEXT: Analysis: +; CHECK-MAIN-NEXT: - Stack usage exceeds the limit along the call stack. + +; CHECK-FOO: Potential stack overflow path found(limit:1024 bytes): +; CHECK-FOO-NEXT: CallStack: +; CHECK-FOO-NEXT: foo +; CHECK-FOO-NEXT: baz +; CHECK-FOO-NEXT: Analysis: +; CHECK-FOO-NEXT: - Stack usage exceeds the limit along the call stack. + +; CHECK-BAZ: No potential stack overflow path found(limit:1024 bytes). diff --git a/llvm/test/tools/stackanalyzer/X86/analysis_recursion.ll b/llvm/test/tools/stackanalyzer/X86/analysis_recursion.ll new file mode 100644 index 000000000000..69b6329cd13b --- /dev/null +++ b/llvm/test/tools/stackanalyzer/X86/analysis_recursion.ll @@ -0,0 +1,23 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: stackanalyzer --analysis %t.bc --entry=recursive_function --target=x86_64 | FileCheck %s + +define void @recursive_function(i32 %n) { + %cmp = icmp eq i32 %n, 0 + br i1 %cmp, label %base_case, label %recursive_case + +base_case: + ret void + +recursive_case: + %dec = sub i32 %n, 1 + call void @recursive_function(i32 %dec) + ret void +} + +; CHECK: Potential stack overflow path found(limit:1024 bytes): +; CHECK-NEXT: CallStack: +; CHECK-NEXT: recursive_function +; CHECK-NEXT: recursive_function +; CHECK-NEXT: Analysis: +; CHECK-NEXT: - Recursive call without proper base case check. +; CHECK-NEXT: - Unbounded recursion may lead to stack overflow. \ No newline at end of file diff --git a/llvm/test/tools/stackanalyzer/X86/analysis_stacksize_config.ll b/llvm/test/tools/stackanalyzer/X86/analysis_stacksize_config.ll new file mode 100644 index 000000000000..9d876494e541 --- /dev/null +++ b/llvm/test/tools/stackanalyzer/X86/analysis_stacksize_config.ll @@ -0,0 +1,48 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: stackanalyzer --analysis %t.bc --entry=main --stacksize=2048 --target=x86_64 | FileCheck %s --check-prefix=CHECK-2048 +; RUN: stackanalyzer --analysis %t.bc --entry=main --stacksize=512 --target=x86_64 | FileCheck %s --check-prefix=CHECK-512 + +@.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1 + +define dso_local i32 @baz(i32 noundef %0) { + %2 = alloca i32, align 4 + %3 = alloca [1024 x i8], align 16 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + %5 = mul nsw i32 %4, 3 + ret i32 %5 +} + +define dso_local i32 @foo(i32 noundef %0) { + %2 = alloca i32, align 4 + %3 = alloca [256 x i8], align 16 + %4 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + %5 = load i32, ptr %2, align 4 + %6 = call i32 @baz(i32 noundef %5) + store i32 %6, ptr %4, align 4 + %7 = load i32, ptr %4, align 4 + ret i32 %7 +} + +define dso_local i32 @main() { + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + store i32 0, ptr %1, align 4 + %3 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str, ptr noundef %2) + %4 = load i32, ptr %2, align 4 + %5 = call i32 @foo(i32 noundef %4) + ret i32 0 +} + +declare i32 @__isoc99_scanf(ptr noundef, ...) + +; CHECK-2048: No potential stack overflow path found(limit:2048 bytes). + +; CHECK-512: Potential stack overflow path found(limit:512 bytes): +; CHECK-512-NEXT: CallStack: +; CHECK-512-NEXT: main +; CHECK-512-NEXT: foo +; CHECK-512-NEXT: baz +; CHECK-512-NEXT: Analysis: +; CHECK-512-NEXT: - Stack usage exceeds the limit along the call stack. diff --git a/llvm/test/tools/stackanalyzer/X86/callgraph_callback.ll b/llvm/test/tools/stackanalyzer/X86/callgraph_callback.ll new file mode 100644 index 000000000000..f73e72b745cb --- /dev/null +++ b/llvm/test/tools/stackanalyzer/X86/callgraph_callback.ll @@ -0,0 +1,80 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: stackanalyzer --callgraph %t.bc --entry=main --anders --target=x86_64 | FileCheck %s + +@.str = private unnamed_addr constant [5 x i8] c"%hhu\00", align 1 + +define dso_local i32 @foo(i8 noundef zeroext %0, ptr noundef %1) #0 { + %3 = alloca i8, align 1 + %4 = alloca ptr, align 8 + %5 = alloca i8, align 1 + %6 = alloca [256 x i8], align 16 + store i8 %0, ptr %3, align 1 + store ptr %1, ptr %4, align 8 + %7 = load i8, ptr %5, align 1 + %8 = zext i8 %7 to i32 + %9 = call i32 @bar(i32 noundef %8) + %10 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str, ptr noundef %5) + %11 = load ptr, ptr %4, align 8 + %12 = load i8, ptr %5, align 1 + %13 = zext i8 %12 to i32 + %14 = call i32 %11(i32 noundef %13) + ret i32 %14 +} + +declare i32 @__isoc99_scanf(ptr noundef, ...) #1 + +define dso_local i32 @baz(i32 noundef %0) #0 { + %2 = alloca i32, align 4 + %3 = alloca [1024 x i8], align 16 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + %5 = mul nsw i32 %4, 3 + ret i32 %5 +} + +define dso_local i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca i8, align 1 + %3 = alloca ptr, align 8 + store i32 0, ptr %1, align 4 + store ptr @baz, ptr %3, align 8 + %4 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str, ptr noundef %2) + %5 = load i8, ptr %2, align 1 + %6 = load ptr, ptr %3, align 8 + %7 = call i32 @foo(i8 noundef zeroext %5, ptr noundef %6) + ret i32 0 +} + +define dso_local i32 @bar(i32 noundef %0) #0 { + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + store i32 %4, ptr %3, align 4 + %5 = load i32, ptr %3, align 4 + %6 = mul nsw i32 2, %5 + ret i32 %6 +} + +; CHECK: Call graph node <>{{.*}} #uses=0 +; CHECK: CS calls function 'foo' +; CHECK: CS calls function '__isoc99_scanf' +; CHECK: CS calls function 'baz' +; CHECK: CS calls function 'main' +; CHECK: CS calls function 'bar' + +; CHECK: Call graph node for function: '__isoc99_scanf'{{.*}} #uses=3 +; CHECK: CS calls external node + +; CHECK: Call graph node for function: 'bar'{{.*}} #uses=2 + +; CHECK: Call graph node for function: 'baz'{{.*}} #uses=2 + +; CHECK: Call graph node for function: 'foo'{{.*}} #uses=2 +; CHECK: CS{{.*}} calls function 'bar' +; CHECK: CS{{.*}} calls function '__isoc99_scanf' +; CHECK: CS{{.*}} calls function 'baz' + +; CHECK: Call graph node for function: 'main'{{.*}} #uses=1 +; CHECK: CS{{.*}} calls function '__isoc99_scanf' +; CHECK: CS{{.*}} calls function 'foo' \ No newline at end of file diff --git a/llvm/test/tools/stackanalyzer/X86/callgraph_no_callback.ll b/llvm/test/tools/stackanalyzer/X86/callgraph_no_callback.ll new file mode 100644 index 000000000000..a75b41ff6eb8 --- /dev/null +++ b/llvm/test/tools/stackanalyzer/X86/callgraph_no_callback.ll @@ -0,0 +1,93 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: stackanalyzer --callgraph %t.bc --entry=main --target=x86_64 | FileCheck %s --check-prefix=CHECK-MAIN +; RUN: stackanalyzer --callgraph %t.bc --entry=foo --target=x86_64 | FileCheck %s --check-prefix=CHECK-FOO +; RUN: stackanalyzer --callgraph %t.bc --entry=baz --target=x84_64 | FileCheck %s --check-prefix=CHECK-BAZ + +@.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1 + +define dso_local i32 @baz(i32 noundef %0) #0 { + %2 = alloca i32, align 4 + %3 = alloca [1024 x i8], align 16 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + %5 = mul nsw i32 %4, 3 + ret i32 %5 +} + +define dso_local i32 @foo(i32 noundef %0) #0 { + %2 = alloca i32, align 4 + %3 = alloca [256 x i8], align 16 + %4 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + %5 = load i32, ptr %2, align 4 + %6 = call i32 @baz(i32 noundef %5) + store i32 %6, ptr %4, align 4 + %7 = load i32, ptr %4, align 4 + ret i32 %7 +} + +define dso_local i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + store i32 0, ptr %1, align 4 + %3 = call i32 (ptr, ...) @__isoc99_scanf(ptr noundef @.str, ptr noundef %2) + %4 = load i32, ptr %2, align 4 + %5 = call i32 @foo(i32 noundef %4) + ret i32 0 +} + +declare i32 @__isoc99_scanf(ptr noundef, ...) #1 + +; CHECK-MAIN: Call graph node <>{{.*}} #uses=0 +; CHECK-MAIN: CS calls function 'baz' +; CHECK-MAIN: CS calls function 'foo' +; CHECK-MAIN: CS calls function 'main' +; CHECK-MAIN: CS calls function '__isoc99_scanf' + +; CHECK-MAIN: Call graph node for function: '__isoc99_scanf'{{.*}} #uses=2 +; CHECK-MAIN: CS calls external node + +; CHECK-MAIN: Call graph node for function: 'baz'{{.*}} #uses=2 + +; CHECK-MAIN: Call graph node for function: 'foo'{{.*}} #uses=2 +; CHECK-MAIN: CS{{.*}} calls function 'baz' + +; CHECK-MAIN: Call graph node for function: 'main'{{.*}} #uses=1 +; CHECK-MAIN: CS{{.*}} calls function '__isoc99_scanf' +; CHECK-MAIN: CS{{.*}} calls function 'foo' + +; CHECK-FOO: Call graph node <>{{.*}} #uses=0 +; CHECK-FOO: CS calls function 'baz' +; CHECK-FOO: CS calls function 'foo' +; CHECK-FOO: CS calls function 'main' +; CHECK-FOO: CS calls function '__isoc99_scanf' + +; CHECK-FOO: Call graph node for function: '__isoc99_scanf'{{.*}} #uses=2 +; CHECK-FOO: CS calls external node + +; CHECK-FOO: Call graph node for function: 'baz'{{.*}} #uses=2 + +; CHECK-FOO: Call graph node for function: 'foo'{{.*}} #uses=2 +; CHECK-FOO: CS{{.*}} calls function 'baz' + +; CHECK-FOO: Call graph node for function: 'main'{{.*}} #uses=1 +; CHECK-FOO: CS{{.*}} calls function '__isoc99_scanf' +; CHECK-FOO: CS{{.*}} calls function 'foo' + +; CHECK-BAZ: Call graph node <>{{.*}} #uses=0 +; CHECK-BAZ: CS calls function 'baz' +; CHECK-BAZ: CS calls function 'foo' +; CHECK-BAZ: CS calls function 'main' +; CHECK-BAZ: CS calls function '__isoc99_scanf' + +; CHECK-BAZ: Call graph node for function: '__isoc99_scanf'{{.*}} #uses=2 +; CHECK-BAZ: CS calls external node + +; CHECK-BAZ: Call graph node for function: 'baz'{{.*}} #uses=2 + +; CHECK-BAZ: Call graph node for function: 'foo'{{.*}} #uses=2 +; CHECK-BAZ: CS{{.*}} calls function 'baz' + +; CHECK-BAZ: Call graph node for function: 'main'{{.*}} #uses=1 +; CHECK-BAZ: CS{{.*}} calls function '__isoc99_scanf' +; CHECK-BAZ: CS{{.*}} calls function 'foo' \ No newline at end of file diff --git a/llvm/test/tools/stackanalyzer/help.test b/llvm/test/tools/stackanalyzer/help.test new file mode 100644 index 000000000000..0badb006adf5 --- /dev/null +++ b/llvm/test/tools/stackanalyzer/help.test @@ -0,0 +1,21 @@ +; RUN: stackanalyzer --help | FileCheck %s +; CHECK: USAGE: stackanalyzer [options] Input .bc file to be analyzed +; CHECK-EMPTY: +; CHECK: OPTIONS: +; CHECK-EMPTY: +; CHECK: Generic Options: +; CHECK-EMPTY: +; CHECK: --help - Display available options (--help-hidden for more) +; CHECK: --help-list - Display list of available options (--help-list-hidden for more) +; CHECK: --version - Display the version of this program +; CHECK-EMPTY: +; CHECK: StackAnalyzerCategory: +; CHECK-EMPTY: +; CHECK: --analysis - Output possible path of the callgraph which can possibly cause stack overflow +; CHECK: --anders - Use Anders analysis to analyze the call graph +; CHECK: --callgraph - Output the callgraph given the .bc file +; CHECK: --entry= - The name of the entry function for the callgraph +; CHECK: -o - Output callgraph in .dot format with stack cost information{{.*}}Should be used together with `analysis`. +; CHECK: --stacksize= - Max stack size of the limit of a path within the callgraph, given the .bc file. Should be used together with `analysis`. +; CHECK-NOT: --sufile +; CHECK-NOT: --debuginfo \ No newline at end of file diff --git a/llvm/tools/stackanalyzer/CMakeLists.txt b/llvm/tools/stackanalyzer/CMakeLists.txt new file mode 100644 index 000000000000..2f013759b3f2 --- /dev/null +++ b/llvm/tools/stackanalyzer/CMakeLists.txt @@ -0,0 +1,14 @@ +set(LLVM_LINK_COMPONENTS + Analysis + BitReader + Core + Passes + Support + ${LLVM_TARGETS_TO_BUILD} +) + +add_llvm_tool(stackanalyzer + CallGraphGen.cpp + stackanalyzer.cpp + StackUsage.cpp +) \ No newline at end of file diff --git a/llvm/tools/stackanalyzer/CallGraphGen.cpp b/llvm/tools/stackanalyzer/CallGraphGen.cpp new file mode 100644 index 000000000000..f866434626e1 --- /dev/null +++ b/llvm/tools/stackanalyzer/CallGraphGen.cpp @@ -0,0 +1,584 @@ +//===--- CallGraphGen.cpp - Analyze the callgraph of a LLVM bitcode file using +// pointer analysis ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CallGraphGen.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + +std::unique_ptr +ConstraintGraph::createInitialConstraintNode() { + UndefValue *UndefValue = UndefValue::get(Type::getVoidTy(M.getContext())); + return std::make_unique(UndefValue, this); +} + +ConstraintGraph::ConstraintGraph(Module &Module) : M(Module) { + InitialConstraintNode = createInitialConstraintNode().get(); + for (auto &F : M) { + if (F.hasExternalLinkage()) + continue; + if (F.isIntrinsic()) + continue; + } +} + +ConstraintGraphNode *ConstraintGraph::getOrInsertConstraintNode(Value *V) { + auto &CSN = ConstraintGraphNodes[V]; + if (CSN) { + return CSN.get(); + } + CSN = std::make_unique(V, this); + return CSN.get(); +} + +Constraint *ConstraintGraph::getOrInsertConstraint(Value *Dst, Value *Src, + ConstraintKind Kind) { + for (auto &C : Constraints) { + if (C->Src->V == Src && C->Dst->V == Dst && C->Kind == Kind) { + return C.get(); + } + } + auto *SrcNode = getOrInsertConstraintNode(Src); + auto *DstNode = getOrInsertConstraintNode(Dst); + auto ConstraitEdge = std::make_unique(DstNode, SrcNode, Kind); + SrcNode->addConstraint(DstNode, Kind); + Constraint *ConstraitEdgePtr = ConstraitEdge.get(); + Constraints.push_back(std::move(ConstraitEdge)); + return ConstraitEdgePtr; +} + +Constraint *ConstraintGraph::insertConstraint(Value *Dst, Value *Src, + ConstraintKind Kind) { + auto *SrcNode = getOrInsertConstraintNode(Src); + auto *DstNode = getOrInsertConstraintNode(Dst); + auto ConstraitEdge = std::make_unique(DstNode, SrcNode, Kind); + SrcNode->addConstraint(DstNode, Kind); + Constraint *ConstraitEdgePtr = ConstraitEdge.get(); + Constraints.push_back(std::move(ConstraitEdge)); + return ConstraitEdgePtr; +} + +AnalysisKey PACallGraphAnalysis::Key; + +PACallGraphAnalysis::Result PACallGraphAnalysis::run(Module &M, + ModuleAnalysisManager &) { + auto FI = M.begin(), FE = M.end(); + for (; FI != FE; ++FI) { + if (FI->getName() == Config.EntryFunction) { + break; + } + } + DataflowResult::Type ResultFact; + PAAnalysisDataflowFacts InitFact; + PointerAnalysisVisitor PAVisitor(M); + if (Config.UseAnders) { + compForwardDataflow(&(*FI), &PAVisitor, &ResultFact, InitFact); + PAVisitor.solveConstraint(); + } + PAVisitor.removeRedundantCallEdge(); + if (Config.UseDebug) { + PAVisitor.printConstraintGraph(outs()); + PAVisitor.printPointToSetMap(outs()); + } + return std::move(PAVisitor.CG); +} + +// for debugging purpose, reference: +// https://github.com/SunnyWadkar/LLVM-DataFlow-Analysis/blob/master/Dataflow/available-support.cpp +static std::string getShortValueName(const Value *V) { + if (auto *Arg = dyn_cast(V)) { + std::string S = ""; + raw_string_ostream *Strm = new raw_string_ostream(S); + Arg->print(*Strm); + std::string ArgName = Strm->str(); + size_t Idx = ArgName.find(" "); + return ArgName.substr(Idx) + ":" + Arg->getParent()->getName().str(); + } + if (auto *InstV = dyn_cast(V)) { + std::string S = ""; + raw_string_ostream *Strm = new raw_string_ostream(S); + V->print(*Strm); + std::string Inst = Strm->str(); + size_t Idx1 = Inst.find("%"); + size_t Idx2 = Inst.find(" ", Idx1); + if (Idx1 != std::string::npos && Idx2 != std::string::npos) { + return Inst.substr(Idx1, Idx2 - Idx1) + ":" + + InstV->getFunction()->getName().str(); + } + return "\"" + Inst + "\""; + } + if (const ConstantInt *Cint = dyn_cast(V)) { + std::string S = ""; + raw_string_ostream *Strm = new raw_string_ostream(S); + Cint->getValue().print(*Strm, true); + return Strm->str(); + } + if (V->getName().str().length() > 0) { + return V->getName().str(); + } + std::string S = ""; + raw_string_ostream *Strm = new raw_string_ostream(S); + V->print(*Strm); + std::string Inst = Strm->str(); + return "\"" + Inst + "\""; +} + +PointerAnalysisVisitor::PointerAnalysisVisitor(Module &InitModule) + : CSG(ConstraintGraph(InitModule)), CG(InitModule) {} + +void PointerAnalysisVisitor::merge(PAAnalysisDataflowFacts *Facts, + const PAAnalysisDataflowFacts &OtherFacts) { + Facts->insert(Facts->end(), OtherFacts.begin(), OtherFacts.end()); + std::sort(Facts->begin(), Facts->end()); + auto Last = std::unique(Facts->begin(), Facts->end()); + Facts->erase(Last, Facts->end()); +} + +void PointerAnalysisVisitor::compDFVal(Instruction *Inst, + PAAnalysisDataflowFacts *Dfval) { + switch (Inst->getOpcode()) { + case Instruction::Load: { + transfer(dyn_cast(Inst), Dfval); + break; + } + case Instruction::Store: { + transfer(dyn_cast(Inst), Dfval); + break; + } + case Instruction::Call: { + transfer(dyn_cast(Inst), Dfval); + break; + } + case Instruction::GetElementPtr: { + transfer(dyn_cast(Inst), Dfval); + break; + } + case Instruction::Ret: { + transfer(dyn_cast(Inst), Dfval); + break; + } + case Instruction::PHI: { + transfer(dyn_cast(Inst), Dfval); + break; + } + case Instruction::BitCast: { + transfer(dyn_cast(Inst), Dfval); + break; + } + case Instruction::Select: { + transfer(dyn_cast(Inst), Dfval); + break; + } + case Instruction::IntToPtr: { + transfer(dyn_cast(Inst), Dfval); + break; + } + } +} + +void PointerAnalysisVisitor::transfer(LoadInst *Inst, + PAAnalysisDataflowFacts *Dfval) { + auto *Addr = Inst->getPointerOperand(); + auto *Constraint = + CSG.getOrInsertConstraint(Inst, Addr, ConstraintKind::Load); + Dfval->push_back(Constraint); +} + +void PointerAnalysisVisitor::transfer(StoreInst *Inst, + PAAnalysisDataflowFacts *Dfval) { + auto *Addr = Inst->getPointerOperand(); + auto *Val = Inst->getValueOperand(); + Constraint *Cstrt; + if (isa(Val)) { + Cstrt = CSG.getOrInsertConstraint(Addr, Val, ConstraintKind::GetAddr); + } else { + Cstrt = CSG.getOrInsertConstraint(Addr, Val, ConstraintKind::Store); + } + Dfval->push_back(Cstrt); +} + +void PointerAnalysisVisitor::transfer(GetElementPtrInst *Inst, + PAAnalysisDataflowFacts *Dfval) { + auto *StructVal = Inst->getPointerOperand(); + auto *ConstraintFrom = + CSG.getOrInsertConstraint(Inst, StructVal, ConstraintKind::Subset); + auto *ConstraintTo = + CSG.getOrInsertConstraint(StructVal, Inst, ConstraintKind::Subset); + Dfval->push_back(ConstraintFrom); + Dfval->push_back(ConstraintTo); +} + +void PointerAnalysisVisitor::transfer(PHINode *Inst, + PAAnalysisDataflowFacts *Dfval) { + for (unsigned I = 0, NumOperands = Inst->getNumIncomingValues(); + I != NumOperands; ++I) { + auto *PHIArg = Inst->getIncomingValue(I); + auto *Constraint = + CSG.getOrInsertConstraint(Inst, PHIArg, ConstraintKind::Subset); + Dfval->push_back(Constraint); + } + if (Inst->getType()->isPointerTy()) { + for (unsigned I = 0, NumOperands = Inst->getNumIncomingValues(); + I != NumOperands; ++I) { + auto *PHIArg = Inst->getIncomingValue(I); + auto *ConstraintTo = + CSG.getOrInsertConstraint(PHIArg, Inst, ConstraintKind::Subset); + Dfval->push_back(ConstraintTo); + } + } +} + +void PointerAnalysisVisitor::transfer(BitCastInst *Inst, + PAAnalysisDataflowFacts *Dfval) { + auto *Src = Inst->getOperand(0); + auto *Constraint = + CSG.getOrInsertConstraint(Inst, Src, ConstraintKind::Subset); + Dfval->push_back(Constraint); + if (Src->getType()->isPointerTy()) { + auto *ConstraintTo = + CSG.getOrInsertConstraint(Src, Inst, ConstraintKind::Subset); + Dfval->push_back(ConstraintTo); + } +} + +void PointerAnalysisVisitor::transfer(SelectInst *Inst, + PAAnalysisDataflowFacts *Dfval) { + auto *TrueVal = Inst->getTrueValue(); + auto *FalseVal = Inst->getFalseValue(); + auto *ConstraintTrue = + CSG.getOrInsertConstraint(Inst, TrueVal, ConstraintKind::Subset); + + auto *ConstraintFalse = + CSG.getOrInsertConstraint(Inst, FalseVal, ConstraintKind::Subset); + + Dfval->push_back(ConstraintTrue); + Dfval->push_back(ConstraintFalse); + if (TrueVal->getType()->isPointerTy()) { + auto *ConstraintTo = + CSG.getOrInsertConstraint(TrueVal, Inst, ConstraintKind::Subset); + Dfval->push_back(ConstraintTo); + } + if (FalseVal->getType()->isPointerTy()) { + auto *ConstraintTo = + CSG.getOrInsertConstraint(FalseVal, Inst, ConstraintKind::Subset); + Dfval->push_back(ConstraintTo); + } +} + +void PointerAnalysisVisitor::transfer(IntToPtrInst *Inst, + PAAnalysisDataflowFacts *Dfval) { + auto *Src = Inst->getOperand(0); + auto *Constraint = + CSG.getOrInsertConstraint(Inst, Src, ConstraintKind::Subset); + Dfval->push_back(Constraint); +} + +void PointerAnalysisVisitor::transfer(CallInst *Inst, + PAAnalysisDataflowFacts *Dfval) { + auto *Callee = Inst->getCalledFunction(); + if (!Callee) { + auto *CalleeValue = Inst->getCalledOperand(); + auto *Constraint = + CSG.getOrInsertConstraint(Inst, CalleeValue, ConstraintKind::Unsolved); + for (unsigned I = 0, NumOperands = Inst->arg_size(); I != NumOperands; + ++I) { + auto *RArg = Inst->getArgOperand(I); + auto *CSGN = CSG.getOrInsertConstraintNode(RArg); + UnresolvedArgs[Constraint->Src].push_back(CSGN); + } + Dfval->push_back(Constraint); + ConstraintFunctionMap[Constraint] = CurrentFunction; + } else { + auto *PrevFunction = CurrentFunction; + if (Callee->isIntrinsic() || Callee->isDeclaration()) + return; + for (unsigned I = 0, NumOperands = Inst->arg_size(); I != NumOperands; + ++I) { + auto *RArg = Inst->getArgOperand(I); + auto *FArg = Callee->getArg(I); + Dfval->push_back( + CSG.getOrInsertConstraint(FArg, RArg, ConstraintKind::Subset)); + if (RArg->getType()->isPointerTy()) { + Dfval->push_back( + CSG.getOrInsertConstraint(RArg, FArg, ConstraintKind::Subset)); + } + } + DataflowResult::Type SubroutineResult; + PAAnalysisDataflowFacts SubroutineInitFact; + compForwardDataflow(Callee, this, &SubroutineResult, SubroutineInitFact); + for (auto *V : FunctionReturnValueMap[Callee]) { + Dfval->push_back( + CSG.getOrInsertConstraint(Inst, V, ConstraintKind::Subset)); + } + CurrentFunction = PrevFunction; + } +} + +void PointerAnalysisVisitor::transfer(ReturnInst *Inst, + PAAnalysisDataflowFacts *Dfval) { + auto *RetVal = Inst->getReturnValue(); + if (!RetVal) + return; + FunctionReturnValueMap[CurrentFunction].insert(RetVal); +} + +using ConstraintSolverFn = void (PointerAnalysisVisitor::*)( + const Constraint *Cstrt, ConstraintGraphNode *CSNode); + +static ConstraintSolverFn ConstraintSolvers[] = { + &PointerAnalysisVisitor::solveSubsetConstraint, + &PointerAnalysisVisitor::solveGetAddrConstraint, + &PointerAnalysisVisitor::solveLoadConstraint, + &PointerAnalysisVisitor::solveStoreConstraint, + &PointerAnalysisVisitor::solveUnsolvedConstraint, + nullptr // ConstraintKind::Init +}; + +void PointerAnalysisVisitor::solveConstraint() { + for (auto &CSNode : CSG) { + Worklist.push_back(CSNode.second.get()); + } + while (!Worklist.empty()) { + auto *CSNode = Worklist.front(); + Worklist.pop_front(); + auto CurrentConstraints = CSG.getConstraints(); + for (auto *Constraint : CSG.getConstraints()) { + if (Constraint->Src == CSNode) { + (this->*ConstraintSolvers[static_cast(Constraint->Kind)])( + Constraint, CSNode); + } + } + } +} + +void PointerAnalysisVisitor::solveLoadConstraint(const Constraint *Cstrt, + ConstraintGraphNode *CSNode) { + if (PointToSetMap[Cstrt->Src].empty() && + !CSG.hasConstraintEdge(Cstrt->Src->V, Cstrt->Dst->V, + ConstraintKind::Subset)) { + CSG.insertConstraint(Cstrt->Src->V, Cstrt->Dst->V, ConstraintKind::Subset); + Worklist.push_back(Cstrt->Dst); + return; + } + if (isa(Cstrt->Dst->V)) + return; + for (auto *V : PointToSetMap[Cstrt->Src]) { + auto *CSNodePointTo = CSG.getOrInsertConstraintNode(V); + if (!CSG.hasConstraintEdge(Cstrt->Dst->V, CSNodePointTo->V, + ConstraintKind::Subset)) { + CSG.insertConstraint(Cstrt->Dst->V, CSNodePointTo->V, + ConstraintKind::Subset); + Worklist.push_back(CSNodePointTo); + } + } +} + +void PointerAnalysisVisitor::solveStoreConstraint(const Constraint *Cstrt, + ConstraintGraphNode *CSNode) { + if (PointToSetMap[Cstrt->Dst].empty() && + !CSG.hasConstraintEdge(Cstrt->Dst->V, Cstrt->Src->V, + ConstraintKind::Subset)) { + CSG.insertConstraint(Cstrt->Dst->V, Cstrt->Src->V, ConstraintKind::Subset); + Worklist.push_back(Cstrt->Src); + return; + } + for (auto *V : PointToSetMap[Cstrt->Dst]) { + auto *CSNodePointTo = CSG.getOrInsertConstraintNode(V); + if (isa(CSNodePointTo->V)) + continue; + if (!CSG.hasConstraintEdge(CSNodePointTo->V, Cstrt->Src->V, + ConstraintKind::Subset)) { + CSG.insertConstraint(CSNodePointTo->V, Cstrt->Src->V, + ConstraintKind::Subset); + Worklist.push_back(Cstrt->Src); + } + } +} + +void PointerAnalysisVisitor::solveGetAddrConstraint( + const Constraint *Cstrt, ConstraintGraphNode *CSNode) { + auto PrevPointToSet = PointToSetMap[Cstrt->Src]; + PointToSetMap[Cstrt->Src].insert(CSNode->V); + CSG.getOrInsertConstraint(Cstrt->Dst->V, CSNode->V, ConstraintKind::Subset); + if (PrevPointToSet != PointToSetMap[Cstrt->Src]) { + Worklist.push_back(Cstrt->Src); + } +} + +void PointerAnalysisVisitor::solveSubsetConstraint( + const Constraint *Cstrt, ConstraintGraphNode *CSNode) { + auto PrevPointToSet = PointToSetMap[Cstrt->Dst]; + PointToSetMap[Cstrt->Dst].insert(PointToSetMap[Cstrt->Src].begin(), + PointToSetMap[Cstrt->Src].end()); + if (PrevPointToSet != PointToSetMap[Cstrt->Dst]) { + Worklist.push_back(Cstrt->Dst); + } +} + +void PointerAnalysisVisitor::solveUnsolvedConstraint( + const Constraint *Cstrt, ConstraintGraphNode *CSNode) { + auto *Call = dyn_cast(Cstrt->Dst->V); + assert(Call && "Dst should be a CallInst in a Unresolved constraint"); + for (auto *PointToValue : PointToSetMap[Cstrt->Src]) { + if (auto *Callee = dyn_cast(PointToValue)) { + DataflowResult::Type SubroutineResult; + PAAnalysisDataflowFacts SubroutineInitFact; + compForwardDataflow(Callee, this, &SubroutineResult, SubroutineInitFact); + for (auto *V : FunctionReturnValueMap[Callee]) { + CSG.getOrInsertConstraint(Cstrt->Dst->V, V, ConstraintKind::Subset); + Worklist.push_back(CSG.getOrInsertConstraintNode(V)); + } + if (Callee->getFunctionType()->getNumParams() == + UnresolvedArgs[Cstrt->Src].size()) { + for (unsigned I = 0, NumOperands = static_cast( + UnresolvedArgs[Cstrt->Src].size()); + I != NumOperands; ++I) { + auto *RArgNode = UnresolvedArgs[Cstrt->Src][I]; + auto *FArgNode = CSG.getOrInsertConstraintNode(Callee->getArg(I)); + CSG.getOrInsertConstraint(FArgNode->V, RArgNode->V, + ConstraintKind::Subset); + if (RArgNode->V->getType()->isPointerTy()) { + CSG.getOrInsertConstraint(RArgNode->V, FArgNode->V, + ConstraintKind::Subset); + } + Worklist.push_back(RArgNode); + } + } + auto *CGNode = CG[ConstraintFunctionMap[Cstrt]]; + bool Extend = false; + for (auto CI = CGNode->begin(), CE = CGNode->end(); CI != CE; CI++) { + auto *CallRecord = CI->second; + if (CallRecord->getFunction() == Callee) + Extend = true; + } + if (!Extend) { + CGNode->addCalledFunction(Call, CG.getOrInsertFunction(Callee)); + } + } + } +} + +/** + * Removes redundant call edges from the call graph. + */ +void PointerAnalysisVisitor::removeRedundantCallEdge() { + for (auto &Node : CG) { + auto *CGNode = Node.second.get(); + SmallMapVector CallCountsMap; + for (auto CI = CGNode->begin(), CE = CGNode->end(); CI != CE; CI++) { + auto *CallRecord = CI->second; + CallCountsMap[CallRecord->getFunction()] = 0; + } + for (auto CI = CGNode->begin(), CE = CGNode->end(); CI != CE; CI++) { + auto *CallRecord = CI->second; + CallCountsMap[CallRecord->getFunction()] += 1; + if (CallCountsMap[CallRecord->getFunction()] > 1) + CGNode->removeCallEdge(CI); + } + } +} + +std::array(ConstraintKind::Init)> + ConstraintKindToString = {"Subset", "GetAddr", "Load", "Store", + "Unresolve"}; + +/** * @brief Prints the constraint graph. + * + * This function prints the constraint graph in the DOT format. The graph + * represents the constraints between different values in the analysis. The + * constraints are grouped by function name and printed accordingly. If a value + * has function information, it is grouped under the respective function name. + * + * @param OS The output stream to which the graph will be printed. + */ +void PointerAnalysisVisitor::printConstraintGraph(raw_ostream &OS) { + OS << "digraph \"Constraint Graph\" {\n"; + + // Map to group constraints by function name + std::map> FunctionConstraints; + + for (const auto &Constraint : CSG.getConstraints()) { + std::string SrcName = getShortValueName(Constraint->Src->V); + std::string DstName = getShortValueName(Constraint->Dst->V); + + // Extract function names from source and destination + size_t SrcFunctionIdx = SrcName.find(":"); + size_t DstFunctionIdx = DstName.find(":"); + + std::string FunctionName; + + if (SrcFunctionIdx != std::string::npos) { + FunctionName = SrcName.substr(SrcFunctionIdx + 1); + } else if (DstFunctionIdx != std::string::npos) { + FunctionName = DstName.substr(DstFunctionIdx + 1); + } + + std::string ConstraintStr = + " \"" + SrcName + "\" -> \"" + DstName + "\" [label=\"" + + ConstraintKindToString[static_cast(Constraint->Kind)] + + "\"];\n"; + + if (SrcFunctionIdx != std::string::npos && + DstFunctionIdx != std::string::npos && + SrcName.substr(SrcFunctionIdx + 1) == + DstName.substr(DstFunctionIdx + 1)) { + // Both have function information and it's the same + FunctionConstraints[FunctionName].push_back(ConstraintStr); + } else if (!FunctionName.empty()) { + // At least one has function information, so group by that + FunctionConstraints[FunctionName].push_back(ConstraintStr); + } + } + + // Print grouped constraints by function + for (const auto &Entry : FunctionConstraints) { + OS << "// Function: " << Entry.first << "\n"; + for (const auto &Cstrt : Entry.second) { + OS << Cstrt; + } + } + + OS << "}\n"; +} + +void PointerAnalysisVisitor::printPointToSetMap(raw_ostream &OS) { + // Map to group PointToSet entries by function name + std::map>>> + FunctionPointSetMap; + + for (const auto &Map : PointToSetMap) { + std::string VarName = getShortValueName(Map.first->V); + + // Extract function name from the variable name + size_t FunctionIdx = VarName.find(":"); + if (FunctionIdx != std::string::npos) { + std::string FunctionName = VarName.substr(FunctionIdx + 1); + + // Store the variable and its PointToSet values in the corresponding + // function's group + FunctionPointSetMap[FunctionName].emplace_back( + VarName, std::vector(Map.second.begin(), Map.second.end())); + } + } + + // Print grouped PointToSet entries by function + for (const auto &Entry : FunctionPointSetMap) { + OS << "// Function: " << Entry.first << "\n"; + for (const auto &VarAndPointToSet : Entry.second) { + OS << VarAndPointToSet.first << ":"; + for (const auto &PointToValue : VarAndPointToSet.second) { + OS << " " << getShortValueName(PointToValue) << ""; + } + OS << "\n"; + } + } +} \ No newline at end of file diff --git a/llvm/tools/stackanalyzer/CallGraphGen.h b/llvm/tools/stackanalyzer/CallGraphGen.h new file mode 100644 index 000000000000..83e47c65cd2d --- /dev/null +++ b/llvm/tools/stackanalyzer/CallGraphGen.h @@ -0,0 +1,515 @@ +//===--- CallGraphGen.h - Analyze the callgraph of a LLVM bitcode file using +// pointer analysis ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_STACKANALYZER_CALLGRAPHGEN_H +#define LLVM_TOOLS_STACKANALYZER_CALLGRAPHGEN_H + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +namespace llvm { + +/// Base dataflow visitor class, defines the dataflow function +template class DataflowVisitor { +public: + virtual ~DataflowVisitor() {} + + std::set ReachableFunctions; + Function *CurrentFunction; + /** + * @brief Dataflow Function invoked for each basic block. + * + * @param Block The Basic Block. + * @param Dfval The input dataflow value. + * @param Forward True to compute dfval forward, otherwise backward. + */ + void compDFVal(BasicBlock *Block, T *Dfval) { + for (BasicBlock::iterator II = Block->begin(), IE = Block->end(); II != IE; + ++II) { + Instruction *Inst = &*II; + compDFVal(Inst, Dfval); + } + } + + /** + * @brief Dataflow Function invoked for each instruction. + * + * @param Inst The Instruction. + * @param Dfval The input dataflow value. + */ + virtual void compDFVal(Instruction *Inst, T *Dfval) = 0; + + /** + * @brief Merge of two dfvals, dest will be the merged result. + * + * @param Dest The destination dataflow value. + * @param Src The source dataflow value. + */ + virtual void merge(T *Dest, const T &Src) = 0; +}; + +/** + * @brief Dummy class to provide a typedef for the detailed result set. + * For each basicblock, we compute its input dataflow val and its output + * dataflow val. + */ +template struct DataflowResult { + typedef typename std::map> Type; +}; + +/** + * @brief Compute a forward iterated fixedpoint dataflow function, using a + * user-supplied visitor function. + * + * Note that the caller must ensure that the function is in fact a monotone + * function, as otherwise the fixedpoint may not terminate. + * + * @param Fn The function. + * @param Visitor A function to compute dataflow vals. + * @param Result The results of the dataflow. + * @param Initval The initial dataflow value. + */ +template +void compForwardDataflow(Function *Fn, DataflowVisitor *Visitor, + typename DataflowResult::Type *Result, + const T &Initval) { + if (Visitor->ReachableFunctions.count(Fn)) { + return; + } + Visitor->ReachableFunctions.insert(Fn); + Visitor->CurrentFunction = Fn; + std::set Worklist; + + // Initialize the worklist with all exit blocks + for (Function::iterator BI = Fn->begin(); BI != Fn->end(); ++BI) { + BasicBlock *BB = &*BI; + Result->insert(std::make_pair(BB, std::make_pair(Initval, Initval))); + Worklist.insert(BB); + } + + // Iteratively compute the dataflow result + while (!Worklist.empty()) { + BasicBlock *BB = *Worklist.begin(); + Worklist.erase(Worklist.begin()); + + // Merge all incoming value + T BBEnterval = (*Result)[BB].first; + for (auto PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + BasicBlock *Pred = *PI; + Visitor->merge(&BBEnterval, (*Result)[Pred].second); + } + + (*Result)[BB].first = BBEnterval; + + Visitor->compDFVal(BB, &BBEnterval); + + // If outgoing value changed, propagate it along the CFG + if (BBEnterval == (*Result)[BB].second) + continue; + (*Result)[BB].second = BBEnterval; + + for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { + Worklist.insert(*SI); + } + } +} + +/** + * @brief Enum representing different kinds of constraints. + */ +enum class ConstraintKind { + Subset = 0, + GetAddr, + Load, + Store, + Unsolved, + Init, +}; + +class ConstraintGraph; + +/** + * @brief Class representing a node in the constraint graph. + */ +class ConstraintGraphNode { +public: + using ConstraintRecord = std::pair; + + /** + * @brief Constructor for ConstraintGraphNode. + * + * @param VI The value associated with the node. + * @param CSG The constraint graph. + */ + ConstraintGraphNode(Value *VI, ConstraintGraph *CSG) : CSG(CSG), V(VI) {} + ConstraintGraphNode(const ConstraintGraphNode &) = delete; + ConstraintGraphNode &operator=(const ConstraintGraphNode &) = delete; + + using iterator = std::vector::iterator; + using const_iterator = std::vector::const_iterator; + + inline iterator begin() { return ConstraintedSuccs.begin(); } + inline iterator end() { return ConstraintedSuccs.end(); } + inline const_iterator begin() const { return ConstraintedSuccs.begin(); } + inline const_iterator end() const { return ConstraintedSuccs.end(); } + inline bool empty() const { return ConstraintedSuccs.empty(); } + inline unsigned size() const { + return static_cast(ConstraintedSuccs.size()); + } + + ConstraintGraphNode *operator[](unsigned Idx) const { + return ConstraintedSuccs[Idx].first; + } + + /** + * @brief Add a constraint to the node. + * + * @param Succ The successor node. + * @param Kind The kind of constraint. + */ + void addConstraint(ConstraintGraphNode *Succ, ConstraintKind Kind) { + ConstraintedSuccs.push_back(std::make_pair(Succ, Kind)); + } + + void removeConstraintEdge(iterator I) { + *I = ConstraintedSuccs.back(); + ConstraintedSuccs.pop_back(); + } + + /** + * Returns the constraint predecessors of the current object. + * + * @return A vector of ConstraintRecord objects representing the constraint + * predecessors. + */ + +private: + friend class ConstraintGraph; + friend class PointerAnalysisVisitor; + + ConstraintGraph *CSG; + + std::vector ConstraintedSuccs; + + Value *V; + +public: + unsigned Index; +}; + +/** + * @brief Struct representing a constraint. + */ +struct Constraint { + ConstraintGraphNode *Dst; + ConstraintGraphNode *Src; + ConstraintKind Kind; + +public: + /** + * @brief Constructor for Constraint. + * + * @param CSDst The destination node. + * @param CSSrc The source node. + * @param CSKind The kind of constraint. + */ + Constraint(ConstraintGraphNode *CSDst, ConstraintGraphNode *CSSrc, + ConstraintKind CSKind) + : Dst(CSDst), Src(CSSrc), Kind(CSKind) {} + + bool operator==(const Constraint &Other) const { + return Src == Other.Src && Dst == Other.Dst && Kind == Other.Kind; + } + + bool operator<(const Constraint &Other) const { + if (Src->Index < Other.Src->Index) + return true; + if (Dst->Index < Other.Dst->Index) + return true; + return false; + } +}; + +/** + * @brief Class representing the constraint graph. + */ +class ConstraintGraph { + Module &M; + + using ConstraintNodeMap = + std::map>; + + ConstraintNodeMap ConstraintGraphNodes; + + ConstraintGraphNode *InitialConstraintNode; + + /** + * @brief Helper function to get or create a constraint node for + * initialization. + * + * @return A unique pointer to the created constraint node. + */ + std::unique_ptr createInitialConstraintNode(); + + std::vector> Constraints; + +public: + /** + * @brief Constructor for ConstraintGraph. + * + * @param Module The module. + */ + explicit ConstraintGraph(Module &Module); + + using iterator = ConstraintNodeMap::iterator; + using const_iterator = ConstraintNodeMap::const_iterator; + + inline iterator begin() { return ConstraintGraphNodes.begin(); } + inline iterator end() { return ConstraintGraphNodes.end(); } + inline const_iterator begin() const { return ConstraintGraphNodes.begin(); } + inline const_iterator end() const { return ConstraintGraphNodes.end(); } + + /** + * @brief Get the initial constraint node. + * + * @return The initial constraint node. + */ + ConstraintGraphNode *getInitialConstraintNode() const { + return InitialConstraintNode; + } + + /** + * @brief Get or insert a constraint node. + * + * @param V The value. + * @return The constraint node. + */ + ConstraintGraphNode *getOrInsertConstraintNode(Value *V); + + /** + * @brief Get or insert a constraint. + * + * @param Dst The destination value. + * @param Src The source value. + * @param Kind The kind of constraint. + * @return The constraint. + */ + Constraint *getOrInsertConstraint(Value *Dst, Value *Src, + ConstraintKind Kind); + + Constraint *insertConstraint(Value *Dst, Value *Src, ConstraintKind Kind); + + bool hasConstraintEdge(const Value *Dst, const Value *Src, + ConstraintKind Kind) const { + for (auto &C : Constraints) { + if (C->Src->V == Src && C->Dst->V == Dst && C->Kind == Kind) { + return true; + } + } + return false; + } + + ConstraintGraphNode *operator[](const Value *V) { + return ConstraintGraphNodes[V].get(); + } + + std::vector getConstraints() const { + std::vector ConstraintVec; + for (auto &C : Constraints) { + ConstraintVec.push_back(C.get()); + } + return ConstraintVec; + } +}; + +template <> struct GraphTraits { + using NodeRef = const ConstraintGraphNode *; + using CSNPairTy = ConstraintGraphNode::ConstraintRecord; + using EdgeRef = const ConstraintGraphNode::ConstraintRecord; + + static NodeRef getEntryNode(const ConstraintGraphNode *CSN) { return CSN; } + static const ConstraintGraphNode *CSNGetValue(CSNPairTy P) { return P.first; } + + using ChildIteratorType = mapped_iterator; + using ChildEdgeIteratorType = ConstraintGraphNode::const_iterator; + + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N->begin(), &CSNGetValue); + } + + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(N->end(), &CSNGetValue); + } + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->begin(); + } + + static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } + + static NodeRef edge_dest(EdgeRef E) { return E.first; } +}; + +template <> struct GraphTraits { + using PairTy = + std::pair>; + using NodeRef = const ConstraintGraphNode *; + using EdgeRef = const ConstraintGraphNode::ConstraintRecord; + + static NodeRef getEntryNode(const ConstraintGraph *CSG) { + return CSG->getInitialConstraintNode(); + } + + using nodes_iterator = ConstraintGraph::const_iterator; + + static nodes_iterator nodes_begin(const ConstraintGraph *CSG) { + return CSG->begin(); + } + + static nodes_iterator nodes_end(const ConstraintGraph *CSG) { + return CSG->end(); + } +}; + +template <> +struct GraphTraits + : public GraphTraits { + using PairTy = + std::pair>; + + static NodeRef getEntryNode(const ConstraintGraph *CSG) { + return CSG->getInitialConstraintNode(); + } + + using nodes_iterator = ConstraintGraph::const_iterator; + + static nodes_iterator nodes_begin(const ConstraintGraph *CSG) { + return CSG->begin(); + } + + static nodes_iterator nodes_end(const ConstraintGraph *CSG) { + return CSG->end(); + } +}; + +/** + * @struct PointerAnalysisCLIConfig + * @brief Configuration options for pointer analysis CLI. + */ +struct PointerAnalysisCLIConfig { + bool UseAnders; + bool UseDebug; + std::string EntryFunction; +}; + +/// An analysis pass to compute the \c CallGraph for a \c Module using pointer +/// analysis. +/// +/// This class implements the concept of an analysis pass used by the \c +/// ModuleAnalysisManager to run an analysis over a module and cache the +/// resulting data. +class PACallGraphAnalysis : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + + static AnalysisKey Key; + + PointerAnalysisCLIConfig Config; + +public: + explicit PACallGraphAnalysis(PointerAnalysisCLIConfig Config) + : Config(Config) {} + + /// A formulaic type to inform clients of the result type. + using Result = CallGraph; + + /// Compute the \c CallGraph for the module \c M. + CallGraph run(Module &M, ModuleAnalysisManager &); +}; + +using PAAnalysisDataflowFacts = std::vector; + +/** + * @class PointerAnalysisVisitor + * @brief A visitor class for performing pointer analysis on a given module. + * + * This class inherits from the DataflowVisitor class + * and is responsible for performing pointer analysis on a given module. It + * maintains various data structures and maps to store information related to + * constraint graphs, call graphs, point-to sets, unresolved arguments, function + * return value point-to sets, constraint-function mappings, and a deque of + * constraints. + * + * The main functionality of this class includes solving constraints, merging + * dataflow facts, computing dataflow values for instructions, and transferring + * dataflow facts for load, store, and call instructions. It also provides + * methods for propagating constraints, solving specific types of constraints, + * and printing the constraint graph and point-to set map. + * + * @see DataflowVisitor + * @see PAAnalysisDataflowFacts + */ +class PointerAnalysisVisitor : public DataflowVisitor { + ConstraintGraph CSG; + CallGraph CG; + + std::map> PointToSetMap; + std::map> + UnresolvedArgs; + std::map> FunctionReturnValueMap; + std::map ConstraintFunctionMap; + std::deque Worklist; + +public: + friend class PACallGraphAnalysis; + PointerAnalysisVisitor(Module &Module); + void solveConstraint(); + void merge(PAAnalysisDataflowFacts *Facts, + const PAAnalysisDataflowFacts &OtherFacts) override; + void compDFVal(Instruction *Inst, PAAnalysisDataflowFacts *Dfval) override; + + void removeRedundantCallEdge(); + + void transfer(LoadInst *Inst, PAAnalysisDataflowFacts *Dfval); + void transfer(StoreInst *Inst, PAAnalysisDataflowFacts *Dfval); + void transfer(CallInst *Inst, PAAnalysisDataflowFacts *Dfval); + void transfer(ReturnInst *Inst, PAAnalysisDataflowFacts *Dfval); + void transfer(GetElementPtrInst *Inst, PAAnalysisDataflowFacts *Dfval); + void transfer(BitCastInst *Inst, PAAnalysisDataflowFacts *Dfval); + void transfer(SelectInst *Inst, PAAnalysisDataflowFacts *Dfval); + void transfer(IntToPtrInst *Inst, PAAnalysisDataflowFacts *Dfval); + void transfer(PHINode *Inst, PAAnalysisDataflowFacts *Dfval); + + void solveLoadConstraint(const Constraint *Cstrt, + ConstraintGraphNode *CSNode); + void solveStoreConstraint(const Constraint *Cstrt, + ConstraintGraphNode *CSNode); + void solveGetAddrConstraint(const Constraint *Cstrt, + ConstraintGraphNode *CSNode); + void solveSubsetConstraint(const Constraint *Cstrt, + ConstraintGraphNode *CSNode); + void solveUnsolvedConstraint(const Constraint *Cstrt, + ConstraintGraphNode *CSNode); + +private: + void printConstraintGraph(raw_ostream &OS); + void printPointToSetMap(raw_ostream &OS); +}; + +} // namespace llvm + +#endif // LLVM_TOOLS_STACKANALYZER_CALLGRAPHGEN_H diff --git a/llvm/tools/stackanalyzer/StackUsage.cpp b/llvm/tools/stackanalyzer/StackUsage.cpp new file mode 100644 index 000000000000..0fece62e6b9a --- /dev/null +++ b/llvm/tools/stackanalyzer/StackUsage.cpp @@ -0,0 +1,297 @@ +//===--- StackUsage.cpp - Analyze the callgraph of a LLVM bitcode file using +// pointer analysis ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "StackUsage.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/RandomNumberGenerator.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/TargetParser/Host.h" +#include + +using namespace llvm; +using namespace llvm::sys; + +namespace { + +SmallString<128> generateUniqueName(StringRef Prefix) { + auto TimePoint = std::chrono::system_clock::now(); + auto Duration = TimePoint.time_since_epoch(); + auto Millis = + std::chrono::duration_cast(Duration).count(); + + static std::random_device RD; + static std::mt19937 RNG(RD()); + std::uniform_int_distribution Dist(0, 999999); + unsigned RandomNum = Dist(RNG); + + SmallString<128> UniqueName; + raw_svector_ostream OS(UniqueName); + OS << Prefix << "_" << format("%lld", Millis) << "_" + << format("%06u", RandomNum); + + return UniqueName; +} + +} // anonymous namespace + +namespace llvm { + +void parseStackSizeFromSU(Module &Module, + MapVector &StackSizeMap, + StringRef AnalysisTarget) { + std::string UniqueSuFilename = (generateUniqueName("su_file") + ".su").str(); + emitSUFile(UniqueSuFilename, Module, AnalysisTarget); + + auto BufferOrError = MemoryBuffer::getFile(UniqueSuFilename); + if (std::error_code EC = BufferOrError.getError()) { + errs() << "Error opening file " << UniqueSuFilename << ": " << EC.message() + << "\n"; + return; + } + + std::unique_ptr Buffer = std::move(BufferOrError.get()); + StringRef Content = Buffer->getBuffer(); + + // Split the file content into lines + SmallVector Lines; + Content.split(Lines, '\n'); + + // Iterate through each line + for (StringRef Line : Lines) { + if (Line.trim().empty()) + continue; // Skip empty lines + + // Split the line by tabs + SmallVector Parts; + Line.split(Parts, '\t', -1, false); + + if (Parts.size() < 3) { + errs() << "Invalid format in line: " << Line << "\n"; + continue; + } + + // Extract the function name and stack size + StringRef FullFunctionName = Parts[0]; + StringRef StackSizeStr = Parts[1]; + + // Parse the stack size + unsigned StackSize; + if (StackSizeStr.getAsInteger(10, StackSize)) { + errs() << "Invalid stack size in line: " << Line << "\n"; + continue; + } + + // Extract the function name (remove path and extension) + StringRef FunctionName = sys::path::filename(FullFunctionName); + FunctionName = FunctionName.rsplit(':').second; + + // Find the corresponding function in the module + Function *F = Module.getFunction(FunctionName); + if (!F) { + errs() << "Function " << FunctionName << " not found in module\n"; + continue; + } + + // Insert the function and its stack size into the map + StackSizeMap[F] = StackSize; + } + + // Remove the .su file + auto EC = fs::remove(UniqueSuFilename); + if (EC) { + errs() << "Error removing SU file: " << EC.message() << "\n"; + } +} + +void emitSUFile(StringRef SUFilename, Module &Module, + StringRef TargetTripleInput) { + std::string TargetTriple; + if (TargetTripleInput.empty()) { + TargetTriple = sys::getDefaultTargetTriple(); + } else { + TargetTriple = Triple::normalize(TargetTripleInput); + } + + if (TargetTripleInput.empty()) { + std::string DefaultTriple = sys::getDefaultTargetTriple(); + TargetTriple = StringRef(DefaultTriple); + } else { + TargetTriple = TargetTripleInput; + } + + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllAsmPrinters(); + + std::string Error; + auto *Target = TargetRegistry::lookupTarget(TargetTriple, Error); + if (!Target) { + errs() << "Error: " << Error << "\n"; + return; + } + + auto *CPU = "generic"; + auto *Features = ""; + + TargetOptions Opt; + Opt.StackUsageOutput = SUFilename; + auto RM = std::optional(); + std::unique_ptr TargetMachine( + Target->createTargetMachine(TargetTriple, CPU, Features, Opt, RM)); + + Module.setDataLayout(TargetMachine->createDataLayout()); + Module.setTargetTriple(TargetTriple); + + std::string UniqueObjectFilename = + (generateUniqueName("output-objectfile") + ".o").str(); + auto TempObjectFile = sys::fs::TempFile::create(UniqueObjectFilename); + if (!TempObjectFile) { + errs() << "Error creating temp object file: " + << toString(TempObjectFile.takeError()) << "\n"; + return; + } + + std::error_code EC; + raw_fd_ostream Dest(TempObjectFile->FD, false); + if (EC) { + errs() << "Error opening file: " << EC.message() << "\n"; + return; + } + + legacy::PassManager Pass; + auto FileType = CGFT_ObjectFile; + if (TargetMachine->addPassesToEmitFile(Pass, Dest, nullptr, FileType)) { + errs() << "TargetMachine can't emit a file of this type\n"; + return; + } + + Pass.run(Module); + Dest.flush(); + + // Discard the temporary object file + if (auto Err = TempObjectFile->discard()) { + errs() << "Error discarding object file: " << toString(std::move(Err)) + << "\n"; + return; + } +} + +void StackOverflowDetector::analyze( + const CallGraph &CG, + const MapVector &StackSizes) { + auto CGI = CG.begin(); + auto CGE = CG.end(); + for (; CGI != CGE; ++CGI) { + Function *F = CGI->second->getFunction(); + if (!F || F->isDeclaration()) + continue; + if (F->getName() == EntryFunction) + break; + } + traverse(CGI->second->getFunction(), CG, StackSizes); +} + +void StackOverflowDetector::printResults(raw_ostream &OS) const { + if (OverflowPaths.empty()) { + OS << "No potential stack overflow path found(limit:" << Threshold + << " bytes).\n"; + } else { + for (const auto &Path : OverflowPaths) { + OS << "Potential stack overflow path found(limit:" << Threshold + << " bytes): \n"; + OS << "CallStack:\n"; + for (auto *F : Path.CallStack) { + OS << " " << F->getName() << "\n"; + } + OS << "Analysis:\n"; + if (Path.StackSize <= Threshold) { + OS << "- Recursive call without proper base case check.\n"; + OS << "- Unbounded recursion may lead to stack overflow.\n"; + } else { + OS << "- Stack usage exceeds the limit along the call stack.\n"; + } + } + } +} + +bool StackOverflowDetector::evaluateCurrentPath() { + unsigned CumulativeStackSize = 0; + for (auto &Entry : PathStack) { + CumulativeStackSize += Entry.second; + } + if (CumulativeStackSize > Threshold) { + std::vector CallStack; + for (auto &Entry : PathStack) { + CallStack.push_back(Entry.first); + } + OverflowPaths.push_back(Path({CallStack, CumulativeStackSize})); + return true; + } + return false; +} + +bool StackOverflowDetector::traverse( + Function *F, const CallGraph &CG, + const MapVector &StackSizes) { + // Check for loop detection: if we revisit a node that is in the PathStack, + // it's a loop + if (PathStack.count(F)) { + unsigned LoopStackSize = 0; + for (auto PI = PathStack.find(F), PE = PathStack.end(); PI != PE; ++PI) { + LoopStackSize += PI->second; + } + + // If the loop's stack cost is zero, treat it as a single node and evaluate + // current path + if (LoopStackSize == 0) { + return evaluateCurrentPath(); + } + // Otherwise, consider it a potential overflow path + std::vector CallStack; + unsigned CumulativeStackSize = 0; + for (auto &Entry : PathStack) { + CallStack.push_back(Entry.first); + CumulativeStackSize += Entry.second; + } + // Add the called function to the call stack to give better diagnostics + CallStack.push_back(F); + OverflowPaths.push_back(Path({CallStack, CumulativeStackSize})); + return true; + } + + Visited.insert(F); + unsigned CurrentStackSize = StackSizes.lookup(F); + PathStack.insert({F, CurrentStackSize}); + if (evaluateCurrentPath()) { + return true; + } + auto *CGNode = CG[F]; + + bool FindOverflowPath = false; + for (auto &Callee : *CGNode) { + Function *CalleeF = Callee.second->getFunction(); + if (CalleeF && !CalleeF->isDeclaration()) { + FindOverflowPath = traverse(CalleeF, CG, StackSizes) || FindOverflowPath; + } + } + + PathStack.pop_back(); + return FindOverflowPath; +} +} // namespace llvm \ No newline at end of file diff --git a/llvm/tools/stackanalyzer/StackUsage.h b/llvm/tools/stackanalyzer/StackUsage.h new file mode 100644 index 000000000000..9220997a7971 --- /dev/null +++ b/llvm/tools/stackanalyzer/StackUsage.h @@ -0,0 +1,92 @@ +//===--- StackUsage.h - Analyze the stack usage of functions inside a LLVM +// bitcode file ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_STACKANALYZER_STACKUSAGE_H +#define LLVM_TOOLS_STACKANALYZER_STACKUSAGE_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include + +namespace llvm { +/** + * @brief Parses the stack size from the stack usage file. + * + * This function reads the stack usage information from the specified .su file + * and populates the provided map with the stack sizes for each function + * in the given module. + * + * @param Module The LLVM module containing the functions. + * @param StackSizeMap A map to be populated with the stack sizes for each + * function. + * @param AnalysisTarget The target backend for the analysis. + */ +void parseStackSizeFromSU(Module &Module, + MapVector &StackSizeMap, + StringRef AnalysisTarget); + +/** + * @brief Emits a stack usage file for the given module. + * + * This function generates a stack usage file for the specified module. The + * stack usage file contains information about the stack usage of the functions + * in the module. + * + * @param Filename The path to the output file. + * @param Module The LLVM module for which the stack usage file is generated. + * @param TargetTripleInput The target backend for the module. + */ +void emitSUFile(StringRef Filename, Module &Module, + StringRef TargetTripleInput); + +/** + * @class StackOverflowDetector + * @brief A class that detects stack overflow in a program. + * + * The StackOverflowDetector class analyzes the call graph of a program and + * detects potential stack overflow paths. It uses a depth-first search + * algorithm to traverse the call graph and keeps track of the stack sizes of + * each function. The class provides a method to analyze the call graph and + * print the results. + * + * @note This class assumes that the call graph and stack sizes have already + * been computed. + */ +class StackOverflowDetector { + + struct Path { + std::vector CallStack; + unsigned StackSize; + }; + + SmallVector OverflowPaths; + MapVector PathStack; + std::set Visited; + unsigned Threshold; + std::string EntryFunction; + + bool traverse(Function *F, const CallGraph &CG, + const MapVector &StackSizes); + bool evaluateCurrentPath(); + +public: + StackOverflowDetector(unsigned Limit, const std::string &Entry) + : Threshold(Limit), EntryFunction(Entry) {} + + void analyze(const CallGraph &CG, + const MapVector &); + + void printResults(raw_ostream &OS) const; +}; +} // namespace llvm + +#endif // LLVM_TOOLS_STACKANALYZER_STACKUSAGE_H diff --git a/llvm/tools/stackanalyzer/stackanalyzer.cpp b/llvm/tools/stackanalyzer/stackanalyzer.cpp new file mode 100644 index 000000000000..a7a4b6a6c027 --- /dev/null +++ b/llvm/tools/stackanalyzer/stackanalyzer.cpp @@ -0,0 +1,160 @@ +#include "CallGraphGen.h" +#include "StackUsage.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/PassPlugin.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/MemoryBuffer.h" +#include +#include + +using namespace llvm; + +static cl::OptionCategory StackAnalyzerCategory("StackAnalyzerCategory"); + +static cl::opt + InputFilename(cl::Positional, cl::desc("Input .bc file to be analyzed"), + cl::cat(StackAnalyzerCategory)); + +static cl::opt + AnalysisTarget("target", + cl::desc("The target backend for call stack analysis"), + cl::init(""), cl::cat(StackAnalyzerCategory)); + +static cl::opt + UseCallGraph("callgraph", + cl::desc("Output the callgraph given the .bc file"), + cl::cat(StackAnalyzerCategory)); + +static cl::opt + UseAnalysis("analysis", + cl::desc("Output possible path of the callgraph which can " + "possibly cause stack overflow"), + cl::cat(StackAnalyzerCategory)); + +static cl::opt + LimitSize("stacksize", + cl::desc("Max stack size of the limit of a path within the " + "callgraph, given the .bc file. " + "Should be used together with `analysis`."), + cl::init(1024), cl::cat(StackAnalyzerCategory)); + +static cl::opt + UseAnders("anders", + cl::desc("Use Anders analysis to analyze the call graph"), + cl::init(false), cl::cat(StackAnalyzerCategory)); + +static cl::opt OutputFilename( + "o", + cl::desc("Output callgraph in .dot format with stack cost information" + "Should be used together with `analysis`."), + cl::init(""), cl::cat(StackAnalyzerCategory)); + +static cl::opt + EntryFunction("entry", + cl::desc("The name of the entry function for the callgraph"), + cl::init("main"), cl::cat(StackAnalyzerCategory)); + +// Hidden options +static cl::opt + UseDebug("debuginfo", + cl::desc("Enable debug output for the call graph analysis"), + cl::cat(StackAnalyzerCategory), cl::Hidden); + +static Expected> openBitcodeFile(StringRef Path) { + Expected> MemBufOrErr = + errorOrToExpected(MemoryBuffer::getFileOrSTDIN(Path)); + if (Error E = MemBufOrErr.takeError()) + return E; + + std::unique_ptr MemBuf = std::move(*MemBufOrErr); + + return MemBuf; +} + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + cl::HideUnrelatedOptions(StackAnalyzerCategory); + cl::ParseCommandLineOptions(argc, argv); + ExitOnError ExitOnErr("stackanalyzer: "); + + LLVMContext Context; + auto MB = ExitOnErr(openBitcodeFile(InputFilename)); + auto M = ExitOnErr(parseBitcodeFile(MB->getMemBufferRef(), Context)); + + auto Config = PointerAnalysisCLIConfig{UseAnders, UseDebug, EntryFunction}; + + ModuleAnalysisManager MAM; + PassBuilder PB; + PB.registerModuleAnalyses(MAM); + MAM.registerPass([Config] { return PACallGraphAnalysis(Config); }); + ModulePassManager MPM; + MPM.addPass(RequireAnalysisPass()); + MPM.run(*M, MAM); + + MapVector StackSize; + for (auto &F : *M) { + StackSize.insert(std::make_pair(&F, 0)); + } + parseStackSizeFromSU(*M, StackSize, AnalysisTarget); + + const auto &Graph = MAM.getResult(*M); + + if (UseCallGraph) { + if (!OutputFilename.empty()) { + std::error_code EC; + raw_fd_ostream File(OutputFilename, EC, sys::fs::OF_Text); + if (!EC) { + File << "digraph \"CallGraph\" {\n"; + for (auto &NodePair : Graph) { + CallGraphNode *Node = NodePair.second.get(); + if (Function *F = Node->getFunction()) { + if (F->isIntrinsic()) + continue; + File << " \"" << F->getName() << "\";\n"; + } + } + for (auto &NodePair : Graph) { + CallGraphNode *Node = NodePair.second.get(); + if (Function *F = Node->getFunction()) { + if (F->isIntrinsic()) + continue; + File << " \"" << F->getName() << "\" [label=\"" << F->getName() + << "\\nStack Size: " << StackSize[F] << " bytes\"];\n"; + } + } + for (auto &NodePair : Graph) { + CallGraphNode *Node = NodePair.second.get(); + if (Function *F = Node->getFunction()) { + if (F->isIntrinsic()) + continue; + for (auto &CallRecord : *Node) { + if (Function *Callee = CallRecord.second->getFunction()) { + if (Callee->isIntrinsic()) + continue; + File << " \"" << F->getName() << "\" -> \"" + << Callee->getName() << "\";\n"; + } + } + } + } + File << "}\n"; + } + } else { + Graph.print(outs()); + } + } + if (UseAnalysis) { + StackOverflowDetector Detector{LimitSize, EntryFunction}; + Detector.analyze(Graph, StackSize); + Detector.printResults(outs()); + } + return 0; +} \ No newline at end of file diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn index 2f46527b613a..5da4fdc912d5 100644 --- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn @@ -316,6 +316,7 @@ group("test") { "//llvm/tools/opt", "//llvm/tools/sancov", "//llvm/tools/sanstats", + "//llvm/tools/stackanalyzer", "//llvm/tools/verify-uselistorder", "//llvm/tools/yaml2obj", "//llvm/unittests", diff --git a/llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn new file mode 100644 index 000000000000..ceaec3167f21 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/tools/stackanalyzer/BUILD.gn @@ -0,0 +1,15 @@ +executable("stackanalyzer") { + deps = [ + "//llvm/lib/Analysis", + "//llvm/lib/Core", + "//llvm/lib/Passes", + "//llvm/lib/Bitcode/Reader", + "//llvm/lib/Support", + "//llvm/lib/Target:TargetsToBuild", + ] + sources = [ + "CallGraphGen.cpp", + "StackUsage.cpp", + "stackanalyzer.cpp", + ] +} diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index a7e9398ea8fd..9ac4af7a2762 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -4710,6 +4710,24 @@ cc_binary( ], ) +cc_binary( + name = "stackanalyzer", + srcs = glob([ + "tools/stackanalyzer/*.cpp", + "tools/stackanalyzer/*.h", + ]), + copts = llvm_copts, + stamp = 0, + deps = [ + ":AllTargetsCodeGens", + ":Analysis", + ":BitcodeReader", + ":Core", + ":Passes", + ":Support", + ], +) + cc_binary( name = "split-file", srcs = glob([ -- Gitee