基于LLVM的字符串加密的实现

前置要求

  • 对C++/C的入门知识
  • 高中及以上的英语水平
  • 一台电脑
  • 对命令行的基础使用
  • 对垃圾代码的忍耐能力
  • 会用Google
  • 会下载安装编译LLVM

基础知识

  • 每个源码文件(大致上)对应一个翻译单元(Translation Unit),LLVM体系中每个Module对应一个翻译单元
  • Module是LLVM中间表示的最外层封装,所有的函数,变量,元数据等等全部封装在对应的Module中
  • (几乎)所有跟LLVM中间表示有关的数据类型都继承自llvm::Value,下文中所有的值指代的都是该类所有子类的统称
  • 字符串在LLVM中间表示里用一个常数数组实现。(但有对应的基于字符串的构造方法)
  • 每个函数由一个或多个基本块组成,每个基本块有一个结束指令用于改变控制流。调用其他函数,跳转至其他基本块,返回void或其他值的指令都属于此列。

实现设计

设计上我们搜索所有函数内对全局变量的调用,判断是否为常数数组,如是则在函数的起始位置插入解密函数,在函数的结尾插入加密函数。这点上优于现有的上海交通大学密码与计算机安全实验室的实现方案Armariris。

正文

首先我们导出一份任意包含字符串的源码的中间表示供参考。

我们使用的源码是如下我手动构造的示例文件:

#import <Foundation/Foundation.h>
#import <dlfcn.h>
#import <objc/runtime.h>
static char* foo1="GlobalVariable";
int main(){
printf("你好世界");
NSLog(@"你好");
return 0;
}

使用clang来编译它:

clang -S -emit-llvm SOURCE.mm

产生了如下的LLVM中间表示:

; ModuleID = 'hw.m'
source_filename = "hw.m"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.13.0"

%struct.__NSConstantString_tag = type { i32*, i32, i8*, i64 }

@.str = private unnamed_addr constant [13 x i8] c"\E4\BD\A0\E5\A5\BD\E4\B8\96\E7\95\8C\00", align 1
@__CFConstantStringClassReference = external global [0 x i32]
@.str.1 = private unnamed_addr constant [3 x i16] [i16 20320, i16 22909, i16 0], section "__TEXT,__ustring", align 2
@_unnamed_cfstring_ = private global %struct.__NSConstantString_tag { i32* getelementptr inbounds ([0 x i32], [0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 2000, i8* bitcast ([3 x i16]* @.str.1 to i8*), i64 2 }, section "__DATA,__cfstring", align 8

; Function Attrs: noinline optnone ssp uwtable
define i32 @main() #0 {
%1 = alloca i32, align 4
store i32 0, i32* %1, align 4
%2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0))
notail call void (i8*, ...) @NSLog(i8* bitcast (%struct.__NSConstantString_tag* @_unnamed_cfstring_ to i8*))
ret i32 0
}

declare i32 @printf(i8*, ...) #1

declare void @NSLog(i8*, ...) #1

attributes #0 = { noinline optnone ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6}
!llvm.ident = !{!7}

!0 = !{i32 1, !"Objective-C Version", i32 2}
!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA,__objc_imageinfo,regular,no_dead_strip"}
!3 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
!4 = !{i32 1, !"Objective-C Class Properties", i32 64}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{i32 7, !"PIC Level", i32 2}
!7 = !{!"clang version 6.0.0 (trunk 318965) (llvm/trunk 318964)"}

基础知识:

LLVM的函数分为declare和definition两种。如上所示,declare指的是实现在当前翻译单元外的函数,definition反之。

有了这些知识,接下来我们先搭建我们的Pass的大致结构.注意KeyMap是我们用于存储变量和对应Key的映射表:

#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
#include <iostream>
#include <map>
#include <set>
#include <string>
/*
Unlike Armariris which inject decrytion code at llvm.global_ctors.
We try to find the containing Function of Users referencing our string GV.
Then we search for terminators.
We insert decryption code at begining or the function and encrypt it back at
terminators

For Users where we cant find a Function, we then inject decryption codes at
ctors
*/
/*
Status: Currently we only handle strings passed in directly.
GV strings are not properly handled
*/
using namespace llvm;
using namespace std;
namespace llvm {
struct StringEncryption : public ModulePass {
static char ID;
map<GlobalVariable * /*Value*/, Constant * /*Key*/>
keymap; // Map GV to keys for encryption
StringEncryption() : ModulePass(ID) {}
StringRef getPassName() const override {
return StringRef("StringEncryption");
}
bool runOnModule(Module &M) override {
// in runOnModule. We simple iterate function list and dispatch functions
// to handlers
for (Module::iterator iter = M.begin(); iter != M.end(); iter++) {
Function &F = *iter;
HandleFunction(&F);
}
EncryptGVs(M);
return true;
} // End runOnModule
};
Pass *createStringEncryptionPass() { return new StringEncryption(); }
} // namespace llvm

char StringEncryption::ID = 0;
static RegisterPass<StringEncryption> X("strenc", "StringEncryption");

这里我们创建了一个ModulePass,顾名思义运行在每个Module之上。LLVM IR的Pass的入口点是对应的 runOnXXX 函数,这里即 runOnModule,并使用Module的迭代器来遍历所有的函数,并将对应的函数分发给HandleFunction()方法。

接下来我们开始实现handleFunction函数,这个函数的主要作用是分析函数的相关信息并作处理,也就是所有的重要工作的所在地。首先我们使用 Function::isDeclaration 来过滤掉所有的declare函数。

然后我们依次遍历函数->基本块->指令->指令的参数:

set<GlobalVariable *> Globals;
set<Instruction *> Terminators;
for (BasicBlock &BB : *Func) {
for (Instruction &I : BB) {
if (ReturnInst *TI = dyn_cast<ReturnInst>(&I)) {
Terminators.insert(TI);
}
for (Value *Op : I.operands()) {
if (GlobalVariable *G = dyn_cast<GlobalVariable>(Op)) {
Globals.insert(G);
}
}
}
}

但,这样的方式并不会收集到我们所用示例中的所有字符串,为什么呢?让我们倒回去看一下中间表示:

%2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0))
notail call void (i8*, ...) @NSLog(i8* bitcast (%struct.__NSConstantString_tag* @_unnamed_cfstring_ to i8*))

这里对NSLog传递的参数为一个BitCast的常量表达式.

LLVM使用的是非常严格的类型系统。比高级编程语言的类型检查更严格,例如,一个装有五个32位整数的数组和一个装有六个32位整数的数组在LLVM的类型体系中是不一样的,对他们的互相替换被视为非法操作。而BitCast指令/常量表达式就是LLVM的强制类型转换指令。在上面的例子中将类型为struct.__NSConstantString_tag指针的全局变量@_unnamed_cfstring_转换成了i8类型的指针,符合NSLog的函数声明。

而printf使用的GEP是LLVM中较为复杂难以理解的一条指令,简单的说GEP的作用是计算地址,详细的解释请参见上面的官方文档和The Often Misunderstood GEP Instruction

无论如何,在增加对应的处理之后我们的代码变成了:

for (BasicBlock &BB : *Func) {
for (Instruction &I : BB) {
if (ReturnInst *TI = dyn_cast<ReturnInst>(&I)) {
Terminators.insert(TI);
}
for (Value *Op : I.operands()) {
if (GlobalVariable *G = dyn_cast<GlobalVariable>(Op)) {
Globals.insert(G);
} else if (Constant *C = dyn_cast<Constant>(Op)) {
Constant *stripped = C->stripPointerCasts();
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(stripped)) {
Globals.insert(GV);
continue;
}
}
}
}
}

接下来我们需要过滤掉不能加密的函数,比如说外部的全局变量,不是正确类型的全局变量,ObjC的元信息,LLVM的元信息等都属于此列。

直接上代码吧没啥好详解的:

if (GV->hasInitializer() &&
GV->getSection() != StringRef("llvm.metadata") &&
GV->getSection().find(StringRef("__objc")) == string::npos &&
GV->getName().find("OBJC") == string::npos) {
if (isa<ConstantDataSequential>(GV->getInitializer()) || isa<ConstantStruct>(GV->getInitializer())) {
GV->setConstant(false);
ConstantDataSequential *CDS =NULL;
if(isa<ConstantDataSequential>(GV->getInitializer())){
CDS=dyn_cast<ConstantDataSequential>(GV->getInitializer());
}
else if(isa<ConstantStruct>(GV->getInitializer()) && Func->getParent()->getTypeByName("struct.__NSConstantString_tag")!=NULL){
ConstantStruct* CS=dyn_cast<ConstantStruct>(GV->getInitializer());
if(CS->getType()!=Func->getParent()->getTypeByName("struct.__NSConstantString_tag")){
continue;
}
GV=cast<GlobalVariable>(CS->getOperand(2)->stripPointerCasts());
CDS=cast<ConstantDataSequential>(GV->getInitializer());
}
else{
continue;
}
Type *memberType = CDS->getElementType();
// Ignore non-IntegerType
if (!isa<IntegerType>(memberType)) {
continue;
}
}

这里我们除了过滤之外,还负责了从struct.__NSConstantString_tag结构体中提取真实的字符串常量,判断数组所包含的数据类型是不是整数的操作。

接下来我们开始生成对应的解密Key.

if (keymap.find(GV) == keymap.end()) {
// No Existing Key Found.
// Perform injection
if (intType == Type::getInt8Ty(GV->getParent()->getContext())) {
vector<uint8_t> keys;
for (unsigned i = 0; i < CDS->getNumElements(); i++) {
keys.push_back(cryptoutils->get_uint8_t());
}
Constant *KeyConst = ConstantDataVector::get(
GV->getParent()->getContext(), ArrayRef<uint8_t>(keys));
keymap[GV] = KeyConst;
} else if (intType ==
Type::getInt16Ty(GV->getParent()->getContext())) {
vector<uint16_t> keys;
for (unsigned i = 0; i < CDS->getNumElements(); i++) {
keys.push_back(cryptoutils->get_uint16_t());
}
Constant *KeyConst = ConstantDataVector::get(
GV->getParent()->getContext(), ArrayRef<uint16_t>(keys));
keymap[GV] = KeyConst;
} else if (intType ==
Type::getInt32Ty(GV->getParent()->getContext())) {
vector<uint32_t> keys;
for (unsigned i = 0; i < CDS->getNumElements(); i++) {
keys.push_back(cryptoutils->get_uint32_t());
}
Constant *KeyConst = ConstantDataVector::get(
GV->getParent()->getContext(), ArrayRef<uint32_t>(keys));
keymap[GV] = KeyConst;
} else if (intType ==
Type::getInt64Ty(GV->getParent()->getContext())) {
vector<uint64_t> keys;
for (unsigned i = 0; i < CDS->getNumElements(); i++) {
keys.push_back(cryptoutils->get_uint64_t());
}
Constant *KeyConst = ConstantDataVector::get(
GV->getParent()->getContext(), ArrayRef<uint64_t>(keys));
keymap[GV] = KeyConst;
} else {
errs() << "Unsupported CDS Type\n";
abort();
}
}

这样,和Armariris不同的是,我们为每一项都生成了一组对应的Key.

接下来我们在函数的第一个基本块(EntryBlock)寻找适合插入指令的没有Phi Node等杂项的位置。(实际上函数的EntryBlock不能也不会有PhiNode,具体意思我会在末尾补充)

使用IRBuilder<> IRB(Func->getEntryBlock().getFirstNonPHIOrDbgOrLifetime ());创建IRBuilder, IRBuilder是LLVM提供的方便指令插入的助手模版类。

我们首先创造一个GEP来获得原始变量的指针,然后将其从原来的[数量 x 单个字符大小]BitCast成我们需要的类型。最后加载,完成异或操作后写回原始变量。在Terminator处的处理同理。

注意这是一个Hack并且只能在MinSizeRelease下使用才不会触发Assert。BinaryOperator按照规矩是只接受Vector的,而clang前端生成的是Array。Again,稳定的实现请参照Hikari的源码

Value *zero = ConstantInt::get(
Type::getInt32Ty(GV->getParent()->getContext()), 0);//因为我们从头开始加密。所以gep的索引都是0
Value *zeroes[] = {zero,zero};
Value *GEP = IRB.CreateInBoundsGEP(GV, zeroes);
//BinaryOperations don't take CDAs,only CDVs
//FIXME: Figure out if CDA and CDV has same mem layout
Value* BCI=IRB.CreateBitCast(GEP,keymap[GV]->getType()->getPointerTo());
LoadInst *LI = IRB.CreateLoad(BCI);//ArrayType
Value *XOR = IRB.CreateXor(LI, keymap[GV]);
IRB.CreateStore(XOR, BCI);
for (Instruction *I : Terminators) {
IRBuilder<> IRB(I);
Value *zero = ConstantInt::get(
Type::getInt32Ty(GV->getParent()->getContext()), 0);
Value *zeroes[] = {zero, zero};
Value *GEP = IRB.CreateInBoundsGEP(GV, zeroes);
Value* BCI=IRB.CreateBitCast(GEP,keymap[GV]->getType()->getPointerTo());
LoadInst *LI = IRB.CreateLoad(BCI);//ArrayType
Value *XOR = IRB.CreateXor(LI, keymap[GV]);
IRB.CreateStore(XOR,BCI);
}

最后,我们在完成后调用我们的EncryptGV。

我们遍历之前创建的映射表。对于unicode类型的字符串编译器默认会放在__TEXT这个不可写的段,导致我们的XOR触发操作系统保护异常,这需要我们进行修复:

void EncryptGVs(Module &M){
// We've done Instruction Insertation
// Perform GV Encrytion
for (map<GlobalVariable *, Constant *>::iterator it = keymap.begin();
it != keymap.end(); ++it) {
GlobalVariable *GV = it->first;
assert(GV->hasInitializer() && "Encrypted GV doesn't have initializer");
ConstantDataSequential *Key = cast<ConstantDataSequential>(it->second);
ConstantDataSequential *GVInitializer =
cast<ConstantDataSequential>(GV->getInitializer());
assert(Key->getNumElements() == GVInitializer->getNumElements() &&
"Key and String size mismatch!");
assert(Key->getElementType() == GVInitializer->getElementType() &&
"Key and String type mismatch!");
Type *memberType = Key->getElementType();
IntegerType *intType = cast<IntegerType>(memberType);
//Fixup GV sections otherwise we might fall into __TEXT and get a EXC_i386_GPFLT
//or other platform's equivalent
if(GV->getSection().find("__TEXT")!=string::npos){
GV->setSection("__DATA,__const");
}

if (intType == Type::getInt8Ty(M.getContext())) {
vector<uint8_t> Encrypted;
for (unsigned i = 0; i < Key->getNumElements(); i++) {
uint64_t K = Key->getElementAsInteger(i);
uint64_t S = GVInitializer->getElementAsInteger(i);
Encrypted.push_back(K^S);
}
Constant* newInit=ConstantDataArray::get(M.getContext(),ArrayRef<uint8_t>(Encrypted));
GV->setInitializer(newInit);
}
else if (intType == Type::getInt16Ty(M.getContext())) {
vector<uint16_t> Encrypted;
for (unsigned i = 0; i < Key->getNumElements(); i++) {
uint64_t K = Key->getElementAsInteger(i);
uint64_t S = GVInitializer->getElementAsInteger(i);
Encrypted.push_back(K ^ S);
}
Constant* newInit=ConstantDataArray::get(M.getContext(),ArrayRef<uint16_t>(Encrypted));
GV->setInitializer(newInit);
} else if (intType == Type::getInt32Ty(M.getContext())) {
vector<uint32_t> Encrypted;
for (unsigned i = 0; i < Key->getNumElements(); i++) {
uint64_t K = Key->getElementAsInteger(i);
uint64_t S = GVInitializer->getElementAsInteger(i);
Encrypted.push_back(K ^ S);
}
Constant* newInit=ConstantDataArray::get(M.getContext(),ArrayRef<uint32_t>(Encrypted));
GV->setInitializer(newInit);
} else if (intType == Type::getInt64Ty(M.getContext())) {
vector<uint64_t> Encrypted;
for (unsigned i = 0; i < Key->getNumElements(); i++) {
uint64_t K = Key->getElementAsInteger(i);
uint64_t S = GVInitializer->getElementAsInteger(i);
Encrypted.push_back(K ^ S);
}
Constant* newInit=ConstantDataArray::get(M.getContext(),ArrayRef<uint64_t>(Encrypted));
GV->setInitializer(newInit);

} else {
errs() << "Unsupported CDS Type\n"<<*intType<<"\n";
abort();
}
errs()<<"Rewritten GlobalVariable:"<<*GVInitializer<<" To:"<<*(GV->getInitializer())<<"\n";
}
}

收尾

  • PhiNode指的是一个代表的数值随着控制流的变化而变化的值。例如PhiNode允许当控制流从基本块A跳转时代表1,基本块b跳转时代表2.
  • 我们这里的实现实际上非常简陋,例如当一个全局变量引用另一个文本全局变量时我们没有处理会导致编译失败。这部分需要单独处理并在llvm.global_ctors中解密,类似于Theos的 %ctor或者__attribute__((constructor))
  • 这实际上同样不是最好的实现方式。
    对于在堆上保存着使用的情况。这种实现返回时会导致字符串被加密回去。

示例