好久没弄ollvm了,可以继续了,今天给ollvm新增了一个pass,用来加密字符串,这个pass是从别的库里面扒出来的。
本文是基于在Windows 上使用VS2017编译出来的ollvm,在这个基础上来添加。
第一步:
寻找两个pass的代码
头文件
#ifndef _STRING_OBFUSCATION_H_
#define _STRING_OBFUSCATION_H_
// LLVM include
#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/CryptoUtils.h"
// Namespace
using namespace llvm;
using namespace std;
namespace llvm {
Pass *createStringObfuscation(bool flag);
}
#endif
源文件
#define DEBUG_TYPE "objdiv"
#include
#include
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/CryptoUtils.h"
#include "llvm/Transforms/Obfuscation/StringObfuscation.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
STATISTIC(GlobalsEncoded, "Counts number of global variables encoded");
#define ZooPrint(_F, …) fprintf(stdout, "File : [%s](%d) " _F, __FILE__, __LINE__, __VA_ARGS__)
namespace llvm {
struct encVar {
public:
GlobalVariable \*var;
uint8\_t key;
};
class StringObfuscationPass : public llvm::ModulePass {
public:
static char ID; // pass identification
bool is\_flag = false;
StringObfuscationPass() : ModulePass(ID) {}
StringObfuscationPass(bool flag) : ModulePass(ID)
{
is\_flag = flag;
}
virtual bool runOnModule(Module &M) {
ZooPrint(" Run On Module : %d \\n", is\_flag);
if (!is\_flag)
return false;
std::vector<GlobalVariable\*> toDelConstGlob;
//std::vector<GlobalVariable\*> encGlob;
std::vector<encVar\*> encGlob;
ZooPrint(" M.Size : %d \\n", M.size());
int i = ;
for (Module::global\_iterator gi = M.global\_begin(), ge = M.global\_end(); gi != ge; ++gi)
{
#if 0
// 老式代码,原来的样子
@.str = private unnamed_addr constant[ x i8] c"\E4\BD\A0\E5\A5\BD\E4\B8\96\E7\95\8C\00", align
@__CFConstantStringClassReference = external global[ x i32]
@.str. = private unnamed_addr constant[ x i16][i16 , i16 , i16 ], section "__TEXT,__ustring", align
// 新式字符串的样子
@"\01??_C@_07CHPFNFHA@123456?6?$AA@" = linkonce_odr unnamed_addr constant [ x i8] c"123456\0A\00", comdat, align
@"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@" = linkonce_odr unnamed_addr constant [ x i8] c"%d\0A\00", comdat, align
@__local_stdio_printf_options._OptionsStorage = internal global i64 , align
#endif
// Loop over all global variables
GlobalVariable* gv = &(*gi);
//errs() << "Global var " << gv->getName();
//std::string::size_type str_idx = gv->getName().str().find(".str.");
std::string section(gv->getSection());
ZooPrint(" %d : String : \\"%s\\" , section : \\"%s\\" , isConstant : %d , hasInitializer : %d , isa : %d , r : %d \\n", i++, gv->getName().str().c\_str(), section.c\_str(), gv->isConstant(), gv->hasInitializer(), isa<ConstantDataSequential>(gv->getInitializer()), gv->getName().str().substr(, ) == "\\"\\x01??\_C@\_");
// ZooPrint(" 0x%02X 0x%02X 0x%02X 0x%02X 0x%02X 0x%02X 0x%02X 0x%02X \\n", gv->getName()\[0\] & 0xFF, gv->getName()\[1\] & 0xFF, gv->getName()\[2\] & 0xFF, gv->getName()\[3\] & 0xFF, gv->getName()\[4\] & 0xFF, gv->getName()\[5\] & 0xFF, gv->getName()\[6\] & 0xFF, gv->getName()\[7\] & 0xFF);
// Let's encode the static ones
//if (gv->getName().str().substr(0, 4) == ".str"&&
if (gv->getName().str().substr(, ) == "\\x01??\_C@\_" &&
gv->isConstant() &&
gv->hasInitializer() &&
isa<ConstantDataSequential>(gv->getInitializer()) &&
section != "llvm.metadata" &&
section.find("\_\_objc\_methname") == std::string::npos
/\*&&gv->getType()->getArrayElementType()->getArrayElementType()->isIntegerTy()\*/)
{
ZooPrint(" In Global Encode \\n");
++GlobalsEncoded;
//errs() << " is constant";
// Duplicate global variable
GlobalVariable \*dynGV = new GlobalVariable(M,
gv->getType()->getElementType(),
!(gv->isConstant()), gv->getLinkage(),
(Constant\*), gv->getName(),
(GlobalVariable\*),
gv->getThreadLocalMode(),
gv->getType()->getAddressSpace());
// dynGV->copyAttributesFrom(gv);
dynGV->setInitializer(gv->getInitializer());
std::string tmp = gv->getName().str();
// errs()<<"GV: "<<\*gv<<"\\n";
Constant \*initializer = gv->getInitializer();
ConstantDataSequential \*cdata = dyn\_cast<ConstantDataSequential>(initializer);
if (cdata) {
const char \*orig = cdata->getRawDataValues().data();
unsigned len = cdata->getNumElements()\*cdata->getElementByteSize();
encVar \*cur = new encVar();
cur->var = dynGV;
cur->key = llvm::cryptoutils->get\_uint8\_t();
// casting away const is undef. behavior in C++
// TODO a clean implementation would retrieve the data, generate a new constant
// set the correct type, and copy the data over.
//char \*encr = new char\[len\];
//Constant \*initnew = ConstantDataArray::getString(M.getContext(), encr, true);
char \*encr = const\_cast<char \*>(orig);
// Simple xor encoding
for (unsigned i = ; i != len; ++i) {
encr\[i\] = orig\[i\] ^ cur->key;
}
// FIXME Second part of the unclean hack.
dynGV->setInitializer(initializer);
// Prepare to add decode function for this variable
encGlob.push\_back(cur);
}
else {
// just copying default initializer for now
dynGV->setInitializer(initializer);
}
// redirect references to new GV and remove old one
gv->replaceAllUsesWith(dynGV);
toDelConstGlob.push\_back(gv);
}
}
// actuallte delete marked globals
for (unsigned i = , e = toDelConstGlob.size(); i != e; ++i)
toDelConstGlob\[i\]->eraseFromParent();
addDecodeFunction(&M, &encGlob);
return true;
}
private:
void addDecodeFunction(Module \*mod, std::vector<encVar\*> \*gvars) {
ZooPrint(" Add Decode Function \\n");
// Declare and add the function definition
//errs()<<"Successful enter decode function"<<"\\n";
std::vector<Type\*>FuncTy\_args;
FunctionType\* FuncTy = FunctionType::get(
/\*Result=\*/Type::getVoidTy(mod->getContext()), // returning void
/\*Params=\*/FuncTy\_args, // taking no args
/\*isVarArg=\*/false);
uint64\_t StringObfDecodeRandomName = cryptoutils->get\_uint64\_t();
std::string random\_str;
std::stringstream random\_stream;
random\_stream << StringObfDecodeRandomName;
random\_stream >> random\_str;
StringObfDecodeRandomName++;
Constant\* c = mod->getOrInsertFunction(".datadiv\_decode" + random\_str, FuncTy);
Function\* fdecode = cast<Function>(c);
fdecode->setCallingConv(CallingConv::C);
BasicBlock\* entry = BasicBlock::Create(mod->getContext(), "entry", fdecode);
IRBuilder<> builder(mod->getContext());
builder.SetInsertPoint(entry);
for (unsigned i = , e = gvars->size(); i != e; ++i) {
GlobalVariable \*gvar = (\*gvars)\[i\]->var;
uint8\_t key = (\*gvars)\[i\]->key;
Constant \*init = gvar->getInitializer();
ConstantDataSequential \*cdata = dyn\_cast<ConstantDataSequential>(init);
unsigned len = cdata->getNumElements()\*cdata->getElementByteSize();
--len;
BasicBlock \*preHeaderBB = builder.GetInsertBlock();
BasicBlock\* for\_body = BasicBlock::Create(mod->getContext(), "for-body", fdecode);
BasicBlock\* for\_end = BasicBlock::Create(mod->getContext(), "for-end", fdecode);
builder.CreateBr(for\_body);
builder.SetInsertPoint(for\_body);
PHINode \*variable = builder.CreatePHI(Type::getInt32Ty(mod->getContext()), , "i");
Value \*startValue = builder.getInt32();
Value \*endValue = builder.getInt32(len);
variable->addIncoming(startValue, preHeaderBB);
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//LoadInst \*Load=builder.CreateLoad(gvar);
//errs()<<"Load: "<<\*(Load->getPointerOperand())<<"\\n";
Value\* indexList\[\] = { ConstantInt::get(variable->getType(), ), variable };
Value \*const\_key = builder.getInt8(key);
Value \*GEP = builder.CreateGEP(gvar, ArrayRef<Value\*>(indexList, ), "arrayIdx");
LoadInst \*loadElement = builder.CreateLoad(GEP, false);
loadElement->setAlignment();
//errs()<<"Type: "<<\*loadElement<<"\\n";
//CastInst\* extended = new ZExtInst(const\_key, loadElement->getType(), "extended", for\_body);
//Value\* extended = builder.CreateZExtOrBitCast(const\_key, loadElement->getType(),"extended");
Value \*Xor = builder.CreateXor(loadElement, const\_key, "xor");
StoreInst \*Store = builder.CreateStore(Xor, GEP, false);
Store->setAlignment();
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Value \*stepValue = builder.getInt32();
Value \*nextValue = builder.CreateAdd(variable, stepValue, "next-value");
Value \*endCondition = builder.CreateICmpULT(variable, endValue, "end-condition");
endCondition = builder.CreateICmpNE(endCondition, builder.getInt1(), "loop-condition");
BasicBlock \*loopEndBB = builder.GetInsertBlock();
builder.CreateCondBr(endCondition, loopEndBB, for\_end);
builder.SetInsertPoint(for\_end);
variable->addIncoming(nextValue, loopEndBB);
}
builder.CreateRetVoid();
appendToGlobalCtors(\*mod, fdecode, );
}
};
Pass \*createStringObfuscation(bool flag);
}
#if 0
RegisterPass(const char *PassArg, const char *Name, bool CFGOnly = false, bool is_analysis = false)
上面这个是RegisterPass的构造函数。
参数说明:
template
PassArg :opt调用时所用的命行参数;
Name :此pass的简要说明;
CFGOnly :如果一个遍历CFG而不修改它,那么这个参数被设置为true;
is_analysis :如果一个Pass是一个分析Pass,例如dominator tree pass,那么这个参数被设置为true。
例子:
static RegisterPass
#endif
char StringObfuscationPass::ID = ;
static RegisterPass
Pass * llvm::createStringObfuscation(bool flag) {
ZooPrint("new my pass \n");
return new StringObfuscationPass(flag);
}
第二步:
将头文件放在如下位置:ollvm\obfuscator-llvm-4.0\include\llvm\Transforms\Obfuscation\StringObfuscation.h
将源文件放在如下位置:ollvm\obfuscator-llvm-4.0\lib\Transforms\Obfuscation\StringEncode.cpp
第三步:
将源文件放到如下工程中
第四步:
在此文件中新增代码:ollvm\obfuscator-llvm-4.0\lib\Transforms\IPO\PassManagerBuilder.cpp
新增导入头文件
#include "llvm/Transforms/Obfuscation/StringObfuscation.h"
新增全局变量代码如下
static cl::opt
cl::desc("seed for the random"));
// 全局开关,根据参数判断是否设置
static cl::opt
cl::desc("Enable the string obfuscation"));
在:PassManagerBuilder::populateModulePassManager 函数中,新增挂载新的pass代码,如下
MPM.add(createStringObfuscation(StringObf));
意义为根据全局开关来判断是否启用当前pass
经过以上四步,问题全部解决了,直接重新编译ollvm即可。
后续可以修改pass代码,可以修改解密函数。
新增其他pass新增步骤也如上。
使用方式如下
G:\ollvm\Test>G:\ollvm\build\RelWithDebInfo\bin\clang.exe -mllvm -sobf -mllvm -fla main.c
含义是,开启字符串加密,并且启动代码扁平化
效果:
源代码如下
编译后如下
已经开启了代码扁平化,原始字符串也已经不一样了,具体情况,
看data段就好了:
已经完全没个人样了
重点在最后,忘了,补充一句,由于字符串在ollvm里面是以UTF8的格式保存的,所以中文字符串天然就是乱码,
有时间想办法来解决一下中文字符串的乱码问题。
这个pass挺简单的,我也没怎么改就拿来用了,后续有时间再改一下吧。
手机扫一扫
移动阅读更方便
你可能感兴趣的文章