ollvm 新增字符串加密功能
阅读原文时间:2023年07月11日阅读:3

好久没弄ollvm了,可以继续了,今天给ollvm新增了一个pass,用来加密字符串,这个pass是从别的库里面扒出来的。

本文是基于在Windows 上使用VS2017编译出来的ollvm,在这个基础上来添加。

第一步:

寻找两个pass的代码

头文件

#ifndef _STRING_OBFUSCATION_H_
#define _STRING_OBFUSCATION_H_

// LLVM include
#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/CryptoUtils.h"

// Namespace
using namespace llvm;
using namespace std;

namespace llvm {
Pass *createStringObfuscation(bool flag);
}

#endif

源文件

#define DEBUG_TYPE "objdiv"
#include
#include

#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/CryptoUtils.h"
#include "llvm/Transforms/Obfuscation/StringObfuscation.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"

using namespace llvm;

STATISTIC(GlobalsEncoded, "Counts number of global variables encoded");

#define ZooPrint(_F, …) fprintf(stdout, "File : [%s](%d) " _F, __FILE__, __LINE__, __VA_ARGS__)

namespace llvm {

 struct encVar {  
 public:  
     GlobalVariable \*var;  
     uint8\_t key;  
 };

 class StringObfuscationPass : public llvm::ModulePass {  
 public:  
     static char ID; // pass identification  
     bool is\_flag = false;  
     StringObfuscationPass() : ModulePass(ID) {}  
     StringObfuscationPass(bool flag) : ModulePass(ID)  
     {  
         is\_flag = flag;  
     }

     virtual bool runOnModule(Module &M) {  
         ZooPrint(" Run On Module : %d \\n", is\_flag);  
         if (!is\_flag)  
             return false;  
         std::vector<GlobalVariable\*> toDelConstGlob;  
         //std::vector<GlobalVariable\*> encGlob;  
         std::vector<encVar\*> encGlob;  
         ZooPrint(" M.Size : %d \\n", M.size());  
         int i = ;  
         for (Module::global\_iterator gi = M.global\_begin(), ge = M.global\_end(); gi != ge; ++gi)  
         {

#if 0
// 老式代码,原来的样子
@.str = private unnamed_addr constant[ x i8] c"\E4\BD\A0\E5\A5\BD\E4\B8\96\E7\95\8C\00", align
@__CFConstantStringClassReference = external global[ x i32]
@.str. = private unnamed_addr constant[ x i16][i16 , i16 , i16 ], section "__TEXT,__ustring", align
// 新式字符串的样子
@"\01??_C@_07CHPFNFHA@123456?6?$AA@" = linkonce_odr unnamed_addr constant [ x i8] c"123456\0A\00", comdat, align
@"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@" = linkonce_odr unnamed_addr constant [ x i8] c"%d\0A\00", comdat, align
@__local_stdio_printf_options._OptionsStorage = internal global i64 , align
#endif
// Loop over all global variables
GlobalVariable* gv = &(*gi);
//errs() << "Global var " << gv->getName();
//std::string::size_type str_idx = gv->getName().str().find(".str.");
std::string section(gv->getSection());

             ZooPrint(" %d : String : \\"%s\\" , section : \\"%s\\" , isConstant : %d , hasInitializer : %d , isa : %d , r : %d \\n", i++, gv->getName().str().c\_str(), section.c\_str(), gv->isConstant(), gv->hasInitializer(), isa<ConstantDataSequential>(gv->getInitializer()), gv->getName().str().substr(, ) == "\\"\\x01??\_C@\_");  
             //    ZooPrint("      0x%02X 0x%02X 0x%02X 0x%02X 0x%02X 0x%02X 0x%02X 0x%02X \\n", gv->getName()\[0\] & 0xFF, gv->getName()\[1\] & 0xFF, gv->getName()\[2\] & 0xFF, gv->getName()\[3\] & 0xFF, gv->getName()\[4\] & 0xFF, gv->getName()\[5\] & 0xFF, gv->getName()\[6\] & 0xFF, gv->getName()\[7\] & 0xFF);

             // Let's encode the static ones  
             //if (gv->getName().str().substr(0, 4) == ".str"&&  
             if (gv->getName().str().substr(, ) == "\\x01??\_C@\_" &&  
                 gv->isConstant() &&  
                 gv->hasInitializer() &&  
                 isa<ConstantDataSequential>(gv->getInitializer()) &&  
                 section != "llvm.metadata" &&  
                 section.find("\_\_objc\_methname") == std::string::npos  
                 /\*&&gv->getType()->getArrayElementType()->getArrayElementType()->isIntegerTy()\*/)  
             {  
                 ZooPrint(" In Global Encode \\n");  
                 ++GlobalsEncoded;  
                 //errs() << " is constant";

                 // Duplicate global variable  
                 GlobalVariable \*dynGV = new GlobalVariable(M,  
                     gv->getType()->getElementType(),  
                     !(gv->isConstant()), gv->getLinkage(),  
                     (Constant\*), gv->getName(),  
                     (GlobalVariable\*),  
                     gv->getThreadLocalMode(),  
                     gv->getType()->getAddressSpace());  
                 // dynGV->copyAttributesFrom(gv);  
                 dynGV->setInitializer(gv->getInitializer());

                 std::string tmp = gv->getName().str();  
                 //  errs()<<"GV: "<<\*gv<<"\\n";

                 Constant \*initializer = gv->getInitializer();  
                 ConstantDataSequential \*cdata = dyn\_cast<ConstantDataSequential>(initializer);  
                 if (cdata) {  
                     const char \*orig = cdata->getRawDataValues().data();  
                     unsigned len = cdata->getNumElements()\*cdata->getElementByteSize();

                     encVar \*cur = new encVar();  
                     cur->var = dynGV;  
                     cur->key = llvm::cryptoutils->get\_uint8\_t();  
                     // casting away const is undef. behavior in C++  
                     // TODO a clean implementation would retrieve the data, generate a new constant  
                     // set the correct type, and copy the data over.  
                     //char \*encr = new char\[len\];  
                     //Constant \*initnew = ConstantDataArray::getString(M.getContext(), encr, true);  
                     char \*encr = const\_cast<char \*>(orig);  
                     // Simple xor encoding  
                     for (unsigned i = ; i != len; ++i) {  
                         encr\[i\] = orig\[i\] ^ cur->key;  
                     }

                     // FIXME Second part of the unclean hack.  
                     dynGV->setInitializer(initializer);

                     // Prepare to add decode function for this variable  
                     encGlob.push\_back(cur);  
                 }  
                 else {  
                     // just copying default initializer for now  
                     dynGV->setInitializer(initializer);  
                 }

                 // redirect references to new GV and remove old one  
                 gv->replaceAllUsesWith(dynGV);  
                 toDelConstGlob.push\_back(gv);

             }  
         }

         // actuallte delete marked globals  
         for (unsigned i = , e = toDelConstGlob.size(); i != e; ++i)  
             toDelConstGlob\[i\]->eraseFromParent();

         addDecodeFunction(&M, &encGlob);

         return true;  
     }

 private:  
     void addDecodeFunction(Module \*mod, std::vector<encVar\*> \*gvars) {  
         ZooPrint(" Add Decode Function \\n");  
         // Declare and add the function definition  
         //errs()<<"Successful enter decode function"<<"\\n";  
         std::vector<Type\*>FuncTy\_args;  
         FunctionType\* FuncTy = FunctionType::get(  
             /\*Result=\*/Type::getVoidTy(mod->getContext()),  // returning void  
             /\*Params=\*/FuncTy\_args,  // taking no args  
             /\*isVarArg=\*/false);  
         uint64\_t StringObfDecodeRandomName = cryptoutils->get\_uint64\_t();  
         std::string  random\_str;  
         std::stringstream random\_stream;  
         random\_stream << StringObfDecodeRandomName;  
         random\_stream >> random\_str;  
         StringObfDecodeRandomName++;  
         Constant\* c = mod->getOrInsertFunction(".datadiv\_decode" + random\_str, FuncTy);  
         Function\* fdecode = cast<Function>(c);  
         fdecode->setCallingConv(CallingConv::C);

         BasicBlock\* entry = BasicBlock::Create(mod->getContext(), "entry", fdecode);

         IRBuilder<> builder(mod->getContext());  
         builder.SetInsertPoint(entry);

         for (unsigned i = , e = gvars->size(); i != e; ++i) {  
             GlobalVariable \*gvar = (\*gvars)\[i\]->var;  
             uint8\_t key = (\*gvars)\[i\]->key;

             Constant \*init = gvar->getInitializer();  
             ConstantDataSequential \*cdata = dyn\_cast<ConstantDataSequential>(init);

             unsigned len = cdata->getNumElements()\*cdata->getElementByteSize();  
             --len;

             BasicBlock \*preHeaderBB = builder.GetInsertBlock();  
             BasicBlock\* for\_body = BasicBlock::Create(mod->getContext(), "for-body", fdecode);  
             BasicBlock\* for\_end = BasicBlock::Create(mod->getContext(), "for-end", fdecode);  
             builder.CreateBr(for\_body);  
             builder.SetInsertPoint(for\_body);  
             PHINode \*variable = builder.CreatePHI(Type::getInt32Ty(mod->getContext()), , "i");  
             Value \*startValue = builder.getInt32();  
             Value \*endValue = builder.getInt32(len);  
             variable->addIncoming(startValue, preHeaderBB);  
             /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

             //LoadInst \*Load=builder.CreateLoad(gvar);  
             //errs()<<"Load: "<<\*(Load->getPointerOperand())<<"\\n";  
             Value\* indexList\[\] = { ConstantInt::get(variable->getType(), ), variable };  
             Value \*const\_key = builder.getInt8(key);  
             Value \*GEP = builder.CreateGEP(gvar, ArrayRef<Value\*>(indexList, ), "arrayIdx");  
             LoadInst \*loadElement = builder.CreateLoad(GEP, false);  
             loadElement->setAlignment();  
             //errs()<<"Type: "<<\*loadElement<<"\\n";  
             //CastInst\* extended = new ZExtInst(const\_key, loadElement->getType(), "extended", for\_body);  
             //Value\* extended = builder.CreateZExtOrBitCast(const\_key, loadElement->getType(),"extended");  
             Value \*Xor = builder.CreateXor(loadElement, const\_key, "xor");  
             StoreInst \*Store = builder.CreateStore(Xor, GEP, false);  
             Store->setAlignment();

             ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////  
             Value \*stepValue = builder.getInt32();  
             Value \*nextValue = builder.CreateAdd(variable, stepValue, "next-value");  
             Value \*endCondition = builder.CreateICmpULT(variable, endValue, "end-condition");  
             endCondition = builder.CreateICmpNE(endCondition, builder.getInt1(), "loop-condition");  
             BasicBlock \*loopEndBB = builder.GetInsertBlock();  
             builder.CreateCondBr(endCondition, loopEndBB, for\_end);  
             builder.SetInsertPoint(for\_end);  
             variable->addIncoming(nextValue, loopEndBB);

         }  
         builder.CreateRetVoid();  
         appendToGlobalCtors(\*mod, fdecode, );

     }

 };

 Pass \*createStringObfuscation(bool flag);  

}

#if 0
RegisterPass(const char *PassArg, const char *Name, bool CFGOnly = false, bool is_analysis = false)

上面这个是RegisterPass的构造函数。
参数说明:

template :YourPassName;
PassArg :opt调用时所用的命行参数;
Name :此pass的简要说明;
CFGOnly :如果一个遍历CFG而不修改它,那么这个参数被设置为true;
is_analysis :如果一个Pass是一个分析Pass,例如dominator tree pass,那么这个参数被设置为true。
例子:

static RegisterPass X("hello", "Hello World Pass", false, false);
#endif

char StringObfuscationPass::ID = ;
static RegisterPass X("GVDiv", "Global variable (i.e., const char*) diversification pass", false, true);

Pass * llvm::createStringObfuscation(bool flag) {
ZooPrint("new my pass \n");
return new StringObfuscationPass(flag);
}

第二步:

将头文件放在如下位置:ollvm\obfuscator-llvm-4.0\include\llvm\Transforms\Obfuscation\StringObfuscation.h

将源文件放在如下位置:ollvm\obfuscator-llvm-4.0\lib\Transforms\Obfuscation\StringEncode.cpp

第三步:

将源文件放到如下工程中

第四步:

在此文件中新增代码:ollvm\obfuscator-llvm-4.0\lib\Transforms\IPO\PassManagerBuilder.cpp

新增导入头文件

#include "llvm/Transforms/Obfuscation/StringObfuscation.h"

新增全局变量代码如下

static cl::opt Seed("seed", cl::init(""),
cl::desc("seed for the random"));

// 全局开关,根据参数判断是否设置
static cl::opt StringObf("sobf", cl::init(false),
cl::desc("Enable the string obfuscation"));

在:PassManagerBuilder::populateModulePassManager 函数中,新增挂载新的pass代码,如下

MPM.add(createStringObfuscation(StringObf));

意义为根据全局开关来判断是否启用当前pass

经过以上四步,问题全部解决了,直接重新编译ollvm即可。

后续可以修改pass代码,可以修改解密函数。

新增其他pass新增步骤也如上。

使用方式如下

G:\ollvm\Test>G:\ollvm\build\RelWithDebInfo\bin\clang.exe -mllvm -sobf -mllvm -fla main.c

含义是,开启字符串加密,并且启动代码扁平化

效果:

源代码如下

编译后如下

已经开启了代码扁平化,原始字符串也已经不一样了,具体情况,

看data段就好了:

已经完全没个人样了

重点在最后,忘了,补充一句,由于字符串在ollvm里面是以UTF8的格式保存的,所以中文字符串天然就是乱码,

有时间想办法来解决一下中文字符串的乱码问题。

这个pass挺简单的,我也没怎么改就拿来用了,后续有时间再改一下吧。

手机扫一扫

移动阅读更方便

阿里云服务器
腾讯云服务器
七牛云服务器

你可能感兴趣的文章