上一篇文章写了如何使用Python写一个简单的爬虫,批量抓取APK的下载链接。这篇文章记录下如何批量拆包APK文件并提取想要的信息。
下载Androguard.png
下载完成后得到一个压缩包,解压后进入目录,把Androguard目录下所有的文件拷贝至Python的根目录下,合并同名文件夹即可。
from androguard.core.bytecodes import apk, dvm
如下图所示没有提示任何错误信息即可。
检查环境.png
注意:在cmd下直接调python命令行需要将Python加入到环境变量中。
首先APK文件可以用普通解压缩的方式拆包,如下图。
解压完毕.png
熟悉安卓开发的人肯定对解压完毕后的文件很熟悉。这里主要介绍部分文件的作用。
assets.png
库文件.png
乱码.png
没错,是乱码。这就说明仅仅使用将APK解压缩的形式去获得我们关心的信息是不可行的。
到此为止,相信大家对APK的结构有了一定了解。下面以获取APP申请权限为例子使用Python完成APK的拆包提取信息。
from androguard.core.bytecodes import apk, dvm
app.get_permissions()
__author__ = \'Administrator\'
#coding=utf-8
from androguard.core.bytecodes import apk, dvm
from androguard.core.analysis import analysis
import re
global count
count = 1
def get_permissions(path, filename):
str = \"Permission:\"
app = apk.APK(path)
permission = app.get_permissions()
file = permission
print permission
writeToTxt(str, file, filename)
return permission
def get_apis(path, filename):
app = apk.APK(path)
app_dex = dvm.DalvikVMFormat(app.get_dex())
app_x = analysis.newVMAnalysis(app_dex)
methods = set()
cs = [cc.get_name() for cc in app_dex.get_classes()]
for method in app_dex.get_methods():
g = app_x.get_method(method)
if method.get_code() == None:
continue
for i in g.get_basic_blocks().get():
for ins in i.get_instructions():
output = ins.get_output()
match = re.search(r\'(L[^;]*;)->[^\\(]*\\([^\\)]*\\).*\', output)
if match and match.group(1) not in cs:
methods.add(match.group())
methods = list(methods)
methods.sort()
print \"methods:\"+\"\\n\"
print methods
str = \"Methods:\"
file = methods
writeToTxt(str, file, filename)
return methods
def get_providers(path, filename):
app = apk.APK(path)
providers = app.get_providers()
print \"providers:\"+\"\\n\"
print providers
str = \"Providers:\"
file = providers
writeToTxt(str, file, filename)
return providers
def get_package(path, filename):
app = apk.APK(path)
packname = app.get_package()
print \"packageName:\"+\"\\n\"
print packname
str = \"PackageName:\"
file = packname
writeToTxt(str, file, filename)
return packname
def get_activities(path, filename):
app = apk.APK(path)
activitys = app.get_activities()
print \"ActivityName:\"+\"\\n\"
print activitys
str = \"Activitys:\"
file = activitys
writeToTxt(str, file, filename)
return activitys
def get_receivers(path, filename):
app = apk.APK(path)
receivers = app.get_receivers()
print \"Receivers:\"+\"\\n\"
print receivers
str = \"Receivers:\"
file = receivers
writeToTxt(str, file, filename)
return receivers
def get_services(path, filename):
app = apk.APK(path)
services = app.get_services()
print \"Services:\"+\"\\n\"
print services
str = \"Services:\"
file = services
writeToTxt(str, file, filename)
return services
def writeToTxt(str, file, filename):
global count
fm = open(\'%d\'%count+\'.txt\', \'w\')
#fm.write(str)
#fm.write(\"\\n\")
for i in file:
tmp = i.split(\'.\')
final = tmp[-1]
fm.write(final)
fm.write(\"\\t\")
fm.close()
count += 1
def main(path, apkname):
get_permissions(path, apkname)
#get_apis(path, apkname)
#get_providers(path, apkname)
#get_package(path, apkname)
#get_activities(path, apkname)
#get_receivers(path, apkname)
#get_services(path, apkname)
if __name__ == \'__main__\':
path = \"D:/sample/Good\"
filename = \"sampleInfo.txt\"
main(path, filename)
__author__ = \'Administrator\'
#-*- coding:GBK -*-
import os
import os.path
import sys
import subprocess
import getFeatures
rootdir = \"D:/Sample/Good//\"
destdir = \"D:/Sample/workSample/badDone//\"
command = \"java -jar D://apktool.jar\"
class Packages:
def __init__(self, srcdir, desdir):
self.sdir = srcdir
self.ddir = desdir
def check(self):
print(\"--------------------starting unpackage!---------------------\")
for dirpath, dirnames, filenames in os.walk(rootdir):
for filename in filenames:
thefile = os.path.join(dirpath, filename)
apkfile = os.path.split(thefile)[1]
apkname = os.path.splitext(apkfile)[0]
print apkfile
try:
if os.path.splitext(thefile)[1] == \".apk\":
# name = os.path.splitext(thefile)[0]
str1= \'\"\'+thefile+\'\"\'
str2= \'\"\'+destdir + os.path.splitext(filename)[0]+\'\"\'
# cmdExtract = r\'%s d -f %s %s\'% (command, str2, str1)
getFeatures.main(thefile, apkname)
print \"******************well done******************\"
except IOError, err:
print err
sys.exit()
if __name__ == \"__main__\":
dir=Packages(rootdir, \'e:/\')
dir.check()
程序运行.png
处理前.png
输出的txt.png
txt中权限.png
到此为止,我们就把安卓中的权限信息提取出来了。这为后面使用机器学习方式对安卓应用进行检测提供了基本的数据。在接下来的文章中将会进一步介绍如何使用Python实现机器学习的方式检测安卓恶意应用。