update readme, format app and comment app, update reqs
This commit is contained in:
parent
14682a55b5
commit
c4b0cc744d
1
.gitignore
vendored
1
.gitignore
vendored
@ -6,3 +6,4 @@ __pycache__/
|
|||||||
rag_data
|
rag_data
|
||||||
.chromadb
|
.chromadb
|
||||||
*temp*
|
*temp*
|
||||||
|
*downloads*
|
@ -1,2 +1,32 @@
|
|||||||
###### David Westgate 17 May 2024
|
###### David Westgate 17 May 2024
|
||||||
## HW4 for gensec
|
## HW5 for gensec
|
||||||
|
This application attempts to automatically solve CTF levels for CS492/CS592 Malware Reverse Engineering.
|
||||||
|
It does so by prompting the user for a specific binary level, and automatically fetching the level from the web.
|
||||||
|
If it is able to do this, the application will perform an object dump of this binary, and send it to the LLM with a prompt,
|
||||||
|
to see if the LLM can respond with the correct password.
|
||||||
|
|
||||||
|
### Setup + Run
|
||||||
|
Install python3, then
|
||||||
|
```
|
||||||
|
cd hw5
|
||||||
|
pip install -r requirnments.txt
|
||||||
|
cp .env.example .env #fill in env file with key
|
||||||
|
python3 app.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Results
|
||||||
|
On average, this application is not very good at solving the CTF levels. This is not very suprising however, as these malware reverse engineering levels are technically difficult, and often require special tooling, debugging and subversion of anti-dissassembly and anti-debugging techniques.
|
||||||
|
|
||||||
|
That said, I tested this application with a handful of the binary files, focusing on the earlier/easier levels of the various chapters (avoiding excessive API cost prevents me for testing all levels). A few succeeded, with others failed returning the incorrect password, or failed and acknowledged they could not figure out it due to limitations.
|
||||||
|
|
||||||
|
#### Success
|
||||||
|
* Ch01StatA_Readelf
|
||||||
|
* Ch08Dbg_GdbIntro
|
||||||
|
#### Failed (wrong answer)
|
||||||
|
* Ch15AntiDis_FakeCallInt
|
||||||
|
* Ch21x64_ParamsStack
|
||||||
|
#### Failed (acknowledged)
|
||||||
|
* Ch15AntiDis_FakeCond
|
||||||
|
* Ch18PackUnp_UnpackEasy
|
||||||
|
|
||||||
|
I imagine this program may perform better for CTF levels of other classes like CS205 Computer Systems Programming
|
113
hw5/app.py
113
hw5/app.py
@ -1,46 +1,31 @@
|
|||||||
from typing import Iterable, Literal
|
import subprocess
|
||||||
from langchain_community.document_loaders.generic import GenericLoader
|
|
||||||
from langchain_community.document_loaders.parsers import LanguageParser
|
|
||||||
from langchain_openai import ChatOpenAI
|
|
||||||
from langchain_core.runnables import RunnablePassthrough
|
|
||||||
from langchain_core.prompts import PromptTemplate
|
|
||||||
from langchain_core.output_parsers import StrOutputParser
|
|
||||||
from langchain_community.document_loaders import FileSystemBlobLoader
|
|
||||||
from langchain_community.document_loaders.blob_loaders.schema import Blob, BlobLoader
|
|
||||||
from langchain_community.document_loaders import AsyncHtmlLoader
|
|
||||||
from langchain.chains import LLMChain
|
|
||||||
from langchain.chains import SimpleSequentialChain
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
import requests
|
import requests
|
||||||
import tempfile
|
import tempfile
|
||||||
import zipfile
|
import zipfile
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
from validators import url
|
from langchain_openai import ChatOpenAI
|
||||||
|
from langchain_core.runnables import RunnablePassthrough
|
||||||
|
from langchain_core.prompts import PromptTemplate
|
||||||
|
from langchain_core.output_parsers import StrOutputParser
|
||||||
|
from dotenv import load_dotenv
|
||||||
from time import time
|
from time import time
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
This application attempts to automatically solve CTF levels for CS492/CS592 Malware Reverse Engineering.
|
This application attempts to automatically solve CTF levels for CS492/CS592 Malware Reverse Engineering.
|
||||||
|
It does so by prompting the user for a specific binary level, and automatically fetching the level from the web.
|
||||||
|
If it is able to do this, the application will perform an object dump of this binary, and send it to the LLM with a prompt,
|
||||||
|
to see if the LLM can respond with the correct password (copied from README.md).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
# def get_rag_chain():
|
|
||||||
# return (
|
|
||||||
# {"context": retriever | format_docs, "question": RunnablePassthrough()}
|
|
||||||
# | prompt
|
|
||||||
# | llm
|
|
||||||
# | StrOutputParser()
|
|
||||||
# )
|
|
||||||
|
|
||||||
|
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
|
|
||||||
url = "https://cs492.oregonctf.org/"
|
url = "https://cs492.oregonctf.org/"
|
||||||
|
|
||||||
|
"""
|
||||||
|
Binary levels are group into various zip archives on the origin. This helps us choose the right zip to download.
|
||||||
|
"""
|
||||||
def get_group(chapter: int):
|
def get_group(chapter: int):
|
||||||
if chapter >= 1 and chapter <= 8:
|
if chapter >= 1 and chapter <= 8:
|
||||||
return "Ch01-08"
|
return "Ch01-08"
|
||||||
@ -54,6 +39,9 @@ def get_group(chapter: int):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
The downloads for the binaries require an authenticated session. Here, we initiate that session with default provided credentials
|
||||||
|
"""
|
||||||
def start_session():
|
def start_session():
|
||||||
payload = {"username": "demo0", "passwd": "malware"}
|
payload = {"username": "demo0", "passwd": "malware"}
|
||||||
|
|
||||||
@ -64,6 +52,10 @@ def start_session():
|
|||||||
session.post(url, data=payload, headers=headers)
|
session.post(url, data=payload, headers=headers)
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Once we have the level name and group, we download the zip file of the appropriate group into a scope based temporary directory.
|
||||||
|
We scan those files, to find the one with the name matching the chosen level. If it is found, we copy that into a persistant downloads directory, for later
|
||||||
|
"""
|
||||||
def get_file_path(level, group):
|
def get_file_path(level, group):
|
||||||
payload = {
|
payload = {
|
||||||
"setname": group,
|
"setname": group,
|
||||||
@ -78,65 +70,74 @@ def get_file_path(level, group):
|
|||||||
with zipfile.ZipFile(zip_file) as z:
|
with zipfile.ZipFile(zip_file) as z:
|
||||||
z.extractall(temp_dir)
|
z.extractall(temp_dir)
|
||||||
extracted_files = os.listdir(temp_dir)
|
extracted_files = os.listdir(temp_dir)
|
||||||
# print(f"Extracted files: {extracted_files}")
|
|
||||||
for file_name in extracted_files:
|
for file_name in extracted_files:
|
||||||
if file_name == level:
|
if file_name == level:
|
||||||
temp_file_path = os.path.join(temp_dir, file_name)
|
temp_file_path = os.path.join(temp_dir, file_name)
|
||||||
saved_file_path = os.path.join("./temp", file_name)
|
saved_file_path = os.path.join("./downloads", file_name)
|
||||||
os.rename(temp_file_path, saved_file_path)
|
os.rename(temp_file_path, saved_file_path)
|
||||||
return saved_file_path
|
return saved_file_path
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Object dump the binary file, and send it in a chain with our prompt to the LLM
|
||||||
|
"""
|
||||||
def execute(path: str):
|
def execute(path: str):
|
||||||
print("path ", path)
|
|
||||||
loader = FileSystemBlobLoader(path, show_progress=True)
|
|
||||||
prompt = PromptTemplate.from_template(
|
prompt = PromptTemplate.from_template(
|
||||||
"Describe the attached binary file: {file_content}"
|
"""
|
||||||
)
|
Below is an object dump for a GNU/Linux ELF 32-bit LSB executable Intel 80386 binary file recovered using objdump -s.
|
||||||
llm = ChatOpenAI(model_name="gpt-4-turbo", temperature=0)
|
When run, this binary is a 'capture the flag' exercise, which tells the user some hints, then prompts "Enter the password:" :
|
||||||
blobs: Iterable[Blob] = loader.yield_blobs()
|
You are an expert reverse engineer. Reverse engineer the password by following the instruction flow of the program, subverting any tricks that may make it otherwise difficult to recover the password.
|
||||||
|
As necessary, utilize the hints given in the prompt of the program.
|
||||||
|
If you are able to recover the password reply with only the password and nothing else.
|
||||||
|
If you are not able to recover the password, explain in detail why.
|
||||||
|
|
||||||
for b in blobs:
|
***Object Dump Begin***
|
||||||
chain = SimpleSequentialChain(
|
|
||||||
prompt=prompt,
|
{objdump}
|
||||||
llm=llm,
|
"""
|
||||||
)
|
)
|
||||||
# print('b ',b)
|
llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
|
||||||
result = chain.run(file_content=b)
|
chain = {"objdump": RunnablePassthrough()} | prompt | llm | StrOutputParser()
|
||||||
|
objdump = subprocess.run(
|
||||||
|
["objdump", "-s", path], capture_output=True, text=True, check=True
|
||||||
|
)
|
||||||
|
result = chain.invoke(objdump.stdout)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
print(
|
print(
|
||||||
"This program will attempt to automatically solve CS 492/592 Malware Reverse Engineering CTF levels."
|
"\n\nThis program will attempt to automatically solve CS 492/592 Malware Reverse Engineering CTF levels.\nEnter the name of the level (example: Ch01StatA_Readelf)"
|
||||||
)
|
)
|
||||||
print("Enter the name of the level (example: Ch03DynA_Ltrace)")
|
|
||||||
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
level: str = input("name of binary>> ")
|
level: str = input("\nname of binary>> ")
|
||||||
if level:
|
if level:
|
||||||
chapter = level[2:4]
|
chapter = level[2:4]
|
||||||
if chapter.strip().isdigit():
|
if chapter.strip().isdigit(): # parse binary for the chapter number
|
||||||
group = get_group(int(chapter))
|
group = get_group(int(chapter)) # get chapter group from chapter number
|
||||||
if group:
|
if group:
|
||||||
start_time = time()
|
start_time = time()
|
||||||
start_session()
|
start_session() #start session (get session token)
|
||||||
if os.path.isfile(os.path.join("./temp", level)):
|
if os.path.isfile(os.path.join("./downloads", level)): #If we already have this file, no need to fetch it again
|
||||||
print("found")
|
print("File already found")
|
||||||
file_path = os.path.join("./temp", level)
|
file_path = os.path.join("./downloads", level)
|
||||||
else:
|
else:
|
||||||
|
print("Fetching file...")
|
||||||
file_path = get_file_path(level, group)
|
file_path = get_file_path(level, group)
|
||||||
if file_path:
|
if file_path:
|
||||||
results = execute(file_path)
|
result = execute(file_path)
|
||||||
print(results)
|
end_time = time()
|
||||||
|
elapsed_time = round(end_time - start_time, 2)
|
||||||
|
print(
|
||||||
|
"\n", result, "\n\nElapsed time: ", elapsed_time, " seconds"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise Exception("Not a valid file")
|
raise Exception("Not a valid file")
|
||||||
else:
|
else:
|
||||||
raise Exception("Not a valid chapter of a possible file")
|
raise Exception("Not a valid chapter of a possible file")
|
||||||
else:
|
else:
|
||||||
raise Exception("Bad input format (example: Ch03DynA_Ltrace)")
|
raise Exception("Bad input format (example: Ch01StatA_Readelf)")
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
|
langchain_core
|
||||||
langchain-community
|
langchain-community
|
||||||
langchain_openai
|
langchain_openai
|
||||||
python-dotenv
|
python-dotenv
|
||||||
validators
|
Requests
|
||||||
esprima
|
|
Reference in New Issue
Block a user