146 lines
6.3 KiB
Python
146 lines
6.3 KiB
Python
import subprocess
|
|
import requests
|
|
import tempfile
|
|
import zipfile
|
|
import io
|
|
import os
|
|
from langchain_openai import ChatOpenAI
|
|
from langchain_core.runnables import RunnablePassthrough
|
|
from langchain_core.prompts import PromptTemplate
|
|
from langchain_core.output_parsers import StrOutputParser
|
|
from dotenv import load_dotenv
|
|
from time import time
|
|
|
|
|
|
"""
|
|
This application attempts to automatically solve CTF levels for CS492/CS592 Malware Reverse Engineering.
|
|
It does so by prompting the user for a specific binary level, and automatically fetching the level from the web.
|
|
If it is able to do this, the application will perform an object dump of this binary, and send it to the LLM with a prompt,
|
|
to see if the LLM can respond with the correct password (copied from README.md).
|
|
"""
|
|
|
|
load_dotenv()
|
|
session = requests.Session()
|
|
url = "https://cs492.oregonctf.org/"
|
|
|
|
"""
|
|
Binary levels are group into various zip archives on the origin. This helps us choose the right zip to download.
|
|
"""
|
|
def get_group(chapter: int):
|
|
if chapter >= 1 and chapter <= 8:
|
|
return "Ch01-08"
|
|
elif chapter >= 11 and chapter <= 13:
|
|
return "Ch11-13"
|
|
elif chapter >= 15 and chapter <= 16:
|
|
return "Ch15-16"
|
|
elif chapter >= 18 and chapter <= 21:
|
|
return "Ch18-21"
|
|
else:
|
|
return False
|
|
|
|
|
|
"""
|
|
The downloads for the binaries require an authenticated session. Here, we initiate that session with default provided credentials
|
|
"""
|
|
def start_session():
|
|
payload = {"username": "demo0", "passwd": "malware"}
|
|
|
|
headers = {
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
"Content-Type": "application/x-www-form-urlencoded",
|
|
}
|
|
session.post(url, data=payload, headers=headers)
|
|
|
|
|
|
"""
|
|
Once we have the level name and group, we download the zip file of the appropriate group into a scope based temporary directory.
|
|
We scan those files, to find the one with the name matching the chosen level. If it is found, we copy that into a persistant downloads directory, for later
|
|
"""
|
|
def get_file_path(level, group):
|
|
payload = {
|
|
"setname": group,
|
|
}
|
|
headers = {
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
"Content-Type": "application/x-www-form-urlencoded",
|
|
}
|
|
response = session.post(url + "/download", data=payload, headers=headers)
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
with io.BytesIO(response.content) as zip_file:
|
|
with zipfile.ZipFile(zip_file) as z:
|
|
z.extractall(temp_dir)
|
|
extracted_files = os.listdir(temp_dir)
|
|
for file_name in extracted_files:
|
|
if file_name == level:
|
|
temp_file_path = os.path.join(temp_dir, file_name)
|
|
saved_file_path = os.path.join("./downloads", file_name)
|
|
os.rename(temp_file_path, saved_file_path)
|
|
return saved_file_path
|
|
return False
|
|
|
|
|
|
"""
|
|
Object dump the binary file, and send it in a chain with our prompt to the LLM
|
|
"""
|
|
def execute(path: str):
|
|
prompt = PromptTemplate.from_template(
|
|
"""
|
|
Below is an object dump for a GNU/Linux ELF 32-bit LSB executable Intel 80386 binary file recovered using objdump -s.
|
|
When run, this binary is a 'capture the flag' exercise, which tells the user some hints, then prompts "Enter the password:" :
|
|
You are an expert reverse engineer. Reverse engineer the password by following the instruction flow of the program, subverting any tricks that may make it otherwise difficult to recover the password.
|
|
As necessary, utilize the hints given in the prompt of the program.
|
|
If you are able to recover the password reply with only the password and nothing else.
|
|
If you are not able to recover the password, explain in detail why.
|
|
|
|
***Object Dump Begin***
|
|
|
|
{objdump}
|
|
"""
|
|
)
|
|
llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
|
|
chain = {"objdump": RunnablePassthrough()} | prompt | llm | StrOutputParser()
|
|
objdump = subprocess.run(
|
|
["objdump", "-s", path], capture_output=True, text=True, check=True
|
|
)
|
|
result = chain.invoke(objdump.stdout)
|
|
return result
|
|
|
|
|
|
print(
|
|
"\n\nThis program will attempt to automatically solve CS 492/592 Malware Reverse Engineering CTF levels.\nEnter the name of the level (example: Ch01StatA_Readelf)"
|
|
)
|
|
while True:
|
|
try:
|
|
level: str = input("\nname of binary>> ")
|
|
if level:
|
|
chapter = level[2:4]
|
|
if chapter.strip().isdigit(): # parse binary for the chapter number
|
|
group = get_group(int(chapter)) # get chapter group from chapter number
|
|
if group:
|
|
start_time = time()
|
|
start_session() #start session (get session token)
|
|
if os.path.isfile(os.path.join("./downloads", level)): #If we already have this file, no need to fetch it again
|
|
print("File already found")
|
|
file_path = os.path.join("./downloads", level)
|
|
else:
|
|
print("Fetching file...")
|
|
file_path = get_file_path(level, group)
|
|
if file_path:
|
|
result = execute(file_path)
|
|
end_time = time()
|
|
elapsed_time = round(end_time - start_time, 2)
|
|
print(
|
|
"\n", result, "\n\nElapsed time: ", elapsed_time, " seconds"
|
|
)
|
|
else:
|
|
raise Exception("Not a valid file")
|
|
else:
|
|
raise Exception("Not a valid chapter of a possible file")
|
|
else:
|
|
raise Exception("Bad input format (example: Ch01StatA_Readelf)")
|
|
else:
|
|
break
|
|
except Exception as e:
|
|
print(e)
|
|
break
|