added zip fetch, extract

This commit is contained in:
David Westgate 2024-05-15 22:18:21 -07:00
parent da6217324a
commit 14682a55b5
3 changed files with 111 additions and 36 deletions

3
.gitignore vendored
View File

@ -4,4 +4,5 @@ __pycache__/
.env .env
*tokens* *tokens*
rag_data rag_data
.chromadb .chromadb
*temp*

View File

@ -1,73 +1,142 @@
from typing import Iterable, Literal
from langchain_community.document_loaders.generic import GenericLoader from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser from langchain_community.document_loaders.parsers import LanguageParser
from langchain_openai import ChatOpenAI from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import FileSystemBlobLoader
from langchain_community.document_loaders.blob_loaders.schema import Blob, BlobLoader
from langchain_community.document_loaders import AsyncHtmlLoader from langchain_community.document_loaders import AsyncHtmlLoader
from langchain.chains import LLMChain
from langchain.chains import SimpleSequentialChain
from dotenv import load_dotenv from dotenv import load_dotenv
import requests import requests
import tempfile
import zipfile
import io
import os
from validators import url from validators import url
from time import time from time import time
load_dotenv()
""" """
This application attempts to automatically solve CTF levels for CS492/CS592 Malware Reverse Engineering. This application attempts to automatically solve CTF levels for CS492/CS592 Malware Reverse Engineering.
""" """
load_dotenv()
promt = "You are a malware reverse engineer"
def get_rag_chain(): # def get_rag_chain():
return ( # return (
{"context": retriever | format_docs, "question": RunnablePassthrough()} # {"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt # | prompt
| llm # | llm
| StrOutputParser() # | StrOutputParser()
) # )
session = requests.Session() session = requests.Session()
llm = ChatOpenAI(model_name="gpt-4-turbo", temperature=0)
url = "https://cs492.oregonctf.org/" url = "https://cs492.oregonctf.org/"
def get_group(chapter: int):
if chapter >= 1 and chapter <= 8:
return "Ch01-08"
elif chapter >= 11 and chapter <= 13:
return "Ch11-13"
elif chapter >= 15 and chapter <= 16:
return "Ch15-16"
elif chapter >= 18 and chapter <= 21:
return "Ch18-21"
else:
return False
def start_session(): def start_session():
payload = { payload = {"username": "demo0", "passwd": "malware"}
'username': 'demo0',
'passwd': 'malware'
}
headers = { headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
'Content-Type': 'application/x-www-form-urlencoded' "Content-Type": "application/x-www-form-urlencoded",
} }
session.post(url, data=payload, headers=headers) session.post(url, data=payload, headers=headers)
def download(setname):
def get_file_path(level, group):
payload = { payload = {
'setname': setname, "setname": group,
} }
headers = { headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
'Content-Type': 'application/x-www-form-urlencoded' "Content-Type": "application/x-www-form-urlencoded",
} }
p = session.post(url+'/download', data=payload, headers=headers) response = session.post(url + "/download", data=payload, headers=headers)
print(p.headers.get('Content-Type')) with tempfile.TemporaryDirectory() as temp_dir:
zip = p.raw with io.BytesIO(response.content) as zip_file:
print(zip) with zipfile.ZipFile(zip_file) as z:
z.extractall(temp_dir)
extracted_files = os.listdir(temp_dir)
# print(f"Extracted files: {extracted_files}")
for file_name in extracted_files:
if file_name == level:
temp_file_path = os.path.join(temp_dir, file_name)
saved_file_path = os.path.join("./temp", file_name)
os.rename(temp_file_path, saved_file_path)
return saved_file_path
return False
def execute(path: str):
print("path ", path)
loader = FileSystemBlobLoader(path, show_progress=True)
prompt = PromptTemplate.from_template(
"Describe the attached binary file: {file_content}"
)
llm = ChatOpenAI(model_name="gpt-4-turbo", temperature=0)
blobs: Iterable[Blob] = loader.yield_blobs()
for b in blobs:
chain = SimpleSequentialChain(
prompt=prompt,
llm=llm,
)
# print('b ',b)
result = chain.run(file_content=b)
return result
print(
"This program will attempt to automatically solve CS 492/592 Malware Reverse Engineering CTF levels."
)
print("Enter the name of the level (example: Ch03DynA_Ltrace)")
while True: while True:
try: try:
start_session() level: str = input("name of binary>> ")
print(session.cookies) if level:
download('Ch01-08') chapter = level[2:4]
line: str = input("llm>> ") if chapter.strip().isdigit():
if line: group = get_group(int(chapter))
start_time = time() if group:
start_time = time()
result: str = get_rag_chain.invoke(line) start_session()
end_time = time() if os.path.isfile(os.path.join("./temp", level)):
elapsed_time = round(end_time - start_time, 2) print("found")
print("\n", result, "\n\nElapsed time: ", elapsed_time, " seconds") file_path = os.path.join("./temp", level)
else:
file_path = get_file_path(level, group)
if file_path:
results = execute(file_path)
print(results)
else:
raise Exception("Not a valid file")
else:
raise Exception("Not a valid chapter of a possible file")
else:
raise Exception("Bad input format (example: Ch03DynA_Ltrace)")
else: else:
break break
except Exception as e: except Exception as e:

5
hw5/requirnments.txt Normal file
View File

@ -0,0 +1,5 @@
langchain-community
langchain_openai
python-dotenv
validators
esprima