From ca9d33801a9d95778f81b8d560f9fd7118b37ea3 Mon Sep 17 00:00:00 2001 From: David Westgate Date: Mon, 6 May 2024 20:56:13 -0700 Subject: [PATCH] de-obfuscate from local file --- hw4/app.py | 54 ++++++++++++++++++++++++++++++++++++++++++++ hw4/requirnments.txt | 6 +++++ 2 files changed, 60 insertions(+) create mode 100644 hw4/requirnments.txt diff --git a/hw4/app.py b/hw4/app.py index e69de29..84c309d 100644 --- a/hw4/app.py +++ b/hw4/app.py @@ -0,0 +1,54 @@ +#derived from https://github.com/wu4f/cs410g-src/blob/main/05_CodeSummarize/01_python_parser.py +from langchain_community.document_loaders.generic import GenericLoader +from langchain_community.document_loaders.parsers import LanguageParser +from langchain_text_splitters import Language +from langchain_openai import ChatOpenAI +from langchain_core.runnables import RunnablePassthrough +from langchain_core.prompts import PromptTemplate +from langchain_core.output_parsers import StrOutputParser +from dotenv import load_dotenv +import readline +load_dotenv() + +llm = ChatOpenAI(model_name="gpt-4-turbo", temperature=0) + +def deobfuscate(path): + loader = GenericLoader.from_filesystem( + path, + glob="*", + suffixes=[".js"], + parser=LanguageParser(), + ) + docs = loader.load() + prompt1 = PromptTemplate.from_template( + """You are an expert javascript de-obfuscater. Carefully analyze the following code and de-obduscate, by applying proper formatting, commenting, and re-naming as necessary. + + Along with this response, make a note of the changes made. If the input is anything other than obfuscated javascript, respond with "Invalid Input:" follwed by the reason why. + + {text} + """ + ) + + chain = ( + {"text": RunnablePassthrough()} + | prompt1 + | llm + | StrOutputParser() + ) + output = "\n".join([d.page_content for d in docs]) + result = chain.invoke(output) + return(result) + +print("Welcome to my javascript code de-obfuscator. Supply a local or web path to an obfuscated javascript file and I will attempt to de-obfuscate it.") + +while True: + try: + line = input("llm>> ") + if line: + result = deobfuscate(line) + print(result) + else: + break + except Exception as e: + print(e) + break diff --git a/hw4/requirnments.txt b/hw4/requirnments.txt new file mode 100644 index 0000000..b94f762 --- /dev/null +++ b/hw4/requirnments.txt @@ -0,0 +1,6 @@ +langchain +langchain-community +langchain_openai +python-dotenv +unstructured +langchainhub