finish app, update readme, update test inputs
This commit is contained in:
parent
ade1c61334
commit
ac80b0d0d6
38
hw4/README.md
Normal file
38
hw4/README.md
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
###### David Westgate 26 April 2024
|
||||||
|
## HW4 for gensec
|
||||||
|
This application is a Langchain agent which intends to be helpful at de-obfuscating javascript code. On the web, obfuscated javascript code is common to minimize javascript source file size, before they are sent over the network to the client and also to limit the ability to reverse engineer the function of javascript running on the client from enterprise web applications. The application is programming to ingest javascript source files both from the local machine, as well as from URLs.
|
||||||
|
|
||||||
|
### Setup + Run
|
||||||
|
Install python3, then
|
||||||
|
```
|
||||||
|
cd hw4
|
||||||
|
pip install -r requirnments.txt
|
||||||
|
python3 app.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Running
|
||||||
|
```
|
||||||
|
python3 app.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example Input
|
||||||
|
Test the ability to de-obfuscate a local javascript source
|
||||||
|
```
|
||||||
|
test/test.js
|
||||||
|
```
|
||||||
|
*Test file included is sourced from `https://www.gstatic.com/feedback/js/a3hbqvnh5213/api.js` licensed under Apache 2.0*
|
||||||
|
|
||||||
|
Test the ability to de-obfuscate a javascript source from the web
|
||||||
|
```
|
||||||
|
https://www.gstatic.com/feedback/js/a3hbqvnh5213/api.js
|
||||||
|
```
|
||||||
|
|
||||||
|
Test the ability to identify non-javascript source file locally, ineligable for de-obfuscation
|
||||||
|
```
|
||||||
|
test/lorem.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
Test the ability to identify non-javascript source file from the web, ineligable for de-obfuscation
|
||||||
|
```
|
||||||
|
https://www.lipsum.com/index.html
|
||||||
|
```
|
41
hw4/app.py
41
hw4/app.py
@ -1,24 +1,30 @@
|
|||||||
#derived from https://github.com/wu4f/cs410g-src/blob/main/05_CodeSummarize/01_python_parser.py
|
# derived from https://github.com/wu4f/cs410g-src/blob/main/05_CodeSummarize/01_python_parser.py
|
||||||
from langchain_community.document_loaders.generic import GenericLoader
|
from langchain_community.document_loaders.generic import GenericLoader
|
||||||
from langchain_community.document_loaders.parsers import LanguageParser
|
from langchain_community.document_loaders.parsers import LanguageParser
|
||||||
from langchain_text_splitters import Language
|
|
||||||
from langchain_openai import ChatOpenAI
|
from langchain_openai import ChatOpenAI
|
||||||
from langchain_core.runnables import RunnablePassthrough
|
from langchain_core.runnables import RunnablePassthrough
|
||||||
from langchain_core.prompts import PromptTemplate
|
from langchain_core.prompts import PromptTemplate
|
||||||
from langchain_core.output_parsers import StrOutputParser
|
from langchain_core.output_parsers import StrOutputParser
|
||||||
|
from langchain_community.document_loaders import AsyncHtmlLoader
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import readline
|
from validators import url
|
||||||
|
from time import time
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
llm = ChatOpenAI(model_name="gpt-4-turbo", temperature=0)
|
llm = ChatOpenAI(model_name="gpt-4-turbo", temperature=0)
|
||||||
|
|
||||||
def deobfuscate(path):
|
|
||||||
loader = GenericLoader.from_filesystem(
|
def deobfuscate(path: str):
|
||||||
|
if url(path):
|
||||||
|
loader = AsyncHtmlLoader(path)
|
||||||
|
else:
|
||||||
|
loader = GenericLoader.from_filesystem(
|
||||||
path,
|
path,
|
||||||
glob="*",
|
glob="*",
|
||||||
suffixes=[".js"],
|
suffixes=[".js"],
|
||||||
parser=LanguageParser(),
|
parser=LanguageParser(),
|
||||||
)
|
)
|
||||||
docs = loader.load()
|
docs = loader.load()
|
||||||
prompt1 = PromptTemplate.from_template(
|
prompt1 = PromptTemplate.from_template(
|
||||||
"""You are an expert javascript de-obfuscater. Carefully analyze the following code and de-obduscate, by applying proper formatting, commenting, and re-naming as necessary.
|
"""You are an expert javascript de-obfuscater. Carefully analyze the following code and de-obduscate, by applying proper formatting, commenting, and re-naming as necessary.
|
||||||
@ -29,24 +35,25 @@ def deobfuscate(path):
|
|||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
chain = (
|
chain = {"text": RunnablePassthrough()} | prompt1 | llm | StrOutputParser()
|
||||||
{"text": RunnablePassthrough()}
|
|
||||||
| prompt1
|
|
||||||
| llm
|
|
||||||
| StrOutputParser()
|
|
||||||
)
|
|
||||||
output = "\n".join([d.page_content for d in docs])
|
output = "\n".join([d.page_content for d in docs])
|
||||||
result = chain.invoke(output)
|
result = chain.invoke(output)
|
||||||
return(result)
|
return result
|
||||||
|
|
||||||
print("Welcome to my javascript code de-obfuscator. Supply a local or web path to an obfuscated javascript file and I will attempt to de-obfuscate it.")
|
|
||||||
|
print(
|
||||||
|
"Welcome to my javascript code de-obfuscator. Supply a local or web path to an obfuscated javascript file and I will attempt to de-obfuscate it."
|
||||||
|
)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
line = input("llm>> ")
|
line: str = input("llm>> ")
|
||||||
if line:
|
if line:
|
||||||
result = deobfuscate(line)
|
start_time = time()
|
||||||
print(result)
|
result: str = deobfuscate(line)
|
||||||
|
end_time = time()
|
||||||
|
elapsed_time = round(end_time - start_time, 2)
|
||||||
|
print("\n",result,"\n\nElapsed time: ", elapsed_time, " seconds")
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
langchain
|
|
||||||
langchain-community
|
langchain-community
|
||||||
langchain_openai
|
langchain_openai
|
||||||
python-dotenv
|
python-dotenv
|
||||||
unstructured
|
validators
|
||||||
langchainhub
|
esprima
|
||||||
|
time
|
1
hw4/test/lorem.txt
Normal file
1
hw4/test/lorem.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
2486
hw4/test/test-fmt.js
2486
hw4/test/test-fmt.js
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user