# derived from https://github.com/wu4f/cs410g-src/blob/main/05_CodeSummarize/01_python_parser.py from langchain_community.document_loaders.generic import GenericLoader from langchain_community.document_loaders.parsers import LanguageParser from langchain_openai import ChatOpenAI from langchain_core.runnables import RunnablePassthrough from langchain_core.prompts import PromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_community.document_loaders import AsyncHtmlLoader from dotenv import load_dotenv from validators import url from time import time load_dotenv() llm = ChatOpenAI(model_name="gpt-4-turbo", temperature=0) def deobfuscate(path: str): if url(path): loader = AsyncHtmlLoader(path) else: loader = GenericLoader.from_filesystem( path, glob="*", suffixes=[".js"], parser=LanguageParser(), ) docs = loader.load() prompt1 = PromptTemplate.from_template( """You are an expert javascript de-obfuscater. Carefully analyze the following code and de-obduscate, by applying proper formatting, commenting, and re-naming as necessary. Along with this response, make a note of the changes made. If the input is anything other than obfuscated javascript, respond with "Invalid Input:" follwed by the reason why. {text} """ ) chain = {"text": RunnablePassthrough()} | prompt1 | llm | StrOutputParser() output = "\n".join([d.page_content for d in docs]) result = chain.invoke(output) return result print( "Welcome to my javascript code de-obfuscator. Supply a local or web path to an obfuscated javascript file and I will attempt to de-obfuscate it." ) while True: try: line: str = input("llm>> ") if line: start_time = time() result: str = deobfuscate(line) end_time = time() elapsed_time = round(end_time - start_time, 2) print("\n",result,"\n\nElapsed time: ", elapsed_time, " seconds") else: break except Exception as e: print(e) break