-
Notifications
You must be signed in to change notification settings - Fork 0
/
llm-rewrite-json.py
133 lines (99 loc) · 5.14 KB
/
llm-rewrite-json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import json
import subprocess
import argparse
PROCESSED_FILES_LOG = 'processed_files.log'
def call_llm(file_path):
"""Calls the LLM to get the new alias and description."""
command = f'cat {file_path} | llm -m gpt-4o --no-stream -s "Based on what this Home Assistant automation does and how it works, rewrite a new Alias and Description for it. In your response just give me the updated alias and description. Respond with JSON objects."'
result = subprocess.run(command, shell=True, capture_output=True, text=True)
if result.returncode != 0:
print(f"Error calling LLM: {result.stderr}")
return ""
return result.stdout.strip()
def clean_response(response):
"""Cleans the LLM response by removing any Markdown code fences and single quotes."""
if response.startswith("```json") and response.endswith("```"):
response = response[7:-3].strip()
elif response.startswith("```") and response.endswith("```"):
response = response[3:-3].strip()
# Remove leading/trailing single quotes
if response.startswith("'") and response.endswith("'"):
response = response[1:-1].strip()
# Replace any internal single quotes with double quotes for valid JSON parsing
return response.replace("'", '')
def update_json_file(input_file_path, output_file_path, new_data):
"""Updates the JSON file with the new alias and description."""
with open(input_file_path, 'r') as file:
data = json.load(file)
# Assuming the new data contains keys "alias" and "description"
data['alias'] = new_data.get('alias', data.get('alias'))
data['description'] = new_data.get('description', data.get('description'))
os.makedirs(os.path.dirname(output_file_path), exist_ok=True)
with open(output_file_path, 'w') as file:
json.dump(data, file, indent=4)
def get_processed_files():
"""Reads processed files from log."""
if not os.path.exists(PROCESSED_FILES_LOG):
return set()
with open(PROCESSED_FILES_LOG, 'r') as file:
return set(line.strip() for line in file)
def mark_as_processed(file_name):
"""Marks a file as processed by adding its name to the log."""
with open(PROCESSED_FILES_LOG, 'a') as file:
file.write(file_name + '\n')
def process_directory(directory):
"""Processes all JSON files in a directory."""
processed_files = get_processed_files()
for filename in os.listdir(directory):
if filename.endswith(".json") and filename not in processed_files:
input_file_path = os.path.join(directory, filename)
output_file_path = os.path.join('output', filename)
print(f'Processing {input_file_path}')
# Call LLM to get updated alias and description
llm_response = call_llm(input_file_path)
if not llm_response:
print(f"No valid response from LLM for {filename}")
continue
# Clean up possible Markdown code fences from the LLM's response
cleaned_response = clean_response(llm_response)
try:
new_data = json.loads(cleaned_response)
update_json_file(input_file_path, output_file_path, new_data)
mark_as_processed(filename)
print(f'Successfully updated {input_file_path} -> {output_file_path}')
except json.JSONDecodeError as e:
print(f'Failed to decode JSON response for {filename}: {e}\nResponse was: {cleaned_response}')
def process_single_file(file_path):
"""Processes a single JSON file."""
filename = os.path.basename(file_path)
processed_files = get_processed_files()
if filename in processed_files:
print(f"{filename} has already been processed.")
return
output_file_path = os.path.join('output', filename)
print(f'Processing {file_path}')
# Call LLM to get updated alias and description
llm_response = call_llm(file_path)
if not llm_response:
print(f"No valid response from LLM for {filename}")
return
# Clean up possible Markdown code fences from the LLM's response
cleaned_response = clean_response(llm_response)
try:
new_data = json.loads(cleaned_response)
update_json_file(file_path, output_file_path, new_data)
mark_as_processed(filename)
print(f'Successfully updated {file_path} -> {output_file_path}')
except json.JSONDecodeError as e:
print(f'Failed to decode JSON response for {filename}: {e}\nResponse was: {cleaned_response}')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process Home Assistant automation JSON files.')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-d', '--directory', help='Directory containing JSON files to process')
group.add_argument('-f', '--file', help='Specific JSON file to process')
args = parser.parse_args()
if args.directory:
process_directory(args.directory)
elif args.file:
process_single_file(args.file)