-
Notifications
You must be signed in to change notification settings - Fork 1
/
inference_open_source_llm.py
365 lines (311 loc) · 15.3 KB
/
inference_open_source_llm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
import re
import os
import csv
import json
import argparse
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, BertTokenizer, BertForSequenceClassification, pipeline, LlamaForCausalLM, LlamaTokenizerFast
import torch
from peft import PeftModel
# dataset = "macro_indicator"
dataset = "firm_news"
data_path = f"./data/prompt/{dataset}.json"
# Function to load JSON data
def load_prompts_from_json(file_path):
with open(file_path, 'r') as json_file:
data = json.load(json_file)
return data
# Function to extract prediction
def extract_prediction(text):
prediction_mapping = {
'Strongly Bullish': 3,
'Bullish': 2,
'Slightly Bullish': 1,
'Flat': 0,
'Fluctuating': 0,
'Slightly Bearish': -1,
'Bearish': -2,
'Strongly Bearish': -3
}
match = re.search(r'\b(Strongly Bullish|Bullish|Slightly Bullish|Flat|Fluctuating|Slightly Bearish|Bearish|Strongly Bearish)\b', text, re.IGNORECASE)
if match:
return prediction_mapping.get(match.group(1))
return None
def get_finbert_sentiment(probabilities):
# positive, negative, neutral
prediction = torch.argmax(probabilities).item()
mapping = {0: 2, 1: -2, 2: 0}
return mapping[prediction]
def get_input_ids_and_attention_mask_chunk(tokens):
"""
This function splits the input_ids and attention_mask into chunks of size 'chunksize'.
It also adds special tokens (101 for [CLS] and 102 for [SEP]) at the start and end of each chunk.
If the length of a chunk is less than 'chunksize', it pads the chunk with zeros at the end.
Returns:
input_id_chunks (List[torch.Tensor]): List of chunked input_ids.
attention_mask_chunks (List[torch.Tensor]): List of chunked attention_masks.
"""
chunksize = 512
input_id_chunks = list(tokens['input_ids'][0].split(chunksize - 2))
attention_mask_chunks = list(tokens['attention_mask'][0].split(chunksize - 2))
for i in range(len(input_id_chunks)):
input_id_chunks[i] = torch.cat([
torch.tensor([101]), input_id_chunks[i], torch.tensor([102])
])
attention_mask_chunks[i] = torch.cat([
torch.tensor([1]), attention_mask_chunks[i], torch.tensor([1])
])
pad_length = chunksize - input_id_chunks[i].shape[0]
if pad_length > 0:
input_id_chunks[i] = torch.cat([
input_id_chunks[i], torch.Tensor([0] * pad_length)
])
attention_mask_chunks[i] = torch.cat([
attention_mask_chunks[i], torch.Tensor([0] * pad_length)
])
return input_id_chunks, attention_mask_chunks
def main():
# Create the parser
parser = argparse.ArgumentParser(description="Finance LLM")
# Add arguments
parser.add_argument('--model', type=str, default='Llama-3-70B', help='Model to use (default: Llama-3-70B)')
# Parse the arguments
args = parser.parse_args()
# Print all the arguments
for arg, value in vars(args).items():
print(f"{arg}: {value}")
if args.model == "Llama-3-70B":
model_name = "meta-llama/Meta-Llama-3-70B-Instruct"
elif args.model == "Qwen2-72B-Instruct":
model_name = "Qwen/Qwen2-72B-Instruct"
elif args.model == "Mixtral-8x22B-Instruct-v0.1":
model_name = "mistralai/Mixtral-8x22B-Instruct-v0.1"
elif args.model == "Yi-1.5-34B-Chat":
model_name = "01-ai/Yi-1.5-34B-Chat"
elif args.model == "Phi-3-medium":
model_name = "microsoft/Phi-3-medium-4k-instruct"
elif args.model == "FinBERT":
model_name = "ProsusAI/finbert"
elif args.model == "FinGPT":
base_model = "NousResearch/Llama-2-13b-hf"
peft_model = "FinGPT/fingpt-sentiment_llama2-13b_lora"
if args.model == "Phi-3-medium":
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="cuda",
torch_dtype="auto",
trust_remote_code=True,
)
elif args.model == "Qwen2-72B-Instruct":
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype="auto",
device_map="auto",
)
elif args.model == "Yi-1.5-34B-Chat":
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto",
)
elif args.model == "FinBERT":
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)
elif args.model == "FinGPT":
tokenizer = LlamaTokenizerFast.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
model = LlamaForCausalLM.from_pretrained(base_model, trust_remote_code=True, device_map="cuda:0")
model = PeftModel.from_pretrained(model, peft_model)
model = model.eval()
else:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto",
)
# Prepare CSV file for output
output_csv_file = f"./data/output/shuffle1/{dataset}_{args.model}_greedy.csv"
# Load the prompts from the JSON file
prompts_dict = load_prompts_from_json(data_path)
# print(f"Date: {date}\nPrompt: {prompt}\n")
fieldnames = ['Date', 'Response', 'Prediction']
with open(output_csv_file, 'a', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
if csvfile.tell() == 0: # Check if file is empty to write header
writer.writeheader()
for date, prompt in prompts_dict.items():\
# shuffle1
prompt = prompt.replace("Only output: Strongly Bullish, Bullish, Slightly Bullish, Flat, Fluctuating, Slightly Bearish, Bearish, Strongly Bearish", "Only output: Flat, Slightly Bearish, Bullish, Bearish, Fluctuating, Slightly Bullish, Strongly Bullish, Strongly Bearish")
# # shuffle2
# prompt = prompt.replace("Only output: Strongly Bullish, Bullish, Slightly Bullish, Flat, Fluctuating, Slightly Bearish, Bearish, Strongly Bearish", "Only output: Strongly Bearish, Flat, Slightly Bearish, Slightly Bullish, Fluctuating, Bullish, Bearish, Strongly Bullish")
# # shuffle3
# prompt = prompt.replace("Only output: Strongly Bullish, Bullish, Slightly Bullish, Flat, Fluctuating, Slightly Bearish, Bearish, Strongly Bearish", "Only output: Slightly Bearish, Strongly Bullish, Bearish, Bullish, Slightly Bullish, Fluctuating, Flat, Strongly Bearish")
# # shuffle4
# prompt = prompt.replace("Only output: Strongly Bullish, Bullish, Slightly Bullish, Flat, Fluctuating, Slightly Bearish, Bearish, Strongly Bearish", "Only output: Bearish, Strongly Bullish, Fluctuating, Slightly Bearish, Slightly Bullish, Strongly Bearish, Bullish, Flat")
# prompt style
if args.model in ["Mixtral-8x22B-Instruct-v0.1", "Yi-1.5-34B-Chat", "Phi-3-medium"]:
messages = [
{"role": "user", "content": prompt},
]
elif args.model == "Qwen2-72B-Instruct":
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt},
]
else:
messages = [
{"role": "system", "content": ""},
{"role": "user", "content": prompt},
]
# decoding setting
if args.model == "Qwen2-72B-Instruct":
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(
model_inputs.input_ids,
max_length=4096,
do_sample=False,
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
elif args.model == "Phi-3-medium":
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
)
generation_args = {
"max_new_tokens": 4096,
"return_full_text": False,
"do_sample": False,
}
output = pipe(messages, **generation_args)
response = output[0]['generated_text']
elif args.model == "FinBERT":
# drop the instructions, only keep the data
prompt = prompt.split('\n\n')[1]
tokens = tokenizer.encode_plus(prompt, add_special_tokens=False, return_tensors = 'pt')
input_id_chunks, attention_mask_chunks = get_input_ids_and_attention_mask_chunk(tokens)
input_ids = torch.stack(input_id_chunks)
attention_mask = torch.stack(attention_mask_chunks)
input_dict = {
'input_ids' : input_ids.long(),
'attention_mask' : attention_mask.int()
}
outputs = model(**input_dict)
probabilities = torch.nn.functional.softmax(outputs[0], dim=-1)
mean_probabilities = probabilities.mean(dim=0)
response = mean_probabilities
elif args.model == "FinGPT":
def split_text(text, max_length):
tokens = tokenizer(text, return_tensors='pt')
input_ids = tokens['input_ids'][0]
chunks = []
for i in range(0, len(input_ids), max_length):
chunk = tokenizer.decode(input_ids[i:i+max_length], skip_special_tokens=True)
chunks.append(chunk)
return chunks
def sentiment_to_score(sentiment):
if 'positive' in sentiment and 'negative' in sentiment:
return 0
elif 'positive' in sentiment:
return 2
elif 'negative' in sentiment:
return -2
elif 'neutral' in sentiment:
return 0
return None # Default for unknown sentiments
def get_finbert_sentiment(prompt):
prompt_text = 'Instruction: What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}\nInput: ' + prompt + '\nAnswer: '
tokens = tokenizer(prompt_text, return_tensors='pt', padding=True, max_length=512, truncation=True)
tokens = {key: val.to('cuda:0') for key, val in tokens.items()}
output = model.generate(**tokens, do_sample=False, max_length=512)
response = tokenizer.decode(output[0], skip_special_tokens=True)
if "Answer: " in response:
sentiment = response.split("Answer: ")[1].strip()
return sentiment
else:
None
prompt = prompt.split('\n\n')[1]
# prompt = 'Instruction: What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}\nInput: ' + prompt + '\nAnswer: '
# # Generate results
# tokens = tokenizer(prompt, return_tensors='pt', padding=True, max_length=4096)
# tokens = {key: val.to('cuda:0') for key, val in tokens.items()}
# output = model.generate(**tokens, max_length=4096)
# # Decode results
# response = tokenizer.decode(output[0], skip_special_tokens=True)
# response = response.split("Answer: ")[1].strip()
# FinGPT generates the garbled text when the number of tokens is greater than 512
chunks = split_text(prompt, 470) # make sure prompt = (instruction + data) <= 500 max tokens
sentiments = []
scores = []
for chunk in chunks:
sentiment = get_finbert_sentiment(chunk)
if sentiment != None:
sentiments.append(sentiment)
score = sentiment_to_score(sentiment)
if score != None:
scores.append(score)
response = sentiments
average_score = sum(scores) / len(scores)
else:
if args.model == "Yi-1.5-34B-Chat":
input_ids = tokenizer.apply_chat_template(
conversation=messages,
tokenize=True,
return_tensors='pt'
).to(model.device)
else:
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
if args.model == "Llama-3-80B":
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
outputs = model.generate(
input_ids,
max_length=4096,
eos_token_id=terminators,
do_sample=False,
num_beams=1,
temperature=None,
top_p=None,
pad_token_id=tokenizer.eos_token_id,
)
else:
outputs = model.generate(
input_ids,
max_length=4096,
do_sample=False,
num_beams=1,
temperature=None,
top_p=None,
pad_token_id=tokenizer.eos_token_id,
)
response = outputs[0][input_ids.shape[-1]:]
response = tokenizer.decode(response, skip_special_tokens=True)
if args.model == "FinBERT":
prediction = get_finbert_sentiment(response)
elif args.model == "FinGPT":
prediction = average_score
else:
prediction = extract_prediction(response)
# Write to CSV file
writer.writerow({'Date': date, 'Response': response, 'Prediction': prediction})
csvfile.flush() # Flush data to disk after each write
if __name__ == "__main__":
main()