huggingface · jonetiz · Jul 30, 2024 · Aug 2, 2024 · Aug 2, 2024 · Aug 5, 2024
diff --git a/aws-examples/README.md b/aws-examples/README.md
@@ -0,0 +1,7 @@
+# AWS Examples with 🤗 Optimum Neuron
+
+These example scripts are used in conjunction with posts on the [AWS Machine Learning Blog](https://aws.amazon.com/blogs/machine-learning/).
+
+## Current Examples
+
+- End-to-end Fine-tuning and Deployment of Mistral (`mistral-e2e`)
diff --git a/aws-examples/mistral-e2e/chat.py b/aws-examples/mistral-e2e/chat.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2020 The HuggingFace Inc. team and Amazon Web Services, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This is a simple command-line chat application that has contextual memory.
+"""
+
+from transformers import AutoTokenizer
+from optimum.neuron import NeuronModelForCausalLM
+
+# Load the model compiled for AWS Neuron
+model = NeuronModelForCausalLM.from_pretrained("./mistral_neuron", local_files_only=True)
+
+# Load the tokenizer
+tokenizer = AutoTokenizer.from_pretrained("./mistral_neuron")
+tokenizer.pad_token_id = tokenizer.eos_token_id
+
+def format_chat_prompt(message, history, max_tokens):
+ """Formats an entire chat history to enable contextual memory."""
+ chat = []
+
+ # Add each former interaction to the chat list with alternating roles, user and assistant.
+ for interaction in history:
+ chat.append({"role": "user", "content": interaction[0]})
+ chat.append({"role": "assistant", "content": interaction[1]})
+
+ # Add the new (user) message to the chat flow.
+ chat.append({"role": "user", "content": message})
+
+ # Apply the chat template to each chat message and ensure we do not exceed max_tokens
+ for i in range(0, len(chat), 2):
+ # apply the chat message to every pair of messages from user and assistant up to i
+ prompt = tokenizer.apply_chat_template(chat[i:], tokenize=False)
+
+ # Validate that our response does not exceed max_tokens.
+ # If it does, the for loop truncates the first message from the prompt until we are under max_tokens.
+ # This way, we never pass more than the alloted max_tokens to the model. 
+ tokens = tokenizer(prompt) 
+ if len(tokens.input_ids) <= max_tokens:
+ return prompt
+
+ # If we've exceeded max_tokens, raise SystemError.
+ # This shouldn't be reached unless something goes wrong, such as the
+ # initial message and subsequent response exceeding the token limit.
+ raise SystemError
+
+def chat(history, max_tokens):
+ """This function runs recursively to take user input, making the chat bot functional."""
+ # Take input from user
+ message = input("Enter input: ")
+
+ # Stop the program if the user types "quit"
+ if message == "quit":
+ return
+
+ # Tokenize the formatted prompt
+ inputs = tokenizer(format_chat_prompt(message, history, max_tokens), return_tensors="pt")
+
+ # Do inference to generate a response
+ outputs = model.generate(
+ **inputs,
+ max_new_tokens=512,
+ do_sample=True,
+ temperature=0.9,
+ top_k=50,
+ top_p=0.9
+ )
+
+ # Decode the response to a string, and remove the prompt.
+ response = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
+
+ # Print the response
+ print(response)
+
+ # Add the message and response to history 
+ history.append([message, response])
+
+ # Repeat
+ chat(history, max_tokens)
+
+if __name__ == "__main__":
+ # Define an empty history and max number of tokens
+ history = []
+ max_tokens = 4096
+
+ chat(history, max_tokens)
diff --git a/aws-examples/mistral-e2e/compile.py b/aws-examples/mistral-e2e/compile.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2020 The HuggingFace Inc. team and Amazon Web Services, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This script compiles a model to be usable as an Optimum Neuron NeuronModelForCausalLM.
+"""
+
+from optimum.neuron import NeuronModelForCausalLM
+
+# num_cores is the number of neuron cores. Find this with the command neuron-ls
+compiler_args = {"num_cores": 12, "auto_cast_type": 'bf16'}
+input_shapes = {"batch_size": 1, "sequence_length": 4096}
+
+# Compiles an Optimum Neuron model from the previously trained (uncompiled) model
+model = NeuronModelForCausalLM.from_pretrained(
+ "mistral_trained",
+ export=True,
+ **compiler_args,
+ **input_shapes
+)
+
+# Saves the compiled model to the directory mistral_neuron
+model.save_pretrained("mistral_neuron")
diff --git a/aws-examples/mistral-e2e/dataset.py b/aws-examples/mistral-e2e/dataset.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2020 The HuggingFace Inc. team and Amazon Web Services, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This script formats the gsm8k dataset for use by the training script (run_clm.py) in this directory.
+"""
+
+from datasets import DatasetDict, load_dataset
+
+def format(sample):
+ sample['text'] = f"<s>[INST] {sample['question']} [/INST]\n\n{sample['answer']}</s>"
+ return sample
+
+# Downloads the gsm8k dataset directly from Hugging Face.
+dataset = load_dataset("gsm8k", "main")
+
+# We need to split the dataset into a training, and validation set.
+# Note gsm8k has 'test', we rename to 'validation' for our training script.
+train = dataset['train']
+validation = dataset['test']
+
+# Map the format function on all elements of the training and validation splits.
+# Also removes the question and answer columns we no longer need.
+train = train.map(format, remove_columns=list(train.features))
+validation = validation.map(format, remove_columns=list(validation.features))
+
+# Create a new DatasetDict with our train and validation splits.
+dataset = DatasetDict({"train": train, "validation": validation})
+
+dataset.save_to_disk('dataset_formatted')
diff --git a/aws-examples/mistral-e2e/hfon.yaml b/aws-examples/mistral-e2e/hfon.yaml
@@ -0,0 +1,135 @@
+# This is an AWS CloudFormation template to deploy the environment used in this example. Must be deployed in us-east-1.
+AWSTemplateFormatVersion: '2010-09-09'
+Description: Creates an EC2 trn1.32xlarge and inf2.24xlarge instance, and an S3 bucket, for end-to-end training and deployment of LLMs.
+
+# Selectable parameters in the AWS CloudFormation deployment
+Parameters:
+ VPC:
+ Type: AWS::EC2::VPC::Id
+ Description: The VPC ID where the resources will be deployed.
+ Subnet:
+ Type: AWS::EC2::Subnet::Id
+ Description: The public subnet ID where the EC2 instances will be deployed.
+ KeyPair:
+ Type: AWS::EC2::KeyPair::KeyName
+ Description: The key pair to use for SSH access to the EC2 instances.
+
+Resources:
+ # S3 Bucket
+ S3Bucket:
+ Type: AWS::S3::Bucket
+ Properties:
+ BucketName: !Sub aws-hfon-${AWS::AccountId} 
+
+ # EC2 trn1.32xlarge Instance
+ trn1Instance:
+ Type: AWS::EC2::Instance
+ Properties:
+ ImageId: ami-0ce2d16d374f959dd
+ InstanceType: trn1.32xlarge
+ KeyName: !Ref KeyPair
+ NetworkInterfaces:
+ - AssociatePublicIpAddress: "true"
+ DeviceIndex: "0"
+ GroupSet:
+ - !Ref InstanceSecurityGroup
+ SubnetId: !Ref Subnet
+ IamInstanceProfile: !Ref InstanceProfile
+ BlockDeviceMappings:
+ - DeviceName: /dev/sda1
+ Ebs:
+ VolumeSize: 512
+ VolumeType: gp3
+ UserData:
+ Fn::Base64: !Sub |
+ #!/bin/bash
+ yum update -y
+ yum install -y aws-cli
+ su - ubuntu
+ echo "export S3_BUCKET=s3://${S3Bucket}" >> /home/ubuntu/.bashrc 
+
+ # EC2 inf2.24xlarge Instance 
+ inf2Instance:
+ Type: AWS::EC2::Instance
+ # Wait until the trn1 instance was successfully provisioned
+ DependsOn: trn1Instance
+ Properties:
+ ImageId: ami-0ce2d16d374f959dd
+ InstanceType: inf2.24xlarge
+ KeyName: !Ref KeyPair
+ NetworkInterfaces:
+ - AssociatePublicIpAddress: "true"
+ DeviceIndex: "0"
+ GroupSet:
+ - !Ref InstanceSecurityGroup
+ SubnetId: !Ref Subnet
+ IamInstanceProfile: !Ref InstanceProfile
+ BlockDeviceMappings:
+ - DeviceName: /dev/sda1
+ Ebs:
+ VolumeSize: 256
+ VolumeType: gp3
+ UserData:
+ Fn::Base64: !Sub |
+ #!/bin/bash
+ yum update -y
+ yum install -y aws-cli
+ su - ubuntu
+ echo "export S3_BUCKET=s3://${S3Bucket}" >> /home/ubuntu/.bashrc 
+
+ # Security Group
+ InstanceSecurityGroup:
+ Type: AWS::EC2::SecurityGroup
+ Properties:
+ GroupDescription: Allows SSH access
+ SecurityGroupIngress:
+ - IpProtocol: tcp
+ FromPort: 22
+ ToPort: 22
+ CidrIp: 0.0.0.0/0
+ VpcId: !Ref VPC
+
+ # Instance Role
+ InstanceRole:
+ Type: AWS::IAM::Role
+ Properties:
+ AssumeRolePolicyDocument:
+ Version: "2012-10-17"
+ Statement:
+ - Effect: Allow
+ Principal:
+ Service: 
+ - ec2.amazonaws.com
+ Action:
+ - "sts:AssumeRole"
+ Policies:
+ - PolicyName: ec2-hfon-s3-bucket-access
+ PolicyDocument:
+ Version: "2012-10-17"
+ Statement:
+ - Effect: Allow
+ Action:
+ - s3:PutObject
+ - s3:GetObject
+ - s3:ListBucket
+ Resource:
+ - !GetAtt S3Bucket.Arn
+ - !Sub "${S3Bucket.Arn}/*"
+
+ # IAM Instance Profile
+ InstanceProfile:
+ Type: AWS::IAM::InstanceProfile
+ Properties:
+ Roles:
+ - !Ref InstanceRole
+
+Outputs:
+ S3BucketName:
+ Description: The name of the S3 bucket
+ Value: !Ref S3Bucket
+ trn1InstanceId:
+ Description: The ID of the trn1.32xlarge EC2 instance
+ Value: !Ref trn1Instance
+ inf2InstanceId:
+ Description: The ID of the inf2.24xlarge EC2 instance
+ Value: !Ref inf2Instance