You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
% pip show umap-learn
Name: umap-learn
Version: 0.5.3
Summary: Uniform Manifold Approximation and Projection
Home-page: http://github.com/lmcinnes/umap
Author:
Author-email:
License: BSD
Location: /Users/davidlaxer/anaconda3/envs/AI-Feynman/lib/python3.10/site-packages
Requires: numba, numpy, pynndescent, scikit-learn, scipy, tqdm
Required-by:
% ipython
Python 3.10.13 (main, Sep 11 2023, 08:21:04) [Clang 14.0.6 ]
Type 'copyright', 'credits' or 'license' for more information
IPython 8.20.0 -- An enhanced Interactive Python. Type '?' for help.
from transformers import GPT2Tokenizer, GPT2Model
import torch
import umap
import matplotlib.pyplot as plt
import numpy as np
# Load the pre-trained GPT-2 model and tokenizer
model_name = 'gpt2'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2Model.from_pretrained(model_name)
# Move the model to the 'mps' device if available
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
model.to(device)
# Define the words for which you want to extract embeddings
words = ["dog", "puppy", "dog-puppy"]
# Extract the embedding vectors for each word
embeddings = []
max_length = 0
for word in words:
input_ids = tokenizer.encode(word, add_special_tokens=False, return_tensors='pt')
input_ids = input_ids.to(device)
with torch.no_grad():
outputs = model(input_ids)
hidden_states = outputs.last_hidden_state
word_embedding = hidden_states.squeeze(0).cpu().numpy()
embeddings.append(word_embedding)
max_length = max(max_length, word_embedding.shape[0])
# Pad the embedding vectors to the maximum length
padded_embeddings = []
for embedding in embeddings:
padded_embedding = np.pad(embedding, ((0, max_length - embedding.shape[0]), (0, 0)), mode='constant')
padded_embeddings.append(padded_embedding)
# Reshape the padded embeddings to a 2-dimensional array
reshaped_embeddings = np.array(padded_embeddings).reshape(len(padded_embeddings), -1)
# Apply UMAP to reduce the dimensionality of the embeddings
reducer = umap.UMAP(n_components=2, random_state=42)
reduced_embeddings = reducer.fit_transform(reshaped_embeddings)
# Plot the reduced embeddings
plt.figure(figsize=(8, 6))
for i, word in enumerate(words):
plt.scatter(reduced_embeddings[i, 0], reduced_embeddings[i, 1], label=word)
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.title('Embedding Vectors of Words')
plt.legend()
plt.show()
Fatal Python error: Fatal Python error: Segmentation faultSegmentation fault
Thread 0xThread 0x000070000239c000000070000239c000 (most recent call first):
(most recent call first):
File File ""/Users/davidlaxer/anaconda3/envs/AI-Feynman/lib/python3.10/threading.py/Users/davidlaxer/anaconda3/envs/AI-Feynman/lib/python3.10/threading.py"", line , line 320320 in in waitwait
File "/Users/davidlaxer/anaconda3/envs/AI-Feynman/lib/python3.10/threading.py", line 607 File in wait
"/Users/davidlaxer/anaconda3/envs/AI-Feynman/lib/python3.10/threading.py" File , line "607/Users/davidlaxer/anaconda3/envs/AI-Feynman/lib/python3.10/site-packages/IPython/core/history.py in zsh: segmentation fault ipython
The text was updated successfully, but these errors were encountered:
This code crashes on MacOS Sonoma
The text was updated successfully, but these errors were encountered: