|
35 | 35 | },
|
36 | 36 | "outputs": [],
|
37 | 37 | "source": [
|
38 |
| - "import os\n", |
39 | 38 | "import getpass\n",
|
| 39 | + "import os\n", |
40 | 40 | "\n",
|
41 | 41 | "\n",
|
42 | 42 | "# Function to securely get and set environment variables\n",
|
|
122 | 122 | "# Step 1: Data Loading\n",
|
123 | 123 | "import pandas as pd\n",
|
124 | 124 | "from datasets import load_dataset\n",
|
| 125 | + "\n", |
125 | 126 | "# Make sure you have an Hugging Face token(HF_TOKEN) in your development environemnt before running the code below\n",
|
126 | 127 | "# How to get a token: https://huggingface.co/docs/hub/en/security-tokens\n",
|
127 | 128 | "\n",
|
|
961 | 962 | " if isinstance(value, (pd.Series, np.ndarray, list)):\n",
|
962 | 963 | " # Handle array-like objects\n",
|
963 | 964 | " if len(value) > 0 and not pd.isna(value).all():\n",
|
964 |
| - " combined.append(f\"{attr.capitalize()}: {str(value)}\")\n", |
| 965 | + " combined.append(f\"{attr.capitalize()}: {value!s}\")\n", |
965 | 966 | " elif not pd.isna(value):\n",
|
966 |
| - " combined.append(f\"{attr.capitalize()}: {str(value)}\")\n", |
| 967 | + " combined.append(f\"{attr.capitalize()}: {value!s}\")\n", |
967 | 968 | " return \" \".join(combined)\n",
|
968 | 969 | "\n",
|
969 | 970 | " df[\"combined_info\"] = df.apply(combine_row, axis=1)\n",
|
|
1056 | 1057 | "outputs": [],
|
1057 | 1058 | "source": [
|
1058 | 1059 | "import tiktoken\n",
|
1059 |
| - "from tqdm import tqdm\n", |
1060 | 1060 | "from langchain_openai import OpenAIEmbeddings\n",
|
| 1061 | + "from tqdm import tqdm\n", |
1061 | 1062 | "\n",
|
1062 | 1063 | "MAX_TOKENS = 8191 # Maximum tokens for text-embedding-3-small\n",
|
1063 | 1064 | "OVERLAP = 50\n",
|
|
1116 | 1117 | " if isinstance(input_data, str):\n",
|
1117 | 1118 | " # Return list of embeddings for string input\n",
|
1118 | 1119 | " return chunk_embeddings[0]\n",
|
1119 |
| - " else:\n", |
1120 |
| - " # Create duplicated rows for each chunk with the respective embedding for row input\n", |
1121 |
| - " duplicated_rows = []\n", |
1122 |
| - " for embedding in chunk_embeddings:\n", |
1123 |
| - " new_row = input_data.copy()\n", |
1124 |
| - " new_row[\"embedding\"] = embedding\n", |
1125 |
| - " duplicated_rows.append(new_row)\n", |
1126 |
| - " return duplicated_rows" |
| 1120 | + " # Create duplicated rows for each chunk with the respective embedding for row input\n", |
| 1121 | + " duplicated_rows = []\n", |
| 1122 | + " for embedding in chunk_embeddings:\n", |
| 1123 | + " new_row = input_data.copy()\n", |
| 1124 | + " new_row[\"embedding\"] = embedding\n", |
| 1125 | + " duplicated_rows.append(new_row)\n", |
| 1126 | + " return duplicated_rows" |
1127 | 1127 | ]
|
1128 | 1128 | },
|
1129 | 1129 | {
|
|
2029 | 2029 | " # Connection successful\n",
|
2030 | 2030 | " print(\"Connection to MongoDB successful\")\n",
|
2031 | 2031 | " return client\n",
|
2032 |
| - " else:\n", |
2033 |
| - " print(\"Connection to MongoDB failed\")\n", |
| 2032 | + " print(\"Connection to MongoDB failed\")\n", |
2034 | 2033 | " return None\n",
|
2035 | 2034 | "\n",
|
2036 | 2035 | "\n",
|
|
2059 | 2058 | "outputs": [],
|
2060 | 2059 | "source": [
|
2061 | 2060 | "# Programmatically create vector search index for both colelctions\n",
|
2062 |
| - "import time\n", |
2063 | 2061 | "from pymongo.operations import SearchIndexModel\n",
|
2064 | 2062 | "\n",
|
2065 | 2063 | "\n",
|
|
2086 | 2084 | " # time.sleep(20) # Sleep for 20 seconds\n",
|
2087 | 2085 | " print(f\"New index '{index_name}' created successfully:\", result)\n",
|
2088 | 2086 | " except Exception as e:\n",
|
2089 |
| - " print(f\"Error creating new vector search index '{index_name}': {str(e)}\")" |
| 2087 | + " print(f\"Error creating new vector search index '{index_name}': {e!s}\")" |
2090 | 2088 | ]
|
2091 | 2089 | },
|
2092 | 2090 | {
|
|
2193 | 2191 | "outputs": [],
|
2194 | 2192 | "source": [
|
2195 | 2193 | "import pandas as pd\n",
|
2196 |
| - "from pymongo import MongoClient\n", |
2197 | 2194 | "from pymongo.errors import BulkWriteError\n",
|
2198 | 2195 | "\n",
|
2199 | 2196 | "\n",
|
|
2506 | 2503 | "outputs": [],
|
2507 | 2504 | "source": [
|
2508 | 2505 | "# Programatically create search indexes\n",
|
2509 |
| - "from pymongo.operations import IndexModel\n", |
2510 | 2506 | "\n",
|
2511 | 2507 | "\n",
|
2512 | 2508 | "def create_collection_search_index(collection, index_definition, index_name):\n",
|
|
2531 | 2527 | " print(f\"Search index '{index_name}' created successfully\")\n",
|
2532 | 2528 | " return result\n",
|
2533 | 2529 | " except Exception as e:\n",
|
2534 |
| - " print(f\"Error creating search index: {str(e)}\")\n", |
| 2530 | + " print(f\"Error creating search index: {e!s}\")\n", |
2535 | 2531 | " return None\n",
|
2536 | 2532 | "\n",
|
2537 | 2533 | "\n",
|
|
2642 | 2638 | },
|
2643 | 2639 | "outputs": [],
|
2644 | 2640 | "source": [
|
2645 |
| - "from langchain_openai import OpenAIEmbeddings\n", |
2646 | 2641 | "from langchain_mongodb import MongoDBAtlasVectorSearch\n",
|
2647 | 2642 | "from langchain_mongodb.retrievers import MongoDBAtlasHybridSearchRetriever\n",
|
| 2643 | + "from langchain_openai import OpenAIEmbeddings\n", |
2648 | 2644 | "\n",
|
2649 | 2645 | "ATLAS_VECTOR_SEARCH_INDEX = \"vector_index_with_filter\"\n",
|
2650 | 2646 | "embedding_model = OpenAIEmbeddings(\n",
|
|
2802 | 2798 | "outputs": [],
|
2803 | 2799 | "source": [
|
2804 | 2800 | "import pickle\n",
|
| 2801 | + "from collections.abc import AsyncIterator\n", |
2805 | 2802 | "from contextlib import AbstractContextManager\n",
|
| 2803 | + "from datetime import datetime, timezone\n", |
2806 | 2804 | "from types import TracebackType\n",
|
2807 |
| - "from typing import Any, Dict, Optional, AsyncIterator, Union, List, Tuple\n", |
| 2805 | + "from typing import Any, Dict, List, Optional, Tuple, Union\n", |
2808 | 2806 | "\n",
|
2809 | 2807 | "from langchain_core.runnables import RunnableConfig\n",
|
2810 |
| - "from typing_extensions import Self\n", |
2811 |
| - "\n", |
2812 | 2808 | "from langgraph.checkpoint.base import (\n",
|
2813 | 2809 | " BaseCheckpointSaver,\n",
|
2814 | 2810 | " Checkpoint,\n",
|
|
2818 | 2814 | ")\n",
|
2819 | 2815 | "from langgraph.checkpoint.serde.jsonplus import JsonPlusSerializer\n",
|
2820 | 2816 | "from motor.motor_asyncio import AsyncIOMotorClient\n",
|
2821 |
| - "from datetime import datetime, timezone\n", |
| 2817 | + "from typing_extensions import Self\n", |
2822 | 2818 | "\n",
|
2823 | 2819 | "\n",
|
2824 | 2820 | "class JsonPlusSerializerCompat(JsonPlusSerializer):\n",
|
|
3015 | 3011 | },
|
3016 | 3012 | "outputs": [],
|
3017 | 3013 | "source": [
|
3018 |
| - "from typing import Dict, Any\n", |
| 3014 | + "from typing import Any, Dict\n", |
| 3015 | + "\n", |
3019 | 3016 | "from langchain.agents import tool\n",
|
3020 | 3017 | "\n",
|
3021 | 3018 | "\n",
|
|
3099 | 3096 | },
|
3100 | 3097 | "outputs": [],
|
3101 | 3098 | "source": [
|
3102 |
| - "from pydantic import BaseModel, Field, constr\n", |
3103 | 3099 | "from typing import List\n",
|
3104 |
| - "from datetime import datetime\n", |
| 3100 | + "\n", |
| 3101 | + "from pydantic import BaseModel, Field\n", |
3105 | 3102 | "\n",
|
3106 | 3103 | "\n",
|
3107 | 3104 | "class Step(BaseModel):\n",
|
|
3144 | 3141 | "\n",
|
3145 | 3142 | " return document\n",
|
3146 | 3143 | " except Exception as e:\n",
|
3147 |
| - " raise ValueError(f\"Invalid safety procedure data: {str(e)}\")\n", |
| 3144 | + " raise ValueError(f\"Invalid safety procedure data: {e!s}\")\n", |
3148 | 3145 | "\n",
|
3149 | 3146 | "\n",
|
3150 | 3147 | "# Tool to add new safety procedures\n",
|
|
3323 | 3320 | },
|
3324 | 3321 | "outputs": [],
|
3325 | 3322 | "source": [
|
3326 |
| - "from langchain_openai import ChatOpenAI\n", |
3327 | 3323 | "from langchain_anthropic import ChatAnthropic\n",
|
3328 |
| - "from langchain_groq import ChatGroq\n", |
3329 | 3324 | "\n",
|
3330 | 3325 | "# llm = ChatOpenAI(model=\"gpt-4o\", temperature=0)\n",
|
3331 | 3326 | "llm = ChatAnthropic(model=\"claude-3-sonnet-20240229\", temperature=0)\n",
|
|
3356 | 3351 | },
|
3357 | 3352 | "outputs": [],
|
3358 | 3353 | "source": [
|
3359 |
| - "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n", |
3360 | 3354 | "from datetime import datetime\n",
|
3361 | 3355 | "\n",
|
| 3356 | + "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n", |
| 3357 | + "\n", |
3362 | 3358 | "\n",
|
3363 | 3359 | "def create_agent(llm, tools, system_message: str):\n",
|
3364 | 3360 | " \"\"\"Create an agent.\"\"\"\n",
|
|
3512 | 3508 | "outputs": [],
|
3513 | 3509 | "source": [
|
3514 | 3510 | "import operator\n",
|
3515 |
| - "from langchain_core.messages import BaseMessage\n", |
3516 | 3511 | "from typing import Annotated, TypedDict\n",
|
3517 | 3512 | "\n",
|
| 3513 | + "from langchain_core.messages import BaseMessage\n", |
| 3514 | + "\n", |
3518 | 3515 | "\n",
|
3519 | 3516 | "class AgentState(TypedDict):\n",
|
3520 | 3517 | " messages: Annotated[List[BaseMessage], operator.add]\n",
|
|
3539 | 3536 | "outputs": [],
|
3540 | 3537 | "source": [
|
3541 | 3538 | "import functools\n",
|
| 3539 | + "\n", |
3542 | 3540 | "from langchain_core.messages import AIMessage, ToolMessage\n",
|
3543 | 3541 | "\n",
|
3544 | 3542 | "\n",
|
|
3676 | 3674 | "outputs": [],
|
3677 | 3675 | "source": [
|
3678 | 3676 | "import asyncio\n",
|
3679 |
| - "from langchain_core.messages import HumanMessage, AIMessage\n", |
3680 |
| - "import time\n", |
| 3677 | + "\n", |
| 3678 | + "from langchain_core.messages import HumanMessage\n", |
3681 | 3679 | "\n",
|
3682 | 3680 | "\n",
|
3683 | 3681 | "async def chat_loop():\n",
|
|
3704 | 3702 | " for attempt in range(max_retries):\n",
|
3705 | 3703 | " try:\n",
|
3706 | 3704 | " async for chunk in graph.astream(state, config, stream_mode=\"values\"):\n",
|
3707 |
| - " if \"messages\" in chunk and chunk[\"messages\"]:\n", |
| 3705 | + " if chunk.get(\"messages\"):\n", |
3708 | 3706 | " last_message = chunk[\"messages\"][-1]\n",
|
3709 | 3707 | " if isinstance(last_message, AIMessage):\n",
|
3710 | 3708 | " last_message.name = (\n",
|
|
3719 | 3717 | " break\n",
|
3720 | 3718 | " except Exception as e:\n",
|
3721 | 3719 | " if attempt < max_retries - 1:\n",
|
3722 |
| - " print(f\"\\nAn unexpected error occurred: {str(e)}\")\n", |
| 3720 | + " print(f\"\\nAn unexpected error occurred: {e!s}\")\n", |
3723 | 3721 | " print(f\"\\nRetrying in {retry_delay} seconds...\")\n",
|
3724 | 3722 | " await asyncio.sleep(retry_delay)\n",
|
3725 | 3723 | " retry_delay *= 2\n",
|
3726 | 3724 | " else:\n",
|
3727 |
| - " print(f\"\\nMax retries reached. OpenAI API error: {str(e)}\")\n", |
| 3725 | + " print(f\"\\nMax retries reached. OpenAI API error: {e!s}\")\n", |
3728 | 3726 | " break\n",
|
3729 | 3727 | "\n",
|
3730 | 3728 | " print(\"\\n\") # New line after the complete response"
|
|
0 commit comments