LangChainWiseAgentVectorDB

Bases: WiseAgentVectorDB

An abstract class that makes use of a LangChain vector database.

Source code in wiseagents/vectordb/lang_chain_wise_agent_vector_db.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
class LangChainWiseAgentVectorDB(WiseAgentVectorDB):
    """
    An abstract class that makes use of a LangChain vector database.
    """

    def __new__(cls, *args, **kwargs):
        """Create a new instance of the class, setting default values for the instance variables."""
        obj = super().__new__(cls)
        obj._embedding_model_name = DEFAULT_EMBEDDING_MODEL_NAME
        model_kwargs = dict()
        model_kwargs['tokenizer_kwargs']={"clean_up_tokenization_spaces": True}
        obj._embedding_function = HuggingFaceEmbeddings(model_name=DEFAULT_EMBEDDING_MODEL_NAME,  model_kwargs=model_kwargs)
        return obj

    def __init__(self, embedding_model_name: Optional[str] = DEFAULT_EMBEDDING_MODEL_NAME):
        """
        Initialize a new instance of LangChainWiseAgentVectorDB.


        Args:
            embedding_model_name (Optional[str]): the optional name of the embedding model to use
        """
        super().__init__()
        enforce_no_abstract_class_instances(self.__class__, LangChainWiseAgentVectorDB)
        self._embedding_model_name = embedding_model_name
        model_kwargs = dict()
        model_kwargs['tokenizer_kwargs']={"clean_up_tokenization_spaces": True}
        self._embedding_function = HuggingFaceEmbeddings(model_name=self.embedding_model_name, model_kwargs=model_kwargs )

    @property
    def embedding_model_name(self):
        """Get the name of the embedding model."""
        return self._embedding_model_name

    def convert_from_lang_chain_documents(self, documents: List[LangChainDocument]) -> List[Document]:
        return [Document(content=document.page_content, metadata=document.metadata) for document in documents]

    @abstractmethod
    def get_or_create_collection(self, collection_name: str):
        ...

    @abstractmethod
    def delete_collection(self, collection_name: str):
        ...

    @abstractmethod
    def insert_documents(self, documents: List[Document], collection_name: str):
        ...

    @abstractmethod
    def insert_or_update_documents(self, documents: List[Document], collection_name: str):
        ...

    @abstractmethod
    def delete_documents(self, ids: List[str], collection_name: str):
        ...

    @abstractmethod
    def query(self, queries: List[str], collection_name: str, k: Optional[int] = 4) -> List[List[Document]]:
        ...

embedding_model_name property

Get the name of the embedding model.

__init__(embedding_model_name=DEFAULT_EMBEDDING_MODEL_NAME)

Initialize a new instance of LangChainWiseAgentVectorDB.

Parameters:
  • embedding_model_name (Optional[str], default: DEFAULT_EMBEDDING_MODEL_NAME ) –

    the optional name of the embedding model to use

Source code in wiseagents/vectordb/lang_chain_wise_agent_vector_db.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def __init__(self, embedding_model_name: Optional[str] = DEFAULT_EMBEDDING_MODEL_NAME):
    """
    Initialize a new instance of LangChainWiseAgentVectorDB.


    Args:
        embedding_model_name (Optional[str]): the optional name of the embedding model to use
    """
    super().__init__()
    enforce_no_abstract_class_instances(self.__class__, LangChainWiseAgentVectorDB)
    self._embedding_model_name = embedding_model_name
    model_kwargs = dict()
    model_kwargs['tokenizer_kwargs']={"clean_up_tokenization_spaces": True}
    self._embedding_function = HuggingFaceEmbeddings(model_name=self.embedding_model_name, model_kwargs=model_kwargs )

__new__(*args, **kwargs)

Create a new instance of the class, setting default values for the instance variables.

Source code in wiseagents/vectordb/lang_chain_wise_agent_vector_db.py
19
20
21
22
23
24
25
26
def __new__(cls, *args, **kwargs):
    """Create a new instance of the class, setting default values for the instance variables."""
    obj = super().__new__(cls)
    obj._embedding_model_name = DEFAULT_EMBEDDING_MODEL_NAME
    model_kwargs = dict()
    model_kwargs['tokenizer_kwargs']={"clean_up_tokenization_spaces": True}
    obj._embedding_function = HuggingFaceEmbeddings(model_name=DEFAULT_EMBEDDING_MODEL_NAME,  model_kwargs=model_kwargs)
    return obj

PGVectorLangChainWiseAgentVectorDB

Bases: LangChainWiseAgentVectorDB

A LangChainWiseAgentVectorDB implementation that makes use of a LangChain PGVector database.

Source code in wiseagents/vectordb/lang_chain_wise_agent_vector_db.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
class PGVectorLangChainWiseAgentVectorDB(LangChainWiseAgentVectorDB):
    """
    A LangChainWiseAgentVectorDB implementation that makes use of a LangChain PGVector database.
    """

    yaml_tag = u'!wiseagents.vectordb.PGVectorLangChainWiseAgentVectorDB'

    def __new__(cls, *args, **kwargs):
        """Create a new instance of the class, setting default values for the instance variables."""
        obj = super().__new__(cls)
        obj._vector_dbs = {}
        return obj

    def __init__(self, connection_string: str, embedding_model_name: Optional[str] = DEFAULT_EMBEDDING_MODEL_NAME):
        """
        Initialize a new instance of PGVectorLangChainWiseAgentVectorDB.


        Args:
            connection_string (str): the connection string for the PGVector database
            embedding_model_name (Optional[str]): the optional name of the embedding model to use
        """
        super().__init__(embedding_model_name)
        self._connection_string = connection_string
        self._vector_dbs = {}

    def __repr__(self):
        """Return a string representation of the vector DB."""
        return (f"{self.__class__.__name__}(connection_string={self.connection_string},"
                f"embedding_model_name={self.embedding_model_name})")


    def __getstate__(self) -> object:
        """Return the state of the vector DB. Removing _vector_dbs and _embedding_function to avoid them being serialized/deserialized by pyyaml."""
        state = super().__getstate__()
        del state['vector_dbs']
        del state['embedding_function']
        return state

    @property
    def connection_string(self):
        """Get the connection string."""
        return self._connection_string

    def get_or_create_collection(self, collection_name: str):
        if not hasattr(self, "_vector_dbs"):
            # instances populated from PyYAML won't have this set initially
            self._vector_dbs = {}
        if collection_name not in self._vector_dbs:
            self._vector_dbs[collection_name] = PGVector(embeddings=self._embedding_function,
                                                         collection_name=collection_name,
                                                         connection=self._connection_string)

    def delete_collection(self, collection_name: str):
        self.get_or_create_collection(collection_name)
        if collection_name in self._vector_dbs:
            self._vector_dbs[collection_name].delete_collection()
            del self._vector_dbs[collection_name]

    def insert_documents(self, documents: List[Document], collection_name: str):
        self.get_or_create_collection(collection_name)
        self._vector_dbs[collection_name].add_texts(texts=[doc.content for doc in documents],
                                                    ids=[doc.id for doc in documents],
                                                    metadatas=[doc.metadata for doc in documents])

    def insert_or_update_documents(self, documents: List[Document], collection_name: str):
        self.get_or_create_collection(collection_name)
        self.insert_documents(documents, collection_name)

    def delete_documents(self, document_ids: List[str], collection_name: str):
        self.get_or_create_collection(collection_name)
        if collection_name in self._vector_dbs:
            self._vector_dbs[collection_name].delete(ids=document_ids)

    def query(self, queries: List[str], collection_name: str, k: Optional[int] = 4) -> List[List[Document]]:
        self.get_or_create_collection(collection_name)
        if collection_name in self._vector_dbs:
            return [self.convert_from_lang_chain_documents(self._vector_dbs[collection_name].similarity_search(query, k))
                    for query in queries]

connection_string property

Get the connection string.

__getstate__()

Return the state of the vector DB. Removing _vector_dbs and _embedding_function to avoid them being serialized/deserialized by pyyaml.

Source code in wiseagents/vectordb/lang_chain_wise_agent_vector_db.py
108
109
110
111
112
113
def __getstate__(self) -> object:
    """Return the state of the vector DB. Removing _vector_dbs and _embedding_function to avoid them being serialized/deserialized by pyyaml."""
    state = super().__getstate__()
    del state['vector_dbs']
    del state['embedding_function']
    return state

__init__(connection_string, embedding_model_name=DEFAULT_EMBEDDING_MODEL_NAME)

Initialize a new instance of PGVectorLangChainWiseAgentVectorDB.

Parameters:
  • connection_string (str) –

    the connection string for the PGVector database

  • embedding_model_name (Optional[str], default: DEFAULT_EMBEDDING_MODEL_NAME ) –

    the optional name of the embedding model to use

Source code in wiseagents/vectordb/lang_chain_wise_agent_vector_db.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def __init__(self, connection_string: str, embedding_model_name: Optional[str] = DEFAULT_EMBEDDING_MODEL_NAME):
    """
    Initialize a new instance of PGVectorLangChainWiseAgentVectorDB.


    Args:
        connection_string (str): the connection string for the PGVector database
        embedding_model_name (Optional[str]): the optional name of the embedding model to use
    """
    super().__init__(embedding_model_name)
    self._connection_string = connection_string
    self._vector_dbs = {}

__new__(*args, **kwargs)

Create a new instance of the class, setting default values for the instance variables.

Source code in wiseagents/vectordb/lang_chain_wise_agent_vector_db.py
83
84
85
86
87
def __new__(cls, *args, **kwargs):
    """Create a new instance of the class, setting default values for the instance variables."""
    obj = super().__new__(cls)
    obj._vector_dbs = {}
    return obj

__repr__()

Return a string representation of the vector DB.

Source code in wiseagents/vectordb/lang_chain_wise_agent_vector_db.py
102
103
104
105
def __repr__(self):
    """Return a string representation of the vector DB."""
    return (f"{self.__class__.__name__}(connection_string={self.connection_string},"
            f"embedding_model_name={self.embedding_model_name})")