Skip to content

Vector redis

RedisVectorStore

A class for managing a vector store in Redis, including loading data and creating schemas for embeddings.

Attributes:

Name Type Description
redis_url str

The URL for connecting to the Redis instance.

index SearchIndex

The Redis search index instance.

keys list

The keys loaded into the Redis vector store.

info dict

Information about the current Redis search index.

Source code in model/vector_redis.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
class RedisVectorStore():
    """
    A class for managing a vector store in Redis, including loading data and creating schemas for embeddings.

    Attributes:
        redis_url (str): The URL for connecting to the Redis instance.
        index (SearchIndex): The Redis search index instance.
        keys (list): The keys loaded into the Redis vector store.
        info (dict): Information about the current Redis search index.
    """

    def __init__(self, redis_url) -> None:
        """
        Initializes the RedisVectorStore instance with a Redis URL.

        Args:
            redis_url (str): The Redis connection URL.
        """

        self.redis_url = redis_url
        self.index = None
        self.keys = None
        self.info = None

    def load_data(self, emb_obj: InspectorEmbeddings, config: AppConfig) -> list:
        """
        Loads data from an embedding object into the Redis vector store.

        Args:
            emb_obj (InspectorEmbeddings): The embeddings object containing vector data and dimensions.
            config (AppConfig): The application configuration object.

        Returns:
            list: A list of keys corresponding to the loaded data in the Redis store.
        """

        if emb_obj != None:
            data_to_vectorstore = emb_obj.data_to_vectorstore
            dimensions = emb_obj.dimensions
            self.create_schema(config, dimensions)
            self.keys = self.index.load(data_to_vectorstore)
            return self.keys

    def create_schema(self, config: AppConfig, dimensions: int = 3072, overwrite: bool = True) -> SearchIndex:
        """
        Creates a schema in Redis for storing document embeddings.

        Args:
            config (AppConfig): The application configuration object.
            dimensions (int): The number of dimensions for the vector embeddings. Defaults to 3072.
            overwrite (bool): Whether to overwrite an existing schema. Defaults to True.

        Returns:
            SearchIndex: The Redis search index instance.

        """

        schema = {
            "index": {
                "name": f"document-index:{config.user}:{config.task_id}",
                "prefix": f"doc:{config.user}:{config.task_id}",
                "storage_type": "hash", 
            },
            "fields": [
                {
                    "name": "file_name",
                    "type": "tag"
                },
                {
                    "name": "section",
                    "type": "tag"
                },
                {
                    "name": "text", 
                    "type": "text"
                },
                {
                    "name": "embedding",
                    "type": "vector",
                    "attrs": {
                        "dims": dimensions,
                        "distance_metric": "cosine",
                        "algorithm": "flat",
                        "datatype": "float32",
                    }
                },
            ],
        }
        self.index = SearchIndex.from_dict(schema)
        self.index.connect(self.redis_url)
        self.index.create(overwrite=overwrite)
        self.info = self.index.info()
        return self.index

__init__(redis_url)

Initializes the RedisVectorStore instance with a Redis URL.

Parameters:

Name Type Description Default
redis_url str

The Redis connection URL.

required
Source code in model/vector_redis.py
23
24
25
26
27
28
29
30
31
32
33
34
def __init__(self, redis_url) -> None:
    """
    Initializes the RedisVectorStore instance with a Redis URL.

    Args:
        redis_url (str): The Redis connection URL.
    """

    self.redis_url = redis_url
    self.index = None
    self.keys = None
    self.info = None

create_schema(config, dimensions=3072, overwrite=True)

Creates a schema in Redis for storing document embeddings.

Parameters:

Name Type Description Default
config AppConfig

The application configuration object.

required
dimensions int

The number of dimensions for the vector embeddings. Defaults to 3072.

3072
overwrite bool

Whether to overwrite an existing schema. Defaults to True.

True

Returns:

Name Type Description
SearchIndex SearchIndex

The Redis search index instance.

Source code in model/vector_redis.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def create_schema(self, config: AppConfig, dimensions: int = 3072, overwrite: bool = True) -> SearchIndex:
    """
    Creates a schema in Redis for storing document embeddings.

    Args:
        config (AppConfig): The application configuration object.
        dimensions (int): The number of dimensions for the vector embeddings. Defaults to 3072.
        overwrite (bool): Whether to overwrite an existing schema. Defaults to True.

    Returns:
        SearchIndex: The Redis search index instance.

    """

    schema = {
        "index": {
            "name": f"document-index:{config.user}:{config.task_id}",
            "prefix": f"doc:{config.user}:{config.task_id}",
            "storage_type": "hash", 
        },
        "fields": [
            {
                "name": "file_name",
                "type": "tag"
            },
            {
                "name": "section",
                "type": "tag"
            },
            {
                "name": "text", 
                "type": "text"
            },
            {
                "name": "embedding",
                "type": "vector",
                "attrs": {
                    "dims": dimensions,
                    "distance_metric": "cosine",
                    "algorithm": "flat",
                    "datatype": "float32",
                }
            },
        ],
    }
    self.index = SearchIndex.from_dict(schema)
    self.index.connect(self.redis_url)
    self.index.create(overwrite=overwrite)
    self.info = self.index.info()
    return self.index

load_data(emb_obj, config)

Loads data from an embedding object into the Redis vector store.

Parameters:

Name Type Description Default
emb_obj InspectorEmbeddings

The embeddings object containing vector data and dimensions.

required
config AppConfig

The application configuration object.

required

Returns:

Name Type Description
list list

A list of keys corresponding to the loaded data in the Redis store.

Source code in model/vector_redis.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def load_data(self, emb_obj: InspectorEmbeddings, config: AppConfig) -> list:
    """
    Loads data from an embedding object into the Redis vector store.

    Args:
        emb_obj (InspectorEmbeddings): The embeddings object containing vector data and dimensions.
        config (AppConfig): The application configuration object.

    Returns:
        list: A list of keys corresponding to the loaded data in the Redis store.
    """

    if emb_obj != None:
        data_to_vectorstore = emb_obj.data_to_vectorstore
        dimensions = emb_obj.dimensions
        self.create_schema(config, dimensions)
        self.keys = self.index.load(data_to_vectorstore)
        return self.keys