import React, { Component } from 'react';
import { Button, Icon } from "semantic-ui-react";
import { connect } from "react-redux";
import ReactGA4 from 'react-ga4';
import {
    changeCurrentSection,
} from "../../../actions/generalActions";
import arrowLeft from '../../images/arrow-left.svg';
import calendar from '../../images/calendar.svg';
import { Helmet } from 'react-helmet';

const texts = [
    "docker run -d --name milvus-standalone -p 19530:19530 -p 9091:9091 milvusdb/milvus-standalone:latest",
    `private const int BATCH_SIZE = 5;

    public static async Task InsertEmbeddingsAsync(
        MilvusCollection collection,
        string recordId,
        Dictionary<int, (List<string> text, float[] embedding)> embeddings)
    {
        // Ensure the collection is loaded
        await collection.LoadAsync();
        await Task.Delay(2000); // Pause to ensure the collection is fully loaded

        // Process embeddings in batches
        for (int i = 0; i < embeddings.Count; i += BATCH_SIZE)
        {
            // Get a batch of identifiers
            var batchIds = embeddings
                .Skip(i)
                .Take(BATCH_SIZE)
                .Select(kvp => $"{recordId}_{kvp.Key}")
                .ToArray();

            // Get a batch of content
            var batchContents = embeddings
                .Skip(i)
                .Take(BATCH_SIZE)
                .Select(kvp => string.Join("\n", kvp.Value.text))
                .ToArray();

            // Get a batch of embeddings
            var batchEmbeddings = embeddings
                .Skip(i)
                .Take(BATCH_SIZE)
                .Select(kvp => new ReadOnlyMemory<float>(kvp.Value.embedding))
                .ToArray();

            // Create field data for insertion
            var fields = new List<FieldData>
            {
                FieldData.CreateVarChar("id", batchIds),
                FieldData.CreateVarChar("entity_id", Enumerable.Repeat(recordId, batchIds.Length).ToArray()),
                FieldData.CreateVarChar("content", batchContents),
                FieldData.CreateFloatVector("embedding", batchEmbeddings)
            };

            // Insert data into the collection
            await collection.InsertAsync(fields);

            // Ensure the data is written to the database
            await collection.FlushAsync();

            // Pause before processing the next batch
            await Task.Delay(2000);
        }
    }`
];

const delay = ms => new Promise(res => setTimeout(res, ms));

const llaveizquierda = "{";
const llavederecha = "}";
const parentesisizquierdo = "(";
const parentesisderecho = ")";
const menor = "<";
const mayor = ">";

class IntegratingMilvus extends Component {
    constructor(props) {
        super(props);

        this.state = {
            codeCopied: 0
        }

        this.changeSection = this.changeSection.bind(this);
        this.onClickCopy = this.onClickCopy.bind(this);
    }

    changeSection() {
        this.props.changeCurrentSection("Blog");
        if (this.props.cookieUp)
            ReactGA4.send({ hitType: "pageview", title: "Blog", page: '/Blog' });
    }

    async onClickCopy(codeCopied, isJson) {
        if (isJson)
            navigator.clipboard.writeText(JSON.stringify(texts[codeCopied - 1], null, 2));
        else
            navigator.clipboard.writeText(texts[codeCopied - 1]);

        this.setState({ codeCopied: codeCopied });

        await delay(2000);

        this.setState({ codeCopied: 0 });
    }

    onClickSourceCode() {
        let element = document.getElementById("source-code");
        if (element != null)
            element.scrollIntoView({ behavior: "smooth" });
    }

    render() {
        return (
            <>
                <Helmet>
                    <title>Integrating Milvus: Enhancing Dataverse with Vector Search Capabilities</title>
                    <meta name="description" content="Learn how to enhance your Dataverse integration with Milvus for efficient vector similarity search capabilities." />
                    <meta name="keywords" content="milvus, dataverse, vector search, embeddings, AI, database, integration" />
                    <meta property="og:title" content="Integrating Milvus: Enhancing Dataverse with Vector Search Capabilities" />
                    <meta property="og:description" content="Learn how to enhance your Dataverse integration with Milvus for efficient vector similarity search capabilities." />
                    <meta property="og:type" content="article" />
                    <meta property="article:published_time" content="2025-01-20" />
                </Helmet>
                <section className="article-section">
                    <div className='article-container'>
                        <div className='article-header'>
                            <a href='/blog'>
                                <div className='back-button-container'>
                                    <img src={arrowLeft} />
                                    <label>Back to Articles</label>
                                </div>
                            </a>
                        </div>
                        <div className='article'>
                            <h1>Integrating Milvus: Enhancing Dataverse with Vector Search Capabilities</h1>
                            <div className='author-date-container'>
                                <div className='general-container'>
                                    <img src={calendar} />
                                    <label>January 20, 2025</label>
                                </div>
                            </div>
                            <div className='article-div'>
                                <p className='note'>
                                    <span className='fw600'>Note:</span> This article is part of the Ollama Dataverse Series, we continue to learn and work on code 
                                    repository from the previous articles <a href="/article#ollama-with-dataverse-technical-implementation" target="_blank">Integrating Ollama to Microsoft Dataverse: Technical Implementation</a>. 
                                    This time, we create a new branch named <span>feature/milvus-integration</span> which you can find in the <a href='/article#milvus-dataverse-integration' onClick={this.onClickSourceCode}>Source Code</a> section.
                                </p>
                                <p>
                                    Below we'll explore how to integrate Milvus, a vector database, with Microsoft Dataverse to enhance data search and retrieval capabilities.
                                    Milvus allows for efficient vector similarity search, making it ideal for applications that require fast and accurate search results.
                                </p>
                                <h2>Prerequisites</h2>
                                <div className='step'>
                                    <p className='mb-05'>
                                        Before you begin, make sure you have the following installed:
                                    </p>
                                    <ul class="custom-bullets">
                                        <li>.NET SDK (version 8.0)</li>
                                        <li>Visual Studio or any editor of your choice</li>
                                        <li>
                                            Ollama available and accessible. You can set it up locally, on a server, or in a container, depending on your preference.
                                            If you choose to install it on your machine, refer to our previous article, <a href='/article#install-and-run-ollama-net' target="_blank">Getting started with Ollama in .NET</a>, for detailed instructions.
                                        </li>
                                        <li>
                                            Milvus available and accessible. You can set it up locally, on a server, or in a container, depending on your preference.
                                            If you choose to install it on your machine, download Milvus from 
                                            the <a href="https://milvus.io/docs/quickstart.md" target="_blank">official website</a> and follow the installation instructions.
                                        </li>
                                    </ul>
                                    <p>You can use the following command to start the Milvus server for a standalone instance:</p>
                                    <div className='bash'>
                                        <div className='bash-header'>
                                            <label>bash</label>
                                            <Button onClick={() => this.onClickCopy(1, false)}>
                                                <Icon className={this.state.codeCopied == 1 ? 'check' : ''} name={this.state.codeCopied == 1 ? 'check' : 'copy outline'} />
                                                <p>{this.state.codeCopied == 1 ? ' Copied!' : 'Copy'}</p>
                                            </Button>
                                        </div>
                                        <div className='bash-body'>
                                            <p>docker run -d --name milvus-standalone -p 19530:19530 -p 9091:9091 milvusdb/milvus-standalone:latest</p>
                                        </div>
                                    </div>
                                    <p>For a cluster setup, refer to the <a href="https://milvus.io/docs" target="_blank">Milvus documentation</a>.</p>
                                </div>
                                <h3>Step 1: Creating Milvus Collection and Index</h3>
                                <div className='step'>
                                    <p>To store and search Dataverse data efficiently, we need to create a Milvus collection with the following key fields:</p>
                                    <ul class="custom-bullets mb-275">
                                        <li>A primary key field to uniquely identify each record</li>
                                        <li>A entity id field (record GUID in dataverse)</li>
                                        <li>A content field to store the actual record data (refer to Step 3 in article <a href="/article#ollama-with-dataverse-technical-implementation" target="_blank">Integrating Ollama to Microsoft Dataverse: Technical Implementation</a> where we specify the datasets)</li>
                                        <li>An embedding field to store the vector representations (768 dimensions)</li>
                                    </ul>
                                    <div className='mobile-relative'>
                                        <div className="blog-milvus-table-container">
                                            <div>
                                                <button>Fields</button>
                                            </div>
                                            <div className='button-table'>
                                                {
                                                    window.innerWidth <= 767 ? 
                                                        <button className='fields'>
                                                            E
                                                            <br></br>
                                                            N
                                                            <br></br>
                                                            T
                                                            <br></br>
                                                            I
                                                            <br></br>
                                                            T
                                                            <br></br>
                                                            I
                                                            <br></br>
                                                            E
                                                            <br></br>
                                                            S
                                                        </button>
                                                    : <button className='fields'>Entities</button>
                                                }
                                                <table>
                                                    <thead>
                                                        <tr>
                                                            <th>ID</th>
                                                            <th>Entity ID</th>
                                                            <th>Content</th>
                                                            <th>Embedding</th>
                                                        </tr>
                                                    </thead>
                                                    <tbody>
                                                        <tr className='grey'>
                                                            <td>1</td>
                                                            <td>9c58d75d-c3b0-4c35-b5ac-a34b9610d59a</td>
                                                            <td>
                                                                {
                                                                    "Flow: Flow run name 1 (ID: 9c58d75d-c3b0-4c35-b5ac-a34b9610d59a).\n" +
                                                                    "Status: Succeeded | Flow State: Microsoft.Xrm.Sdk.OptionSetValue. Execution: 2025..."
                                                                }
                                                            </td>
                                                            <td>[0.0497427657, 0.02053993, -0.103969239, -0.0311760083, 0.0945027247, -0.06946214, 0.0266473014, 0.064201735, 0.0566126369, -0.0188180879 ... ]</td>
                                                        </tr>
                                                        <tr>
                                                            <td>2</td>
                                                            <td>eb053395-7b6b-4737-85f9-917a8a4cfa7b</td>
                                                            <td>
                                                                {
                                                                    "Flow: Flow run name 2 (ID: eb053395-7b6b-4737-85f9-917a8a4cfa7b).\n" +
                                                                    "Status: Failed | Flow State: Microsoft.Xrm.Sdk.OptionSetValue. Execution: 2025..."
                                                                }
                                                            </td>
                                                            <td>[0.032457298, -0.015849732, 0.107652349, 0.025983174, -0.089321746, 0.071245163, -0.034786192, -0.058294317, -0.063512873, 0.022937451 ... ]</td>
                                                        </tr>
                                                        <tr className='grey'>
                                                            <td>3</td>
                                                            <td>22f0fe6f-3003-4241-9897-b5efe27afaff</td>
                                                            <td>
                                                                {
                                                                    "Flow: Flow run name 3 (ID: 22f0fe6f-3003-4241-9897-b5efe27afaff).\n" +
                                                                    "Status: Succeeded | Flow State: Microsoft.Xrm.Sdk.OptionSetValue. Execution: 2024..."
                                                                }
                                                            </td>
                                                            <td>[-0.047361298, 0.019854732, -0.102569348, -0.030549671, 0.092743651, -0.067945213, 0.029374182, 0.065743298, 0.053612871, -0.021845237 ... ]</td>
                                                        </tr>
                                                    </tbody>
                                                </table>
                                            </div>
                                        </div>
                                    </div>
                                    <p>
                                        For the index, we use <span>IVF_FLAT</span> with L2 distance metric, which provides a good balance between search speed and accuracy. <span>IVF_FLAT</span> divides 
                                        the vector space into clusters, making searches more efficient by first finding the most relevant clusters.
                                    </p>
                                    <p>The collection and index are automatically created if they don't exist when the application starts.</p>
                                </div>
                                <h3>Step 2: Inserting Embeddings in Milvus Collection</h3>
                                <div className='step'>
                                    <p>
                                        The system handles data insertion with several features:
                                    </p>
                                    <ul className="custom-bullets">
                                        <li><span>Batch Processing:</span> Records are inserted in batches of 5 to optimize performance and manage memory efficiently.</li>
                                        <li>
                                            <span>Flexible Storage Options:</span>
                                            <ul>
                                                <li>Direct insertion of new records</li>
                                                <li>Prevents duplicate entries by checking existence first</li>
                                            </ul>
                                        </li>
                                    </ul>
                                    <div className='bash'>
                                        <div className='bash-header'>
                                            <label>bash</label>
                                            <Button onClick={() => this.onClickCopy(2, false)}>
                                                <Icon className={this.state.codeCopied == 2 ? 'check' : ''} name={this.state.codeCopied == 2 ? 'check' : 'copy outline'} />
                                                <p>{this.state.codeCopied == 2 ? ' Copied!' : 'Copy'}</p>
                                            </Button>
                                        </div>
                                        <div className='bash-body'>
                                            <p><span className='blue'>const int</span> BATCH_SIZE = <span className='red'>5</span>;</p>
                                            <br></br>
                                            <p><span className='blue'>public static async</span>{" Task "}<span className='red'>InsertEmbeddingsAsync</span>{parentesisizquierdo}</p>
                                            <p className='one-space'>MilvusCollection collection,</p>
                                            <p className='one-space'><span className='orange'>string</span> recordId,</p> 
                                            <p className='one-space'>Dictionary{menor}<span className='orange'>int</span>, (List{menor}<span className='orange'>string</span>{mayor} text, <span className='orange'>float</span>[] embedding){mayor} embeddings{parentesisderecho}</p>
                                            <p>{llaveizquierda}</p>
                                            <p className='one-space'><span className='grey'>// Ensure the collection is loaded</span></p>
                                            <p className='one-space'><span className='blue'>await</span> collection.LoadAsync();</p>
                                            <p className='one-space'><span className='blue'>await</span> Task.Delay(<span className='red'>2000</span>); <span className='grey'>// Pause to ensure the collection is fully loaded</span></p>
                                            <br></br>
                                            <p className='one-space'><span className='grey'>// Process embeddings in batches</span></p>
                                            <p className='one-space'><span className='blue'>for</span> (<span className='orange'>int</span> i = <span className='red'>0</span>; i {menor} embeddings.Count; i += BATCH_SIZE)</p>
                                            <p className='one-space'>{llaveizquierda}</p>
                                            <p className='two-spaces'><span className='grey'>// Get a batch of identifiers</span></p>
                                            <p className='two-spaces'><span className='blue'>var</span> batchIds = embeddings</p>    
                                            <p className='three-spaces'>.Skip(i)</p>
                                            <p className='three-spaces'>.Take(BATCH_SIZE)</p>
                                            <p className='three-spaces'>.Select(kvp <code className='white'>={mayor}</code> <span className='green'>{'$"'}</span>{'{recordId}_{kvp.Key}'}<span className='green'>"</span>)</p>
                                            <p className='three-spaces'>.ToArray();</p>
                                            <br></br>
                                            <p className='two-spaces'><span className='grey'>// Get a batch of content</span></p>
                                            <p className='two-spaces'><span className='blue'>var</span> batchContents = embeddings</p>    
                                            <p className='three-spaces'>.Skip(i)</p>
                                            <p className='three-spaces'>.Take(BATCH_SIZE)</p>
                                            <p className='three-spaces'>.Select(kvp <code className='white'>={mayor}</code> <span className='orange'>string</span>.Join(<span className='green'>"\n"</span>, kvp.Value.text))</p>
                                            <p className='three-spaces'>.ToArray();</p>
                                            <br></br>
                                            <p className='two-spaces'><span className='grey'>// Get a batch of embeddings</span></p>
                                            <p className='two-spaces'><span className='blue'>var</span> batchEmbeddings = embeddings</p>    
                                            <p className='three-spaces'>.Skip(i)</p>
                                            <p className='three-spaces'>.Take(BATCH_SIZE)</p>
                                            <p className='three-spaces'>.Select(kvp <code className='white'>={mayor}</code> <span className='blue'>new</span>ReadOnlyMemory{menor}<span className='orange'>float</span>{mayor}(kvp.Value.embedding))</p>
                                            <p className='three-spaces'>.ToArray();</p>
                                            <br></br>
                                            <p className='two-spaces'><span className='grey'>// Create field data for insertion</span></p>
                                            <p className='two-spaces'><span className='blue'>var</span> fields = <span className='blue'>new</span>{"List<FieldData>"}</p>  
                                            <p className='two-spaces'>{llaveizquierda}</p>  
                                            <p className='three-spaces'>FieldData.CreateVarChar(<span className='green'>"id"</span>, batchIds),</p>
                                            <p className='three-spaces'>FieldData.CreateVarChar(<span className='green'>"entity_id"</span>, Enumerable.Repeat(entityId, batchIds.Length).ToArray()),</p>
                                            <p className='three-spaces'>FieldData.CreateVarChar(<span className='green'>"content"</span>, batchContents),</p>
                                            <p className='three-spaces'>FieldData.CreateFloatVector(<span className='green'>"embedding"</span>, batchEmbeddings)</p>
                                            <p className='two-spaces'>{llavederecha};</p>  
                                            <br></br>
                                            <p className='two-spaces'><span className='grey'>// Insert data into the collection</span></p>
                                            <p className='two-spaces'><span className='blue'>await</span> collection.InsertAsync(fields);</p>
                                            <br></br>
                                            <p className='two-spaces'><span className='grey'>// Ensure the data is written to the database</span></p>
                                            <p className='two-spaces'><span className='blue'>await</span> collection.FlushAsync();</p>
                                            <br></br>
                                            <p className='two-spaces'><span className='grey'>// Pause before processing the next batch</span></p>
                                            <p className='two-spaces'><span className='blue'>await</span> Task.Delay(<span className='red'>2000</span>);</p>
                                            <p className='one-space'>{llavederecha};</p>
                                            <p>{llavederecha}</p>
                                        </div>
                                    </div>
                                </div>
                                <h3>Step 3: Retrieving and Ranking Results</h3>
                                <div className='step'>
                                    <p><span className='bold'>Search Execution:</span></p>
                                    <ul className="custom-bullets">
                                        <li>Takes a query embedding vector as input</li>
                                        <li>Uses L2 (Euclidean) distance to measure similarity</li>
                                        <li>Configurable number of results (default: 3)</li>
                                        <li>Uses IVF-Flat index for efficient searching</li>
                                    </ul>

                                    <p><span className='bold'>Result Ranking:</span></p>
                                    <ul className="custom-bullets">
                                        <li>Scores are normalized (1.0 - distance) so higher values indicate better matches</li>
                                        <li>Returns both the original content and similarity scores</li>
                                        <li>Results are ordered by similarity, with the closest matches first</li>
                                    </ul>

                                    <p><span className='bold'>Performance Optimization:</span></p>
                                    <ul className="custom-bullets">
                                        <li>Collection is pre-loaded before searching</li>
                                        {/* <li>Search parameters include 'nprobe=16' for balance between speed and accuracy</li> */}
                                        <li>Results include only necessary fields (id and content) to minimize data transfer</li>
                                    </ul>

                                    <p>
                                        This approach enables fast and accurate semantic search across your stored documents,
                                        with results ranked by relevance to the query.
                                    </p>
                                </div>
                                <h3>Conclusion</h3>
                                <div className='step' id='source-code'>
                                    <p>
                                        Integrating Milvus with Dataverse opens up new possibilities for data search and retrieval.
                                        By leveraging vector search capabilities, we can provide now more accurate and relevant search results.
                                    </p>
                                    <p>
                                        We encourage you to experiment with this integration and share your findings.
                                        Together, we can continue to enhance the capabilities on this project!
                                    </p>
                                </div>
                                <h3>Source Code</h3>
                                <div className='last-step'>
                                    <p>
                                        You can find the complete source code for this project on GitHub. Feel free to explore, clone, and modify it for your own use: <a href='https://github.com/coowise/OllamaDataverseEntityChatApp/tree/feature/milvus-integration' target='_blank'>GitHub Repository: OllamaDataverseEntityChatApp</a>
                                    </p>
                                    <p>
                                        This repository includes all the files and configurations used in this tutorial, making it easy for you to replicate and extend the application. The code is well-structured and includes helpers for UI, Dataverse operations, Ollama and Milvus integration to simplify development and ensure modularity.
                                    </p>
                                </div>
                            </div>
                            <div className='footer'>
                                <span>AI Development</span>
                                <span>Dataverse</span>
                            </div>
                        </div>
                    </div>
                </section>
            </>
        );
    }
}

const mapStateToProps = (value) => {
    return {
        language: value.general.language,

        currentSection: value.general.currentSection,

        cookieUp: value.general.cookieUp
    };
}

const mapDispatchToProps = (dispatch) => {
    return {
        changeCurrentSection: (currentSection) => dispatch(changeCurrentSection(currentSection))
    }
}

export default connect(mapStateToProps, mapDispatchToProps)(IntegratingMilvus);