> ## Documentation Index
> Fetch the complete documentation index at: https://actianvectorai-docs-feedback-implementation.mintlify.site/llms.txt
> Use this file to discover all available pages before exploring further.

# Pure semantic search

> Search for documents by meaning using vector similarity.

Pure semantic search retrieves the most similar documents to your query using only vector similarity. No metadata filters are applied — all documents in the collection are candidates.

Use pure semantic search when you want broad retrieval across your entire document corpus without restricting results by category, date, or other metadata.

Before running this example, make sure you have a VectorAI DB instance running at `localhost:6574` and the relevant SDK installed. For setup instructions, see [Docker installation](/home/installation/instructions).

This example demonstrates the core semantic search pipeline:

1. **Create a collection** with cosine distance and a vector dimension matching your embeddings.
2. **Create field indexes** on payload fields you plan to filter on later.
3. **Embed and upsert documents** with their text, vector, and metadata payload.
4. **Search** with a query vector and retrieve the top-K most similar results with their payload.

Each result includes these fields:

* `id`: The unique identifier of the matching document
* `score`: Cosine similarity score. Higher values indicate greater semantic similarity.
* `payload`: Metadata object containing the document text and attributes

<CodeGroup>
  ```python Python theme={null}
  from __future__ import annotations

  import random

  from actian_vectorai import (
      Distance,
      FieldType,
      PointStruct,
      VectorAIClient,
      VectorParams,
  )

  SERVER = "localhost:6574"
  COLLECTION = "semantic_demo"
  DIM = 64
  fmt = "\n=== {:50} ==="

  # Simulated document corpus
  DOCUMENTS = [
      {
          "id": 1,
          "text": "Python is a popular programming language",
          "topic": "programming",
          "year": 2024,
      },
      {
          "id": 2,
          "text": "Machine learning transforms data into insights",
          "topic": "ml",
          "year": 2024,
      },
      {
          "id": 3,
          "text": "Vector databases enable semantic search",
          "topic": "databases",
          "year": 2024,
      },
      {"id": 4, "text": "Neural networks learn hierarchical features", "topic": "ml", "year": 2023},
      {
          "id": 5,
          "text": "SQL is the language of relational databases",
          "topic": "databases",
          "year": 2020,
      },
      {"id": 6, "text": "Deep learning requires large datasets", "topic": "ml", "year": 2023},
      {"id": 7, "text": "Graph databases model relationships", "topic": "databases", "year": 2022},
      {"id": 8, "text": "Transformers revolutionized NLP", "topic": "ml", "year": 2023},
      {
          "id": 9,
          "text": "Rust is a memory-safe systems language",
          "topic": "programming",
          "year": 2024,
      },
      {"id": 10, "text": "Embeddings represent meaning as vectors", "topic": "ml", "year": 2024},
  ]


  def fake_embed(text: str, dim: int = DIM) -> list[float]:
      """Deterministic pseudo-embedding based on text hash."""
      random.seed(hash(text) % (2**32))
      return [random.gauss(0, 1) for _ in range(dim)]


  def main() -> None:
      with VectorAIClient(SERVER) as client:
          if client.collections.exists(COLLECTION):
              client.collections.delete(COLLECTION)
          client.collections.create(
              COLLECTION,
              vectors_config=VectorParams(size=DIM, distance=Distance.Cosine),
          )

          # Create field indexes for filtered search
          client.points.create_field_index(COLLECTION, "topic", FieldType.FieldTypeKeyword)
          client.points.create_field_index(COLLECTION, "year", FieldType.FieldTypeInteger)

          # Embed and insert documents
          points = [
              PointStruct(
                  id=doc["id"],
                  vector=fake_embed(doc["text"]),
                  payload={"text": doc["text"], "topic": doc["topic"], "year": doc["year"]},
              )
              for doc in DOCUMENTS
          ]
          client.points.upsert(COLLECTION, points)
          print(f"✓ Indexed {len(DOCUMENTS)} documents")

          # ── Pure semantic search ────────────────────────────
          print(fmt.format("Semantic: 'how do vector databases work?'"))
          query_vec = fake_embed("how do vector databases work?")
          results = client.points.search(
              COLLECTION,
              vector=query_vec,
              limit=5,
              with_payload=True,
          )
          for r in results:
              print(f"  score={r.score:.4f} | {r.payload['text']}")

          # Cleanup
          client.collections.delete(COLLECTION)
          print("\n✓ Cleaned up")


  if __name__ == "__main__":
      main()
  ```

  ```javascript JavaScript theme={null}
  import { VectorAIClient } from '@actian/vectorai-client';

  const SERVER = 'localhost:6574';
  const COLLECTION = 'semantic_demo';
  const DIM = 64;

  // Simulated document corpus
  const DOCUMENTS = [
    { id: 1, text: 'Python is a popular programming language', topic: 'programming', year: 2024 },
    { id: 2, text: 'Machine learning transforms data into insights', topic: 'ml', year: 2024 },
    { id: 3, text: 'Vector databases enable semantic search', topic: 'databases', year: 2024 },
    { id: 4, text: 'Neural networks learn hierarchical features', topic: 'ml', year: 2023 },
    { id: 5, text: 'SQL is the language of relational databases', topic: 'databases', year: 2020 },
    { id: 6, text: 'Deep learning requires large datasets', topic: 'ml', year: 2023 },
    { id: 7, text: 'Graph databases model relationships', topic: 'databases', year: 2022 },
    { id: 8, text: 'Transformers revolutionized NLP', topic: 'ml', year: 2023 },
    { id: 9, text: 'Rust is a memory-safe systems language', topic: 'programming', year: 2024 },
    { id: 10, text: 'Embeddings represent meaning as vectors', topic: 'ml', year: 2024 },
  ];

  /** Deterministic pseudo-embedding based on text hash. */
  function fakeEmbed(text, dim = DIM) {
    let hash = 0;
    for (let i = 0; i < text.length; i++) {
      hash = (hash * 31 + text.charCodeAt(i)) | 0;
    }
    const seed = Math.abs(hash);
    const vec = [];
    for (let i = 0; i < dim; i++) {
      const x = Math.sin(seed * (i + 1)) * 10000;
      vec.push(x - Math.floor(x));
    }
    return vec;
  }

  async function main() {
    const client = new VectorAIClient(SERVER);
    try {
      await client.collections.delete(COLLECTION).catch(() => {});
      await client.collections.create(COLLECTION, {
        dimension: DIM,
        distanceMetric: 'COSINE',
      });

      // Create field indexes for filtered search
      await client.points.createFieldIndex(COLLECTION, 'topic', { fieldType: 'KEYWORD' });
      await client.points.createFieldIndex(COLLECTION, 'year', { fieldType: 'INTEGER' });

      // Embed and insert documents
      const points = DOCUMENTS.map((doc) => ({
        id: doc.id,
        vector: fakeEmbed(doc.text),
        payload: { text: doc.text, topic: doc.topic, year: doc.year },
      }));
      await client.points.upsert(COLLECTION, points, { wait: true });
      console.log(`Indexed ${DOCUMENTS.length} documents`);

      // -- Pure semantic search --
      console.log("\n=== Semantic: 'how do vector databases work?' ===");
      const queryVec = fakeEmbed('how do vector databases work?');
      const results = await client.points.search(COLLECTION, queryVec, {
        limit: 5,
        withPayload: true,
      });
      for (const r of results) {
        console.log(`  score=${r.score.toFixed(4)} | ${r.payload.text}`);
      }

      // Cleanup
      await client.collections.delete(COLLECTION);
      console.log('\nCleaned up');
    } finally {
      client.close();
    }
  }

  main().catch(console.error);
  ```
</CodeGroup>

<Tip>
  In production, replace the placeholder embedding function (`fake_embed` in Python, `fakeEmbed` in JavaScript) with a real embedding model such as OpenAI, Cohere, or an open-source model like Sentence Transformers. Use the same model for both indexing and querying.
</Tip>
