<?php

use boru\openai\embeddings\distance\Cosine;
use boru\output\Output;
use boru\openai\embeddings\Document;
use boru\openai\embeddings\vectorstore\FileVectorStore;
use boru\openai\embeddings\vectorstore\MemoryVectorStore;
use boru\openai\embeddings\vectorstore\RedisVectorStore;
use boru\openai\embeddings\vectorstore\VectorStore;
use boru\openai\models\Embedding;
use boru\openai\OpenAI;

require __DIR__."/init.php";

if(php_sapi_name() != "cli") {
    Output::outLine("This script is intended to be run from the command line");
    exit(1);
}
array_shift($argv);
if(count($argv) < 1) {
    Output::outLine("Usage: php VectorSearch.php <query>");
    exit(1);
}
$query = implode(" ",$argv);

$storePath = __DIR__."/codeAnalyzer/store.json";
$embedCacheFile = __DIR__."/codeAnalyzer/embed_cache.json";
$vectorStore = new FileVectorStore($storePath);
if($vectorStore->countDocuments() <= 0) {
    Output::outLine("please use php -f vector/generate.php to generate some test documents");
    exit();
}

Output::outLine("Vector store has",$vectorStore->countDocuments(),"documents");

//lets use an embed cache for queries so we don't have to re-embed them
$embedCache = __DIR__."/codeAnalyzer/embed_cache.json";
if(file_exists($embedCache)) {
    $embedCache = json_decode(file_get_contents($embedCacheFile),true);
} else {
    $embedCache = [];
}
if(isset($embedCache[$query])) {
    $embeddedQuery = $embedCache[$query];
} else {
    $embedding = new Embedding($query);
    $embeddedQuery = $embedding->embedding();
    $embedCache[$query] = $embeddedQuery;
    file_put_contents($embedCacheFile,json_encode($embedCache));
}
$vectorStore->distance(new Cosine());
$results = $vectorStore->similarity($embeddedQuery);
foreach($results as $k=>$result) {
    $lines = explode("\n",$result->content());
    echo "Result $k: ".$lines[0]."\n";
}
