<?php
namespace boru\openai\tools;

use boru\openai\api\endpoints\Assistants;
use boru\openai\api\endpoints\Files;
use boru\openai\models\Assistant;
use boru\openai\models\Content;
use boru\openai\models\File;
use boru\openai\OpenAI;
use boru\openai\OpenAIConfig;

class OCR {
    private $detail = "high";
    private $fileId;
    private $file;
    private $fileUrl;
    private $assistant;
    private $deleteFile = false;
    private $defaultPrompts = [
        "before" => "",
        "after" => "",
    ];
    private $retryLimit = 3;
    private $retryDelay = 0.6;
    private $result;
    private $tokens;

    private $outputDir;

    private $cleanupFiles = [];

    private static $assistantCache = [];

    public function __construct($options=[]) {
        if(!is_array($options)) {
            throw new \Exception("Options must be an array");
        }
        if(isset($options["file"])) {
            $this->file($options["file"]);
        }
        if(isset($options["fileId"])) {
            $this->fileId($options["fileId"]);
        }
        if(isset($options["fileUrl"])) {
            $this->fileUrl($options["fileUrl"]);
        }
        if(isset($options["detail"])) {
            $this->detail($options["detail"]);
        }
        if(isset($options["deleteFile"])) {
            $this->deleteFile($options["deleteFile"]);
        } elseif(isset($options["delete"])) {
            $this->deleteFile($options["delete"]);
        }
        if(isset($options["prompts"]) && is_array($options["prompts"])) {
            foreach($options["prompts"] as $key => $value) {
                $this->prompt($key,$value);
            }
        }
        if(isset($options["outputDir"])) {
            $this->outputDir($options["outputDir"]);
        }
        if(isset($options["retryLimit"])) {
            $this->retryLimit($options["retryLimit"]);
        }
        if(isset($options["retryDelay"])) {
            $this->retryDelay($options["retryDelay"]);
        }
        if(isset($options["before"])) {
            $this->prompt("before",$options["before"]);
        }
        if(isset($options["after"])) {
            $this->prompt("after",$options["after"]);
        }
        if(isset($options["assistant"])) {
            $this->assistant($options["assistant"]);
        } else {
            $this->assistant("ocr");
        }
    }
    public function prompt($type,$data=null) {
        if(!isset($this->defaultPrompts[$type])) {
            throw new \Exception("Prompt type not found");
        }
        if($data) {
            $this->assistant->setPrompt($type,$data);
        }
        $prompt = $this->assistant->getPrompt($type);
        if(!$prompt) {
            $this->assistant->setPrompt($type,$this->defaultPrompts[$type]);
            $prompt = $this->assistant->getPrompt($type);
        }
        return $prompt;
    }
    
    public function prompts($prompts=null) {
        if($prompts) {
            if(!is_array($prompts)) {
                throw new \Exception("Prompts must be an array");
            }
            foreach($prompts as $key => $value) {
                $this->prompt($key,$value);
            }
        }
        return $this->assistant->prompts();
    }
    public function deleteFile($deleteFile=null) {
        if($deleteFile!==null) {
            $this->deleteFile = $deleteFile ? true : false;
        }
        return $this->deleteFile;
    }

    public function detail($detail=null) {
        if ($detail !== null) {
            $this->detail = $detail == "high" ? "high" : "low";
            return $this;
        }
        return $this->detail == "high" ? "high" : "low";
    }
    public function highDetail() {
        return $this->detail("high");
    }
    public function lowDetail() {
        return $this->detail("low");
    }
    public function file($file=null) {
        if($file!==null) {
            if($file instanceof File) {
                $this->fileId($file->id());
            } elseif(is_string($file)) {
                $this->file = File::upload($file,"vision");
            } else {
                $this->file = $file;
            }
        }
        return $this->file;
    }
    public function fileId($fileId=null) {
        if($fileId!==null) {
            $this->fileId = $fileId;
        }
        return $this->fileId;
    }
    public function fileUrl($fileUrl=null) {
        if($fileUrl!==null) {
            $this->fileUrl = $fileUrl;
        }
        return $this->fileUrl;
    }
    public function assistant($assistant=null) {
        if($assistant!==null) {
            if($assistant instanceof Assistant) {
                $this->assistant = clone $assistant;
            } else {
                if(!isset(self::$assistantCache[$assistant])) {
                    self::$assistantCache[$assistant] = Assistant::fromInput($assistant);
                }
                $this->assistant = clone self::$assistantCache[$assistant];
            }
        }
        return $this->assistant;
    }
    public function retryLimit($retryLimit=null) {
        if($retryLimit!==null) {
            if(!is_numeric($retryLimit)) {
                throw new \Exception("Retry limit must be a number");
            }
            $this->retryLimit = (int) $retryLimit;
        }
        return $this->retryLimit;
    }
    public function retryDelay($retryDelay=null) {
        if($retryDelay!==null) {
            if(!is_numeric($retryDelay)) {
                throw new \Exception("Retry delay must be a number");
            }
            $this->retryDelay = (float) $retryDelay;
        }
        if(!is_numeric($this->retryDelay)) {
            $this->retryDelay = 0.2;
        }
        return $this->retryDelay;
    }
    public function outputDir($outputDir=null) {
        if($outputDir!==null) {
            $this->outputDir = $outputDir;
        }
        if(!$this->outputDir) {
            $this->outputDir = OpenAIConfig::get("outputdir", __DIR__.'/../../output/');
        }
        if(substr($this->outputDir,-1) != "/") {
            $this->outputDir .= "/";
        }
        return $this->outputDir;
    }

    public function cleanup() {
        foreach($this->cleanupFiles as $i=>$file) {
            if(file_exists($file)) {
                unlink($file);
                unset($this->cleanupFiles[$i]);
            }
        }
    }

    public function makeMessages() {
        $messages = [];
        if(!empty($this->prompt("before"))) {
            $content = new Content();
            $content->addText($this->prompt("before"));
            $messages[] = $content;
        }
        $content = new Content();
        $content->addText("OCR This Image.");
        if($this->fileId()) {
            $content->addImageFile($this->fileId(),$this->detail());
        } elseif($this->fileUrl()) {
            $content->addImageUrl($this->fileUrl());
        } elseif($this->file()) {
            $content->addImageFile($this->file->id(),$this->detail());
        } else {
            throw new \Exception("fileId, fileUrl must be provided.. or a file to upload must be provided");
        }
        $messages[] = $content;
        if(!empty($this->prompt("after"))) {
            $content = new Content();
            $content->addText($this->prompt("after"));
            $messages[] = $content;
        }
        return $messages;
    }

    /**
     * @param bool $force
     * @param Assistant|string $assistant
     * @return string
     */
    public function run($assistant=null) {
        OpenAI::printDebug("OCR","Running OCR");
        if($assistant) {
            $this->assistant($assistant);
        }
        $runs=0;
        $limit = $this->retryLimit();
        if($limit < 1) {
            $limit = 1;
        }
        $result = "";
        while($runs < $limit) {
            $result = $this->runOnce();
            //if not ##ERROR##, return.. otherwise we keep going
            if(substr($result,0,9) != "##ERROR##") {
                return $result;
            }
            $runs++;
            usleep($this->retryDelay() * 1000000);
        }
        $result = $this->runFallback();
        return $result;
    }

    private function makeOcrSpaceParameters($parameters=[]) {
        if($this->fileId()) {
            $content = Files::content($this->fileId());
            $parameters['base64Image'] = "data:image/jpeg;base64,".$this->convertToJpg($content,$this->fileId());
        } elseif($this->fileUrl()) {
            $parameters['url'] = $this->fileUrl();
        } elseif($this->file()) {
            $content = Files::content($this->file->id());
            $parameters['base64Image'] = "data:image/jpeg;base64,".$this->convertToJpg($content,$this->file->id());
        }
        return $parameters;
    }

    private function convertToJpg($content,$fileId) {
        $maxFileSizeMegs = OpenAIConfig::get("ocrspace.max_file_size_megs", 1);
        $maxFileSize = $maxFileSizeMegs * 1024 * 1024;
        $inputFile = $this->outputDir().$fileId.'.webp';
        file_put_contents($inputFile, $content);
        $outFile = $this->outputDir().$fileId.'.jpg';
        $fileImage = new \Imagick();
        $fileImage->setResolution(300,300);
        $fileImage->readimage($inputFile);
        $fileImage->setImageFormat('JPEG');
        $fileImage->setImageAlphaChannel(\imagick::ALPHACHANNEL_ACTIVATE);
        $fileImage->setBackgroundColor(new \ImagickPixel('#ffffff'));
        $fileImage->writeImage($outFile);
        $quality = 95;
        while(strlen($fileImage->getImageBlob()) > $maxFileSize) {
            $quality -= 5;
            $fileImage->setImageCompressionQuality($quality);
            if($quality < 5) {
                throw new \Exception("File size too large");
            }
            $fileImage->writeImage($outFile);
        }
        $imageContents = file_get_contents($outFile);
        $fileImage = base64_encode($imageContents);
        $this->cleanupFiles[] = $inputFile;
        $this->cleanupFiles[] = $outFile;
        return $fileImage;
    }

    public function runFallback(){
        $result="";
        
        $ocrSpaceApiKey = OpenAIConfig::get("ocrspace.api_key", false);      
        if($ocrSpaceApiKey === false || empty($ocrSpaceApiKey)) { 
            /// API key not found
            return $result;  // ""
        }
        $ocrUrl = OpenAIConfig::get("ocrspace.url", "https://api.ocr.space/parse/image");
        $ocrEngine = OpenAIConfig::get("ocrspace.engine", 2);

        // send to OCRSpace (from mva's OCR), need to get Boru API Key
        $http_post_fields = array('apikey'=>$ocrSpaceApiKey,'OCREngine'=>$ocrEngine);
        $http_post_fields = $this->makeOcrSpaceParameters($http_post_fields);
        $curl_handle = curl_init();
        curl_setopt($curl_handle, CURLOPT_URL, $ocrUrl);
        curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($curl_handle, CURLOPT_POST, true);
        curl_setopt($curl_handle, CURLOPT_POSTFIELDS, $http_post_fields);
         
        $returned_data = curl_exec($curl_handle);
        curl_close($curl_handle);
        $response = json_decode($returned_data,true);
        if(!$response || !isset($response['ParsedResults']) || !isset($response['ParsedResults'][0]) || !isset($response['ParsedResults'][0]['ParsedText']))
        {
            return $result;  // ""
        }
        $this->result = $response['ParsedResults'][0]['ParsedText'];
        $this->cleanup();
        return $this->result;
    }

    public function runOnce() {
        $assistant = $this->assistant();
        $messages = $this->makeMessages();
        foreach($messages as $message) {
            $assistant->addMessage("user",$message);
        }
        
        $result = $assistant->output("\n");
        $result = self::parseResult($result);
        
        if($this->deleteFile()) {
            if($this->file()) {
                OpenAI::printDebug("OCR","Deleting file: ".$this->file()->id());
                $this->file()->delete();
            }
        }
        $this->result = $result;
        OpenAI::printDebug("OCR","Completed");
        return $result;
    }

    public function tokens() {
        if(!$this->result) {
            throw new \Exception("No result to get tokens from");
        }
        if(!$this->tokens) {
            $this->tokens = $this->assistant->encode($this->result);
        }
        if(!$this->tokens) {
            throw new \Exception("No tokens found");
        }
        return $this->tokens;
    }
    public function tokenCount() {
        return count($this->tokens());
    }

    /**
     * Quick OCR of a file.. WILL DELETE the file from OpenAI after processing. If you want to keep the file, you can set the deleteFile option to false.
     * @param string $fileName
     * @param array $options an array of options to pass to the OCR constructor. Options include:
     *     - before (string) [default: ""] - text to add before the OCR prompt
     *     - after (string) [default: ""] - text to add after the OCR prompt
     *     - detail (high/low) [default: high]
     *     - deleteFile (true/false) [default: true]
     *     - assistant (Assistant object or string name of assistant) [default: ocr]
     * @return string
     */
    public static function ocrFile($fileName,$options=[]) {
        $ocr = new OCR($options);
        $ocr->file($fileName);
        if(!isset($options["deleteFile"]) && !isset($options["delete"])) {
            $ocr->deleteFile(true);
        }
        return $ocr->run();
    }

    /**
     * Quick OCR of a file that is already uploaded to OpenAI. The file will not be deleted by default. If you want to delete the file after processing, you can set the deleteFile option to true.
     * @param string $fileId
     * @param array $options an array of options to pass to the OCR constructor. Options include:
     *     - before (string) [default: ""] - text to add before the OCR prompt
     *     - after (string) [default: ""] - text to add after the OCR prompt
     *     - detail (high/low) [default: high]
     *     - deleteFile (true/false) [default: false]
     *     - assistant (Assistant object or string name of assistant) [default: ocr]
     * @return string
     */
    public static function ocrFileId($fileId,$options=[]) {
        $ocr = new OCR($options);
        $ocr->fileId($fileId);
        return $ocr->run();
    }

    /**
     * Quick OCR of a file using a publicly accessible URL. No file will be uploaded or stored on OpenAI. The deleteFile option has no effect.
     * @param string $fileUrl
     * @param array $options an array of options to pass to the OCR constructor. Options include:
     *      - before (string) [default: ""] - text to add before the OCR prompt
     *      - after (string) [default: ""] - text to add after the OCR prompt
     *      - detail (high/low) [default: high]
     *      - assistant (Assistant object or string name of assistant) [default: ocr]
     * @return string
     */
    public static function ocrFileUrl($fileUrl,$options=[]) {
        $ocr = new OCR($options);
        $ocr->fileUrl($fileUrl);
        return $ocr->run();
    }

    public static function parseResult($result) {
        if(substr($result,0,3) == "```") {
            $result = substr($result,3);
            //remove "plaintext" if it is the next characters
            if(substr($result,0,9) == "plaintext") {
                $result = substr($result,9);
            }
            //remove "markdown" if it is the next characters
            if(substr($result,0,8) == "markdown") {
                $result = substr($result,8);
            }
            //remove "json" if it is the next characters
            if(substr($result,0,4) == "json") {
                $result = substr($result,4);
            }
            if(substr($result,-3) == "```") {
                $result = substr($result,0,-3);
            }
        }
        if(substr($result,0,11) == "##SUCCESS##") {
            $result = substr($result,11);
        }
        return $result;
    }
    
}
