<?php
namespace boru\boruai\OCR;


use boru\boruai\OCR\Methods\Tesseract;
use boru\boruai\OCR\Methods\AIAgent;
use boru\boruai\OCR\Agents\CompareAgent;

class OCR {

    public static $extensionToMethod = [
        'pdf' => 'runPdf',
        "jpg" => 'runImage',
        "jpeg" => 'runImage',
        "png" => 'runImage',
        "bmp" => 'runImage',
        'xls' => 'runExcell',
        'xlsx' => 'runExcell',
        'csv' => 'runCsv',
    ];

    public static $defaultMethod = 'runPdf';

    private $ocrAgent = null;

    /** @var Tesseract */
    private $tesseract = null;

    private $reference = null;
    private $pdfFilePath = null;

    private $forceTesseract = false;
    private $tesseractOptions = [
        'tsvMode'           => true,
        'tilePixelThreshold'=> 12000000, // 12 MP
        'tileCols'          => 5,
        'tileRows'          => 5,
        'tileOverlap'       => 20,
        'opts'              => [
            '--psm' => '6',    // often nicer for tiles
        ],
    ];

    private $shouldCombine = true;
    private $useTesseract = true;
    private $useAI = true;

    public function __destruct() {
        if($this->tesseract) {
            $this->tesseract->__destruct();
        }
    }

    public function tFile() {
        return $this->pdfFilePath . ".tesseract.json";
    }

    public function __construct($pdfPath,$reference="",$tesseractOptions=[]) {
        $this->reference = $reference;
        $this->pdfFilePath = $pdfPath;
        if(!is_array($tesseractOptions)) {
            if(is_bool($tesseractOptions)) {
                $this->forceTesseract = $tesseractOptions;
            }
            $tesseractOptions = [];
        }
        if(!empty($tesseractOptions)) {
            $this->tesseractOptions = array_merge($this->tesseractOptions, $tesseractOptions);
        }
        //ensure the file exists
        if(!is_file($pdfPath)) {
            throw new \Exception("PDF file does not exist: ".$pdfPath);
        }
    }

    public function aiAgent() {
        if($this->ocrAgent === null) {
            $this->ocrAgent = new AIAgent($this->pdfFilePath, $this->reference);
        }
        return $this->ocrAgent;
    }

    /** @var Tesseract */
    public function tesseract() {
        if($this->tesseract === null) {
            if(file_exists($this->tFile()) && !$this->forceTesseract) {
                $this->tesseract = Tesseract::load($this->tFile());
                $this->tesseract->setOptions($this->tesseractOptions);
            } else {
                $this->tesseract = new Tesseract($this->pdfFilePath, $this->tesseractOptions);
            }
        }
        return $this->tesseract;
    }

    public function setInstructions($instructions) {
        $this->aiAgent()->instructions($instructions);
    }

    public function setMessage($message) {
        $this->aiAgent()->message($message);
    }

    public function shouldCombine($combine=null) {
        if($combine !== null) {
            $this->shouldCombine = $combine;
        }
        return $this->shouldCombine;
    }

    public function useTesseract($use=null) {
        if($use !== null) {
            $this->useTesseract = $use;
        }
        return $this->useTesseract;
    }

    public function useAI($use=null) {
        if($use !== null) {
            $this->useAI = $use;
        }
        return $this->useAI;
    }

    private function methodForExtension($ext) {
        $ext = strtolower($ext);
        if(isset(self::$extensionToMethod[$ext])) {
            return self::$extensionToMethod[$ext];
        }
        return self::$defaultMethod;
    }

    /**
     * Run the OCR process for the given reference.
     * @param mixed $reference 
     * @return string OCR result 
     */
    public function run($reference = null) {
        if($reference !== null) {
            $this->reference = $reference;
        }
        //decide file type by extension
        $ext = strtolower(pathinfo($this->pdfFilePath, PATHINFO_EXTENSION));
        $method = $this->methodForExtension($ext);
        if(!method_exists($this, $method)) {
            if(is_callable($method)) {
                return call_user_func($method, $this->pdfFilePath, $reference);
            }
            throw new \Exception("No method found for file extension: ".$ext);
        }
        return call_user_func([$this, $method], $this->pdfFilePath, $reference);
    }

    public function runPdf($filePath, $reference = null) {
        $tesseractOcr = "";
        $firstPassOcr = "";
        if($reference !== null) {
            $this->reference = $reference;
        }
        if($this->useTesseract) {
            $tesseractOcr = $this->tesseract()->ocr();
            $this->tesseract()->save($this->tFile());
        }
        if($this->useAI) {
            $firstPassOcr = $this->aiAgent()->run($this->reference);
        }
        if($this->shouldCombine() && $tesseractOcr && $firstPassOcr) {
            $compare = new CompareAgent($firstPassOcr, $tesseractOcr);
            $combined = $compare->run($this->reference);
            return $combined;
        }
        if($tesseractOcr) {
            return $tesseractOcr;
        }
        return $firstPassOcr;
    }

    public function runExcell($filePath, $reference = null) {
        if($reference !== null) {
            $this->reference = $reference;
        }
        //convert excell to csv string and return that.
        $excell = new Methods\Excell();
        $csvString = $excell->convertSpreadsheetToCsvString($filePath);
        return $csvString;
    }

    public function runCsv($filePath, $reference = null) {
        if($reference !== null) {
            $this->reference = $reference;
        }
        //this is alrady csv, just read it
        return file_get_contents($filePath);
    }
}