<?php
namespace boru\boruai\OCR;


use boru\boruai\OCR\Methods\Tesseract;
use boru\boruai\OCR\Methods\AIAgent;
use boru\boruai\OCR\Agents\CompareAgent;

class OCR {

    public static $extensionToMethod = [
        'pdf' => 'runPdf',
        "jpg" => 'runImage',
        "jpeg" => 'runImage',
        "png" => 'runImage',
        "bmp" => 'runImage',
        'xls' => 'runExcell',
        'xlsx' => 'runExcell',
        'csv' => 'runCsv',
    ];

    public static $defaultMethod = 'runPdf';

    private $firstPass = null;
    private $tesseract = null;

    private $reference = null;
    private $pdfFilePath = null;

    public function __destruct() {
        if($this->tesseract) {
            $this->tesseract->__destruct();
        }
    }

    public function tFile() {
        return $this->pdfFilePath . ".tesseract.json";
    }

    public function __construct($pdfPath,$reference="",$forceTesseract=false) {
        $this->reference = $reference;
        $this->pdfFilePath = $pdfPath;
        //ensure the file exists
        if(!is_file($pdfPath)) {
            throw new \Exception("PDF file does not exist: ".$pdfPath);
        }
        if(file_exists($this->tFile()) && !$forceTesseract) {
            $this->tesseract = Tesseract::load($this->tFile());
        } else {
            $this->tesseract = new Tesseract($pdfPath);
        }
        $this->firstPass = new AIAgent($pdfPath, $reference);
        $this->tesseract = new Tesseract($pdfPath);
    }

    private function methodForExtension($ext) {
        $ext = strtolower($ext);
        if(isset(self::$extensionToMethod[$ext])) {
            return self::$extensionToMethod[$ext];
        }
        return self::$defaultMethod;
    }

    /**
     * Run the OCR process for the given reference.
     * @param mixed $reference 
     * @return string OCR result 
     */
    public function run($reference = null) {
        if($reference !== null) {
            $this->reference = $reference;
        }
        //decide file type by extension
        $ext = strtolower(pathinfo($this->pdfFilePath, PATHINFO_EXTENSION));
        $method = $this->methodForExtension($ext);
        return call_user_func([$this, $method], $this->pdfFilePath, $reference);
    }

    public function runPdf($filePath, $reference = null) {
        if($reference !== null) {
            $this->reference = $reference;
        }
        $tesseractOcr = Tesseract::frame($this->tesseract->ocr());
        $this->tesseract->save($this->tFile());
        $firstPassOcr = $this->firstPass->run($this->reference);

        $compare = new CompareAgent($firstPassOcr, $tesseractOcr);
        $result = $compare->run($this->reference);
        return $result;
    }

    public function runExcell($filePath, $reference = null) {
        if($reference !== null) {
            $this->reference = $reference;
        }
        //convert excell to csv string and return that.
        $excell = new Methods\Excell();
        $csvString = $excell->convertSpreadsheetToCsvString($filePath);
        return $csvString;
    }

    public function runCsv($filePath, $reference = null) {
        if($reference !== null) {
            $this->reference = $reference;
        }
        //this is alrady csv, just read it
        return file_get_contents($filePath);
    }
}