<?php
namespace boru\openai\tools;

use boru\dhprocess\Threader;
use boru\openai\models\document\Document;
use boru\openai\models\document\Page;
use boru\output\Output;

class DocProcessor {
    public $lockFile;
    public $what = "all";
    public $threads = 5;

    public function __construct($lockName="all") {
        $this->lockFile = dirname(__FILE__)."/../../tmp/".basename(__FILE__)."_".$lockName.".lock";
        $this->lock();
        $this->what = $lockName;
    }
    public function __destruct() {
        $this->unlock();
    }
    public function what($what=null) {
        if($what !== null) {
            $this->what = $what;
        }
        return $this->what;
    }
    public function threads($threads=null) {
        if($threads !== null) {
            if($threads <= 0 || !is_numeric($threads)) {
                $threads = false;
            }
            $this->threads = $threads;
        }
        return $this->threads;
    }

    public function run($what=null,$multiThread=false) {
        if($what === null) {
            $what = $this->what;
        }
        if($what == "all") {
            return $this->all();
        }
        if($what == "ocr") {
            return $this->ocr();
        }
        if($what == "tokens") {
            return $this->ocrTokens();
        }
        if($what == "paging" || $what == "page") {
            return $this->paging();
        }
        if($what == "summarizing" || $what == "summarize" || $what == "summary") {
            return $this->summarizing();
        }
        if($what == "parsing" || $what == "parse") {
            return $this->parsing();
        }
    }

    public function all() {
        $this->paging();
        $this->ocr();
        $this->ocrTokens();
        $this->summarizing();
        $this->parsing();
    }

    public function paging() {
        Output::outLine("Starting Paging process");
        $docs = Document::getNeedsPaged();
        if(count($docs) == 0) {
            Output::outLine("No documents need paging");
            return;
        }
        foreach($docs as $doc) {
            try {
                $doc->upload();
                Output::outLine("- Document uploaded to OpenAI: ".$doc->id());
            } catch (\Exception $e) {
                Output::outLine("! Error converting/uploading document: ".$doc->id()."\t".$e->getMessage());
                continue;
            }
        }
        Output::outLine("Finished Paging process");
    }
    public function ocrTokens() {
        Output::outLine("Starting OCR Tokens process");
        $pages = Page::getNeedsOCRTokens();
        if(count($pages) == 0) {
            Output::outLine("No documents need OCR Tokens");
            return;
        }
        foreach($pages as $page) {
            try {
                $page->ocrTokens();
                Output::outLine("- Page OCR Tokens doc:",$page->docid(),"page:",$page->id());
            } catch (\Exception $e) {
                Output::outLine("! Error OCR/Tokenizing document: ".$page->id()."\t".$e->getMessage());
                continue;
            }
        }
        Output::outLine("Finished OCR Tokens process");
    }
    public function ocr() {
        Output::outLine("Starting OCR process");
        $pages = Page::getNeedsOCR();
        if(count($pages) == 0) {
            Output::outLine("No documents need OCR");
            return;
        }
        if($this->threads === false || count($pages) <= 1) {
            foreach($pages as $page) {
                try {
                    $page->ocr();
                    Output::outLine("- Page OCR'd:",$page->docid(),"page:",$page->id());
                } catch (\Exception $e) {
                    Output::outLine("! Error OCRing document: ".$page->id()."\t".$e->getMessage());
                    continue;
                }
            }
        } else {
            Threader::option("numWorkers",$this->threads);
            $pageIds = [];
            foreach($pages as $page) {
                $pageIds[] = $page->id();
            }
            Output::outLine("Starting OCR process with ".$this->threads." threads");
            $threader = Threader::execute($pageIds,function($page) {
                $page = new Page($page);
                try {
                    $page->ocr();
                    //Output::outLine("- Page OCR'd:",$page->docid(),"page:",$page->id());
                    return [$page->id(),true];
                } catch (\Exception $e) {
                    //Output::outLine("! Error OCRing document: ".$page->id()."\t".$e->getMessage());
                    return [$page->id(),$e->getMessage()];
                }
                return true;
            });
            foreach($threader->results() as $result) {
                if(is_array($result) && count($result) == 2) {
                    $page = new Page($result[0]);
                    if($result[1] === true) {
                        $page = new Page($result[0]);
                        Output::outLine("- Page OCR'd:",$page->docid(),"page:",$page->id());
                    } else {
                        Output::outLine("! Error OCRing document: ".$result[0]."\t".$result[1]);
                    }
                } else {
                    Output::outLine("! Error OCRing document\n",json_encode($result,JSON_PRETTY_PRINT));
                }
            }
        }
        Output::outLine("Finished OCR process");
    }
    public function summarizing() {
        Output::outLine("Starting Summarizing process");
        $docs = Document::getNeedsSummarized();
        if(count($docs) == 0) {
            Output::outLine("No documents need summarizing");
            return;
        }
        foreach($docs as $doc) {
            try {
                $doc->summarize();
                Output::outLine("- Document summarized: ".$doc->id());
            } catch (\Exception $e) {
                Output::outLine("! Error summarizing document: ".$doc->id()."\t".$e->getMessage());
                continue;
            }
        }
        Output::outLine("Finished Summarizing process");
    }
    public function parsing() {
        Output::outLine("Starting Parsing process");
        $docs = Document::getNeedsParsed();
        if(count($docs) == 0) {
            Output::outLine("No documents need parsing");
            return;
        }
        foreach($docs as $doc) {
            try {
                $doc->parse();
                Output::outLine("- Document parsed: ".$doc->id());
            } catch (\Exception $e) {
                Output::outLine("! Error parsing document: ".$doc->id()."\t".$e->getMessage());
                continue;
            }
        }
        Output::outLine("Finished Parsing process");
    }

    public function lock() {
        if($this->isLocked()) {
            throw new \Exception("Already Running");
        }
    }
    public function unlock() {
        if(file_exists($this->lockFile)) {
            unlink($this->lockFile);
        }
    }

    public function isLocked(){
        # If lock file exists, check if stale.  If exists and is not stale, return TRUE
        # Else, create lock file and return FALSE.
    
        if (file_exists($this->lockFile)) {
            # check if it's stale
            $lockingPID = trim(file_get_contents($this->lockFile));
    
            # Get all active PIDs.
            $pids = explode("\n", trim(`ps -e | awk '{print $1}'`));
    
            # If PID is still active, return true
            if (in_array($lockingPID, $pids)) return true;
    
            # Lock-file is stale, so kill it.  Then move on to re-creating it.
            echo "Removing stale lock file.\n";
            unlink($this->lockFile);
        }
    
        file_put_contents($this->lockFile, getmypid() . "\n");
        return false;
    
    }
}