<?php

namespace boru\ocr\Source;

/**
 * Converts Excel / CSV files into normalized CSV text.
 *
 * This is intentionally non-AI and deterministic.
 */
class ExcelTextExtractor
{
    /** @var string */
    protected $tmpDir;

    public function __construct($tmpDir)
    {
        $this->tmpDir = rtrim($tmpDir, '/');
        if (!is_dir($this->tmpDir)) {
            throw new \Exception("Temp dir not found: " . $this->tmpDir);
        }
    }

    /**
     * @param string $filePath
     * @return string path to generated CSV file
     */
    public function extractToCsvFile($filePath)
    {
        $ext = strtolower(pathinfo($filePath, PATHINFO_EXTENSION));

        if ($ext === 'csv') {
            return $filePath;
        }

        if (!in_array($ext, ['xls', 'xlsx'])) {
            throw new \Exception("Unsupported spreadsheet format: $ext");
        }

        $converter = new \boru\ocr\Source\Spreadsheet\ExcelCsvConverter();
        $csvString = $converter->convertSpreadsheetToCsvString($filePath);

        $csvPath = $this->tmpDir . '/spreadsheet_' . md5($filePath . microtime(true)) . '.csv';
        file_put_contents($csvPath, $csvString);

        return $csvPath;
    }
}
