<?php
namespace boru\openai\tiktoken\vocab;

use Countable;
use InvalidArgumentException;
use OutOfBoundsException;
use RuntimeException;
use boru\openai\tiktoken\util\EncodeUtil;

/** @psalm-import-type NonEmptyByteVector from EncodeUtil */
class Vocab implements Countable {
    /** @var array<string, int> */
    private $tokenToRankMap;

    /** @var array<int, string> */
    private $rankToTokenMap;

    /** @param array<string, int> $tokenRankMap */
    private function __construct(array $tokenRankMap)
    {
        $this->tokenToRankMap = $tokenRankMap;
        /** @psalm-suppress PropertyTypeCoercion */
        $this->rankToTokenMap = array_map('strval', array_flip($tokenRankMap));

        if (count($this->tokenToRankMap) !== count($this->rankToTokenMap)) {
            throw new InvalidArgumentException('The map of tokens and ranks has duplicates of rank');
        }
    }

    /**
     * @param string $bpeFile
     * @return static
     */
    public static function fromFile(string $bpeFile) {
        if (! file_exists($bpeFile)) {
            throw new RuntimeException(sprintf('File "%s" does not exist', $bpeFile));
        }

        $stream = fopen($bpeFile, 'rb');

        if ($stream === false) {
            throw new RuntimeException(sprintf('Could not open file: %s', $bpeFile));
        }

        try {
            return self::fromStream($stream);
        } finally {
            fclose($stream);
        }
    }

    /**
     * @param resource $stream
     *
     * @return static
     */
    public static function fromStream($stream) {
        $meta = stream_get_meta_data($stream);

        if ($meta['seekable']) {
            rewind($stream);
        }

        $line = fgets($stream);
        $lineNo = 1;
        $map = [];

        while ($line !== false) {
            [$encodedToken, $rank] = explode(' ', $line);
            $token = base64_decode($encodedToken, true);

            if ($token === false) {
                throw new \Exception(sprintf('Could not decode token "%s" at line %d', $encodedToken, $lineNo));
            }

            assert($token !== '');

            $map[$token] = (int) $rank;

            $line = fgets($stream);
            $lineNo++;
        }

        return new self($map);
    }

    /**
     * 
     * @param string $binary 
     * @return int|null 
     * @throws InvalidArgumentException 
     */
    public function tryGetRank(string $binary) {
        if ($binary === '') {
            throw new InvalidArgumentException('Argument $binary cannot be an empty string');
        }
        if(!isset($this->tokenToRankMap[$binary])) {
            return null;
        }
        return $this->tokenToRankMap[$binary];
    }

    /**
     * @param string $binary
     * @return int
     * @throws OutOfBoundsException
     * 
    */
    public function getRank(string $binary) {
        if ($binary === '') {
            throw new InvalidArgumentException('Argument $binary cannot be an empty string');
        }
        if(!isset($this->tokenToRankMap[$binary])) {
            throw new OutOfBoundsException(sprintf(
                'No rank for bytes vector: [%s]',
                implode(', ', EncodeUtil::toBytes($binary))
            ));
        }
        return $this->tokenToRankMap[$binary];
    }

    /**
     * @param int $rank
     * @return string
     * 
     * @throws OutOfBoundsException
     */
    public function getToken(int $rank)
    {
        if(!isset($this->rankToTokenMap[$rank])) {
            throw new OutOfBoundsException(sprintf('No token for rank: %d', $rank));
        }
        return $this->rankToTokenMap[$rank];
    }

    /**
     * @return int
     */
    public function count(): int
    {
        return count($this->tokenToRankMap);
    }
}
