vendor/twig/twig/src/Lexer.php line 121

Open in your IDE?
  1. <?php
  2. /*
  3.  * This file is part of Twig.
  4.  *
  5.  * (c) Fabien Potencier
  6.  * (c) Armin Ronacher
  7.  *
  8.  * For the full copyright and license information, please view the LICENSE
  9.  * file that was distributed with this source code.
  10.  */
  11. namespace Twig;
  12. use Twig\Error\SyntaxError;
  13. /**
  14.  * Lexes a template string.
  15.  *
  16.  * @author Fabien Potencier <fabien@symfony.com>
  17.  */
  18. class Lexer
  19. {
  20.     private $isInitialized false;
  21.     private $tokens;
  22.     private $code;
  23.     private $cursor;
  24.     private $lineno;
  25.     private $end;
  26.     private $state;
  27.     private $states;
  28.     private $brackets;
  29.     private $env;
  30.     private $source;
  31.     private $options;
  32.     private $regexes;
  33.     private $position;
  34.     private $positions;
  35.     private $currentVarBlockLine;
  36.     public const STATE_DATA 0;
  37.     public const STATE_BLOCK 1;
  38.     public const STATE_VAR 2;
  39.     public const STATE_STRING 3;
  40.     public const STATE_INTERPOLATION 4;
  41.     public const REGEX_NAME '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
  42.     public const REGEX_NUMBER '/[0-9]+(?:\.[0-9]+)?([Ee][\+\-][0-9]+)?/A';
  43.     public const REGEX_STRING '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
  44.     public const REGEX_DQ_STRING_DELIM '/"/A';
  45.     public const REGEX_DQ_STRING_PART '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
  46.     public const PUNCTUATION '()[]{}?:.,|';
  47.     public function __construct(Environment $env, array $options = [])
  48.     {
  49.         $this->env $env;
  50.         $this->options array_merge([
  51.             'tag_comment' => ['{#''#}'],
  52.             'tag_block' => ['{%''%}'],
  53.             'tag_variable' => ['{{''}}'],
  54.             'whitespace_trim' => '-',
  55.             'whitespace_line_trim' => '~',
  56.             'whitespace_line_chars' => ' \t\0\x0B',
  57.             'interpolation' => ['#{''}'],
  58.         ], $options);
  59.     }
  60.     private function initialize()
  61.     {
  62.         if ($this->isInitialized) {
  63.             return;
  64.         }
  65.         $this->isInitialized true;
  66.         // when PHP 7.3 is the min version, we will be able to remove the '#' part in preg_quote as it's part of the default
  67.         $this->regexes = [
  68.             // }}
  69.             'lex_var' => '{
  70.                 \s*
  71.                 (?:'.
  72.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '#').'\s*'// -}}\s*
  73.                     '|'.
  74.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_variable'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~}}[ \t\0\x0B]*
  75.                     '|'.
  76.                     preg_quote($this->options['tag_variable'][1], '#'). // }}
  77.                 ')
  78.             }Ax',
  79.             // %}
  80.             'lex_block' => '{
  81.                 \s*
  82.                 (?:'.
  83.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*\n?'// -%}\s*\n?
  84.                     '|'.
  85.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~%}[ \t\0\x0B]*
  86.                     '|'.
  87.                     preg_quote($this->options['tag_block'][1], '#').'\n?'// %}\n?
  88.                 ')
  89.             }Ax',
  90.             // {% endverbatim %}
  91.             'lex_raw_data' => '{'.
  92.                 preg_quote($this->options['tag_block'][0], '#'). // {%
  93.                 '('.
  94.                     $this->options['whitespace_trim']. // -
  95.                     '|'.
  96.                     $this->options['whitespace_line_trim']. // ~
  97.                 ')?\s*endverbatim\s*'.
  98.                 '(?:'.
  99.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'// -%}
  100.                     '|'.
  101.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~%}[ \t\0\x0B]*
  102.                     '|'.
  103.                     preg_quote($this->options['tag_block'][1], '#'). // %}
  104.                 ')
  105.             }sx',
  106.             'operator' => $this->getOperatorRegex(),
  107.             // #}
  108.             'lex_comment' => '{
  109.                 (?:'.
  110.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_comment'][1], '#').'\s*\n?'// -#}\s*\n?
  111.                     '|'.
  112.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_comment'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~#}[ \t\0\x0B]*
  113.                     '|'.
  114.                     preg_quote($this->options['tag_comment'][1], '#').'\n?'// #}\n?
  115.                 ')
  116.             }sx',
  117.             // verbatim %}
  118.             'lex_block_raw' => '{
  119.                 \s*verbatim\s*
  120.                 (?:'.
  121.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'// -%}\s*
  122.                     '|'.
  123.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~%}[ \t\0\x0B]*
  124.                     '|'.
  125.                     preg_quote($this->options['tag_block'][1], '#'). // %}
  126.                 ')
  127.             }Asx',
  128.             'lex_block_line' => '{\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '#').'}As',
  129.             // {{ or {% or {#
  130.             'lex_tokens_start' => '{
  131.                 ('.
  132.                     preg_quote($this->options['tag_variable'][0], '#'). // {{
  133.                     '|'.
  134.                     preg_quote($this->options['tag_block'][0], '#'). // {%
  135.                     '|'.
  136.                     preg_quote($this->options['tag_comment'][0], '#'). // {#
  137.                 ')('.
  138.                     preg_quote($this->options['whitespace_trim'], '#'). // -
  139.                     '|'.
  140.                     preg_quote($this->options['whitespace_line_trim'], '#'). // ~
  141.                 ')?
  142.             }sx',
  143.             'interpolation_start' => '{'.preg_quote($this->options['interpolation'][0], '#').'\s*}A',
  144.             'interpolation_end' => '{\s*'.preg_quote($this->options['interpolation'][1], '#').'}A',
  145.         ];
  146.     }
  147.     public function tokenize(Source $source)
  148.     {
  149.         $this->initialize();
  150.         $this->source $source;
  151.         $this->code str_replace(["\r\n""\r"], "\n"$source->getCode());
  152.         $this->cursor 0;
  153.         $this->lineno 1;
  154.         $this->end \strlen($this->code);
  155.         $this->tokens = [];
  156.         $this->state self::STATE_DATA;
  157.         $this->states = [];
  158.         $this->brackets = [];
  159.         $this->position = -1;
  160.         // find all token starts in one go
  161.         preg_match_all($this->regexes['lex_tokens_start'], $this->code$matches\PREG_OFFSET_CAPTURE);
  162.         $this->positions $matches;
  163.         while ($this->cursor $this->end) {
  164.             // dispatch to the lexing functions depending
  165.             // on the current state
  166.             switch ($this->state) {
  167.                 case self::STATE_DATA:
  168.                     $this->lexData();
  169.                     break;
  170.                 case self::STATE_BLOCK:
  171.                     $this->lexBlock();
  172.                     break;
  173.                 case self::STATE_VAR:
  174.                     $this->lexVar();
  175.                     break;
  176.                 case self::STATE_STRING:
  177.                     $this->lexString();
  178.                     break;
  179.                 case self::STATE_INTERPOLATION:
  180.                     $this->lexInterpolation();
  181.                     break;
  182.             }
  183.         }
  184.         $this->pushToken(/* Token::EOF_TYPE */ -1);
  185.         if (!empty($this->brackets)) {
  186.             list($expect$lineno) = array_pop($this->brackets);
  187.             throw new SyntaxError(sprintf('Unclosed "%s".'$expect), $lineno$this->source);
  188.         }
  189.         return new TokenStream($this->tokens$this->source);
  190.     }
  191.     private function lexData()
  192.     {
  193.         // if no matches are left we return the rest of the template as simple text token
  194.         if ($this->position == \count($this->positions[0]) - 1) {
  195.             $this->pushToken(/* Token::TEXT_TYPE */ 0substr($this->code$this->cursor));
  196.             $this->cursor $this->end;
  197.             return;
  198.         }
  199.         // Find the first token after the current cursor
  200.         $position $this->positions[0][++$this->position];
  201.         while ($position[1] < $this->cursor) {
  202.             if ($this->position == \count($this->positions[0]) - 1) {
  203.                 return;
  204.             }
  205.             $position $this->positions[0][++$this->position];
  206.         }
  207.         // push the template text first
  208.         $text $textContent substr($this->code$this->cursor$position[1] - $this->cursor);
  209.         // trim?
  210.         if (isset($this->positions[2][$this->position][0])) {
  211.             if ($this->options['whitespace_trim'] === $this->positions[2][$this->position][0]) {
  212.                 // whitespace_trim detected ({%-, {{- or {#-)
  213.                 $text rtrim($text);
  214.             } elseif ($this->options['whitespace_line_trim'] === $this->positions[2][$this->position][0]) {
  215.                 // whitespace_line_trim detected ({%~, {{~ or {#~)
  216.                 // don't trim \r and \n
  217.                 $text rtrim($text" \t\0\x0B");
  218.             }
  219.         }
  220.         $this->pushToken(/* Token::TEXT_TYPE */ 0$text);
  221.         $this->moveCursor($textContent.$position[0]);
  222.         switch ($this->positions[1][$this->position][0]) {
  223.             case $this->options['tag_comment'][0]:
  224.                 $this->lexComment();
  225.                 break;
  226.             case $this->options['tag_block'][0]:
  227.                 // raw data?
  228.                 if (preg_match($this->regexes['lex_block_raw'], $this->code$match0$this->cursor)) {
  229.                     $this->moveCursor($match[0]);
  230.                     $this->lexRawData();
  231.                 // {% line \d+ %}
  232.                 } elseif (preg_match($this->regexes['lex_block_line'], $this->code$match0$this->cursor)) {
  233.                     $this->moveCursor($match[0]);
  234.                     $this->lineno = (int) $match[1];
  235.                 } else {
  236.                     $this->pushToken(/* Token::BLOCK_START_TYPE */ 1);
  237.                     $this->pushState(self::STATE_BLOCK);
  238.                     $this->currentVarBlockLine $this->lineno;
  239.                 }
  240.                 break;
  241.             case $this->options['tag_variable'][0]:
  242.                 $this->pushToken(/* Token::VAR_START_TYPE */ 2);
  243.                 $this->pushState(self::STATE_VAR);
  244.                 $this->currentVarBlockLine $this->lineno;
  245.                 break;
  246.         }
  247.     }
  248.     private function lexBlock()
  249.     {
  250.         if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code$match0$this->cursor)) {
  251.             $this->pushToken(/* Token::BLOCK_END_TYPE */ 3);
  252.             $this->moveCursor($match[0]);
  253.             $this->popState();
  254.         } else {
  255.             $this->lexExpression();
  256.         }
  257.     }
  258.     private function lexVar()
  259.     {
  260.         if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code$match0$this->cursor)) {
  261.             $this->pushToken(/* Token::VAR_END_TYPE */ 4);
  262.             $this->moveCursor($match[0]);
  263.             $this->popState();
  264.         } else {
  265.             $this->lexExpression();
  266.         }
  267.     }
  268.     private function lexExpression()
  269.     {
  270.         // whitespace
  271.         if (preg_match('/\s+/A'$this->code$match0$this->cursor)) {
  272.             $this->moveCursor($match[0]);
  273.             if ($this->cursor >= $this->end) {
  274.                 throw new SyntaxError(sprintf('Unclosed "%s".'self::STATE_BLOCK === $this->state 'block' 'variable'), $this->currentVarBlockLine$this->source);
  275.             }
  276.         }
  277.         // arrow function
  278.         if ('=' === $this->code[$this->cursor] && '>' === $this->code[$this->cursor 1]) {
  279.             $this->pushToken(Token::ARROW_TYPE'=>');
  280.             $this->moveCursor('=>');
  281.         }
  282.         // operators
  283.         elseif (preg_match($this->regexes['operator'], $this->code$match0$this->cursor)) {
  284.             $this->pushToken(/* Token::OPERATOR_TYPE */ 8preg_replace('/\s+/'' '$match[0]));
  285.             $this->moveCursor($match[0]);
  286.         }
  287.         // names
  288.         elseif (preg_match(self::REGEX_NAME$this->code$match0$this->cursor)) {
  289.             $this->pushToken(/* Token::NAME_TYPE */ 5$match[0]);
  290.             $this->moveCursor($match[0]);
  291.         }
  292.         // numbers
  293.         elseif (preg_match(self::REGEX_NUMBER$this->code$match0$this->cursor)) {
  294.             $number = (float) $match[0];  // floats
  295.             if (ctype_digit($match[0]) && $number <= \PHP_INT_MAX) {
  296.                 $number = (int) $match[0]; // integers lower than the maximum
  297.             }
  298.             $this->pushToken(/* Token::NUMBER_TYPE */ 6$number);
  299.             $this->moveCursor($match[0]);
  300.         }
  301.         // punctuation
  302.         elseif (false !== strpos(self::PUNCTUATION$this->code[$this->cursor])) {
  303.             // opening bracket
  304.             if (false !== strpos('([{'$this->code[$this->cursor])) {
  305.                 $this->brackets[] = [$this->code[$this->cursor], $this->lineno];
  306.             }
  307.             // closing bracket
  308.             elseif (false !== strpos(')]}'$this->code[$this->cursor])) {
  309.                 if (empty($this->brackets)) {
  310.                     throw new SyntaxError(sprintf('Unexpected "%s".'$this->code[$this->cursor]), $this->lineno$this->source);
  311.                 }
  312.                 list($expect$lineno) = array_pop($this->brackets);
  313.                 if ($this->code[$this->cursor] != strtr($expect'([{'')]}')) {
  314.                     throw new SyntaxError(sprintf('Unclosed "%s".'$expect), $lineno$this->source);
  315.                 }
  316.             }
  317.             $this->pushToken(/* Token::PUNCTUATION_TYPE */ 9$this->code[$this->cursor]);
  318.             ++$this->cursor;
  319.         }
  320.         // strings
  321.         elseif (preg_match(self::REGEX_STRING$this->code$match0$this->cursor)) {
  322.             $this->pushToken(/* Token::STRING_TYPE */ 7stripcslashes(substr($match[0], 1, -1)));
  323.             $this->moveCursor($match[0]);
  324.         }
  325.         // opening double quoted string
  326.         elseif (preg_match(self::REGEX_DQ_STRING_DELIM$this->code$match0$this->cursor)) {
  327.             $this->brackets[] = ['"'$this->lineno];
  328.             $this->pushState(self::STATE_STRING);
  329.             $this->moveCursor($match[0]);
  330.         }
  331.         // unlexable
  332.         else {
  333.             throw new SyntaxError(sprintf('Unexpected character "%s".'$this->code[$this->cursor]), $this->lineno$this->source);
  334.         }
  335.     }
  336.     private function lexRawData()
  337.     {
  338.         if (!preg_match($this->regexes['lex_raw_data'], $this->code$match\PREG_OFFSET_CAPTURE$this->cursor)) {
  339.             throw new SyntaxError('Unexpected end of file: Unclosed "verbatim" block.'$this->lineno$this->source);
  340.         }
  341.         $text substr($this->code$this->cursor$match[0][1] - $this->cursor);
  342.         $this->moveCursor($text.$match[0][0]);
  343.         // trim?
  344.         if (isset($match[1][0])) {
  345.             if ($this->options['whitespace_trim'] === $match[1][0]) {
  346.                 // whitespace_trim detected ({%-, {{- or {#-)
  347.                 $text rtrim($text);
  348.             } else {
  349.                 // whitespace_line_trim detected ({%~, {{~ or {#~)
  350.                 // don't trim \r and \n
  351.                 $text rtrim($text" \t\0\x0B");
  352.             }
  353.         }
  354.         $this->pushToken(/* Token::TEXT_TYPE */ 0$text);
  355.     }
  356.     private function lexComment()
  357.     {
  358.         if (!preg_match($this->regexes['lex_comment'], $this->code$match\PREG_OFFSET_CAPTURE$this->cursor)) {
  359.             throw new SyntaxError('Unclosed comment.'$this->lineno$this->source);
  360.         }
  361.         $this->moveCursor(substr($this->code$this->cursor$match[0][1] - $this->cursor).$match[0][0]);
  362.     }
  363.     private function lexString()
  364.     {
  365.         if (preg_match($this->regexes['interpolation_start'], $this->code$match0$this->cursor)) {
  366.             $this->brackets[] = [$this->options['interpolation'][0], $this->lineno];
  367.             $this->pushToken(/* Token::INTERPOLATION_START_TYPE */ 10);
  368.             $this->moveCursor($match[0]);
  369.             $this->pushState(self::STATE_INTERPOLATION);
  370.         } elseif (preg_match(self::REGEX_DQ_STRING_PART$this->code$match0$this->cursor) && \strlen($match[0]) > 0) {
  371.             $this->pushToken(/* Token::STRING_TYPE */ 7stripcslashes($match[0]));
  372.             $this->moveCursor($match[0]);
  373.         } elseif (preg_match(self::REGEX_DQ_STRING_DELIM$this->code$match0$this->cursor)) {
  374.             list($expect$lineno) = array_pop($this->brackets);
  375.             if ('"' != $this->code[$this->cursor]) {
  376.                 throw new SyntaxError(sprintf('Unclosed "%s".'$expect), $lineno$this->source);
  377.             }
  378.             $this->popState();
  379.             ++$this->cursor;
  380.         } else {
  381.             // unlexable
  382.             throw new SyntaxError(sprintf('Unexpected character "%s".'$this->code[$this->cursor]), $this->lineno$this->source);
  383.         }
  384.     }
  385.     private function lexInterpolation()
  386.     {
  387.         $bracket end($this->brackets);
  388.         if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code$match0$this->cursor)) {
  389.             array_pop($this->brackets);
  390.             $this->pushToken(/* Token::INTERPOLATION_END_TYPE */ 11);
  391.             $this->moveCursor($match[0]);
  392.             $this->popState();
  393.         } else {
  394.             $this->lexExpression();
  395.         }
  396.     }
  397.     private function pushToken($type$value '')
  398.     {
  399.         // do not push empty text tokens
  400.         if (/* Token::TEXT_TYPE */ === $type && '' === $value) {
  401.             return;
  402.         }
  403.         $this->tokens[] = new Token($type$value$this->lineno);
  404.     }
  405.     private function moveCursor($text)
  406.     {
  407.         $this->cursor += \strlen($text);
  408.         $this->lineno += substr_count($text"\n");
  409.     }
  410.     private function getOperatorRegex()
  411.     {
  412.         $operators array_merge(
  413.             ['='],
  414.             array_keys($this->env->getUnaryOperators()),
  415.             array_keys($this->env->getBinaryOperators())
  416.         );
  417.         $operators array_combine($operatorsarray_map('strlen'$operators));
  418.         arsort($operators);
  419.         $regex = [];
  420.         foreach ($operators as $operator => $length) {
  421.             // an operator that ends with a character must be followed by
  422.             // a whitespace, a parenthesis, an opening map [ or sequence {
  423.             $r preg_quote($operator'/');
  424.             if (ctype_alpha($operator[$length 1])) {
  425.                 $r .= '(?=[\s()\[{])';
  426.             }
  427.             // an operator that begins with a character must not have a dot or pipe before
  428.             if (ctype_alpha($operator[0])) {
  429.                 $r '(?<![\.\|])'.$r;
  430.             }
  431.             // an operator with a space can be any amount of whitespaces
  432.             $r preg_replace('/\s+/''\s+'$r);
  433.             $regex[] = $r;
  434.         }
  435.         return '/'.implode('|'$regex).'/A';
  436.     }
  437.     private function pushState($state)
  438.     {
  439.         $this->states[] = $this->state;
  440.         $this->state $state;
  441.     }
  442.     private function popState()
  443.     {
  444.         if (=== \count($this->states)) {
  445.             throw new \LogicException('Cannot pop state without a previous state.');
  446.         }
  447.         $this->state array_pop($this->states);
  448.     }
  449. }
  450. class_alias('Twig\Lexer''Twig_Lexer');