Cómo mejorar la legibilidad de expresiones regulares en PHP

Las expresiones regulares son una herramienta muy poderosa, pero la sabiduría convencional es que una vez que están escritas, son muy difíciles de entender, por lo que mantenerlas no es una experiencia agradable. Aquí se recopilan algunos consejos que ayudarán a que sean más legibles.





PHP PCRE — PHP 7.3, PCRE2 — . PHP , , . PHP , ctype*, URL-, — . IDE , , , .





, , . . , - PHP ( PHP 7.3). , . , PHP, JavaScript , ES2018.





:





  • -;





  • ;





  • ;





  • ;





  • ;





  • .





-

— . -, — . :





/(foo|bar)/i
      
      



(foo|bar)



— , i



— , , /



— . /



, . , ~, !, @, #, $



. , , \



— . : {}, (), [], <>



, . , , , . - , . , , . (, ^, $,



, ), . , , , . , /



, — , , URL-. : 





preg_match('/^https:\/\/example.com\/path/i', $uri);
      
      



“#”, , :





preg_match('#^https://example.com/path#i', $uri);
      
      



- . . , .



, *



, +



, $



. , /Username: @[a-z\.0-9]/



“.” , . 





, , . , -



. , , , , .





, /[A-Z]/



, A Z. (/[A\-Z]/)



, — , A, Z . , , , . , /[AZ-]/



, /[A\-Z]/



, .





( , ), . , :





/Price: [0-9\-\$\.\+]+/
      
      







/Price: [0-9$.+-]+/
      
      



X



, ,   , . , , , , . — :





preg_match('/x\yz/X', ''); //  "y" — ,   —  
      
      



:





Warning: preg_match(): Compilation failed: unrecognized character follows \ at offset 2 in ... on line ...
      
      



, ()



, , ,   , , , .





, “Price: €24



”.





$pattern = '/Price: (£|€)(\d+)/';
$text    = 'Price: €24';
preg_match($pattern, $text, $matches);
      
      



2 , , ((£|€))



, — . , $matches



, ,  :





var_dump($matches);

array(3) {
  [0]=> string(12) "Price: €24"
  [1]=> string(3) "€"
  [2]=> string(2) "24"
}
      
      



, . , , ?:



. , , . , , (£|€)



, , : (?:£|€)



.





$pattern = '/Price: (?:£|€)(\d+)/';
$text    = 'Price: €24';
preg_match($pattern, $text, $matches);
var_dump($matches);
      
      



$matches



1 — :





array(2) {
  [0]=> string(12) "Price: €24"
  [1]=> string(2) "24"
}
      
      



, , , , , .





, . , , ,  





, , :





/Price: (?<currency>£|€)(?<price>\d+)/
      
      



, (?



, , . , (?<currency>£|€)



  — currency, (?<price>\d+)



price. , , — . , :





$pattern = '/Price: (?<currency>£|€)(?<price>\d+)/';
$text    = 'Price: €24';
preg_match($pattern, $text, $matches);
var_dump($matches);
      
      



:





array(5) {
 [0]=> string(12) "Price: €24"
["currency"]=> string(3) "€"
[1]=> string(3) "€"
["price"]=> string(2) "24"
[2]=> string(2) "24"
}
      
      



, $matches



, , .





 , , ["currency"]=> "€"



, [1]=> "€"



.





PHP , : 





Warning: preg_match(): Compilation failed: two named subpatterns have the same name (PCRE2_DUPNAMES not set) at offset ... in ... on line ....
      
      



, J



(UPD: , PHP 7.2.0, ?J



):





/Price: (?<currency>£|€)?(?<price>\d+)(?<currency>£|€)?/J
      
      



2 currency, , J



. , currency , . , , :





$pattern = '/Price: (?<currency>£|€)?(?<price>\d+)(?<currency>£|€)?/J';
$text    = 'Price: €24£';
preg_match($pattern, $text, $matches);
var_dump($matches);

array(6) {
  [0]=> string(14) "Price: €24£"
  ["currency"]=> string(2) "£"
  [1]=> string(3) "€"
  ["price"]=> string(2) "24"
  [2]=> string(2) "24"
  [3]=> string(2) "£"
}
      
      



, . , , PHP-, , . — . .





, :





$pattern  = '/Price: (?<currency>£|€)(?<price>\d+)/i';
      
      



:





$pattern  = '/Price: ';
$pattern .= '(?<currency>£|€)'; // Capture currency symbols £ or €
$pattern .= '(?<price>\d+)'; // Capture price without decimals.
$pattern .= '/i'; // Flags: Case-insensitive
      
      



. x



, , . , . :





/Price: (?<currency>£|€)(?<price>\d+)/i
      
      







/Price:  \s  (?<currency>£|€)  (?<price>\d+)  /ix
      
      



, , x



. , , , . , , \s



.





x



, #



, PHP . , . , :





/Price: (?<currency>£|€)(?<price>\d+)/i
      
      



:





/Price:           # Check for the label "Price:"
\s                # Ensure a white-space after.
(?<currency>£|€)  # Capture currency symbols £ or €
(?<price>\d+)     # Capture price without decimals.
/ix
      
      



PHP, Heredoc Nowdoc . , :





$pattern = <<<PATTERN
  /Price:           # Check for the label "Price:"
  \s                # Ensure a white-space after.
  (?<currency>£|€)  # Capture currency symbols £ or €
  (?<price>\d+)     # Capture price without decimals.
  /ix               # Flags: Case-insensitive
PATTERN;

preg_match($pattern, 'Price: £42', $matches);

      
      



, , , . , — \d



,  , [0-9]



. \D



, — , [^0-9]



. , , , , , : 





/Number: [0-9][^0-9]/
      
      



:





/Number: \d\D/
      
      



, . :





  • \w



    — , , [A-Za-z0-9_]



    ,





 





/[A-Za-z0-9_]/
      
      



:





/\w/
      
      



  • [:xdigit:]



    — , [A-Fa-f0-9]



    ,









/[a-zA-F0-9]/
      
      



:





/[[:xdigit:]]/
      
      



  • \s



    — ,  [ \t\r\n\v\f]



    ,









/ \t\r\n\v\f/
      
      







/\s/
      
      



/u



, , . \p{_}



, _



— . \p



"p" , \P{FOO}



, — , . , , , \p{Sc}



, , , , , . , : \p{Currency_Symbol}



, PHP.





:





$pattern = '/Price: \p{Sc}\d+/u';
      
      



:





$text = 'Price: ¥42';
      
      



, . , , , . , . , \p{Sinhala}



, \x{0D80}-\x{0DFF



}. , :





$pattern = '/[\x{0D80}-\x{0DFF}]/u';
      
      



, :





$pattern = '/\p{Sinhala}/u';
      
      



,





$text = 'පීඑච්පී.වොච්`;
$contains_sinhala = preg_match($pattern, $text);
      
      



, , , !





P.S. — - . , .








All Articles