Hola, mi nombre es Dmitry Karlovsky y antes también usaba Perl para el desarrollo de frontend. Solo mire qué código conciso puede analizar, por ejemplo, un correo electrónico:
/^(?:((?:[\w!#\$%&'\*\+\/=\?\^`\{\|\}~-]){1,}(?:\.(?:[\w!#\$%&'\*\+\/=\?\^`\{\|\}~-]){1,}){0,})|("(?:((?:(?:([\u{1}-\u{8}\u{b}\u{c}\u{e}-\u{1f}\u{21}\u{23}-\u{5b}\u{5d}-\u{7f}])|(\\[\u{1}-\u{9}\u{b}\u{c}\u{e}-\u{7f}]))){0,}))"))@(?:((?:[\w!#\$%&'\*\+\/=\?\^`\{\|\}~-]){1,}(?:\.(?:[\w!#\$%&'\*\+\/=\?\^`\{\|\}~-]){1,}){0,}))$/gsu
Aquí, sin embargo, aparecieron varios errores. Bueno, nada, ¡lo arreglaremos en la próxima versión!
Fuera de bromas
A medida que crecen, los habituales pierden rápidamente su claridad. No en vano hay decenas de servicios en Internet para depurar habituales. Éstos son solo algunos de ellos:
- https://regex101.com/
- https://regexr.com/
- https://www.debuggex.com/
- https://extendsclass.com/regex-tester.html
, :
/(?<>(?<>\p{Script=Cyrillic})\p{Script=Cyrillic}+)/gimsu
, , . 5 :
/\t/
/\ci/
/\x09/
/\u0009/
/\u{9}/u
JS , ?
const text = 'lol;)'
// SyntaxError: Invalid regular expression: /^(lol;)){2}$/: Unmatched ')'
const regexp = new RegExp( `^(${ text }){2}$` )
, , :
const VISA = /(?<type>4)\d{12}(?:\d{3})?/
const MasterCard = /(?<type>5)[12345]\d{14}/
// Invalid regular expression: /(?<type>4)\d{12}(?:\d{3})?|(?<type>5)[12345]\d{14}/: Duplicate capture group name
const CardNumber = new RegExp( VISA.source + '|' + MasterCard.source )
, , , ! ?
JS. XRegExp:
, , , .
DSL, JS . PEG.js:
- .
- — .
- .
- IDE.
- 2 .
, . .
TypeScript $mol_regexp:
. - ..
- , .
const {
char_only, latin_only, decimal_only,
begin, tab, line_end, end,
repeat, repeat_greedy, from,
} = $mol_regexp
, NPM
import { $mol_regexp: {
char_only, decimal_only,
begin, tab, line_end,
repeat, from,
} } from 'mol_regexp'
// /4(?:\d){12,}?(?:(?:\d){3,}?){0,1}/gsu
const VISA = from([
'4',
repeat( decimal_only, 12 ),
[ repeat( decimal_only, 3 ) ],
])
// /5[12345](?:\d){14,}?/gsu
const MasterCard = from([
'5',
char_only( '12345' ),
repeat( decimal_only, 14 ),
])
:
- .
- .
- .
- . .
- ( ).
// /(?:(4(?:\d){12,}?(?:(?:\d){3,}?){0,1})|(5[12345](?:\d){14,}?))/gsu
const CardNumber = from({ VISA, MasterCard })
// /^(?:\t){0,}?(?:((?:(4(?:\d){12,}?(?:(?:\d){3,}?){0,1})|(5[12345](?:\d){14,}?))))(?:((?:\r){0,1}\n)|(\r))/gmsu
const CardRow = from(
[ begin, repeat( tab ), {CardNumber}, line_end ],
{ multiline: true },
)
const cards = `
3123456789012
4123456789012
551234567890123
5512345678901234
`
for( const token of cards.matchAll( CardRow ) ) {
if( !token.groups ) {
if( !token[0].trim() ) continue
console.log( ' ', token[0].trim() )
continue
}
const type = ''
|| token.groups.VISA && ' VISA'
|| token.groups.MasterCard && 'MasterCard'
console.log( type, token.groups.CardNumber )
}
, , . matchAll
, . $mol_regexp
. groups
. , , .
3123456789012 VISA 4123456789012 551234567890123 MasterCard 5512345678901234
:
const {
begin, end,
char_only, char_range,
latin_only, slash_back,
repeat_greedy, from,
} = $mol_regexp
//
const atom_char = char_only( latin_only, "!#$%&'*+/=?^`{|}~-" )
const atom = repeat_greedy( atom_char, 1 )
const dot_atom = from([ atom, repeat_greedy([ '.', atom ]) ])
//
const name_letter = char_only(
char_range( 0x01, 0x08 ),
0x0b, 0x0c,
char_range( 0x0e, 0x1f ),
0x21,
char_range( 0x23, 0x5b ),
char_range( 0x5d, 0x7f ),
)
//
const quoted_pair = from([
slash_back,
char_only(
char_range( 0x01, 0x09 ),
0x0b, 0x0c,
char_range( 0x0e, 0x7f ),
)
])
//
const name = repeat_greedy({ name_letter, quoted_pair })
const quoted_name = from([ '"', {name}, '"' ])
// :
const local_part = from({ dot_atom, quoted_name })
const domain = dot_atom
// ,
const mail = from([ begin, local_part, '@', {domain}, end ])
— . !
// SyntaxError: Wrong param: dot_atom=foo..bar
mail.generate({
dot_atom: 'foo..bar',
domain: 'example.org',
})
, … :
// foo.bar@example.org
mail.generate({
dot_atom: 'foo.bar',
domain: 'example.org',
})
:
// "foo..bar"@example.org
mail.generate({
name: 'foo..bar',
domain: 'example.org',
})
, "" /snjat-dvushku/s-remontom/v-vihino
. , :
const translit = char_only( latin_only, '-' )
const place = repeat_greedy( translit )
const action = from({ rent: 'snjat', buy: 'kupit' })
const repaired = from( 's-remontom' )
const rooms = from({
one_room: 'odnushku',
two_room: 'dvushku',
any_room: 'kvartiru',
})
const route = from([
begin,
'/', {action}, '-', {rooms},
[ '/', {repaired} ],
[ '/v-', {place} ],
end,
])
:
// `/snjat-dvushku/v-vihino`.matchAll(route).next().value.groups
{
action: "snjat",
rent: "snjat",
buy: "",
rooms: "dvushku",
one_room: "",
two_room: "dvushku",
any_room: "",
repaired: "",
place: "vihino",
}
, :
// /kupit-kvartiru/v-moskve
route.generate({
buy: true,
any_room: true,
repaired: false,
place: 'moskve',
})
true
, . false
, .
?
, , . 2 , . . groups
:
// time.source == "((\d{2}):(\d{2}))"
// time.groups == [ 'time', 'hours', 'minutes' ]
const time = from({
time: [
{ hours: repeat( decimal_only, 2 ) },
':',
{ minutes: repeat( decimal_only, 2 ) },
],
)
, exec
- groups
:
{
time: '12:34',
hours: '12,
minutes: '34',
}
, , , , :
// time.source == "((\d{2}):(\d{2}))"
// time.groups == [ 'time', 'minutes' ]
const time = wrong_from({
time: [
/(\d{2})/,
':',
{ minutes: repeat( decimal_only, 2 ) },
],
)
{
time: '12:34',
hours: '34,
minutes: undefined,
}
, , "" "0", "1" . — , , :
new RegExp( '|' + regexp.source ).exec('').length - 1
, String..match
String..matchAll
exec
. , , Symbol.match
Symbol.matchAll
. :
*[Symbol.matchAll] (str:string) {
const index = this.lastIndex
this.lastIndex = 0
while ( this.lastIndex < str.length ) {
const found = this.exec(str)
if( !found ) break
yield found
}
this.lastIndex = index
}
, , :
interface RegExpMatchArray { groups?: { [key: string]: string } }
, :
interface String {
match< RE extends RegExp >( regexp: RE ): ReturnType<
RE[ typeof Symbol.match ]
>
matchAll< RE extends RegExp >( regexp: RE ): ReturnType<
RE[ typeof Symbol.matchAll ]
>
}
TypeScript groups
, - .
- $mol_regexp.
- — MarkedText: $hyoo_marked.
- MAM NPM.
- $mol, .
— , , , - ( ) .