Ya basta de escribir estos habituales

Hola, mi nombre es Dmitry Karlovsky y antes también usaba Perl para el desarrollo de frontend. Solo mire qué código conciso puede analizar, por ejemplo, un correo electrónico:







/^(?:((?:[\w!#\$%&'\*\+\/=\?\^`\{\|\}~-]){1,}(?:\.(?:[\w!#\$%&'\*\+\/=\?\^`\{\|\}~-]){1,}){0,})|("(?:((?:(?:([\u{1}-\u{8}\u{b}\u{c}\u{e}-\u{1f}\u{21}\u{23}-\u{5b}\u{5d}-\u{7f}])|(\\[\u{1}-\u{9}\u{b}\u{c}\u{e}-\u{7f}]))){0,}))"))@(?:((?:[\w!#\$%&'\*\+\/=\?\^`\{\|\}~-]){1,}(?:\.(?:[\w!#\$%&'\*\+\/=\?\^`\{\|\}~-]){1,}){0,}))$/gsu
      
      





Aquí, sin embargo, aparecieron varios errores. Bueno, nada, ¡lo arreglaremos en la próxima versión!







Fuera de bromas









A medida que crecen, los habituales pierden rápidamente su claridad. No en vano hay decenas de servicios en Internet para depurar habituales. Éstos son solo algunos de ellos:









, :







/(?<>(?<>\p{Script=Cyrillic})\p{Script=Cyrillic}+)/gimsu
      
      





, , . 5 :







/\t/
/\ci/
/\x09/
/\u0009/
/\u{9}/u
      
      





JS , ?







const text = 'lol;)'

// SyntaxError: Invalid regular expression: /^(lol;)){2}$/: Unmatched ')'
const regexp = new RegExp( `^(${ text }){2}$` )
      
      





, , :







const VISA = /(?<type>4)\d{12}(?:\d{3})?/
const MasterCard = /(?<type>5)[12345]\d{14}/

// Invalid regular expression: /(?<type>4)\d{12}(?:\d{3})?|(?<type>5)[12345]\d{14}/: Duplicate capture group name
const CardNumber = new RegExp( VISA.source + '|' + MasterCard.source )
      
      





, , , ! ?









JS. XRegExp:







  • API .
  • .
  • .
  • .
  • .
  • IDE.


, , , .









DSL, JS . PEG.js:







  • .
  • — .
  • .
  • IDE.
  • 2 .


.







, . .









TypeScript $mol_regexp:







  • .
  • IDE.
  • .
  • , .


. - ..











- , .







const {
    char_only, latin_only, decimal_only,
    begin, tab, line_end, end,
    repeat, repeat_greedy, from,
} = $mol_regexp
      
      





, NPM



import { $mol_regexp: {
    char_only, decimal_only,
    begin, tab, line_end,
    repeat, from,
} } from 'mol_regexp'
      
      







// /4(?:\d){12,}?(?:(?:\d){3,}?){0,1}/gsu
const VISA = from([
    '4',
    repeat( decimal_only, 12 ),
    [ repeat( decimal_only, 3 ) ],
])

// /5[12345](?:\d){14,}?/gsu
const MasterCard = from([
    '5',
    char_only( '12345' ),
    repeat( decimal_only, 14 ),
])
      
      





:







  • .
  • .
  • .
  • . .
  • ( ).




// /(?:(4(?:\d){12,}?(?:(?:\d){3,}?){0,1})|(5[12345](?:\d){14,}?))/gsu
const CardNumber = from({ VISA, MasterCard })
      
      







// /^(?:\t){0,}?(?:((?:(4(?:\d){12,}?(?:(?:\d){3,}?){0,1})|(5[12345](?:\d){14,}?))))(?:((?:\r){0,1}\n)|(\r))/gmsu
const CardRow = from(
    [ begin, repeat( tab ), {CardNumber}, line_end ],
    { multiline: true },
)
      
      







const cards = `
    3123456789012
    4123456789012
    551234567890123
    5512345678901234
`
      
      







for( const token of cards.matchAll( CardRow ) ) {

    if( !token.groups ) {
        if( !token[0].trim() ) continue
        console.log( ' ', token[0].trim() )
        continue
    }

    const type = ''
        || token.groups.VISA && ' VISA'
        || token.groups.MasterCard && 'MasterCard'

    console.log( type, token.groups.CardNumber )

}
      
      





, , . matchAll



, . $mol_regexp



. groups



. , , .









  3123456789012
 VISA 4123456789012
  551234567890123
MasterCard 5512345678901234
      
      





.







E-Mail



:







const {
    begin, end,
    char_only, char_range,
    latin_only, slash_back,
    repeat_greedy, from,
} = $mol_regexp

//      
const atom_char = char_only( latin_only, "!#$%&'*+/=?^`{|}~-" )
const atom = repeat_greedy( atom_char, 1 )
const dot_atom = from([ atom, repeat_greedy([ '.', atom ]) ])

//      
const name_letter = char_only(
    char_range( 0x01, 0x08 ),
    0x0b, 0x0c,
    char_range( 0x0e, 0x1f ),
    0x21,
    char_range( 0x23, 0x5b ),
    char_range( 0x5d, 0x7f ),
)

//     
const quoted_pair = from([
    slash_back,
    char_only(
        char_range( 0x01, 0x09 ),
        0x0b, 0x0c,
        char_range( 0x0e, 0x7f ),
    )
])

//   
const name = repeat_greedy({ name_letter, quoted_pair })
const quoted_name = from([ '"', {name}, '"' ])

//   :   
const local_part = from({ dot_atom, quoted_name })
const domain = dot_atom

// ,     
const mail = from([ begin, local_part, '@', {domain}, end ])
      
      





— . !







//  SyntaxError: Wrong param: dot_atom=foo..bar
mail.generate({
    dot_atom: 'foo..bar',
    domain: 'example.org',
})
      
      





, … :







// foo.bar@example.org
mail.generate({
    dot_atom: 'foo.bar',
    domain: 'example.org',
})
      
      





:







// "foo..bar"@example.org
mail.generate({
    name: 'foo..bar',
    domain: 'example.org',
})
      
      





.









, "" /snjat-dvushku/s-remontom/v-vihino



. , :







const translit = char_only( latin_only, '-' )
const place = repeat_greedy( translit )

const action = from({ rent: 'snjat', buy: 'kupit' })
const repaired = from( 's-remontom' )

const rooms = from({
    one_room: 'odnushku',
    two_room: 'dvushku',
    any_room: 'kvartiru',
})

const route = from([
    begin,
    '/', {action}, '-', {rooms},
    [ '/', {repaired} ],
    [ '/v-', {place} ],
    end,
])
      
      





:







// `/snjat-dvushku/v-vihino`.matchAll(route).next().value.groups
{
    action: "snjat",
    rent: "snjat",
    buy: "",
    rooms: "dvushku",
    one_room: "",
    two_room: "dvushku",
    any_room: "",
    repaired: "",
    place: "vihino",
}
      
      





, :







// /kupit-kvartiru/v-moskve
route.generate({
    buy: true,
    any_room: true,
    repaired: false,
    place: 'moskve',
})
      
      





true



, . false



, .







, , , .







?



, , . 2 , . . groups



:







// time.source == "((\d{2}):(\d{2}))"
// time.groups == [ 'time', 'hours', 'minutes' ]
const time = from({
    time: [
        { hours: repeat( decimal_only, 2 ) },
        ':',
        { minutes: repeat( decimal_only, 2 ) },
    ],
)
      
      





, exec



- groups



:







{
    time: '12:34',
    hours: '12,
    minutes: '34',
}
      
      





, , , , :







// time.source == "((\d{2}):(\d{2}))"
// time.groups == [ 'time', 'minutes' ]
const time = wrong_from({
    time: [
        /(\d{2})/,
        ':',
        { minutes: repeat( decimal_only, 2 ) },
    ],
)
      
      





{
    time: '12:34',
    hours: '34,
    minutes: undefined,
}
      
      





, , "" "0", "1" . — , , :







new RegExp( '|' + regexp.source ).exec('').length - 1
      
      





, String..match



String..matchAll



exec



. , , Symbol.match



Symbol.matchAll



. :







*[Symbol.matchAll] (str:string) {
    const index = this.lastIndex
    this.lastIndex = 0
    while ( this.lastIndex < str.length ) {
        const found = this.exec(str)
        if( !found ) break
        yield found
    }
    this.lastIndex = index
}
      
      





, , :







interface RegExpMatchArray {
    groups?: {
        [key: string]: string
    }
}
      
      





, :







interface String {

    match< RE extends RegExp >( regexp: RE ): ReturnType<
        RE[ typeof Symbol.match ]
    >

    matchAll< RE extends RegExp >( regexp: RE ): ReturnType<
        RE[ typeof Symbol.matchAll ]
    >

}
      
      





TypeScript groups



, - .







, .











— , , , - ( ) .




















All Articles