A veces surge la tarea de analizar un DSL arbitrario para seguir trabajando con él a nivel de código PHP. Y quiero compartir mi experiencia de resolver este problema con ejemplos.
Durante bastante tiempo he estado usando el servicio dbdiagram para diseñar una estructura de base de datos para proyectos futuros o existentes. Elegí este servicio porque es bastante fácil de usar. Describimos la estructura de las tablas en DBML e inmediatamente vemos el resultado.
, : PHP , , Laravel .
, GO, , , PHP.
: , ( () , «»). () .
//
// full_name varchar [not null, unique, default: 1]
// JSON
[
{"name": "IDENT", "string": "full_name", "pos": [0, 9]},
{"name": "IDENT", "string": "varchar", "pos": [0, 17]},
{"name": "LBRACK", "string": "[", "pos": [0, 18]},
{"name": "NOT", "string": "not", "pos": [0, 22]},
{"name": "NULL", "string": "null", "pos": [0, 27]},
{"name": "COMMA", "string": ",", "pos": [0, 27]},
{"name": "UNIQUE", "string": "unique", "pos": [0, 35]},
{"name": "COMMA", "string": ",", "pos": [0, 35]},
{"name": "DEFAULT", "string": "default", "pos": [0, 44]},
{"name": "COLON", "string": ":", "pos": [0, 44]},
{"name": "INT", "string": "1", "pos": [0, 46]},
{"name": "RBRACK", "string": "]", "pos": [0, 47]}
]
// DBML
Project test {
database_type: 'PostgreSQL'
Note: 'Description of the project'
}
[
{"name":"PROJECT","string":"Project","pos":[0,7]},
{"name":"IDENT","string":"test","pos":[0,12]},
{"name":"LBRACE","string":"{","pos":[0,13]},
{"name":"IDENT","string":"database_type","pos":[1,15]},
{"name":"COLON","string":":","pos":[1,15]},
{"name":"DSTRING","string":"PostgreSQL","pos":[1,18]},
{"name":"NOTE","string":"Note","pos":[2,6]},
{"name":"COLON","string":":","pos":[2,6]},
{"name":"DSTRING","string":"Description of the project","pos":[2,9]},
{"name":"RBRACE","string":"}","pos":[3,0]}
]
PROJECT
, .
<?php
//
$tokens = TokenCollection(...);
//
$token = $tokens->nextToken();
// ,
if (!$token->is(Token::IDENT) && !$token->is(Token::DSTRING)) {
throw new ParserException('Project does not have a name');
}
$name = $token->getString();
// LBRACE
$token = $tokens->nextToken();
if (!$token->is('LBRACE')) {
throw new ParserException('Expects {');
}
$project = new Project($name);
//
do {
$token = $tokens->nextToken();
switch ($token->getName()) {
case Token::IDENT:
switch ($token->getString()) {
case 'database_type':
$project->setDbType(...);
break;
default:
throw new ParserException('Expects database_type');
}
break;
// ,
case Token::NOTE:
$project->setNote(...);
break;
// ,
case 'RBRACE':
return $project;
default:
throw new ParserException(sprintf('Invalid token %s', $token->getString()));
}
} while ($tokens->valid());
50% DBML, , , , .
phplrt
PHP Russia 2021 @SerafimArts phplrt, AST. .. .
, , , DBML, EBNF. EBNF phplrt , AST. .
phplrt EBNF php , .
\G(?|(?:(?:\s+)(*MARK:T_WHITESPACE))|(?:(?:\/\/[^\n]*\n)(*MARK:T_COMMENT))|(?:(?:(?<=\b)true\b)(*MARK:T_BOOL_TRUE))|(?:(?:(?<=\b)false\b)(*MARK:T_BOOL_FALSE))|(?:(?:(?<=\b)null\b)(*MARK:T_NULL))|(?:(?:(?<=\b)Project\b)(*MARK:T_PROJECT))|(?:(?:(?<=\b)Table\b)(*MARK:T_TABLE))|(?:(?:(?<=\b)as\b)(*MARK:T_TABLE_ALIAS))|(?:(?:(?<=\b)(Indexes|indexes)\b)(*MARK:T_TABLE_INDEXES))|(?:(?:(Ref|ref))(*MARK:T_TABLE_REF))|(?:(?:(?<=\b)TableGroup\b)(*MARK:T_TABLE_GROUP))|(?:(?:(?<=\b)(Enum|enum)\b)(*MARK:T_ENUM))|(?:(?:(?<=\b)(primary\ske|pk)\b)(*MARK:T_TABLE_SETTING_PK))|(?:(?:(?<=\b)unique\b)(*MARK:T_TABLE_SETTING_UNIQUE))|(?:(?:(?<=\b)increment\b)(*MARK:T_TABLE_SETTING_INCREMENT))|(?:(?:(?<=\b)default\b)(*MARK:T_TABLE_SETTING_DEFAULT))|(?:(?:(?<=\b)null\b)(*MARK:T_TABLE_SETTING_NULL))|(?:(?:(?<=\b)not\snull\b)(*MARK:T_TABLE_SETTING_NOT_NULL))|(?:(?:(?<=\b)cascade\b)(*MARK:T_REF_ACTION_CASCADE))|(?:(?:(?<=\b)restrict\b)(*MARK:T_REF_ACTION_RESTRICT))|(?:(?:(?<=\b)set\snull\b)(*MARK:T_REF_ACTION_SET_NULL))|(?:(?:(?<=\b)set\default\b)(*MARK:T_REF_ACTION_SET_DEFAULT))|(?:(?:(?<=\b)no\saction\b)(*MARK:T_REF_ACTION_NO_ACTION))|(?:(?:(?<=\b)delete\b)(*MARK:T_REF_ACTION_DELETE))|(?:(?:(?<=\b)update\b)(*MARK:T_REF_ACTION_UPDATE))|(?:(?:note:)(*MARK:T_SETTING_NOTE))|(?:(?:(?<=\b)Note\b)(*MARK:T_NOTE))|(?:(?:[0-9]+\.[0-9]+)(*MARK:T_FLOAT))|(?:(?:[0-9]+)(*MARK:T_INT))|(?:(?:('{3}|["']{1})([^'"][\s\S]*?)\1)(*MARK:T_QUOTED_STRING))|(?:(?:(`{1})([\s\S]+?)\1)(*MARK:T_EXPRESSION))|(?:(?:[a-zA-Z0-9_]+)(*MARK:T_WORD))|(?:(?:\\n)(*MARK:T_EOL))|(?:(?:\()(*MARK:T_LPAREN))|(?:(?:\))(*MARK:T_RPAREN))|(?:(?:{)(*MARK:T_LBRACE))|(?:(?:})(*MARK:T_RBRACE))|(?:(?:\[)(*MARK:T_LBRACK))|(?:(?:\])(*MARK:T_RBRACK))|(?:(?:\>)(*MARK:T_GT))|(?:(?:\<)(*MARK:T_LT))|(?:(?:,)(*MARK:T_COMMA))|(?:(?::)(*MARK:T_COLON))|(?:(?:\-)(*MARK:T_MINUS))|(?:(?:\.)(*MARK:T_DOT))|(?:(?:.+?)(*MARK:T_UNKNOWN)))
//
Project test {
database_type: 'PostgreSQL'
Note: 'Description of the project'
}
, .. , .
//
%token T_PROJECT (?<=\b)Project\b
//
%token T_NOTE (?<=\b)Note\b
// ,
%token T_QUOTED_STRING ('{3}|["']{1})([^'"][\s\S]*?)\1
//
%token T_WORD [a-zA-Z_]+
//
%token T_LBRACE {
%token T_RBRACE }
%token T_COLON :
%token T_EOL \\n
// ,
%skip T_WHITESPACE \s+
,
#Project
:
::T_PROJECT:: <T_WORD> ::T_LBRACE:: ::T_EOL::
//
::T_RBRACE:: ::EOL::
;
// #Project - ( ),
//
// ::TOKEN_NAME:: AST
// <TOKEN_NAME>
, , .
// database_type: 'PostgreSQL'
#ProjectSetting
:
<T_WORD> ::T_COLON:: (<T_WORD> | <T_QUOTED_STRING>)
;
// Note: 'Description of the project'
#Note
:
::T_NOTE:: ::T_COLON:: (<T_WORD> | <T_QUOTED_STRING>)
;
// ( <T_WORD> | <T_QUOTED_STRING> ) ,
//
#DBML
:
// DBML
// (...)*
(
Project()
// Project() |
// Table() |
// TableGroup() |
// Enum() |
// Ref()
)*
;
#Project
:
::T_PROJECT:: <T_WORD> ::T_LBRACE:: ::T_EOL::
// 0
(ProjectSetting() | Note() ::T_EOL::)*
::T_RBRACE:: ::T_EOL::
;
xml .
<DBML offset="0">
<Project offset="0">
<T_WORD offset="8">project_name</T_WORD>
<ProjectSetting offset="27">
<T_WORD offset="27">database_type</T_WORD>
<T_QUOTED_STRING offset="42">'PostgreSQL'</T_QUOTED_STRING>
</ProjectSetting>
<Note offset="59">
<T_QUOTED_STRING offset="65">'Description of the project'</T_QUOTED_STRING>
</Note>
</Project>
</DBML>
, , XML , ?
. phplrt PHP
!!!!!! . XML , .
, .
#Project -> {
return new ProjectNode(
// $children -
// \Butschster\Dbml\Ast\Project\SettingNode
// \Butschster\Dbml\Ast\NoteNode
$token->getOffset(), $children
);
}
#ProjectSetting -> {
return new SettingNode(
// \current($children) -
// \end($children) -
$token->getOffset(), \current($children), \end($children)
);
}
#Note -> {
return new NoteNode(
// \end($children)
$token->getOffset(), \end($children)
);
}
PHP
<?php
class ProjectNode
{
private ?string $note = null;
/** @var SettingNode[] */
private array $settings = [];
private string $name;
public function __construct(
private int $offset,
array $children
)
{
foreach ($children as $child) {
if ($child instanceof NoteNode) {
$this->note = $child->getDescription();
} else if ($child instanceof SettingNode) {
$this->settings[$child->getKey()] = $child;
} else if ($child instanceof NameNode) {
$this->name = $child->getValue();
}
}
}
public function getName(): string
{
return $this->name;
}
public function getNote(): ?string
{
return $this->note;
}
public function getSettings(): array
{
return $this->settings;
}
}
class NoteNode
{
private string $description;
public function __construct(private int $offset, StringNode $string)
{
$this->description = $string->getValue();
}
public function getDescription(): string
{
return $this->description;
}
}
class SettingNode
{
private string $key;
private string $value;
public function __construct(
private int $offset, SettingKeyNode $key, StringNode $value
)
{
$this->key = $key->getValue();
$this->value = $value->getValue();
}
public function getKey(): string
{
return $this->key;
}
public function getValue(): string
{
return $this->value;
}
}
. , DBML , .
phplrt EBNF :
DBML
EBNF
XML ,
<?php
class ProjectParserTest extends TestCase
{
function test_project_with_single_line_note_should_be_parsed()
{
$this->assertAst(<<<DBML
Project project_name {
Note: 'Description of the project'
database_type: 'PostgreSQL'
}
DBML
, <<<AST
<Schema offset="0">
<Project offset="0">
<ProjectName offset="8">
<String offset="8">
<T_WORD offset="8">project_name</T_WORD>
</String>
</ProjectName>
<Note offset="27">
<String offset="33">
<T_QUOTED_STRING offset="33">'Description of the project'</T_QUOTED_STRING>
</String>
</Note>
<ProjectSetting offset="66">
<ProjectSettingKey offset="66">
<T_WORD offset="66">database_type</T_WORD>
</ProjectSettingKey>
<String offset="81">
<T_QUOTED_STRING offset="81">'PostgreSQL'</T_QUOTED_STRING>
</String>
</ProjectSetting>
</Project>
</Schema>
AST
);
}
function test_project_with_multi_line_note_should_be_parsed()
{
$this->assertAst(<<<DBML
Project project_name {
database_type: 'PostgreSQL'
Note: '''
# DBML - Database Markup Language
(database markup language) is a simple, readable DSL language designed to define database structures.
## Benefits
* It is simple, flexible and highly human-readable
* It is database agnostic, focusing on the essential database structure definition without worrying about the detailed syntaxes of each database
* Comes with a free, simple database visualiser at [dbdiagram.io](http://dbdiagram.io)
'''
}
DBML
, <<<AST
<Schema offset="0">
<Project offset="0">
<ProjectName offset="8">
<String offset="8">
<T_WORD offset="8">project_name</T_WORD>
</String>
</ProjectName>
<ProjectSetting offset="27">
<ProjectSettingKey offset="27">
<T_WORD offset="27">database_type</T_WORD>
</ProjectSettingKey>
<String offset="42">
<T_QUOTED_STRING offset="42">'PostgreSQL'</T_QUOTED_STRING>
</String>
</ProjectSetting>
<Note offset="59">
<String offset="65">
<T_QUOTED_STRING offset="65">'''
# DBML - Database Markup Language
(database markup language) is a simple, readable DSL language designed to define database structures.
## Benefits
* It is simple, flexible and highly human-readable
* It is database agnostic, focusing on the essential database structure definition without worrying about the detailed syntaxes of each database
* Comes with a free, simple database visualiser at [dbdiagram.io](http://dbdiagram.io)
'''</T_QUOTED_STRING>
</String>
</Note>
</Project>
</Schema>
AST
);
}
function test_project_with_block_note_should_be_parsed()
{
$this->assertAst(<<<DBML
Project project_name {
database_type: 'PostgreSQL'
Note {
'This is a note of this table'
}
}
DBML
, <<<AST
<Schema offset="0">
<Project offset="0">
<ProjectName offset="8">
<String offset="8">
<T_WORD offset="8">project_name</T_WORD>
</String>
</ProjectName>
<ProjectSetting offset="27">
<ProjectSettingKey offset="27">
<T_WORD offset="27">database_type</T_WORD>
</ProjectSettingKey>
<String offset="42">
<T_QUOTED_STRING offset="42">'PostgreSQL'</T_QUOTED_STRING>
</String>
</ProjectSetting>
<Note offset="59">
<String offset="74">
<T_QUOTED_STRING offset="74">'This is a note of this table'</T_QUOTED_STRING>
</String>
</Note>
</Project>
</Schema>
AST
);
}
}
DBML EBNF, phplrt php , ( ).
yet another DBML parser written on PHP8 ( ) - https://github.com/butschster/dbml-parser
Se completó la primera etapa de mi plan. Ahora queda por hacer un generador de modelos y migraciones.
Como resultado de trabajar con la herramienta phplrt, quiero expresar mi respeto y respeto a @SerafimArts por él, quien ayudó a cambiar radicalmente el enfoque del análisis de idiomas y a resolver mi problema.
Un agradecimiento especial a @greabock y @SerafimArts por su ayuda en la preparación del material y asistencia en el desarrollo del analizador.