Desarrollo de una máquina virtual apilada y un compilador para ella (parte I)

Dio la casualidad de que durante los últimos 18 años, no tuve que escribir en C / C ++. En el trabajo, se utilizó Java y, debido a los puestos, las actividades estaban más relacionadas con el espíritu empresarial: negociaciones, ventas corporativas, operaciones de producción de edificios y estructuración de transacciones de inversión. Quería recuperar mis habilidades en mi tiempo libre del trabajo, estirar la parte de mi cerebro que no había forzado durante los 18 años y, naturalmente, comenzar desde lo más básico. Queda por proponer una tarea.





, 70-80 , - (, ) . "", (Go, Kotlin ) .



32-bit C , . Computer Science . , , , . . . .





, :





CPU: 32-bit , , , IP (Instruction Pointer) SP (Stack Pointer), (__int32), .





RAM: 65536 32-bit`. . (code/text) (data, heap), (stack). .





:





  typedef __int32 WORD;
  
	constexpr WORD OP_CODE_MASK = 0b00000000000000000000000011111111;
	constexpr WORD OP_TYPE_MASK = 0b00000000000000000000111000000000;

	constexpr WORD OP_HALT      = 0b00000000000000000000000000000000;
	constexpr WORD OP_CONST     = 0b00000000000000000000000000000001;
	constexpr WORD OP_PUSH      = 0b00000000000000000000000000000010;
	constexpr WORD OP_POP       = 0b00000000000000000000000000000011;

	constexpr WORD OP_INC       = 0b00000000000000000000000000000100;
	constexpr WORD OP_DEC       = 0b00000000000000000000000000000101;
	constexpr WORD OP_ADD       = 0b00000000000000000000000000000110;
	constexpr WORD OP_SUB       = 0b00000000000000000000000000000111;
	constexpr WORD OP_MUL       = 0b00000000000000000000000000001000;
	constexpr WORD OP_DIV       = 0b00000000000000000000000000001001;

	constexpr WORD OP_AND       = 0b00000000000000000000000000001010;
	constexpr WORD OP_OR        = 0b00000000000000000000000000001011;
	constexpr WORD OP_XOR       = 0b00000000000000000000000000001100;
	constexpr WORD OP_NOT       = 0b00000000000000000000000000001101;
	constexpr WORD OP_SHL       = 0b00000000000000000000000000001110;
	constexpr WORD OP_SHR       = 0b00000000000000000000000000001111;

	constexpr WORD OP_JMP       = 0b00000000000000000000000000010001;
	constexpr WORD OP_CMPJE     = 0b00000000000000000000000000010010;
	constexpr WORD OP_CMPJNE    = 0b00000000000000000000000000010011;
	constexpr WORD OP_CMPJG     = 0b00000000000000000000000000010100;
	constexpr WORD OP_CMPJGE    = 0b00000000000000000000000000010101;
	constexpr WORD OP_CMPJL     = 0b00000000000000000000000000010110;
	constexpr WORD OP_CMPJLE    = 0b00000000000000000000000000010111;

	constexpr WORD OP_DUP       = 0b00000000000000000000000000011000;
	constexpr WORD OP_CALL      = 0b00000000000000000000000000011001;
	constexpr WORD OP_RET       = 0b00000000000000000000000000011010;
	constexpr WORD OP_SYSCALL   = 0b00000000000000000000000000011011;

	constexpr WORD OP_RESERVED1 = 0b00000000000000000000000000011100;
	constexpr WORD OP_RESERVED2 = 0b00000000000000000000000000011101;
	constexpr WORD OP_RESERVED3 = 0b00000000000000000000000000011110;
	constexpr WORD OP_RESERVED4 = 0b00000000000000000000000000011111;
  
  constexpr WORD MAX_MEMORY = 65536;
      
      



8 32 (opcode), 1 (immediate ), 3 (byte, short, int, long, char, float, double ), . .





class VMRuntime {
	public:
		VMRuntime();                                // Constructor
		~VMRuntime();                               // Desctructor
		bool loadImage(void* image, size_t size);   // Load executable image
		void run();                                 // Runs image from address 0
		WORD readWord(WORD address);                // Read WORD from memory
		void writeWord(WORD address, WORD value);   // Write WORD to memory 
		WORD getMaxAddress();                       // Get max address in 32-bit words
		WORD getIP();                               // Get Instruction Pointer address
		WORD getSP();                               // Get Stack Pointer address
	private:
		WORD  memory[MAX_MEMORY];                   // Random access memory array
		WORD  ip;                                   // Instruction pointer
		WORD  sp;                                   // Stack pointer
		WORD  fp;                                   // Frame pointer
		void systemCall(WORD n);                    // System call
		void printState();                          // Print current VM state
	};
      
      



, (loadImage), (run), / (readWord, writeWord), IP, SP. printState ( ), systemCall, - ( - API).





- , , , . HALT.





void VMRuntime::run() {
	WORD a, b;
	WORD opcode;

	ip = 0;
	sp = MAX_MEMORY - 1;

	while (1) {

		opcode = memory[ip++];
		
		switch (opcode) {
		//------------------------------------------------------------------------
		// STACK OPERATIONS
		//------------------------------------------------------------------------
		case OP_CONST: 
		    memory[--sp] = memory[ip++]; 
			break;
		case OP_PUSH:
			memory[--sp] = memory[memory[ip++]];
			break;
		case OP_POP:  
		  memory[memory[ip++]] = memory[sp++]; 
			break;
		case OP_DUP:
			a = memory[sp];
			memory[--sp] = a;
			break;
		//------------------------------------------------------------------------
		// ARITHMETIC OPERATIONS
		//------------------------------------------------------------------------
		case OP_INC:
			memory[sp]++;
			break;
		case OP_DEC:
			memory[sp]--;
			break;
		case OP_ADD:  
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a + b;
			break;
		case OP_SUB:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a - b;
			break;
		case OP_MUL:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a * b;
			break;
		case OP_DIV:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a / b;
			break;
		//------------------------------------------------------------------------
		// BITWISE OPERATIONS
		//------------------------------------------------------------------------
		case OP_AND:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a & b;
			break;
		case OP_OR:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a | b;
			break;
		case OP_XOR:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a ^ b;
			break;
		case OP_NOT:
			a = memory[sp++];
			memory[--sp] = ~a;
			break;
		case OP_SHL:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a << b;
			break;
		case OP_SHR:
			b = memory[sp++];
			a = memory[sp++];
			memory[--sp] = a >> b;
			break;
		//------------------------------------------------------------------------
		// FLOW CONTROL OPERATIONS
		//------------------------------------------------------------------------
		case OP_JMP:
			ip = memory[ip];
			break;
		case OP_CMPJE:
			b = memory[sp++];
			a = memory[sp++];
			if (a == b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJNE:
			b = memory[sp++];
			a = memory[sp++];
			if (a != b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJG:
			b = memory[sp++];
			a = memory[sp++];
			if (a > b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJGE:
			a = memory[sp++];
			b = memory[sp++];
			if (a >= b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJL:
			b = memory[sp++];
			a = memory[sp++];
			if (a < b) ip = memory[ip]; else ip++;
			break;
		case OP_CMPJLE:
			b = memory[sp++];
			a = memory[sp++];
			if (a <= b) ip = memory[ip]; else ip++;
			break;
		//------------------------------------------------------------------------
		// PROCEDURE CALL OPERATIONS
		//------------------------------------------------------------------------
		case OP_CALL:
			a = memory[ip++];
			memory[--sp] = ip;       
			ip = a;                  
			break;
		case OP_RET:
			ip = memory[sp++];       
			break;
		case OP_SYSCALL:
			a = memory[ip++];
			systemCall(a);
			break;
		case OP_HALT: 
			printState();
		  return;
		default:
			cout << "Runtime error - unknown opcode=" << opcode << endl;
			printState();
			return;
		}
	}
  
  // Only one system call implemented - print string (0x20)
  void VMRuntime::systemCall(WORD n) {
		WORD ptr;
		switch (n) {
		case 0x20:  // print C style string
			ptr = memory[sp++];
			cout << ((char*)&memory[ptr]);
		break;
	}
}
      
      



, , , ( , ).





. , , "" .





class VMImage {
	public:
		VMImage();
		~VMImage();
		void clear();
		WORD setEmitPointer(WORD address);
		WORD getEmitPointer();
		WORD emit(WORD opcode);
		WORD emit(WORD opcode, WORD operand);
		WORD readWord(WORD address);
		void writeWord(WORD address, WORD value);
		WORD writeData(WORD address, void* data, size_t length);
		void* getImage();
		size_t getImageSize();
		void dissasemble();
	private:
		WORD memory[MAX_MEMORY];
		WORD imageSize;
		WORD ep;

	};
      
      



, "Hello, world from VM!" 10 , , . ( , ) :





start:                           //  [0]
push   iVar                      //   iVar  
dec                              //   
call   fn                        //   fn
dup                              //      (Top Of Stack)
pop    iVar                      //        iVar
const  0                         //     0   
cmpjg  start                     //  iVar > 0   start:
halt                             //   


fn:                              //  [64]
const  myStr                     //     
syscall 0x20                     //        
ret                              //     


dataSeg:                         //  [128]
iVar = 10                       
myStr = "Hello, world from VM!\n"
      
      



Ahora es demasiado perezoso escribir un traductor para el ensamblador de una máquina virtual para esta tarea, porque estamos creando un lenguaje de alto nivel que compilaremos inmediatamente en los comandos de la máquina virtual. Pero para escribir esto en la imagen ejecutada por la máquina virtual, usaremos la clase VMImage:





void createExecutableImage(VMImage* img) {
	
	WORD dataSeg = 128;							// Data segment starts at 128
	
	WORD iVar = dataSeg;
	WORD myStr = dataSeg + 1;
	img->writeWord(iVar, 10);
	img->writeData(myStr, "Hello, world from VM!\n", 23);    
	
	WORD fn = 64;

	WORD start = img->emit(OP_PUSH, iVar);      // stack <- [iVar] (operand 1)
	img->emit(OP_DEC);                          // stack[top]--  (operand 1 decrement)
	img->emit(OP_CALL, fn);                     // Call function fn()     
	img->emit(OP_DUP);                          // duplicate stack top (operand 1 duplicate)
	img->emit(OP_POP, iVar);                    // stack -> [iVar] (pop operand 1 duplicate to iVar)
	img->emit(OP_CONST, 0);                     // push const 0 (operand 2)
	img->emit(OP_CMPJG, start);                 // if (operand1 > operand2) jump to addr           
	img->emit(OP_HALT);                         // end of program

	img->setEmitPointer(fn);                    // Function fn()
	img->emit(OP_CONST, myStr);                 // Push constant string address
	img->emit(OP_SYSCALL, 0x20);                // Call system call 0x20, to print C style string to standard output
	img->emit(OP_RET);                          // Return
  
}
      
      



Y luego comenzaremos la ejecución de nuestra imagen en una máquina virtual, midiendo el tiempo:





int main() {
	VMImage* img = new VMImage();
	createExecutableImage(img);
	VMRuntime* vm = new VMRuntime();
	vm->loadImage(img->getImage(), img->getImageSize());
  auto start = std::chrono::high_resolution_clock::now();
	
  vm->run();
  
	auto end = std::chrono::high_resolution_clock::now();
	auto ms_int = chrono::duration_cast<chrono::nanoseconds>(end - start).count();
	cout << "EXECUTION TIME: " << ms_int / 1000000000.0 << "s" << endl;
	
  delete vm;
  delete img;
}
      
      



Nos metemos en la consola:





¡Hurra! ¡Fresco! ¡Las operaciones de pila, la aritmética, las instrucciones de salto condicional y las llamadas a funciones funcionan! Esto es alentador. Al parecer seguiré desarrollando esta historia ...








All Articles