Commit bcfc3fcb authored by Kai Rese's avatar Kai Rese
Browse files

Add specification, assembler and description

parent 95e94f73
**/target
**/.idea
**/*.iml
**/*.out
**/*.rs.bk
**/Cargo.lock
MIT License
Copyright (c) 2021 Kai Rese
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# arschitek_zero
Arschitek Zero
===
Arschitek Zero is a simple 16 bit instruction set architecture. It is designed to provide a minimal amount of functionality while also not being too inconvenient to write programs in.
Have a look at the [specification](./specification.md) or the [assembler](./assembler).
Roadmap:
- [x] General ISA specification
- [x] State
- [x] Instructions
- [x] Assembley language
- [ ] System specification
- [ ] Executables
- [ ] I/O
- [ ] BIOS/OS
- [ ] Tooling
- [x] Basic assembler
- [ ] Public emulator
- [ ] Viewer for executables?
[package]
name = "arschitek_zero_assembler"
version = "0.1.0"
authors = ["Kai Rese <Kulasko@users.noreply.github.com>"]
edition = "2018"
[dependencies]
:Initialization
add reg1 1
add reg0 1
shftl reg1 reg1 11
stg reg3 :ListStart
:NextNumber
add reg0 1
stg reg2 :ListStart
:ListLoop
sub reg7 reg3 reg2
bez reg7 :FoundNumber
load reg4 reg2 reg15
stg reg6 +6
store reg6 reg14 reg15
add reg14 -2
jmp :Modulo
bez reg8 :NextNumber
add reg2 2
jmp :ListLoop
:FoundNumber
store reg0 reg2 reg15
add reg1 -1
add reg3 2
bgz reg1 :NextNumber
stop
:Modulo
add reg10 reg4 reg15
add reg11 reg15 reg15
:ShiftLeft
shftl reg10 reg10 1
add reg11 1
sub reg8 reg0 reg10
bgz reg8 :ShiftLeft
bez reg8 :ShiftLeft
shftr reg10 reg10 1
add reg8 reg0 reg15
:ShiftRight
sub reg8 reg8 reg10
bgz reg8 :Skip
bez reg8 :Skip
add reg8 reg8 reg10
:Skip
add reg11 -1
shftr reg10 reg10 1
bgz reg11 :ShiftRight
add reg14 2
load reg9 reg14 reg15
jmp reg9 0
:ListStart
\ No newline at end of file
pub mod error;
pub mod instruction;
pub mod token;
pub mod statement;
pub mod scanner;
pub mod parser;
pub mod assembler;
use crate::assembler::error::*;
use crate::assembler::instruction::InstructionStatement;
use crate::assembler::statement::Statement;
pub fn assemble<'a>(statements: &Vec<Statement<'a>>) -> Result<Vec<u8>, ErrorVec> {
let mut output = Vec::<u8>::with_capacity(256);
let mut errors = ErrorVec::new();
let mut register_symbols: Vec<(&str, u8)> = vec![
("reg0", 0), ("reg1", 1), ("reg2", 2), ("reg3", 3),
("reg4", 4), ("reg5", 5), ("reg6", 6), ("reg7", 7),
("reg8", 8), ("reg9", 9), ("reg10", 10), ("reg11", 11),
("reg12", 12), ("reg13", 13), ("reg14", 14), ("reg15", 15)
];
let mut label_symbols = Vec::<(&str, usize)>::new();
let mut curr_position: usize = 0;
for (line_number, statement) in statements.iter().enumerate() {
if let Statement::SetLabel(name) = statement {
match label_symbols.binary_search_by(|entry| entry.0.cmp(name)) {
Ok(_) => errors.push(Error {
line_number,
kind: ErrorKind::RedefinedLabel(name.to_owned().to_owned())
}),
Err(_) => label_symbols.push((name, curr_position))
}
} else if let Statement::Instruction(instruction) = statement {
if let InstructionStatement::Data(_, name) = instruction {
if let Some(name) = name {
match label_symbols.binary_search_by(
|entry| entry.0.cmp(name)) {
Ok(_) => errors.push(Error {
line_number,
kind: ErrorKind::RedefinedLabel(name.to_owned().to_owned())
}),
Err(_) => label_symbols.push((name, curr_position))
}
}
}
curr_position += 2;
}
}
register_symbols.sort_unstable_by(|a, b| a.0.cmp(b.0));
label_symbols.sort_unstable_by(|a, b| a.0.cmp(b.0));
curr_position = 0;
for (index, statement) in statements.iter().enumerate() {
if let Statement::Instruction(instruction) = statement {
let line_number = index + 1;
curr_position += 2;
let code = match instruction {
InstructionStatement::AddReg(first, second, third) =>
encode_3_reg(line_number, &register_symbols, 0x0, first, second, third),
InstructionStatement::AddImm(first, second) =>
encode_reg_imm(line_number, &register_symbols, 0x1, first, *second,
true),
InstructionStatement::Sub(first, second, third) =>
encode_3_reg(line_number, &register_symbols, 0x2, first, second, third),
InstructionStatement::SetTarget(first, second, third) =>
encode_reg_target(line_number, &register_symbols, &label_symbols,
curr_position, 0x3, first, *second, *third),
InstructionStatement::LoadReg(first, second, third) =>
encode_3_reg(line_number, &register_symbols, 0x4, first, second, third),
InstructionStatement::LoadImm(first, second, third) =>
encode_reg_target(line_number, &register_symbols, &label_symbols,
curr_position, 0x5, first, *second, *third),
InstructionStatement::StoreReg(first, second, third) =>
encode_3_reg(line_number, &register_symbols, 0x6, first, second, third),
InstructionStatement::StoreImm(first, second, third) =>
encode_reg_target(line_number, &register_symbols, &label_symbols,
curr_position, 0x7, first, *second, *third),
InstructionStatement::JumpImm(first, second) =>
encode_target(line_number, &label_symbols,
curr_position, 0x8, *first, *second),
InstructionStatement::JumpReg(first, second) =>
encode_reg_imm(line_number, &register_symbols, 0x9, first, *second,
true),
InstructionStatement::BranchEqualZero(first, second,
third) =>
encode_reg_target(line_number, &register_symbols, &label_symbols,
curr_position, 0xa, first, *second, *third),
InstructionStatement::BranchGreaterZero(first, second,
third) =>
encode_reg_target(line_number, &register_symbols, &label_symbols,
curr_position, 0xb, first, *second, *third),
InstructionStatement::ShiftLeft(first, second, third) =>
encode_2_reg_imm(line_number, &register_symbols, 0xc, first, second,
*third),
InstructionStatement::ShiftRight(first, second, third) =>
encode_2_reg_imm(line_number, &register_symbols, 0xd, first, second,
*third),
InstructionStatement::BoolTable(first, second, third) =>
encode_2_reg_imm(line_number, &register_symbols, 0xd, first, second,
*third),
InstructionStatement::Stop => Ok((0x0f, 0x00)),
InstructionStatement::Data(first, _) =>
Ok((((*first as usize >> 8) & 0xff) as u8, (*first as usize & 0xff) as u8))
};
match code {
Ok((low, high)) => {
output.push(low);
output.push(high);
}
Err(error) => errors.push(error)
}
}
}
if errors.len() > 0 {
Err(errors)
} else {
Ok(output)
}
}
fn encode_3_reg<'a>(line_number: usize, register_symbols: &'a Vec<(&str, u8)>, code: u8,
first: &'a str, second: &'a str, third: &'a str)
-> Result<(u8, u8), Error> {
let low_byte: u8 = code | (get_identifier(line_number, register_symbols, first)? << 4);
let high_byte: u8 = get_identifier(line_number, register_symbols, second)? |
(get_identifier(line_number, register_symbols, third)? << 4);
Ok((low_byte, high_byte))
}
fn encode_reg_imm<'a>(line_number: usize, register_symbols: &'a Vec<(&str, u8)>, code: u8,
first: &'a str, second: isize, signed: bool) -> Result<(u8, u8), Error> {
let low_byte: u8 = code | (get_identifier(line_number, register_symbols, first)? << 4);
assert_range(line_number, second, 8, signed)?;
let high_byte: u8 = second as u8;
Ok((low_byte, high_byte))
}
fn encode_reg_target<'a>(line_number: usize, register_symbols: &'a Vec<(&str, u8)>,
label_symbols: &'a Vec<(&str, usize)>, ip: usize, code: u8, first: &'a str,
second: Option<&'a str>, third: isize) -> Result<(u8, u8), Error> {
let target = get_target_address(line_number, label_symbols, second, ip)?
as isize + third;
assert_range(line_number, target, 8, true)?;
let low_byte: u8 = code | (get_identifier(line_number, &register_symbols, first)? << 4);
let high_byte: u8 = target as u8;
Ok((low_byte, high_byte))
}
fn encode_target<'a>(line_number: usize, label_symbols: &Vec<(&str, usize)>, ip: usize, code: u8,
first: Option<&'a str>, second: isize) -> Result<(u8, u8), Error> {
let target = get_target_address(
line_number, label_symbols, first, ip)? as isize + second;
assert_range(line_number, target, 12, true)?;
let low_byte: u8 = code | ((target & 0xf) << 4) as u8;
let high_byte: u8 = (target >> 4) as u8;
Ok((low_byte, high_byte))
}
fn encode_2_reg_imm<'a>(line_number: usize, register_symbols: &Vec<(&str, u8)>, code: u8,
first: &'a str, second: &'a str, third: isize)
-> Result<(u8, u8), Error> {
let low_byte: u8 = code | (get_identifier(line_number, register_symbols, first)? << 4);
assert_range(line_number, third, 4, false)?;
let high_byte: u8 = get_identifier(line_number, register_symbols, second)?
| (third << 4) as u8;
Ok((low_byte, high_byte))
}
fn get_target_address<'a>(line_number: usize, label_symbols: &Vec<(&str, usize)>,
label: Option<&'a str>, ip: usize) -> Result<usize, Error> {
if let Some(key) = label {
let base = get_label_address(line_number, label_symbols, &key)?;
Ok((base as isize - ip as isize) as usize)
} else {
Ok(0)
}
}
fn get_identifier<'a>(line_number: usize, register_symbols: &Vec<(&str, u8)>, key: &'a str)
-> Result<u8, Error> {
if let Ok(index) = register_symbols.binary_search_by(
|entry| entry.0.cmp(key)) {
Ok(register_symbols[index].1)
} else {
Err(Error {
line_number,
kind: ErrorKind::UndefinedIdentifier(key.to_string())
})
}
}
fn get_label_address<'a>(line_number: usize, label_symbols: &Vec<(&str, usize)>, key: &'a str)
-> Result<usize, Error> {
if let Ok(index) = label_symbols.binary_search_by(
|entry| entry.0.cmp(key)) {
Ok(label_symbols[index].1)
} else {
Err(Error {
line_number,
kind: ErrorKind::UndefinedLabel(key.to_string())
})
}
}
fn assert_range<'a>(line_number: usize, test_value: isize, bits: u8, signed: bool)
-> Result<(), Error> {
if (signed & (test_value < (1 << (bits - 1)) as isize) &
(test_value >= (-1 << (bits - 1)) as isize)) || (!signed &
(test_value < (1 << bits) as isize)) {
Ok(())
} else {
Err(Error {
line_number,
kind: ErrorKind::ValueOutOfRange{ bits, signed, value: test_value }
})
}
}
\ No newline at end of file
use std::fmt::Display;
#[derive(Debug)]
pub struct Error {
pub kind: ErrorKind,
pub line_number: usize,
}
#[derive(Debug)]
pub struct ErrorVec {
errors: Vec<Error>
}
#[derive(Debug, PartialEq)]
pub enum ErrorKind {
InvalidStatement(&'static str),
OperandType{
position: usize,
expected: &'static str,
other_expected: Option<&'static str>,
actual: &'static str
},
OperandCount{
expected: usize,
other_expected: Option<usize>,
actual: usize
},
ValueOutOfRange{ bits: u8, signed: bool, value: isize},
RedefinedLabel(String),
UndefinedIdentifier(String),
UndefinedLabel(String)
}
impl std::error::Error for Error {
fn source(&self) -> Option<&'static std::error::Error> { None }
}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match &self.kind {
ErrorKind::InvalidStatement(token) => write!(
f, "Line {}: Expected operation or label, found {}", self.line_number, token),
ErrorKind::OperandType {
position, expected, other_expected, actual
} => {
let position_strings = ["first", "second", "third"];
if let Some(other_expected) = other_expected {
write!(f, "Line {}: Expected {} or {} as {} operand, found {}",
self.line_number, expected, other_expected, position_strings[*position],
actual)
} else {
write!(f, "Line {}: Expected {} as {} operand, found {}",
self.line_number, expected, position_strings[*position], actual)
}
},
ErrorKind::OperandCount {
expected, other_expected, actual
} => {
if let Some(other_expected) = other_expected {
write!(f, "Line {}: Expected {} or {} operands, found {}", self.line_number,
expected, other_expected, actual)
} else {
write!(f, "Line {}: Expected {} operands, found {}", self.line_number,
expected, actual)
}
},
ErrorKind::ValueOutOfRange {bits, signed, value} => {
if *signed {
write!(f, "Line {}: The immediate value is out of range, range is from \
{} to {}", self.line_number, -1 << (*bits - 1), (1 << (*bits - 1)) - 1)
} else {
if *value >= 0 {
write!(f, "Line {}: The unsigned immediate value is too big, maximum value \
is {}", self.line_number, (1 << *bits) - 1)
} else {
write!(f, "Line {}: The immediate value is unsigned and therefore must be \
positive", self.line_number)
}
}
},
ErrorKind::RedefinedLabel(name) => {
write!(f, "Line {}: The label \"{}\" has already been defined",
self.line_number, name)
},
ErrorKind::UndefinedLabel(name) => {
write!(f, "Line {}: Can't find the label \"{}\"", self.line_number, name)
},
ErrorKind::UndefinedIdentifier(name) => {
write!(f, "Line {}: Can't find the definition for \"{}\"", self.line_number, name)
}
}
}
}
impl PartialEq for Error {
fn eq(&self, other: &Error) -> bool {
(self.line_number == other.line_number) && (self.kind == other.kind)
}
}
impl ErrorVec {
pub fn new() -> ErrorVec {
ErrorVec{ errors: Vec::<Error>::new() }
}
pub fn len(&self) -> usize { self.errors.len() }
pub fn push(&mut self, error: Error) {
self.errors.push(error)
}
}
impl std::error::Error for ErrorVec {
fn source(&self) -> Option<&'static std::error::Error> { None }
}
impl std::fmt::Display for ErrorVec {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
for error in &self.errors {
writeln!(f, "{}", error)?;
}
Ok(())
}
}
impl Display for ErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", match self {
ErrorKind::InvalidStatement(..) => "Invalid statement",
ErrorKind::OperandType{..} => "Operand type",
ErrorKind::OperandCount{..} => "Operand count",
ErrorKind::ValueOutOfRange{..} => "Value out of range",
ErrorKind::UndefinedIdentifier(_) => "Undefined identifier",
ErrorKind::UndefinedLabel(_) => "Undefined label",
ErrorKind::RedefinedLabel(_) => "Redefined label"
})
}
}
\ No newline at end of file
use std::fmt::Display;
pub fn get_instruction_type(name: &str) -> Option<InstructionToken> {
if let Ok(index) = INSTRUCTION_CODE_TABLE.binary_search_by(
|entry| {entry.0.cmp(name.to_ascii_lowercase().as_str())}) {
Some(INSTRUCTION_CODE_TABLE[index].1)
} else {
None
}
}
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum InstructionToken {
Add,
Sub,
SetTarget,
Load,
Store,
Jump,
BranchEqual,
BranchGreater,
ShiftLeft,
ShiftRight,
BoolTable,
Stop,
Data
}
#[derive(Debug, PartialEq)]
pub enum InstructionStatement<'a> {
AddReg(&'a str, &'a str, &'a str),
AddImm(&'a str, isize),
Sub(&'a str, &'a str, &'a str),
SetTarget(&'a str, Option<&'a str>, isize),
LoadReg(&'a str, &'a str, &'a str),
LoadImm(&'a str, Option<&'a str>, isize),
StoreReg(&'a str, &'a str, &'a str),
StoreImm(&'a str, Option<&'a str>, isize),
JumpImm(Option<&'a str>, isize),
JumpReg(&'a str, isize),
BranchEqualZero(&'a str, Option<&'a str>, isize),
BranchGreaterZero(&'a str, Option<&'a str>, isize),
ShiftLeft(&'a str, &'a str, isize),
ShiftRight(&'a str, &'a str, isize),
BoolTable(&'a str, &'a str, isize),
Stop,
Data(isize, Option<&'a str>)
}
impl Display for InstructionToken {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", match self {
InstructionToken::Add => "Add",
InstructionToken::Sub => "Sub",
InstructionToken::SetTarget => "Set Target",
InstructionToken::Load => "Load",
InstructionToken::Store => "Store",
InstructionToken::Jump => "Jump",
InstructionToken::BranchEqual => "Branch when equal to zero",
InstructionToken::BranchGreater => "Branch when greater than zero",
InstructionToken::ShiftLeft => "Shift left",
InstructionToken::ShiftRight => "Shift right",
InstructionToken::BoolTable => "Bool table",
InstructionToken::Stop => "Stop",
InstructionToken::Data => "Data"
})
}
}
const INSTRUCTION_CODE_TABLE: [(&str, InstructionToken); 13] = [
("add", InstructionToken::Add),
("bez", InstructionToken::BranchEqual),
("bgz", InstructionToken::BranchGreater),
("botab", InstructionToken::BoolTable),
("data", InstructionToken::Data),
("jmp", InstructionToken::Jump),
("load", InstructionToken::Load),
("shftl", InstructionToken::ShiftLeft),
("shftr", InstructionToken::ShiftRight),
("stg", InstructionToken::SetTarget),
("stop", InstructionToken::Stop),
("store", InstructionToken::Store),
("sub", InstructionToken::Sub),
];
\ No newline at end of file
use super::error::*;
use super::token::Token;
use super::statement::Statement;
use super::instruction::*;
pub fn parse<'a>(tokens: &'a Vec<Vec<Token>>) -> Result<Vec<Statement<'a>>, ErrorVec> {
let mut statements = Vec::<Statement>::new();
let mut errors = ErrorVec::new();
for (index, token_line) in tokens.iter().enumerate() {
let line_number = index + 1;
match parse_line(line_number, token_line) {
Ok(new_statement) => statements.push(new_statement),
Err(error) => errors.push(error)
}
}
if errors.len() > 0 {
Err(errors)
} else {
Ok(statements)
}
}
pub fn parse_line<'a>(line_number: usize, tokens: &'a Vec<Token>)